diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp index 9451a8c0fd..3b8fea52a8 100644 --- a/be/src/olap/primary_key_index.cpp +++ b/be/src/olap/primary_key_index.cpp @@ -105,7 +105,7 @@ Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader, // parse bloom filter segment_v2::ColumnIndexMetaPB column_index_meta = meta.bloom_filter_index(); segment_v2::BloomFilterIndexReader bf_index_reader(std::move(file_reader), - &column_index_meta.bloom_filter_index()); + column_index_meta.bloom_filter_index()); RETURN_IF_ERROR(bf_index_reader.load(!config::disable_pk_storage_page_cache, false)); std::unique_ptr bf_iter; RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter)); diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp b/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp index 69dbf05356..c76de68b7b 100644 --- a/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp @@ -32,16 +32,15 @@ namespace doris { namespace segment_v2 { -Status BitmapIndexReader::load(bool use_page_cache, bool kept_in_memory, - const BitmapIndexPB* index_meta) { +Status BitmapIndexReader::load(bool use_page_cache, bool kept_in_memory) { // TODO yyq: implement a new once flag to avoid status construct. - return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] { - return _load(use_page_cache, kept_in_memory, index_meta); + return _load_once.call([this, use_page_cache, kept_in_memory] { + return _load(use_page_cache, kept_in_memory, std::move(_index_meta)); }); } Status BitmapIndexReader::_load(bool use_page_cache, bool kept_in_memory, - const BitmapIndexPB* index_meta) { + std::unique_ptr index_meta) { const IndexedColumnMetaPB& dict_meta = index_meta->dict_column(); const IndexedColumnMetaPB& bitmap_meta = index_meta->bitmap_column(); _has_null = index_meta->has_null(); diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h index 6f1687354c..0277003fe1 100644 --- a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h @@ -43,11 +43,13 @@ class BitmapIndexPB; class BitmapIndexReader { public: - explicit BitmapIndexReader(io::FileReaderSPtr file_reader) + explicit BitmapIndexReader(io::FileReaderSPtr file_reader, const BitmapIndexPB& index_meta) : _file_reader(std::move(file_reader)), - _type_info(get_scalar_type_info()) {} + _type_info(get_scalar_type_info()) { + _index_meta.reset(new BitmapIndexPB(index_meta)); + } - Status load(bool use_page_cache, bool kept_in_memory, const BitmapIndexPB*); + Status load(bool use_page_cache, bool kept_in_memory); // create a new column iterator. Client should delete returned iterator Status new_iterator(BitmapIndexIterator** iterator); @@ -57,7 +59,7 @@ public: const TypeInfo* type_info() { return _type_info; } private: - Status _load(bool use_page_cache, bool kept_in_memory, const BitmapIndexPB*); + Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr); private: friend class BitmapIndexIterator; @@ -68,6 +70,7 @@ private: DorisCallOnce _load_once; std::unique_ptr _dict_column_reader; std::unique_ptr _bitmap_column_reader; + std::unique_ptr _index_meta; }; class BitmapIndexIterator { diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h index dc45e4f692..5e4c848dcb 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h @@ -41,13 +41,16 @@ class BloomFilterIndexPB; class BloomFilterIndexReader { public: explicit BloomFilterIndexReader(io::FileReaderSPtr file_reader, - const BloomFilterIndexPB* bloom_filter_index_meta) + const BloomFilterIndexPB& bloom_filter_index_meta) : _file_reader(std::move(file_reader)), - _type_info(get_scalar_type_info()), - _bloom_filter_index_meta(bloom_filter_index_meta) {} + _type_info(get_scalar_type_info()) { + _bloom_filter_index_meta.reset(new BloomFilterIndexPB(bloom_filter_index_meta)); + } Status load(bool use_page_cache, bool kept_in_memory); + BloomFilterAlgorithmPB algorithm() { return _bloom_filter_index_meta->algorithm(); } + // create a new column iterator. Status new_iterator(std::unique_ptr* iterator); @@ -62,7 +65,7 @@ private: io::FileReaderSPtr _file_reader; DorisCallOnce _load_once; const TypeInfo* _type_info; - const BloomFilterIndexPB* _bloom_filter_index_meta; + std::unique_ptr _bloom_filter_index_meta; std::unique_ptr _bloom_filter_reader; }; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 17a3136231..ef71939e94 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -179,11 +179,11 @@ Status ColumnReader::create(const ColumnReaderOptions& opts, const ColumnMetaPB& ColumnReader::ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta, uint64_t num_rows, io::FileReaderSPtr file_reader) - : _opts(opts), + : _use_index_page_cache(!config::disable_storage_page_cache), + _opts(opts), _num_rows(num_rows), _file_reader(std::move(file_reader)), - _dict_encoding_type(UNKNOWN_DICT_ENCODING), - _use_index_page_cache(!config::disable_storage_page_cache) { + _dict_encoding_type(UNKNOWN_DICT_ENCODING) { _meta_length = meta.length(); _meta_type = (FieldType)meta.type(); if (_meta_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { @@ -207,32 +207,35 @@ Status ColumnReader::init(const ColumnMetaPB* meta) { auto& index_meta = meta->indexes(i); switch (index_meta.type()) { case ORDINAL_INDEX: - _ordinal_index_meta = &index_meta.ordinal_index(); - _ordinal_index.reset(new OrdinalIndexReader(_file_reader, _num_rows)); + _ordinal_index.reset( + new OrdinalIndexReader(_file_reader, _num_rows, index_meta.ordinal_index())); break; case ZONE_MAP_INDEX: - _zone_map_index_meta = &index_meta.zone_map_index(); - _zone_map_index.reset(new ZoneMapIndexReader(_file_reader)); + _segment_zone_map = + std::make_unique(index_meta.zone_map_index().segment_zone_map()); + _zone_map_index.reset(new ZoneMapIndexReader( + _file_reader, index_meta.zone_map_index().page_zone_maps())); break; case BITMAP_INDEX: - _bitmap_index_meta = &index_meta.bitmap_index(); - _bitmap_index.reset(new BitmapIndexReader(_file_reader)); + _bitmap_index.reset(new BitmapIndexReader(_file_reader, index_meta.bitmap_index())); break; case BLOOM_FILTER_INDEX: - _bf_index_meta = &index_meta.bloom_filter_index(); - _bloom_filter_index.reset(new BloomFilterIndexReader(_file_reader, _bf_index_meta)); + _bloom_filter_index.reset( + new BloomFilterIndexReader(_file_reader, index_meta.bloom_filter_index())); break; default: return Status::Corruption("Bad file {}: invalid column index type {}", _file_reader->path().native(), index_meta.type()); } } + // ArrayColumnWriter writes a single empty array and flushes. In this scenario, // the item writer doesn't write any data and the corresponding ordinal index is empty. - if (_ordinal_index_meta == nullptr && !is_empty()) { + if (_ordinal_index == nullptr && !is_empty()) { return Status::Corruption("Bad file {}: missing ordinal index for column {}", _file_reader->path().native(), meta->column_id()); } + return Status::OK(); } @@ -290,8 +293,7 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n, vectorized::MutableColumn FieldType type = _type_info->type(); std::unique_ptr min_value(WrapperField::create_by_type(type, _meta_length)); std::unique_ptr max_value(WrapperField::create_by_type(type, _meta_length)); - _parse_zone_map_skip_null(_zone_map_index_meta->segment_zone_map(), min_value.get(), - max_value.get()); + _parse_zone_map_skip_null(*_segment_zone_map, min_value.get(), max_value.get()); dst->reserve(*n); bool is_string = is_olap_string_type(type); @@ -325,16 +327,16 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n, vectorized::MutableColumn } bool ColumnReader::match_condition(const AndBlockColumnPredicate* col_predicates) const { - if (_zone_map_index_meta == nullptr) { + if (_zone_map_index == nullptr) { return true; } FieldType type = _type_info->type(); std::unique_ptr min_value(WrapperField::create_by_type(type, _meta_length)); std::unique_ptr max_value(WrapperField::create_by_type(type, _meta_length)); - _parse_zone_map(_zone_map_index_meta->segment_zone_map(), min_value.get(), max_value.get()); + _parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get()); - return _zone_map_match_condition(_zone_map_index_meta->segment_zone_map(), min_value.get(), - max_value.get(), col_predicates); + return _zone_map_match_condition(*_segment_zone_map, min_value.get(), max_value.get(), + col_predicates); } void ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField* min_value_container, @@ -473,20 +475,19 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicat } Status ColumnReader::_load_ordinal_index(bool use_page_cache, bool kept_in_memory) { - DCHECK(_ordinal_index_meta != nullptr); - return _ordinal_index->load(use_page_cache, kept_in_memory, _ordinal_index_meta); + return _ordinal_index->load(use_page_cache, kept_in_memory); } Status ColumnReader::_load_zone_map_index(bool use_page_cache, bool kept_in_memory) { - if (_zone_map_index_meta != nullptr) { - return _zone_map_index->load(use_page_cache, kept_in_memory, _zone_map_index_meta); + if (_zone_map_index != nullptr) { + return _zone_map_index->load(use_page_cache, kept_in_memory); } return Status::OK(); } Status ColumnReader::_load_bitmap_index(bool use_page_cache, bool kept_in_memory) { - if (_bitmap_index_meta != nullptr) { - return _bitmap_index->load(use_page_cache, kept_in_memory, _bitmap_index_meta); + if (_bitmap_index != nullptr) { + return _bitmap_index->load(use_page_cache, kept_in_memory); } return Status::OK(); } @@ -527,8 +528,18 @@ Status ColumnReader::_load_inverted_index_index(const TabletIndex* index_meta) { return Status::OK(); } +bool ColumnReader::has_bloom_filter_index(bool ngram) const { + if (_bloom_filter_index == nullptr) return false; + + if (ngram) { + return _bloom_filter_index->algorithm() == BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER; + } else { + return _bloom_filter_index->algorithm() != BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER; + } +} + Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory) { - if (_bf_index_meta != nullptr) { + if (_bloom_filter_index != nullptr) { return _bloom_filter_index->load(use_page_cache, kept_in_memory); } return Status::OK(); diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 41a1caf2b8..174aabdefa 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -137,18 +137,9 @@ public: const EncodingInfo* encoding_info() const { return _encoding_info; } - bool has_zone_map() const { return _zone_map_index_meta != nullptr; } - bool has_bitmap_index() const { return _bitmap_index_meta != nullptr; } - bool has_bloom_filter_index(bool ngram) const { - if (_bf_index_meta == nullptr) return false; - - if (ngram) { - return _bf_index_meta->algorithm() == BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER; - } else { - return _bf_index_meta->algorithm() != BloomFilterAlgorithmPB::NGRAM_BLOOM_FILTER; - } - } - + bool has_zone_map() const { return _zone_map_index != nullptr; } + bool has_bitmap_index() const { return _bitmap_index != nullptr; } + bool has_bloom_filter_index(bool ngram) const; // Check if this column could match `cond' using segment zone map. // Since segment zone map is stored in metadata, this function is fast without I/O. // Return true if segment zone map is absent or `cond' could be satisfied, false otherwise. @@ -226,6 +217,8 @@ private: FieldType _meta_type; FieldType _meta_children_column_type; bool _meta_is_nullable; + bool _use_index_page_cache; + PagePointer _meta_dict_page; CompressionTypePB _meta_compression; @@ -241,20 +234,15 @@ private: const EncodingInfo* _encoding_info = nullptr; // initialized in init(), used for create PageDecoder - bool _use_index_page_cache; - // meta for various column indexes (null if the index is absent) - const ZoneMapIndexPB* _zone_map_index_meta = nullptr; - const OrdinalIndexPB* _ordinal_index_meta = nullptr; - const BitmapIndexPB* _bitmap_index_meta = nullptr; - const BloomFilterIndexPB* _bf_index_meta = nullptr; + std::unique_ptr _segment_zone_map; mutable std::mutex _load_index_lock; std::unique_ptr _zone_map_index; std::unique_ptr _ordinal_index; std::unique_ptr _bitmap_index; std::shared_ptr _inverted_index; - std::unique_ptr _bloom_filter_index; + std::shared_ptr _bloom_filter_index; std::vector> _sub_readers; diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp index d3d6d9cb9c..6bae72ea0f 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp @@ -67,16 +67,15 @@ Status OrdinalIndexWriter::finish(io::FileWriter* file_writer, ColumnIndexMetaPB return Status::OK(); } -Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory, - const OrdinalIndexPB* index_meta) { +Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory) { // TODO yyq: implement a new once flag to avoid status construct. - return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] { - return _load(use_page_cache, kept_in_memory, index_meta); + return _load_once.call([this, use_page_cache, kept_in_memory] { + return _load(use_page_cache, kept_in_memory, std::move(_meta_pb)); }); } Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory, - const OrdinalIndexPB* index_meta) { + std::unique_ptr index_meta) { if (index_meta->root_page().is_root_data_page()) { // only one data page, no index page _num_pages = 1; diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.h b/be/src/olap/rowset/segment_v2/ordinal_page_index.h index 84a25c78a7..648ed2c6b3 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.h +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.h @@ -66,11 +66,14 @@ class OrdinalPageIndexIterator; class OrdinalIndexReader { public: - explicit OrdinalIndexReader(io::FileReaderSPtr file_reader, ordinal_t num_values) - : _file_reader(std::move(file_reader)), _num_values(num_values) {} + explicit OrdinalIndexReader(io::FileReaderSPtr file_reader, ordinal_t num_values, + const OrdinalIndexPB& meta_pb) + : _file_reader(std::move(file_reader)), _num_values(num_values) { + _meta_pb.reset(new OrdinalIndexPB(meta_pb)); + } // load and parse the index page into memory - Status load(bool use_page_cache, bool kept_in_memory, const OrdinalIndexPB* index_meta); + Status load(bool use_page_cache, bool kept_in_memory); // the returned iter points to the largest element which is less than `ordinal`, // or points to the first element if all elements are greater than `ordinal`, @@ -88,7 +91,8 @@ public: int32_t num_data_pages() const { return _num_pages; } private: - Status _load(bool use_page_cache, bool kept_in_memory, const OrdinalIndexPB* index_meta); + Status _load(bool use_page_cache, bool kept_in_memory, + std::unique_ptr index_meta); private: friend OrdinalPageIndexIterator; @@ -96,6 +100,8 @@ private: io::FileReaderSPtr _file_reader; DorisCallOnce _load_once; + std::unique_ptr _meta_pb; + // total number of values (including NULLs) in the indexed column, // equals to 1 + 'last ordinal of last data pages' ordinal_t _num_values; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 7eb660b3c2..bc64e5cf4b 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -84,9 +84,9 @@ Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t se Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema) : _segment_id(segment_id), + _meta_mem_usage(0), _rowset_id(rowset_id), _tablet_schema(tablet_schema), - _meta_mem_usage(0), _segment_meta_mem_tracker(StorageEngine::instance()->segment_meta_mem_tracker()) {} Segment::~Segment() { @@ -96,8 +96,16 @@ Segment::~Segment() { } Status Segment::_open() { - RETURN_IF_ERROR(_parse_footer()); - RETURN_IF_ERROR(_create_column_readers()); + SegmentFooterPB footer; + RETURN_IF_ERROR(_parse_footer(&footer)); + RETURN_IF_ERROR(_create_column_readers(footer)); + _pk_index_meta.reset(footer.has_primary_key_index_meta() + ? new PrimaryKeyIndexMetaPB(footer.primary_key_index_meta()) + : nullptr); + // delete_bitmap_calculator_test.cpp + // DCHECK(footer.has_short_key_index_page()); + _sk_index_page = footer.short_key_index_page(); + _num_rows = footer.num_rows(); return Status::OK(); } @@ -154,7 +162,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o return iter->get()->init(read_options); } -Status Segment::_parse_footer() { +Status Segment::_parse_footer(SegmentFooterPB* footer) { // Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4) auto file_size = _file_reader->size(); if (file_size < 12) { @@ -170,7 +178,6 @@ Status Segment::_parse_footer() { _file_reader->read_at(file_size - 12, Slice(fixed_buf, 12), &bytes_read, &io_ctx)); DCHECK_EQ(bytes_read, 12); - // validate magic number if (memcmp(fixed_buf + 8, k_segment_magic, k_segment_magic_length) != 0) { return Status::Corruption("Bad segment file {}: magic number not match", _file_reader->path().native()); @@ -182,8 +189,6 @@ Status Segment::_parse_footer() { return Status::Corruption("Bad segment file {}: file size {} < {}", _file_reader->path().native(), file_size, 12 + footer_length); } - _meta_mem_usage += footer_length; - _segment_meta_mem_tracker->consume(footer_length); std::string footer_buf; footer_buf.resize(footer_length); @@ -201,7 +206,7 @@ Status Segment::_parse_footer() { } // deserialize footer PB - if (!_footer.ParseFromString(footer_buf)) { + if (!footer->ParseFromString(footer_buf)) { return Status::Corruption("Bad segment file {}: failed to parse SegmentFooterPB", _file_reader->path().native()); } @@ -210,10 +215,10 @@ Status Segment::_parse_footer() { Status Segment::_load_pk_bloom_filter() { DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS); - DCHECK(_footer.has_primary_key_index_meta()); + DCHECK(_pk_index_meta != nullptr); DCHECK(_pk_index_reader != nullptr); return _load_pk_bf_once.call([this] { - RETURN_IF_ERROR(_pk_index_reader->parse_bf(_file_reader, _footer.primary_key_index_meta())); + RETURN_IF_ERROR(_pk_index_reader->parse_bf(_file_reader, *_pk_index_meta)); _meta_mem_usage += _pk_index_reader->get_bf_memory_size(); _segment_meta_mem_tracker->consume(_pk_index_reader->get_bf_memory_size()); return Status::OK(); @@ -227,10 +232,9 @@ Status Segment::load_pk_index_and_bf() { } Status Segment::load_index() { return _load_index_once.call([this] { - if (_tablet_schema->keys_type() == UNIQUE_KEYS && _footer.has_primary_key_index_meta()) { + if (_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr) { _pk_index_reader.reset(new PrimaryKeyIndexReader()); - RETURN_IF_ERROR( - _pk_index_reader->parse_index(_file_reader, _footer.primary_key_index_meta())); + RETURN_IF_ERROR(_pk_index_reader->parse_index(_file_reader, *_pk_index_meta)); _meta_mem_usage += _pk_index_reader->get_memory_size(); _segment_meta_mem_tracker->consume(_pk_index_reader->get_memory_size()); return Status::OK(); @@ -238,7 +242,7 @@ Status Segment::load_index() { // read and parse short key index page PageReadOptions opts; opts.file_reader = _file_reader.get(); - opts.page_pointer = PagePointer(_footer.short_key_index_page()); + opts.page_pointer = PagePointer(_sk_index_page); opts.codec = nullptr; // short key index page uses NO_COMPRESSION for now OlapReaderStatistics tmp_stats; opts.use_page_cache = true; @@ -259,24 +263,26 @@ Status Segment::load_index() { }); } -Status Segment::_create_column_readers() { - for (uint32_t ordinal = 0; ordinal < _footer.columns().size(); ++ordinal) { - auto& column_pb = _footer.columns(ordinal); - _column_id_to_footer_ordinal.emplace(column_pb.unique_id(), ordinal); +Status Segment::_create_column_readers(const SegmentFooterPB& footer) { + std::unordered_map column_id_to_footer_ordinal; + + for (uint32_t ordinal = 0; ordinal < footer.columns().size(); ++ordinal) { + auto& column_pb = footer.columns(ordinal); + column_id_to_footer_ordinal.emplace(column_pb.unique_id(), ordinal); } for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) { auto& column = _tablet_schema->column(ordinal); - auto iter = _column_id_to_footer_ordinal.find(column.unique_id()); - if (iter == _column_id_to_footer_ordinal.end()) { + auto iter = column_id_to_footer_ordinal.find(column.unique_id()); + if (iter == column_id_to_footer_ordinal.end()) { continue; } ColumnReaderOptions opts; opts.kept_in_memory = _tablet_schema->is_in_memory(); std::unique_ptr reader; - RETURN_IF_ERROR(ColumnReader::create(opts, _footer.columns(iter->second), - _footer.num_rows(), _file_reader, &reader)); + RETURN_IF_ERROR(ColumnReader::create(opts, footer.columns(iter->second), footer.num_rows(), + _file_reader, &reader)); _column_readers.emplace(column.unique_id(), std::move(reader)); } return Status::OK(); diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 382ae69a7b..67616d60e2 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -81,7 +81,7 @@ public: RowsetId rowset_id() const { return _rowset_id; } - uint32_t num_rows() const { return _footer.num_rows(); } + uint32_t num_rows() const { return _num_rows; } Status new_column_iterator(const TabletColumn& tablet_column, std::unique_ptr* iter); @@ -108,20 +108,17 @@ public: Status read_key_by_rowid(uint32_t row_id, std::string* key); - // only used by UT - const SegmentFooterPB& footer() const { return _footer; } - Status load_index(); Status load_pk_index_and_bf(); std::string min_key() { - DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _footer.has_primary_key_index_meta()); - return _footer.primary_key_index_meta().min_key(); + DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr); + return _pk_index_meta->min_key(); } std::string max_key() { - DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _footer.has_primary_key_index_meta()); - return _footer.primary_key_index_meta().max_key(); + DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr); + return _pk_index_meta->max_key(); } io::FileReaderSPtr file_reader() { return _file_reader; } @@ -133,8 +130,8 @@ private: Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema); // open segment file and read the minimum amount of necessary information (footer) Status _open(); - Status _parse_footer(); - Status _create_column_readers(); + Status _parse_footer(SegmentFooterPB* footer); + Status _create_column_readers(const SegmentFooterPB& footer); Status _load_pk_bloom_filter(); private: @@ -142,16 +139,14 @@ private: io::FileReaderSPtr _file_reader; uint32_t _segment_id; + uint32_t _num_rows; + int64_t _meta_mem_usage; + RowsetId _rowset_id; TabletSchemaSPtr _tablet_schema; - int64_t _meta_mem_usage; - SegmentFooterPB _footer; - - // Map from column unique id to column ordinal in footer's ColumnMetaPB - // If we can't find unique id from it, it means this segment is created - // with an old schema. - std::unordered_map _column_id_to_footer_ordinal; + std::unique_ptr _pk_index_meta; + PagePointerPB _sk_index_page; // map column unique id ---> column reader // ColumnReader for each column in TabletSchema. If ColumnReader is nullptr, @@ -171,6 +166,7 @@ private: std::unique_ptr _pk_index_reader; // Segment may be destructed after StorageEngine, in order to exit gracefully. std::shared_ptr _segment_meta_mem_tracker; + std::mutex _open_lock; }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.cpp b/be/src/olap/rowset/segment_v2/zone_map_index.cpp index 4306224c13..9368862fff 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.cpp +++ b/be/src/olap/rowset/segment_v2/zone_map_index.cpp @@ -145,17 +145,16 @@ Status TypedZoneMapIndexWriter::finish(io::FileWriter* file_writer, return writer.finish(meta->mutable_page_zone_maps()); } -Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory, - const ZoneMapIndexPB* index_meta) { +Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory) { // TODO yyq: implement a new once flag to avoid status construct. - return _load_once.call([this, use_page_cache, kept_in_memory, index_meta] { - return _load(use_page_cache, kept_in_memory, index_meta); + return _load_once.call([this, use_page_cache, kept_in_memory] { + return _load(use_page_cache, kept_in_memory, std::move(_page_zone_maps_meta)); }); } Status ZoneMapIndexReader::_load(bool use_page_cache, bool kept_in_memory, - const ZoneMapIndexPB* index_meta) { - IndexedColumnReader reader(_file_reader, index_meta->page_zone_maps()); + std::unique_ptr page_zone_maps_meta) { + IndexedColumnReader reader(_file_reader, *page_zone_maps_meta); RETURN_IF_ERROR(reader.load(use_page_cache, kept_in_memory)); IndexedColumnIterator iter(&reader); diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.h b/be/src/olap/rowset/segment_v2/zone_map_index.h index b6a6ae9075..686b63a9de 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.h +++ b/be/src/olap/rowset/segment_v2/zone_map_index.h @@ -147,23 +147,27 @@ private: class ZoneMapIndexReader { public: - explicit ZoneMapIndexReader(io::FileReaderSPtr file_reader) - : _file_reader(std::move(file_reader)) {} + explicit ZoneMapIndexReader(io::FileReaderSPtr file_reader, + const IndexedColumnMetaPB& page_zone_maps) + : _file_reader(std::move(file_reader)) { + _page_zone_maps_meta.reset(new IndexedColumnMetaPB(page_zone_maps)); + } // load all page zone maps into memory - Status load(bool use_page_cache, bool kept_in_memory, const ZoneMapIndexPB*); + Status load(bool use_page_cache, bool kept_in_memory); const std::vector& page_zone_maps() const { return _page_zone_maps; } int32_t num_pages() const { return _page_zone_maps.size(); } private: - Status _load(bool use_page_cache, bool kept_in_memory, const ZoneMapIndexPB*); + Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr); private: DorisCallOnce _load_once; // TODO: yyq, we shoud remove file_reader from here. io::FileReaderSPtr _file_reader; + std::unique_ptr _page_zone_maps_meta; std::vector _page_zone_maps; }; diff --git a/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp b/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp index 2e803e4ff1..cfcfbb7a01 100644 --- a/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp @@ -80,8 +80,8 @@ void get_bitmap_reader_iter(const std::string& file_name, const ColumnIndexMetaP BitmapIndexReader** reader, BitmapIndexIterator** iter) { io::FileReaderSPtr file_reader; ASSERT_EQ(io::global_local_filesystem()->open_file(file_name, &file_reader), Status::OK()); - *reader = new BitmapIndexReader(std::move(file_reader)); - auto st = (*reader)->load(true, false, &meta.bitmap_index()); + *reader = new BitmapIndexReader(std::move(file_reader), meta.bitmap_index()); + auto st = (*reader)->load(true, false); EXPECT_TRUE(st.ok()); st = (*reader)->new_iterator(iter); diff --git a/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp index d77325d2a6..e02ed57295 100644 --- a/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp +++ b/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp @@ -97,7 +97,7 @@ void get_bloom_filter_reader_iter(const std::string& file_name, const ColumnInde std::string fname = dname + "/" + file_name; io::FileReaderSPtr file_reader; ASSERT_EQ(io::global_local_filesystem()->open_file(fname, &file_reader), Status::OK()); - *reader = new BloomFilterIndexReader(std::move(file_reader), &meta.bloom_filter_index()); + *reader = new BloomFilterIndexReader(std::move(file_reader), meta.bloom_filter_index()); auto st = (*reader)->load(true, false); EXPECT_TRUE(st.ok()); diff --git a/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp index a5024f4147..0485b086d9 100644 --- a/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp @@ -70,8 +70,8 @@ TEST_F(OrdinalPageIndexTest, normal) { io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - OrdinalIndexReader index(file_reader, 16 * 1024 * 4096 + 1); - EXPECT_TRUE(index.load(true, false, &index_meta.ordinal_index()).ok()); + OrdinalIndexReader index(file_reader, 16 * 1024 * 4096 + 1, index_meta.ordinal_index()); + EXPECT_TRUE(index.load(true, false).ok()); EXPECT_EQ(16 * 1024, index.num_data_pages()); EXPECT_EQ(1, index.get_first_ordinal(0)); EXPECT_EQ(4096, index.get_last_ordinal(0)); @@ -124,8 +124,8 @@ TEST_F(OrdinalPageIndexTest, one_data_page) { EXPECT_EQ(data_page_pointer, root_page_pointer); } - OrdinalIndexReader index(nullptr, num_values); - EXPECT_TRUE(index.load(true, false, &index_meta.ordinal_index()).ok()); + OrdinalIndexReader index(nullptr, num_values, index_meta.ordinal_index()); + EXPECT_TRUE(index.load(true, false).ok()); EXPECT_EQ(1, index.num_data_pages()); EXPECT_EQ(0, index.get_first_ordinal(0)); EXPECT_EQ(num_values - 1, index.get_last_ordinal(0)); diff --git a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp index 22b69c5cae..137f0986f0 100644 --- a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp @@ -81,8 +81,9 @@ public: io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - ZoneMapIndexReader column_zone_map(file_reader); - Status status = column_zone_map.load(true, false, &index_meta.zone_map_index()); + ZoneMapIndexReader column_zone_map(file_reader, + index_meta.zone_map_index().page_zone_maps()); + Status status = column_zone_map.load(true, false); EXPECT_TRUE(status.ok()); EXPECT_EQ(3, column_zone_map.num_pages()); const std::vector& zone_maps = column_zone_map.page_zone_maps(); @@ -128,8 +129,9 @@ public: io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - ZoneMapIndexReader column_zone_map(file_reader); - Status status = column_zone_map.load(true, false, &index_meta.zone_map_index()); + ZoneMapIndexReader column_zone_map(file_reader, + index_meta.zone_map_index().page_zone_maps()); + Status status = column_zone_map.load(true, false); EXPECT_TRUE(status.ok()); EXPECT_EQ(1, column_zone_map.num_pages()); const std::vector& zone_maps = column_zone_map.page_zone_maps(); @@ -181,8 +183,8 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) { io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - ZoneMapIndexReader column_zone_map(file_reader); - Status status = column_zone_map.load(true, false, &index_meta.zone_map_index()); + ZoneMapIndexReader column_zone_map(file_reader, index_meta.zone_map_index().page_zone_maps()); + Status status = column_zone_map.load(true, false); EXPECT_TRUE(status.ok()); EXPECT_EQ(3, column_zone_map.num_pages()); const std::vector& zone_maps = column_zone_map.page_zone_maps();