diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp index 7b2f1593ca..6c276dd262 100644 --- a/be/src/olap/primary_key_index.cpp +++ b/be/src/olap/primary_key_index.cpp @@ -24,6 +24,7 @@ // IWYU pragma: no_include #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" +#include "io/fs/file_writer.h" #include "olap/olap_common.h" #include "olap/rowset/segment_v2/bloom_filter_index_reader.h" #include "olap/rowset/segment_v2/bloom_filter_index_writer.h" @@ -71,6 +72,7 @@ Status PrimaryKeyIndexBuilder::add_item(const Slice& key) { Status PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta) { // finish primary key index RETURN_IF_ERROR(_primary_key_index_builder->finish(meta->mutable_primary_key_index())); + _disk_size += _primary_key_index_builder->disk_size(); // set min_max key, the sequence column should be removed meta->set_min_key(min_key().to_string()); @@ -78,7 +80,11 @@ Status PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta) // finish bloom filter index RETURN_IF_ERROR(_bloom_filter_index_builder->flush()); - return _bloom_filter_index_builder->finish(_file_writer, meta->mutable_bloom_filter_index()); + uint64_t start_size = _file_writer->bytes_appended(); + RETURN_IF_ERROR( + _bloom_filter_index_builder->finish(_file_writer, meta->mutable_bloom_filter_index())); + _disk_size += _file_writer->bytes_appended() - start_size; + return Status::OK(); } Status PrimaryKeyIndexReader::parse_index(io::FileReaderSPtr file_reader, diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h index 65cc64f0cd..233644b4e0 100644 --- a/be/src/olap/primary_key_index.h +++ b/be/src/olap/primary_key_index.h @@ -51,7 +51,11 @@ class PrimaryKeyIndexMetaPB; class PrimaryKeyIndexBuilder { public: PrimaryKeyIndexBuilder(io::FileWriter* file_writer, size_t seq_col_length) - : _file_writer(file_writer), _num_rows(0), _size(0), _seq_col_length(seq_col_length) {} + : _file_writer(file_writer), + _num_rows(0), + _size(0), + _disk_size(0), + _seq_col_length(seq_col_length) {} Status init(); @@ -61,6 +65,8 @@ public: uint64_t size() const { return _size; } + uint64_t disk_size() const { return _disk_size; } + Slice min_key() { return Slice(_min_key.data(), _min_key.size() - _seq_col_length); } Slice max_key() { return Slice(_max_key.data(), _max_key.size() - _seq_col_length); } @@ -70,6 +76,7 @@ private: io::FileWriter* _file_writer = nullptr; uint32_t _num_rows; uint64_t _size; + uint64_t _disk_size; size_t _seq_col_length; faststring _min_key; diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp index 28a44b7b75..acbbfd0934 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp @@ -23,6 +23,7 @@ #include #include "common/logging.h" +#include "io/fs/file_writer.h" #include "olap/key_coder.h" #include "olap/olap_common.h" #include "olap/rowset/segment_v2/encoding_info.h" @@ -45,6 +46,7 @@ IndexedColumnWriter::IndexedColumnWriter(const IndexedColumnWriterOptions& optio _file_writer(file_writer), _num_values(0), _num_data_pages(0), + _disk_size(0), _value_key_coder(nullptr), _compress_codec(nullptr) { _first_value.resize(_type_info->size()); @@ -116,10 +118,12 @@ Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) { footer.mutable_data_page_footer()->set_num_values(num_values_in_page); footer.mutable_data_page_footer()->set_nullmap_size(0); + uint64_t start_size = _file_writer->bytes_appended(); RETURN_IF_ERROR(PageIO::compress_and_write_page( _compress_codec, _options.compression_min_space_saving, _file_writer, {page_body.slice()}, footer, &_last_data_page)); _num_data_pages++; + _disk_size += (_file_writer->bytes_appended() - start_size); if (_options.write_ordinal_index) { std::string key; @@ -171,9 +175,11 @@ Status IndexedColumnWriter::_flush_index(IndexPageBuilder* index_builder, BTreeM index_builder->finish(&page_body, &page_footer); PagePointer pp; + uint64_t start_size = _file_writer->bytes_appended(); RETURN_IF_ERROR(PageIO::compress_and_write_page( _compress_codec, _options.compression_min_space_saving, _file_writer, {page_body.slice()}, page_footer, &pp)); + _disk_size += (_file_writer->bytes_appended() - start_size); meta->set_is_root_data_page(false); pp.to_proto(meta->mutable_root_page()); diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h b/be/src/olap/rowset/segment_v2/indexed_column_writer.h index a95a9fce7f..ba61708dd9 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h @@ -83,6 +83,8 @@ public: Status finish(IndexedColumnMetaPB* meta); + uint64_t disk_size() const { return _disk_size; } + private: Status _finish_current_data_page(size_t& num_val); @@ -96,6 +98,7 @@ private: ordinal_t _num_values; uint32_t _num_data_pages; + uint64_t _disk_size; // remember the first value in current page faststring _first_value; PagePointer _last_data_page; diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 561da9c3c6..d7d539c916 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -764,10 +764,13 @@ Status SegmentWriter::finalize_columns_index(uint64_t* index_size) { if (_has_key) { if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) { RETURN_IF_ERROR(_write_primary_key_index()); + // IndexedColumnWriter write data pages mixed with segment data, we should use + // the stat from primary key index builder. + *index_size += _primary_key_index_builder->disk_size(); } else { RETURN_IF_ERROR(_write_short_key_index()); + *index_size = _file_writer->bytes_appended() - index_start; } - *index_size = _file_writer->bytes_appended() - index_start; } _inverted_index_file_size = try_get_inverted_index_file_size(); // reset all column writers and data_conveter diff --git a/be/test/olap/primary_key_index_test.cpp b/be/test/olap/primary_key_index_test.cpp index 81cea0530d..ac277fecec 100644 --- a/be/test/olap/primary_key_index_test.cpp +++ b/be/test/olap/primary_key_index_test.cpp @@ -71,6 +71,7 @@ TEST_F(PrimaryKeyIndexTest, builder) { EXPECT_EQ("9998", builder.max_key().to_string()); segment_v2::PrimaryKeyIndexMetaPB index_meta; EXPECT_TRUE(builder.finalize(&index_meta)); + EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended()); EXPECT_TRUE(file_writer->close().ok()); EXPECT_EQ(num_rows, builder.num_rows());