[enhancement](merge-on-write) record precise primary key index size (#21196)
This commit is contained in:
@ -24,6 +24,7 @@
|
||||
// IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
|
||||
#include "common/compiler_util.h" // IWYU pragma: keep
|
||||
#include "common/config.h"
|
||||
#include "io/fs/file_writer.h"
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/rowset/segment_v2/bloom_filter_index_reader.h"
|
||||
#include "olap/rowset/segment_v2/bloom_filter_index_writer.h"
|
||||
@ -71,6 +72,7 @@ Status PrimaryKeyIndexBuilder::add_item(const Slice& key) {
|
||||
Status PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta) {
|
||||
// finish primary key index
|
||||
RETURN_IF_ERROR(_primary_key_index_builder->finish(meta->mutable_primary_key_index()));
|
||||
_disk_size += _primary_key_index_builder->disk_size();
|
||||
|
||||
// set min_max key, the sequence column should be removed
|
||||
meta->set_min_key(min_key().to_string());
|
||||
@ -78,7 +80,11 @@ Status PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta)
|
||||
|
||||
// finish bloom filter index
|
||||
RETURN_IF_ERROR(_bloom_filter_index_builder->flush());
|
||||
return _bloom_filter_index_builder->finish(_file_writer, meta->mutable_bloom_filter_index());
|
||||
uint64_t start_size = _file_writer->bytes_appended();
|
||||
RETURN_IF_ERROR(
|
||||
_bloom_filter_index_builder->finish(_file_writer, meta->mutable_bloom_filter_index()));
|
||||
_disk_size += _file_writer->bytes_appended() - start_size;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PrimaryKeyIndexReader::parse_index(io::FileReaderSPtr file_reader,
|
||||
|
||||
@ -51,7 +51,11 @@ class PrimaryKeyIndexMetaPB;
|
||||
class PrimaryKeyIndexBuilder {
|
||||
public:
|
||||
PrimaryKeyIndexBuilder(io::FileWriter* file_writer, size_t seq_col_length)
|
||||
: _file_writer(file_writer), _num_rows(0), _size(0), _seq_col_length(seq_col_length) {}
|
||||
: _file_writer(file_writer),
|
||||
_num_rows(0),
|
||||
_size(0),
|
||||
_disk_size(0),
|
||||
_seq_col_length(seq_col_length) {}
|
||||
|
||||
Status init();
|
||||
|
||||
@ -61,6 +65,8 @@ public:
|
||||
|
||||
uint64_t size() const { return _size; }
|
||||
|
||||
uint64_t disk_size() const { return _disk_size; }
|
||||
|
||||
Slice min_key() { return Slice(_min_key.data(), _min_key.size() - _seq_col_length); }
|
||||
Slice max_key() { return Slice(_max_key.data(), _max_key.size() - _seq_col_length); }
|
||||
|
||||
@ -70,6 +76,7 @@ private:
|
||||
io::FileWriter* _file_writer = nullptr;
|
||||
uint32_t _num_rows;
|
||||
uint64_t _size;
|
||||
uint64_t _disk_size;
|
||||
size_t _seq_col_length;
|
||||
|
||||
faststring _min_key;
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
#include <string>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "io/fs/file_writer.h"
|
||||
#include "olap/key_coder.h"
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/rowset/segment_v2/encoding_info.h"
|
||||
@ -45,6 +46,7 @@ IndexedColumnWriter::IndexedColumnWriter(const IndexedColumnWriterOptions& optio
|
||||
_file_writer(file_writer),
|
||||
_num_values(0),
|
||||
_num_data_pages(0),
|
||||
_disk_size(0),
|
||||
_value_key_coder(nullptr),
|
||||
_compress_codec(nullptr) {
|
||||
_first_value.resize(_type_info->size());
|
||||
@ -116,10 +118,12 @@ Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {
|
||||
footer.mutable_data_page_footer()->set_num_values(num_values_in_page);
|
||||
footer.mutable_data_page_footer()->set_nullmap_size(0);
|
||||
|
||||
uint64_t start_size = _file_writer->bytes_appended();
|
||||
RETURN_IF_ERROR(PageIO::compress_and_write_page(
|
||||
_compress_codec, _options.compression_min_space_saving, _file_writer,
|
||||
{page_body.slice()}, footer, &_last_data_page));
|
||||
_num_data_pages++;
|
||||
_disk_size += (_file_writer->bytes_appended() - start_size);
|
||||
|
||||
if (_options.write_ordinal_index) {
|
||||
std::string key;
|
||||
@ -171,9 +175,11 @@ Status IndexedColumnWriter::_flush_index(IndexPageBuilder* index_builder, BTreeM
|
||||
index_builder->finish(&page_body, &page_footer);
|
||||
|
||||
PagePointer pp;
|
||||
uint64_t start_size = _file_writer->bytes_appended();
|
||||
RETURN_IF_ERROR(PageIO::compress_and_write_page(
|
||||
_compress_codec, _options.compression_min_space_saving, _file_writer,
|
||||
{page_body.slice()}, page_footer, &pp));
|
||||
_disk_size += (_file_writer->bytes_appended() - start_size);
|
||||
|
||||
meta->set_is_root_data_page(false);
|
||||
pp.to_proto(meta->mutable_root_page());
|
||||
|
||||
@ -83,6 +83,8 @@ public:
|
||||
|
||||
Status finish(IndexedColumnMetaPB* meta);
|
||||
|
||||
uint64_t disk_size() const { return _disk_size; }
|
||||
|
||||
private:
|
||||
Status _finish_current_data_page(size_t& num_val);
|
||||
|
||||
@ -96,6 +98,7 @@ private:
|
||||
|
||||
ordinal_t _num_values;
|
||||
uint32_t _num_data_pages;
|
||||
uint64_t _disk_size;
|
||||
// remember the first value in current page
|
||||
faststring _first_value;
|
||||
PagePointer _last_data_page;
|
||||
|
||||
@ -764,10 +764,13 @@ Status SegmentWriter::finalize_columns_index(uint64_t* index_size) {
|
||||
if (_has_key) {
|
||||
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
|
||||
RETURN_IF_ERROR(_write_primary_key_index());
|
||||
// IndexedColumnWriter write data pages mixed with segment data, we should use
|
||||
// the stat from primary key index builder.
|
||||
*index_size += _primary_key_index_builder->disk_size();
|
||||
} else {
|
||||
RETURN_IF_ERROR(_write_short_key_index());
|
||||
*index_size = _file_writer->bytes_appended() - index_start;
|
||||
}
|
||||
*index_size = _file_writer->bytes_appended() - index_start;
|
||||
}
|
||||
_inverted_index_file_size = try_get_inverted_index_file_size();
|
||||
// reset all column writers and data_conveter
|
||||
|
||||
@ -71,6 +71,7 @@ TEST_F(PrimaryKeyIndexTest, builder) {
|
||||
EXPECT_EQ("9998", builder.max_key().to_string());
|
||||
segment_v2::PrimaryKeyIndexMetaPB index_meta;
|
||||
EXPECT_TRUE(builder.finalize(&index_meta));
|
||||
EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
|
||||
EXPECT_TRUE(file_writer->close().ok());
|
||||
EXPECT_EQ(num_rows, builder.num_rows());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user