[enhancement](load) shrink reserved buffer for page builder (#14012) (#14014)

* [enhancement](load) shrink reserved buffer for page builder (#14012)

For table with hundreds of text type columns, flushing its memtable may cost huge memory.
These memory are consumed when initializing page builder, as it reserves 1MB for each column.
So memory consumption grows in proportion with column number. Shrinking the reservation may
reduce memory substantially in load process.

Signed-off-by: freemandealer <freeman.zhang1992@gmail.com>

* response to the review

Signed-off-by: freemandealer <freeman.zhang1992@gmail.com>

* Update binary_plain_page.h

* Update binary_dict_page.cpp

* Update binary_plain_page.h

Signed-off-by: freemandealer <freeman.zhang1992@gmail.com>
This commit is contained in:
zhengyu
2022-11-09 08:40:07 +08:00
committed by GitHub
parent a0f136a0bc
commit 6a1c7fac9d
3 changed files with 20 additions and 4 deletions

View File

@ -37,7 +37,9 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options)
// TODO: the data page builder type can be created by Factory according to user config
_data_page_builder.reset(new BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>(options));
PageBuilderOptions dict_builder_options;
dict_builder_options.data_page_size = _options.dict_page_size;
dict_builder_options.data_page_size =
std::min(_options.data_page_size, _options.dict_page_size);
dict_builder_options.is_dict_page = true;
_dict_builder.reset(new BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options));
reset();
}
@ -118,6 +120,10 @@ Status BinaryDictPageBuilder::add(const uint8_t* vals, size_t* count) {
}
OwnedSlice BinaryDictPageBuilder::finish() {
if (VLOG_DEBUG_IS_ON && _encoding_type == DICT_ENCODING) {
VLOG_DEBUG << "dict page size:" << _dict_builder->size();
}
DCHECK(!_finished);
_finished = true;

View File

@ -53,8 +53,14 @@ public:
}
bool is_page_full() override {
// data_page_size is 0, do not limit the page size
return _options.data_page_size != 0 && _size_estimate > _options.data_page_size;
bool ret = false;
if (_options.is_dict_page) {
// dict_page_size is 0, do not limit the page size
ret = _options.dict_page_size != 0 && _size_estimate > _options.dict_page_size;
} else {
ret = _options.data_page_size != 0 && _size_estimate > _options.data_page_size;
}
return ret;
}
Status add(const uint8_t* vals, size_t* count) override {
@ -104,7 +110,9 @@ public:
void reset() override {
_offsets.clear();
_buffer.clear();
_buffer.reserve(_options.data_page_size == 0 ? 1024 : _options.data_page_size);
_buffer.reserve(_options.data_page_size == 0
? 1024
: std::min(_options.data_page_size, _options.dict_page_size));
_size_estimate = sizeof(uint32_t);
_finished = false;
_last_value_size = 0;

View File

@ -30,6 +30,8 @@ struct PageBuilderOptions {
size_t dict_page_size = DEFAULT_PAGE_SIZE;
bool need_check_bitmap = true;
bool is_dict_page = false; // page used for saving dictionary
};
struct PageDecoderOptions {