[improvement](page builder) avoid allocating big memory in ctor (#35493)
## Proposed changes Issue Number: close #xxx <!--Describe your changes.--> ## Further comments If this is a relatively large or complex change, kick off the discussion at [dev@doris.apache.org](mailto:dev@doris.apache.org) by explaining why you chose the solution you did and what alternatives you considered, etc...
This commit is contained in:
@ -45,17 +45,26 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options)
|
||||
_finished(false),
|
||||
_data_page_builder(nullptr),
|
||||
_dict_builder(nullptr),
|
||||
_encoding_type(DICT_ENCODING) {
|
||||
_encoding_type(DICT_ENCODING) {}
|
||||
|
||||
Status BinaryDictPageBuilder::init() {
|
||||
// initially use DICT_ENCODING
|
||||
// TODO: the data page builder type can be created by Factory according to user config
|
||||
_data_page_builder.reset(new BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>(options));
|
||||
PageBuilder* data_page_builder_ptr = nullptr;
|
||||
RETURN_IF_ERROR(BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>::create(
|
||||
&data_page_builder_ptr, _options));
|
||||
_data_page_builder.reset(data_page_builder_ptr);
|
||||
PageBuilderOptions dict_builder_options;
|
||||
dict_builder_options.data_page_size =
|
||||
std::min(_options.data_page_size, _options.dict_page_size);
|
||||
dict_builder_options.is_dict_page = true;
|
||||
_dict_builder.reset(
|
||||
new BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options));
|
||||
reset();
|
||||
|
||||
PageBuilder* dict_builder_ptr = nullptr;
|
||||
RETURN_IF_ERROR(BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>::create(
|
||||
&dict_builder_ptr, dict_builder_options));
|
||||
_dict_builder.reset(static_cast<BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>*>(
|
||||
dict_builder_ptr));
|
||||
return reset();
|
||||
}
|
||||
|
||||
bool BinaryDictPageBuilder::is_page_full() {
|
||||
@ -148,18 +157,21 @@ OwnedSlice BinaryDictPageBuilder::finish() {
|
||||
return _buffer.build();
|
||||
}
|
||||
|
||||
void BinaryDictPageBuilder::reset() {
|
||||
Status BinaryDictPageBuilder::reset() {
|
||||
_finished = false;
|
||||
_buffer.reserve(_options.data_page_size + BINARY_DICT_PAGE_HEADER_SIZE);
|
||||
_buffer.resize(BINARY_DICT_PAGE_HEADER_SIZE);
|
||||
|
||||
if (_encoding_type == DICT_ENCODING && _dict_builder->is_page_full()) {
|
||||
_data_page_builder.reset(
|
||||
new BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(_options));
|
||||
PageBuilder* data_page_builder_ptr = nullptr;
|
||||
RETURN_IF_ERROR(BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>::create(
|
||||
&data_page_builder_ptr, _options));
|
||||
_data_page_builder.reset(data_page_builder_ptr);
|
||||
_encoding_type = PLAIN_ENCODING;
|
||||
} else {
|
||||
_data_page_builder->reset();
|
||||
RETURN_IF_ERROR(_data_page_builder->reset());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t BinaryDictPageBuilder::count() const {
|
||||
|
||||
@ -57,9 +57,12 @@ enum { BINARY_DICT_PAGE_HEADER_SIZE = 4 };
|
||||
// Data pages start with mode_ = DICT_ENCODING, when the size of dictionary
|
||||
// page go beyond the option_->dict_page_size, the subsequent data pages will switch
|
||||
// to string plain page automatically.
|
||||
class BinaryDictPageBuilder : public PageBuilder {
|
||||
class BinaryDictPageBuilder : public PageBuilderHelper<BinaryDictPageBuilder> {
|
||||
public:
|
||||
BinaryDictPageBuilder(const PageBuilderOptions& options);
|
||||
using Self = BinaryDictPageBuilder;
|
||||
friend class PageBuilderHelper<Self>;
|
||||
|
||||
Status init() override;
|
||||
|
||||
bool is_page_full() override;
|
||||
|
||||
@ -67,7 +70,7 @@ public:
|
||||
|
||||
OwnedSlice finish() override;
|
||||
|
||||
void reset() override;
|
||||
Status reset() override;
|
||||
|
||||
size_t count() const override;
|
||||
|
||||
@ -80,6 +83,8 @@ public:
|
||||
Status get_last_value(void* value) const override;
|
||||
|
||||
private:
|
||||
BinaryDictPageBuilder(const PageBuilderOptions& options);
|
||||
|
||||
PageBuilderOptions _options;
|
||||
bool _finished;
|
||||
|
||||
|
||||
@ -44,12 +44,12 @@ namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
template <FieldType Type>
|
||||
class BinaryPlainPageBuilder : public PageBuilder {
|
||||
class BinaryPlainPageBuilder : public PageBuilderHelper<BinaryPlainPageBuilder<Type>> {
|
||||
public:
|
||||
BinaryPlainPageBuilder(const PageBuilderOptions& options)
|
||||
: _size_estimate(0), _options(options) {
|
||||
reset();
|
||||
}
|
||||
using Self = BinaryPlainPageBuilder<Type>;
|
||||
friend class PageBuilderHelper<Self>;
|
||||
|
||||
Status init() override { return reset(); }
|
||||
|
||||
bool is_page_full() override {
|
||||
bool ret = false;
|
||||
@ -108,7 +108,7 @@ public:
|
||||
return _buffer.build();
|
||||
}
|
||||
|
||||
void reset() override {
|
||||
Status reset() override {
|
||||
_offsets.clear();
|
||||
_buffer.clear();
|
||||
_buffer.reserve(_options.data_page_size == 0
|
||||
@ -117,6 +117,7 @@ public:
|
||||
_size_estimate = sizeof(uint32_t);
|
||||
_finished = false;
|
||||
_last_value_size = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t count() const override { return _offsets.size(); }
|
||||
@ -151,6 +152,9 @@ public:
|
||||
inline Slice get(std::size_t idx) const { return (*this)[idx]; }
|
||||
|
||||
private:
|
||||
BinaryPlainPageBuilder(const PageBuilderOptions& options)
|
||||
: _size_estimate(0), _options(options) {}
|
||||
|
||||
void _copy_value_at(size_t idx, faststring* value) const {
|
||||
size_t value_size =
|
||||
(idx < _offsets.size() - 1) ? _offsets[idx + 1] - _offsets[idx] : _last_value_size;
|
||||
|
||||
@ -41,9 +41,12 @@ namespace segment_v2 {
|
||||
// Entry := SharedPrefixLength(vint), UnsharedLength(vint), Byte^UnsharedLength
|
||||
// Trailer := NumEntry(uint32_t), RESTART_POINT_INTERVAL(uint8_t)
|
||||
// RestartPointStartOffset(uint32_t)^NumRestartPoints,NumRestartPoints(uint32_t)
|
||||
class BinaryPrefixPageBuilder : public PageBuilder {
|
||||
class BinaryPrefixPageBuilder : public PageBuilderHelper<BinaryPrefixPageBuilder> {
|
||||
public:
|
||||
BinaryPrefixPageBuilder(const PageBuilderOptions& options) : _options(options) { reset(); }
|
||||
using Self = BinaryPrefixPageBuilder;
|
||||
friend class PageBuilderHelper<Self>;
|
||||
|
||||
Status init() override { return reset(); }
|
||||
|
||||
bool is_page_full() override { return size() >= _options.data_page_size; }
|
||||
|
||||
@ -51,12 +54,13 @@ public:
|
||||
|
||||
OwnedSlice finish() override;
|
||||
|
||||
void reset() override {
|
||||
Status reset() override {
|
||||
_restart_points_offset.clear();
|
||||
_last_entry.clear();
|
||||
_count = 0;
|
||||
_buffer.clear();
|
||||
_finished = false;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
uint64_t size() const override {
|
||||
@ -88,6 +92,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
BinaryPrefixPageBuilder(const PageBuilderOptions& options) : _options(options) {}
|
||||
|
||||
PageBuilderOptions _options;
|
||||
std::vector<uint32_t> _restart_points_offset;
|
||||
faststring _first_entry;
|
||||
|
||||
@ -84,12 +84,12 @@ void warn_with_bitshuffle_error(int64_t val);
|
||||
// The header is followed by the bitshuffle-compressed element data.
|
||||
//
|
||||
template <FieldType Type>
|
||||
class BitshufflePageBuilder : public PageBuilder {
|
||||
class BitshufflePageBuilder : public PageBuilderHelper<BitshufflePageBuilder<Type>> {
|
||||
public:
|
||||
BitshufflePageBuilder(const PageBuilderOptions& options)
|
||||
: _options(options), _count(0), _remain_element_capacity(0), _finished(false) {
|
||||
reset();
|
||||
}
|
||||
using Self = BitshufflePageBuilder<Type>;
|
||||
friend class PageBuilderHelper<Self>;
|
||||
|
||||
Status init() override { return reset(); }
|
||||
|
||||
bool is_page_full() override { return _remain_element_capacity == 0; }
|
||||
|
||||
@ -149,7 +149,7 @@ public:
|
||||
return _finish(SIZE_OF_TYPE);
|
||||
}
|
||||
|
||||
void reset() override {
|
||||
Status reset() override {
|
||||
auto block_size = _options.data_page_size;
|
||||
_count = 0;
|
||||
_data.clear();
|
||||
@ -160,6 +160,7 @@ public:
|
||||
_buffer.resize(BITSHUFFLE_PAGE_HEADER_SIZE);
|
||||
_finished = false;
|
||||
_remain_element_capacity = block_size / SIZE_OF_TYPE;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t count() const override { return _count; }
|
||||
@ -184,6 +185,9 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
BitshufflePageBuilder(const PageBuilderOptions& options)
|
||||
: _options(options), _count(0), _remain_element_capacity(0), _finished(false) {}
|
||||
|
||||
OwnedSlice _finish(int final_size_of_type) {
|
||||
_data.resize(final_size_of_type * _count);
|
||||
|
||||
|
||||
@ -718,7 +718,7 @@ Status ScalarColumnWriter::finish_current_page() {
|
||||
// build data page body : encoded values + [nullmap]
|
||||
std::vector<Slice> body;
|
||||
OwnedSlice encoded_values = _page_builder->finish();
|
||||
_page_builder->reset();
|
||||
RETURN_IF_ERROR(_page_builder->reset());
|
||||
body.push_back(encoded_values.slice());
|
||||
|
||||
OwnedSlice nullmap;
|
||||
|
||||
@ -51,8 +51,7 @@ struct TypeEncodingTraits {};
|
||||
template <FieldType type, typename CppType>
|
||||
struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new PlainPageBuilder<type>(opts);
|
||||
return Status::OK();
|
||||
return PlainPageBuilder<type>::create(builder, opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
@ -64,8 +63,7 @@ struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> {
|
||||
template <FieldType type>
|
||||
struct TypeEncodingTraits<type, PLAIN_ENCODING, Slice> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new BinaryPlainPageBuilder<type>(opts);
|
||||
return Status::OK();
|
||||
return BinaryPlainPageBuilder<type>::create(builder, opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
@ -78,8 +76,7 @@ template <FieldType type, typename CppType>
|
||||
struct TypeEncodingTraits<type, BIT_SHUFFLE, CppType,
|
||||
typename std::enable_if<!std::is_same<CppType, Slice>::value>::type> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new BitshufflePageBuilder<type>(opts);
|
||||
return Status::OK();
|
||||
return BitshufflePageBuilder<type>::create(builder, opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
@ -91,8 +88,7 @@ struct TypeEncodingTraits<type, BIT_SHUFFLE, CppType,
|
||||
template <>
|
||||
struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_BOOL, RLE, bool> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL>(opts);
|
||||
return Status::OK();
|
||||
return RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL>::create(builder, opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
@ -104,8 +100,7 @@ struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_BOOL, RLE, bool> {
|
||||
template <FieldType type>
|
||||
struct TypeEncodingTraits<type, DICT_ENCODING, Slice> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new BinaryDictPageBuilder(opts);
|
||||
return Status::OK();
|
||||
return BinaryDictPageBuilder::create(builder, opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
@ -118,8 +113,7 @@ template <>
|
||||
struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATE, FOR_ENCODING,
|
||||
typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::CppType> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATE>(opts);
|
||||
return Status::OK();
|
||||
return FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATE>::create(builder, opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
@ -132,8 +126,8 @@ template <>
|
||||
struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATEV2, FOR_ENCODING,
|
||||
typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATEV2>::CppType> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATEV2>(opts);
|
||||
return Status::OK();
|
||||
return FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATEV2>::create(builder,
|
||||
opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
@ -146,8 +140,8 @@ template <>
|
||||
struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2, FOR_ENCODING,
|
||||
typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::CppType> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>(opts);
|
||||
return Status::OK();
|
||||
return FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::create(builder,
|
||||
opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
@ -161,8 +155,7 @@ template <FieldType type, typename CppType>
|
||||
struct TypeEncodingTraits<type, FOR_ENCODING, CppType,
|
||||
typename std::enable_if<std::is_integral<CppType>::value>::type> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new FrameOfReferencePageBuilder<type>(opts);
|
||||
return Status::OK();
|
||||
return FrameOfReferencePageBuilder<type>::create(builder, opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
@ -174,8 +167,7 @@ struct TypeEncodingTraits<type, FOR_ENCODING, CppType,
|
||||
template <FieldType type>
|
||||
struct TypeEncodingTraits<type, PREFIX_ENCODING, Slice> {
|
||||
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
|
||||
*builder = new BinaryPrefixPageBuilder(opts);
|
||||
return Status::OK();
|
||||
return BinaryPrefixPageBuilder::create(builder, opts);
|
||||
}
|
||||
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
|
||||
PageDecoder** decoder) {
|
||||
|
||||
@ -27,11 +27,14 @@ namespace segment_v2 {
|
||||
|
||||
// Encode page use frame-of-reference coding
|
||||
template <FieldType Type>
|
||||
class FrameOfReferencePageBuilder : public PageBuilder {
|
||||
class FrameOfReferencePageBuilder : public PageBuilderHelper<FrameOfReferencePageBuilder<Type>> {
|
||||
public:
|
||||
explicit FrameOfReferencePageBuilder(const PageBuilderOptions& options)
|
||||
: _options(options), _count(0), _finished(false) {
|
||||
using Self = FrameOfReferencePageBuilder<Type>;
|
||||
friend class PageBuilderHelper<Self>;
|
||||
|
||||
Status init() override {
|
||||
_encoder.reset(new ForEncoder<CppType>(&_buf));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool is_page_full() override { return _encoder->len() >= _options.data_page_size; }
|
||||
@ -58,10 +61,11 @@ public:
|
||||
return _buf.build();
|
||||
}
|
||||
|
||||
void reset() override {
|
||||
Status reset() override {
|
||||
_count = 0;
|
||||
_finished = false;
|
||||
_encoder->clear();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t count() const override { return _count; }
|
||||
@ -85,6 +89,9 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
explicit FrameOfReferencePageBuilder(const PageBuilderOptions& options)
|
||||
: _options(options), _count(0), _finished(false) {}
|
||||
|
||||
typedef typename TypeTraits<Type>::CppType CppType;
|
||||
PageBuilderOptions _options;
|
||||
size_t _count;
|
||||
|
||||
@ -118,7 +118,7 @@ Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {
|
||||
|
||||
// IndexedColumn doesn't have NULLs, thus data page body only contains encoded values
|
||||
OwnedSlice page_body = _data_page_builder->finish();
|
||||
_data_page_builder->reset();
|
||||
RETURN_IF_ERROR(_data_page_builder->reset());
|
||||
|
||||
PageFooterPB footer;
|
||||
footer.set_type(DATA_PAGE);
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/status.h"
|
||||
@ -42,6 +43,9 @@ public:
|
||||
|
||||
virtual ~PageBuilder() {}
|
||||
|
||||
// Init the internal state of the page builder.
|
||||
virtual Status init() = 0;
|
||||
|
||||
// Used by column writer to determine whether the current page is full.
|
||||
// Column writer depends on the result to decide whether to flush current page.
|
||||
virtual bool is_page_full() = 0;
|
||||
@ -69,7 +73,7 @@ public:
|
||||
// Reset the internal state of the page builder.
|
||||
//
|
||||
// Any data previously returned by finish may be invalidated by this call.
|
||||
virtual void reset() = 0;
|
||||
virtual Status reset() = 0;
|
||||
|
||||
// Return the number of entries that have been added to the page.
|
||||
virtual size_t count() const = 0;
|
||||
@ -91,5 +95,17 @@ private:
|
||||
DISALLOW_COPY_AND_ASSIGN(PageBuilder);
|
||||
};
|
||||
|
||||
template <typename Derived>
|
||||
class PageBuilderHelper : public PageBuilder {
|
||||
public:
|
||||
template <typename... Args>
|
||||
static Status create(PageBuilder** builder, Args&&... args) {
|
||||
std::unique_ptr<PageBuilder> builder_uniq_ptr(new Derived(std::forward<Args>(args)...));
|
||||
RETURN_IF_ERROR(builder_uniq_ptr->init());
|
||||
*builder = builder_uniq_ptr.release();
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace segment_v2
|
||||
} // namespace doris
|
||||
|
||||
@ -31,13 +31,16 @@ namespace segment_v2 {
|
||||
static const size_t PLAIN_PAGE_HEADER_SIZE = sizeof(uint32_t);
|
||||
|
||||
template <FieldType Type>
|
||||
class PlainPageBuilder : public PageBuilder {
|
||||
class PlainPageBuilder : public PageBuilderHelper<PlainPageBuilder<Type> > {
|
||||
public:
|
||||
PlainPageBuilder(const PageBuilderOptions& options) : _options(options) {
|
||||
using Self = PlainPageBuilder<Type>;
|
||||
friend class PageBuilderHelper<Self>;
|
||||
|
||||
Status init() override {
|
||||
// Reserve enough space for the page, plus a bit of slop since
|
||||
// we often overrun the page by a few values.
|
||||
_buffer.reserve(_options.data_page_size + 1024);
|
||||
reset();
|
||||
return reset();
|
||||
}
|
||||
|
||||
bool is_page_full() override { return _buffer.size() > _options.data_page_size; }
|
||||
@ -66,11 +69,12 @@ public:
|
||||
return _buffer.build();
|
||||
}
|
||||
|
||||
void reset() override {
|
||||
Status reset() override {
|
||||
_buffer.reserve(_options.data_page_size + 1024);
|
||||
_count = 0;
|
||||
_buffer.clear();
|
||||
_buffer.resize(PLAIN_PAGE_HEADER_SIZE);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t count() const override { return _count; }
|
||||
@ -94,6 +98,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
PlainPageBuilder(const PageBuilderOptions& options) : _options(options) {}
|
||||
|
||||
faststring _buffer;
|
||||
PageBuilderOptions _options;
|
||||
size_t _count;
|
||||
|
||||
@ -51,10 +51,12 @@ enum { RLE_PAGE_HEADER_SIZE = 4 };
|
||||
//
|
||||
// TODO(hkp): optimize rle algorithm
|
||||
template <FieldType Type>
|
||||
class RlePageBuilder : public PageBuilder {
|
||||
class RlePageBuilder : public PageBuilderHelper<RlePageBuilder<Type> > {
|
||||
public:
|
||||
RlePageBuilder(const PageBuilderOptions& options)
|
||||
: _options(options), _count(0), _finished(false), _bit_width(0), _rle_encoder(nullptr) {
|
||||
using Self = RlePageBuilder<Type>;
|
||||
friend class PageBuilderHelper<Self>;
|
||||
|
||||
Status init() override {
|
||||
switch (Type) {
|
||||
case FieldType::OLAP_FIELD_TYPE_BOOL: {
|
||||
_bit_width = 1;
|
||||
@ -66,7 +68,7 @@ public:
|
||||
}
|
||||
}
|
||||
_rle_encoder = new RleEncoder<CppType>(&_buf, _bit_width);
|
||||
reset();
|
||||
return reset();
|
||||
}
|
||||
|
||||
~RlePageBuilder() { delete _rle_encoder; }
|
||||
@ -102,11 +104,12 @@ public:
|
||||
return _buf.build();
|
||||
}
|
||||
|
||||
void reset() override {
|
||||
Status reset() override {
|
||||
_count = 0;
|
||||
_finished = false;
|
||||
_rle_encoder->Clear();
|
||||
_rle_encoder->Reserve(RLE_PAGE_HEADER_SIZE, 0);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
size_t count() const override { return _count; }
|
||||
@ -132,6 +135,13 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
RlePageBuilder(const PageBuilderOptions& options)
|
||||
: _options(options),
|
||||
_count(0),
|
||||
_finished(false),
|
||||
_bit_width(0),
|
||||
_rle_encoder(nullptr) {}
|
||||
|
||||
typedef typename TypeTraits<Type>::CppType CppType;
|
||||
enum { SIZE_OF_TYPE = TypeTraits<Type>::size };
|
||||
|
||||
|
||||
@ -43,6 +43,8 @@ public:
|
||||
options.data_page_size = 256 * 1024;
|
||||
options.dict_page_size = 256 * 1024;
|
||||
BinaryDictPageBuilder page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
size_t count = slices.size();
|
||||
|
||||
const Slice* ptr = &slices[0];
|
||||
@ -132,6 +134,8 @@ public:
|
||||
options.data_page_size = 1 * 1024 * 1024;
|
||||
options.dict_page_size = 1 * 1024 * 1024;
|
||||
BinaryDictPageBuilder page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
size_t count = contents.size();
|
||||
std::vector<OwnedSlice> results;
|
||||
std::vector<size_t> page_start_ids;
|
||||
|
||||
@ -47,6 +47,8 @@ public:
|
||||
PageBuilderOptions options;
|
||||
options.data_page_size = 256 * 1024;
|
||||
PageBuilderType page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
size_t count = slices.size();
|
||||
|
||||
Slice* ptr = &slices[0];
|
||||
|
||||
@ -48,6 +48,8 @@ public:
|
||||
// encode
|
||||
PageBuilderOptions options;
|
||||
BinaryPrefixPageBuilder page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
|
||||
size_t count = slices.size();
|
||||
const Slice* ptr = &slices[0];
|
||||
@ -162,6 +164,8 @@ public:
|
||||
// encode
|
||||
PageBuilderOptions options;
|
||||
BinaryPrefixPageBuilder page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
|
||||
size_t count = slices.size();
|
||||
const Slice* ptr = &slices[0];
|
||||
@ -273,6 +277,8 @@ public:
|
||||
// encode
|
||||
PageBuilderOptions options;
|
||||
BinaryPrefixPageBuilder page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
|
||||
size_t count = slices.size();
|
||||
const Slice* ptr = &slices[0];
|
||||
|
||||
@ -56,6 +56,8 @@ public:
|
||||
PageBuilderOptions options;
|
||||
options.data_page_size = 256 * 1024;
|
||||
PageBuilderType page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
|
||||
page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
|
||||
OwnedSlice s = page_builder.finish();
|
||||
@ -121,6 +123,8 @@ public:
|
||||
PageBuilderOptions options;
|
||||
options.data_page_size = 256 * 1024;
|
||||
PageBuilderType page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
|
||||
page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
|
||||
OwnedSlice s = page_builder.finish();
|
||||
|
||||
@ -50,6 +50,8 @@ public:
|
||||
PageBuilderOptions builder_options;
|
||||
builder_options.data_page_size = 256 * 1024;
|
||||
PageBuilderType for_page_builder(builder_options);
|
||||
Status ret0 = for_page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
for_page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
|
||||
OwnedSlice s = for_page_builder.finish();
|
||||
EXPECT_EQ(size, for_page_builder.count());
|
||||
@ -234,6 +236,8 @@ TEST_F(FrameOfReferencePageTest, TestInt32SequenceBlockEncoderSize) {
|
||||
builder_options.data_page_size = 256 * 1024;
|
||||
segment_v2::FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> page_builder(
|
||||
builder_options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size);
|
||||
OwnedSlice s = page_builder.finish();
|
||||
// body: 4 bytes min value + 128 * 1 /8 packing value = 20
|
||||
@ -251,6 +255,8 @@ TEST_F(FrameOfReferencePageTest, TestFirstLastValue) {
|
||||
builder_options.data_page_size = 256 * 1024;
|
||||
segment_v2::FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> page_builder(
|
||||
builder_options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size);
|
||||
OwnedSlice s = page_builder.finish();
|
||||
int32_t first_value = -1;
|
||||
@ -271,6 +277,8 @@ TEST_F(FrameOfReferencePageTest, TestInt32NormalBlockEncoderSize) {
|
||||
builder_options.data_page_size = 256 * 1024;
|
||||
segment_v2::FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> page_builder(
|
||||
builder_options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size);
|
||||
OwnedSlice s = page_builder.finish();
|
||||
// body: 4 bytes min value + 128 * 7 /8 packing value = 116
|
||||
|
||||
@ -64,6 +64,8 @@ public:
|
||||
PageBuilderOptions options;
|
||||
options.data_page_size = 256 * 1024;
|
||||
PageBuilderType page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
|
||||
page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
|
||||
OwnedSlice s = page_builder.finish();
|
||||
@ -120,6 +122,8 @@ public:
|
||||
PageBuilderOptions options;
|
||||
options.data_page_size = 256 * 1024;
|
||||
PageBuilderType page_builder(options);
|
||||
Status ret0 = page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
|
||||
page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
|
||||
OwnedSlice s = page_builder.finish();
|
||||
|
||||
@ -54,6 +54,8 @@ public:
|
||||
PageBuilderOptions builder_options;
|
||||
builder_options.data_page_size = 256 * 1024;
|
||||
PageBuilderType rle_page_builder(builder_options);
|
||||
Status ret0 = rle_page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
rle_page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
|
||||
OwnedSlice s = rle_page_builder.finish();
|
||||
EXPECT_EQ(size, rle_page_builder.count());
|
||||
@ -155,6 +157,8 @@ TEST_F(RlePageTest, TestRleInt32BlockEncoderSize) {
|
||||
PageBuilderOptions builder_options;
|
||||
builder_options.data_page_size = 256 * 1024;
|
||||
segment_v2::RlePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> rle_page_builder(builder_options);
|
||||
Status ret0 = rle_page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
rle_page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size);
|
||||
OwnedSlice s = rle_page_builder.finish();
|
||||
// 4 bytes header
|
||||
@ -191,6 +195,8 @@ TEST_F(RlePageTest, TestRleBoolBlockEncoderSize) {
|
||||
PageBuilderOptions builder_options;
|
||||
builder_options.data_page_size = 256 * 1024;
|
||||
segment_v2::RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL> rle_page_builder(builder_options);
|
||||
Status ret0 = rle_page_builder.init();
|
||||
EXPECT_TRUE(ret0.ok());
|
||||
rle_page_builder.add(reinterpret_cast<const uint8_t*>(bools.get()), &size);
|
||||
OwnedSlice s = rle_page_builder.finish();
|
||||
// 4 bytes header
|
||||
|
||||
Reference in New Issue
Block a user