Fix segment v2 comment (#1769)
This commit is contained in:
@ -162,7 +162,6 @@ Status BinaryDictPageDecoder::init() {
|
||||
_data_page_decoder.reset(new BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));
|
||||
} else if (_encoding_type == PLAIN_ENCODING) {
|
||||
DCHECK_EQ(_encoding_type, PLAIN_ENCODING);
|
||||
// use plain page decoder to decode data
|
||||
_data_page_decoder.reset(new BinaryPlainPageDecoder(_data, _options));
|
||||
} else {
|
||||
LOG(WARNING) << "invalide encoding type:" << _encoding_type;
|
||||
|
||||
@ -137,7 +137,6 @@ public:
|
||||
|
||||
private:
|
||||
Slice _finish(int final_size_of_type) {
|
||||
//_data.resize(BITSHUFFLE_PAGE_HEADER_SIZE + final_size_of_type * _count);
|
||||
_data.resize(final_size_of_type * _count);
|
||||
|
||||
// Do padding so that the input num of element is multiple of 8.
|
||||
|
||||
@ -114,7 +114,7 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
|
||||
*handle = PageHandle(std::move(cache_handle));
|
||||
return Status::OK();
|
||||
}
|
||||
// Now we read this from file. we
|
||||
// Now we read this from file.
|
||||
size_t page_size = pp.size;
|
||||
if (page_size < sizeof(uint32_t)) {
|
||||
return Status::Corruption(Substitute("Bad page, page size is too small, size=$0", page_size));
|
||||
@ -173,7 +173,7 @@ void ColumnReader::_get_filtered_pages(CondColumn* cond_column, std::vector<uint
|
||||
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type));
|
||||
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type));
|
||||
for (int32_t i = 0; i < page_size; ++i) {
|
||||
// min value and max value are valid if exisst_none_null is true
|
||||
// min value and max value are valid if has_not_null is true
|
||||
if (zone_maps[i].has_not_null()) {
|
||||
min_value->from_string(zone_maps[i].min());
|
||||
max_value->from_string(zone_maps[i].max());
|
||||
@ -264,10 +264,8 @@ Status FileColumnIterator::seek_to_first() {
|
||||
}
|
||||
|
||||
Status FileColumnIterator::seek_to_ordinal(rowid_t rid) {
|
||||
if (_page != nullptr && _page->contains(rid)) {
|
||||
// current page contains this row, we just
|
||||
} else {
|
||||
// we need to seek to
|
||||
// if current page contains this row, we don't need to seek
|
||||
if (_page == nullptr || !_page->contains(rid)) {
|
||||
RETURN_IF_ERROR(_reader->seek_at_or_before(rid, &_page_iter));
|
||||
_page.reset(new ParsedPage());
|
||||
RETURN_IF_ERROR(_read_page(_page_iter, _page.get()));
|
||||
|
||||
@ -46,12 +46,10 @@ class ParsedPage;
|
||||
class ColumnIterator;
|
||||
|
||||
struct ColumnReaderOptions {
|
||||
// If verify checksum when read page
|
||||
// whether verify checksum when read page
|
||||
bool verify_checksum = true;
|
||||
};
|
||||
|
||||
// Used to read one column's data. And user should pass ColumnData meta
|
||||
// when he want to read this column's data.
|
||||
// There will be concurrent users to read the same column. So
|
||||
// we should do our best to reduce resource usage through share
|
||||
// same information, such as OrdinalPageIndex and Page data.
|
||||
@ -91,10 +89,7 @@ private:
|
||||
void _calculate_row_ranges(const std::vector<uint32_t>& page_indexes, RowRanges* row_ranges);
|
||||
|
||||
private:
|
||||
// input param
|
||||
ColumnReaderOptions _opts;
|
||||
// we need colun data to parse column data.
|
||||
// use shared_ptr here is to make things simple
|
||||
ColumnMetaPB _meta;
|
||||
uint64_t _num_rows;
|
||||
RandomAccessFile* _file = nullptr;
|
||||
@ -130,8 +125,7 @@ public:
|
||||
// from Arena
|
||||
virtual Status next_batch(size_t* n, ColumnBlock* dst) = 0;
|
||||
|
||||
// Get current oridinal
|
||||
virtual rowid_t get_current_oridinal() const = 0;
|
||||
virtual rowid_t get_current_ordinal() const = 0;
|
||||
|
||||
#if 0
|
||||
// Call this function every time before next_batch.
|
||||
@ -170,8 +164,7 @@ public:
|
||||
|
||||
Status next_batch(size_t* n, ColumnBlock* dst) override;
|
||||
|
||||
// Get current oridinal
|
||||
rowid_t get_current_oridinal() const override { return _current_rowid; }
|
||||
rowid_t get_current_ordinal() const override { return _current_rowid; }
|
||||
|
||||
private:
|
||||
void _seek_to_pos_in_page(ParsedPage* page, uint32_t offset_in_page);
|
||||
@ -181,10 +174,10 @@ private:
|
||||
private:
|
||||
ColumnReader* _reader;
|
||||
|
||||
// We define an operation is one seek and follwing read.
|
||||
// If new seek is issued, there will be a new operation
|
||||
// current page
|
||||
// When _page is null, it means that this reader can not be read
|
||||
// 1. The _page represents current page.
|
||||
// 2. We define an operation is one seek and following read,
|
||||
// If new seek is issued, the _page will be reset.
|
||||
// 3. When _page is null, it means that this reader can not be read.
|
||||
std::unique_ptr<ParsedPage> _page;
|
||||
|
||||
// page iterator used to get next page when current page is finished.
|
||||
|
||||
@ -129,8 +129,8 @@ Status ColumnWriter::append(const void* data, size_t num_rows) {
|
||||
return _append_data((const uint8_t**)&data, num_rows);
|
||||
}
|
||||
|
||||
// append data to page builder. this funciton will make sure that
|
||||
// num_rows must be written before return. And ptr will be modifed
|
||||
// append data to page builder. this function will make sure that
|
||||
// num_rows must be written before return. And ptr will be modified
|
||||
// to next data should be written
|
||||
Status ColumnWriter::_append_data(const uint8_t** ptr, size_t num_rows) {
|
||||
size_t remaining = num_rows;
|
||||
@ -247,7 +247,7 @@ Status ColumnWriter::_write_physical_page(std::vector<Slice>* origin_data, PageP
|
||||
std::vector<Slice>* output_data = origin_data;
|
||||
std::vector<Slice> compressed_data;
|
||||
|
||||
// Put compressor out of if block, because we should use compressor's
|
||||
// Put compressor out of if block, because we will use compressor's
|
||||
// content until this function finished.
|
||||
PageCompressor compressor(_compress_codec);
|
||||
if (_compress_codec != nullptr) {
|
||||
|
||||
@ -26,7 +26,7 @@ namespace segment_v2 {
|
||||
|
||||
using strings::Substitute;
|
||||
|
||||
Status PageDecompressor::decompress_to(Slice* content) {
|
||||
Status PageDecompressor::decompress_to(Slice* uncompressed_data) {
|
||||
if (_data.size < 4) {
|
||||
return Status::Corruption(
|
||||
Substitute("Compressed page's size is too small, size=$0, needed=$1",
|
||||
@ -40,7 +40,7 @@ Status PageDecompressor::decompress_to(Slice* content) {
|
||||
// If compressed_slice's size is equal with _uncompressed_bytes, it means
|
||||
// compressor store this directly without compression. So we just copy
|
||||
// this to buf and return.
|
||||
*content = compressed_slice;
|
||||
*uncompressed_data = compressed_slice;
|
||||
return Status::OK();
|
||||
}
|
||||
std::unique_ptr<char[]> buf(new char[uncompressed_bytes]);
|
||||
@ -54,7 +54,7 @@ Status PageDecompressor::decompress_to(Slice* content) {
|
||||
Substitute("Uncompressed size not match, record=$0 vs decompress=$1",
|
||||
uncompressed_bytes, uncompressed_slice.size));
|
||||
}
|
||||
*content = Slice(buf.release(), uncompressed_bytes);
|
||||
*uncompressed_data = Slice(buf.release(), uncompressed_bytes);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -38,7 +38,7 @@ namespace segment_v2 {
|
||||
// The type of compression codec for Data is stored elsewhere and should
|
||||
// be passed into the constructor.
|
||||
// Usage example:
|
||||
// // page_size refers to page read from storage
|
||||
// // page_slice refers to page read from storage
|
||||
// PageDecompressor decompressor(page_slice, codec);
|
||||
// // points to decompressed Data of the page (without footer)
|
||||
// Slice uncompressed_slice;
|
||||
@ -46,21 +46,21 @@ namespace segment_v2 {
|
||||
// // use uncompressed_slice
|
||||
// // we have a new buffer for decompressed page
|
||||
// if (uncompressed_slice.data != page_slice.data) {
|
||||
// delete[] uncompressed_bytes.data;
|
||||
// delete[] page_slice.data;
|
||||
// }
|
||||
class PageDecompressor {
|
||||
public:
|
||||
PageDecompressor(const Slice& data, const BlockCompressionCodec* codec)
|
||||
: _data(data), _codec(codec) {
|
||||
PageDecompressor(const Slice& compressed_data, const BlockCompressionCodec* codec)
|
||||
: _data(compressed_data), _codec(codec) {
|
||||
}
|
||||
|
||||
// This client will set uncompress content to input param.
|
||||
// In normal case(content.data != input_data.data) client should
|
||||
// call delete[] content.data to free heap memory. However
|
||||
// when the data is not compressed, this function will return input data
|
||||
// directly. In this case content.data == input_data.data,
|
||||
// This client will set uncompress content to uncompressed_data.
|
||||
// In normal case(compressed_data.data != uncompressed_data.data) client should
|
||||
// call delete[] compressed_data.data to free heap memory. However
|
||||
// when the data is not compressed, this function will return compressed_data
|
||||
// directly. In this case compressed_data.data == uncompressed_data.data,
|
||||
// client should not free content.
|
||||
Status decompress_to(Slice* content);
|
||||
Status decompress_to(Slice* uncompressed_data);
|
||||
private:
|
||||
Slice _data;
|
||||
const BlockCompressionCodec* _codec;
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
// PageDecoder is used to decode page page.
|
||||
// PageDecoder is used to decode page.
|
||||
class PageDecoder {
|
||||
public:
|
||||
PageDecoder() { }
|
||||
|
||||
@ -28,7 +28,7 @@ namespace segment_v2 {
|
||||
// A page's data may be in cache, or may not in cache. We use this
|
||||
// class to unify these two cases.
|
||||
// If client use this struct to wrap data not in cache, this class
|
||||
// will free data's memory when it is destoryed.
|
||||
// will free data's memory when it is destroyed.
|
||||
class PageHandle {
|
||||
public:
|
||||
PageHandle() : _is_data_owner(false) { }
|
||||
|
||||
@ -51,7 +51,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if the two ranges are overlapped or false.
|
||||
// Returns true if the two ranges are intersected or false.
|
||||
// The intersection of the two ranges is returned through range.
|
||||
static bool range_intersection(const RowRange& left, const RowRange& right, RowRange* range) {
|
||||
if (left._from <= right._from) {
|
||||
|
||||
@ -52,8 +52,8 @@ using SegmentSharedPtr = std::shared_ptr<Segment>;
|
||||
// And user can create a SegmentIterator through new_iterator function.
|
||||
//
|
||||
// NOTE: This segment is used to a specified TabletSchema, when TabletSchema
|
||||
// is changed, this segemnt can not be used any more. For eample, after a schema
|
||||
// change finished, client should disalbe all cahced Segment for old TabletSchema.
|
||||
// is changed, this segment can not be used any more. For example, after a schema
|
||||
// change finished, client should disable all cached Segment for old TabletSchema.
|
||||
class Segment : public std::enable_shared_from_this<Segment> {
|
||||
public:
|
||||
Segment(std::string fname, uint32_t segment_id,
|
||||
|
||||
Reference in New Issue
Block a user