Fix segment v2 comment (#1769)

This commit is contained in:
kangkaisen
2019-09-09 18:26:48 +08:00
committed by ZHAO Chun
parent cd5cfea5cc
commit 0f44ce99ce
11 changed files with 32 additions and 43 deletions

View File

@ -162,7 +162,6 @@ Status BinaryDictPageDecoder::init() {
_data_page_decoder.reset(new BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));
} else if (_encoding_type == PLAIN_ENCODING) {
DCHECK_EQ(_encoding_type, PLAIN_ENCODING);
// use plain page decoder to decode data
_data_page_decoder.reset(new BinaryPlainPageDecoder(_data, _options));
} else {
LOG(WARNING) << "invalide encoding type:" << _encoding_type;

View File

@ -137,7 +137,6 @@ public:
private:
Slice _finish(int final_size_of_type) {
//_data.resize(BITSHUFFLE_PAGE_HEADER_SIZE + final_size_of_type * _count);
_data.resize(final_size_of_type * _count);
// Do padding so that the input num of element is multiple of 8.

View File

@ -114,7 +114,7 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
*handle = PageHandle(std::move(cache_handle));
return Status::OK();
}
// Now we read this from file. we
// Now we read this from file.
size_t page_size = pp.size;
if (page_size < sizeof(uint32_t)) {
return Status::Corruption(Substitute("Bad page, page size is too small, size=$0", page_size));
@ -173,7 +173,7 @@ void ColumnReader::_get_filtered_pages(CondColumn* cond_column, std::vector<uint
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type));
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type));
for (int32_t i = 0; i < page_size; ++i) {
// min value and max value are valid if exisst_none_null is true
// min value and max value are valid if has_not_null is true
if (zone_maps[i].has_not_null()) {
min_value->from_string(zone_maps[i].min());
max_value->from_string(zone_maps[i].max());
@ -264,10 +264,8 @@ Status FileColumnIterator::seek_to_first() {
}
Status FileColumnIterator::seek_to_ordinal(rowid_t rid) {
if (_page != nullptr && _page->contains(rid)) {
// current page contains this row, we just
} else {
// we need to seek to
// if current page contains this row, we don't need to seek
if (_page == nullptr || !_page->contains(rid)) {
RETURN_IF_ERROR(_reader->seek_at_or_before(rid, &_page_iter));
_page.reset(new ParsedPage());
RETURN_IF_ERROR(_read_page(_page_iter, _page.get()));

View File

@ -46,12 +46,10 @@ class ParsedPage;
class ColumnIterator;
struct ColumnReaderOptions {
// If verify checksum when read page
// whether verify checksum when read page
bool verify_checksum = true;
};
// Used to read one column's data. And user should pass ColumnData meta
// when he want to read this column's data.
// There will be concurrent users to read the same column. So
// we should do our best to reduce resource usage through share
// same information, such as OrdinalPageIndex and Page data.
@ -91,10 +89,7 @@ private:
void _calculate_row_ranges(const std::vector<uint32_t>& page_indexes, RowRanges* row_ranges);
private:
// input param
ColumnReaderOptions _opts;
// we need colun data to parse column data.
// use shared_ptr here is to make things simple
ColumnMetaPB _meta;
uint64_t _num_rows;
RandomAccessFile* _file = nullptr;
@ -130,8 +125,7 @@ public:
// from Arena
virtual Status next_batch(size_t* n, ColumnBlock* dst) = 0;
// Get current oridinal
virtual rowid_t get_current_oridinal() const = 0;
virtual rowid_t get_current_ordinal() const = 0;
#if 0
// Call this function every time before next_batch.
@ -170,8 +164,7 @@ public:
Status next_batch(size_t* n, ColumnBlock* dst) override;
// Get current oridinal
rowid_t get_current_oridinal() const override { return _current_rowid; }
rowid_t get_current_ordinal() const override { return _current_rowid; }
private:
void _seek_to_pos_in_page(ParsedPage* page, uint32_t offset_in_page);
@ -181,10 +174,10 @@ private:
private:
ColumnReader* _reader;
// We define an operation is one seek and follwing read.
// If new seek is issued, there will be a new operation
// current page
// When _page is null, it means that this reader can not be read
// 1. The _page represents current page.
// 2. We define an operation is one seek and following read,
// If new seek is issued, the _page will be reset.
// 3. When _page is null, it means that this reader can not be read.
std::unique_ptr<ParsedPage> _page;
// page iterator used to get next page when current page is finished.

View File

@ -129,8 +129,8 @@ Status ColumnWriter::append(const void* data, size_t num_rows) {
return _append_data((const uint8_t**)&data, num_rows);
}
// append data to page builder. this funciton will make sure that
// num_rows must be written before return. And ptr will be modifed
// append data to page builder. this function will make sure that
// num_rows must be written before return. And ptr will be modified
// to next data should be written
Status ColumnWriter::_append_data(const uint8_t** ptr, size_t num_rows) {
size_t remaining = num_rows;
@ -247,7 +247,7 @@ Status ColumnWriter::_write_physical_page(std::vector<Slice>* origin_data, PageP
std::vector<Slice>* output_data = origin_data;
std::vector<Slice> compressed_data;
// Put compressor out of if block, because we should use compressor's
// Put compressor out of if block, because we will use compressor's
// content until this function finished.
PageCompressor compressor(_compress_codec);
if (_compress_codec != nullptr) {

View File

@ -26,7 +26,7 @@ namespace segment_v2 {
using strings::Substitute;
Status PageDecompressor::decompress_to(Slice* content) {
Status PageDecompressor::decompress_to(Slice* uncompressed_data) {
if (_data.size < 4) {
return Status::Corruption(
Substitute("Compressed page's size is too small, size=$0, needed=$1",
@ -40,7 +40,7 @@ Status PageDecompressor::decompress_to(Slice* content) {
// If compressed_slice's size is equal with _uncompressed_bytes, it means
// compressor store this directly without compression. So we just copy
// this to buf and return.
*content = compressed_slice;
*uncompressed_data = compressed_slice;
return Status::OK();
}
std::unique_ptr<char[]> buf(new char[uncompressed_bytes]);
@ -54,7 +54,7 @@ Status PageDecompressor::decompress_to(Slice* content) {
Substitute("Uncompressed size not match, record=$0 vs decompress=$1",
uncompressed_bytes, uncompressed_slice.size));
}
*content = Slice(buf.release(), uncompressed_bytes);
*uncompressed_data = Slice(buf.release(), uncompressed_bytes);
return Status::OK();
}

View File

@ -38,7 +38,7 @@ namespace segment_v2 {
// The type of compression codec for Data is stored elsewhere and should
// be passed into the constructor.
// Usage example:
// // page_size refers to page read from storage
// // page_slice refers to page read from storage
// PageDecompressor decompressor(page_slice, codec);
// // points to decompressed Data of the page (without footer)
// Slice uncompressed_slice;
@ -46,21 +46,21 @@ namespace segment_v2 {
// // use uncompressed_slice
// // we have a new buffer for decompressed page
// if (uncompressed_slice.data != page_slice.data) {
// delete[] uncompressed_bytes.data;
// delete[] page_slice.data;
// }
class PageDecompressor {
public:
PageDecompressor(const Slice& data, const BlockCompressionCodec* codec)
: _data(data), _codec(codec) {
PageDecompressor(const Slice& compressed_data, const BlockCompressionCodec* codec)
: _data(compressed_data), _codec(codec) {
}
// This client will set uncompress content to input param.
// In normal case(content.data != input_data.data) client should
// call delete[] content.data to free heap memory. However
// when the data is not compressed, this function will return input data
// directly. In this case content.data == input_data.data,
// This client will set uncompress content to uncompressed_data.
// In normal case(compressed_data.data != uncompressed_data.data) client should
// call delete[] compressed_data.data to free heap memory. However
// when the data is not compressed, this function will return compressed_data
// directly. In this case compressed_data.data == uncompressed_data.data,
// client should not free content.
Status decompress_to(Slice* content);
Status decompress_to(Slice* uncompressed_data);
private:
Slice _data;
const BlockCompressionCodec* _codec;

View File

@ -24,7 +24,7 @@
namespace doris {
namespace segment_v2 {
// PageDecoder is used to decode page page.
// PageDecoder is used to decode page.
class PageDecoder {
public:
PageDecoder() { }

View File

@ -28,7 +28,7 @@ namespace segment_v2 {
// A page's data may be in cache, or may not in cache. We use this
// class to unify these two cases.
// If client use this struct to wrap data not in cache, this class
// will free data's memory when it is destoryed.
// will free data's memory when it is destroyed.
class PageHandle {
public:
PageHandle() : _is_data_owner(false) { }

View File

@ -51,7 +51,7 @@ public:
return false;
}
// Returns true if the two ranges are overlapped or false.
// Returns true if the two ranges are intersected or false.
// The intersection of the two ranges is returned through range.
static bool range_intersection(const RowRange& left, const RowRange& right, RowRange* range) {
if (left._from <= right._from) {

View File

@ -52,8 +52,8 @@ using SegmentSharedPtr = std::shared_ptr<Segment>;
// And user can create a SegmentIterator through new_iterator function.
//
// NOTE: This segment is used to a specified TabletSchema, when TabletSchema
// is changed, this segemnt can not be used any more. For eample, after a schema
// change finished, client should disalbe all cahced Segment for old TabletSchema.
// is changed, this segment can not be used any more. For example, after a schema
// change finished, client should disable all cached Segment for old TabletSchema.
class Segment : public std::enable_shared_from_this<Segment> {
public:
Segment(std::string fname, uint32_t segment_id,