From 0f44ce99ced0a82899c545abbb2420af2822e6d4 Mon Sep 17 00:00:00 2001 From: kangkaisen Date: Mon, 9 Sep 2019 18:26:48 +0800 Subject: [PATCH] Fix segment v2 comment (#1769) --- .../rowset/segment_v2/binary_dict_page.cpp | 1 - .../olap/rowset/segment_v2/bitshuffle_page.h | 1 - .../olap/rowset/segment_v2/column_reader.cpp | 10 ++++----- be/src/olap/rowset/segment_v2/column_reader.h | 21 +++++++------------ .../olap/rowset/segment_v2/column_writer.cpp | 6 +++--- .../rowset/segment_v2/page_compression.cpp | 6 +++--- .../olap/rowset/segment_v2/page_compression.h | 20 +++++++++--------- be/src/olap/rowset/segment_v2/page_decoder.h | 2 +- be/src/olap/rowset/segment_v2/page_handle.h | 2 +- be/src/olap/rowset/segment_v2/row_ranges.h | 2 +- be/src/olap/rowset/segment_v2/segment.h | 4 ++-- 11 files changed, 32 insertions(+), 43 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index f903547c1d..b3f468fa52 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -162,7 +162,6 @@ Status BinaryDictPageDecoder::init() { _data_page_decoder.reset(new BitShufflePageDecoder(_data, _options)); } else if (_encoding_type == PLAIN_ENCODING) { DCHECK_EQ(_encoding_type, PLAIN_ENCODING); - // use plain page decoder to decode data _data_page_decoder.reset(new BinaryPlainPageDecoder(_data, _options)); } else { LOG(WARNING) << "invalide encoding type:" << _encoding_type; diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index dd76de3c88..ad3f242cd8 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -137,7 +137,6 @@ public: private: Slice _finish(int final_size_of_type) { - //_data.resize(BITSHUFFLE_PAGE_HEADER_SIZE + final_size_of_type * _count); _data.resize(final_size_of_type * _count); // Do padding so that the input num of element is multiple of 8. diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 7f659d54ac..b5d803870e 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -114,7 +114,7 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) { *handle = PageHandle(std::move(cache_handle)); return Status::OK(); } - // Now we read this from file. we + // Now we read this from file. size_t page_size = pp.size; if (page_size < sizeof(uint32_t)) { return Status::Corruption(Substitute("Bad page, page size is too small, size=$0", page_size)); @@ -173,7 +173,7 @@ void ColumnReader::_get_filtered_pages(CondColumn* cond_column, std::vector min_value(WrapperField::create_by_type(type)); std::unique_ptr max_value(WrapperField::create_by_type(type)); for (int32_t i = 0; i < page_size; ++i) { - // min value and max value are valid if exisst_none_null is true + // min value and max value are valid if has_not_null is true if (zone_maps[i].has_not_null()) { min_value->from_string(zone_maps[i].min()); max_value->from_string(zone_maps[i].max()); @@ -264,10 +264,8 @@ Status FileColumnIterator::seek_to_first() { } Status FileColumnIterator::seek_to_ordinal(rowid_t rid) { - if (_page != nullptr && _page->contains(rid)) { - // current page contains this row, we just - } else { - // we need to seek to + // if current page contains this row, we don't need to seek + if (_page == nullptr || !_page->contains(rid)) { RETURN_IF_ERROR(_reader->seek_at_or_before(rid, &_page_iter)); _page.reset(new ParsedPage()); RETURN_IF_ERROR(_read_page(_page_iter, _page.get())); diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index d3f25553a9..7f50f87066 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -46,12 +46,10 @@ class ParsedPage; class ColumnIterator; struct ColumnReaderOptions { - // If verify checksum when read page + // whether verify checksum when read page bool verify_checksum = true; }; -// Used to read one column's data. And user should pass ColumnData meta -// when he want to read this column's data. // There will be concurrent users to read the same column. So // we should do our best to reduce resource usage through share // same information, such as OrdinalPageIndex and Page data. @@ -91,10 +89,7 @@ private: void _calculate_row_ranges(const std::vector& page_indexes, RowRanges* row_ranges); private: - // input param ColumnReaderOptions _opts; - // we need colun data to parse column data. - // use shared_ptr here is to make things simple ColumnMetaPB _meta; uint64_t _num_rows; RandomAccessFile* _file = nullptr; @@ -130,8 +125,7 @@ public: // from Arena virtual Status next_batch(size_t* n, ColumnBlock* dst) = 0; - // Get current oridinal - virtual rowid_t get_current_oridinal() const = 0; + virtual rowid_t get_current_ordinal() const = 0; #if 0 // Call this function every time before next_batch. @@ -170,8 +164,7 @@ public: Status next_batch(size_t* n, ColumnBlock* dst) override; - // Get current oridinal - rowid_t get_current_oridinal() const override { return _current_rowid; } + rowid_t get_current_ordinal() const override { return _current_rowid; } private: void _seek_to_pos_in_page(ParsedPage* page, uint32_t offset_in_page); @@ -181,10 +174,10 @@ private: private: ColumnReader* _reader; - // We define an operation is one seek and follwing read. - // If new seek is issued, there will be a new operation - // current page - // When _page is null, it means that this reader can not be read + // 1. The _page represents current page. + // 2. We define an operation is one seek and following read, + // If new seek is issued, the _page will be reset. + // 3. When _page is null, it means that this reader can not be read. std::unique_ptr _page; // page iterator used to get next page when current page is finished. diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 1c569cf380..6f474ef0b5 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -129,8 +129,8 @@ Status ColumnWriter::append(const void* data, size_t num_rows) { return _append_data((const uint8_t**)&data, num_rows); } -// append data to page builder. this funciton will make sure that -// num_rows must be written before return. And ptr will be modifed +// append data to page builder. this function will make sure that +// num_rows must be written before return. And ptr will be modified // to next data should be written Status ColumnWriter::_append_data(const uint8_t** ptr, size_t num_rows) { size_t remaining = num_rows; @@ -247,7 +247,7 @@ Status ColumnWriter::_write_physical_page(std::vector* origin_data, PageP std::vector* output_data = origin_data; std::vector compressed_data; - // Put compressor out of if block, because we should use compressor's + // Put compressor out of if block, because we will use compressor's // content until this function finished. PageCompressor compressor(_compress_codec); if (_compress_codec != nullptr) { diff --git a/be/src/olap/rowset/segment_v2/page_compression.cpp b/be/src/olap/rowset/segment_v2/page_compression.cpp index 8f8faf7f79..e0121b2332 100644 --- a/be/src/olap/rowset/segment_v2/page_compression.cpp +++ b/be/src/olap/rowset/segment_v2/page_compression.cpp @@ -26,7 +26,7 @@ namespace segment_v2 { using strings::Substitute; -Status PageDecompressor::decompress_to(Slice* content) { +Status PageDecompressor::decompress_to(Slice* uncompressed_data) { if (_data.size < 4) { return Status::Corruption( Substitute("Compressed page's size is too small, size=$0, needed=$1", @@ -40,7 +40,7 @@ Status PageDecompressor::decompress_to(Slice* content) { // If compressed_slice's size is equal with _uncompressed_bytes, it means // compressor store this directly without compression. So we just copy // this to buf and return. - *content = compressed_slice; + *uncompressed_data = compressed_slice; return Status::OK(); } std::unique_ptr buf(new char[uncompressed_bytes]); @@ -54,7 +54,7 @@ Status PageDecompressor::decompress_to(Slice* content) { Substitute("Uncompressed size not match, record=$0 vs decompress=$1", uncompressed_bytes, uncompressed_slice.size)); } - *content = Slice(buf.release(), uncompressed_bytes); + *uncompressed_data = Slice(buf.release(), uncompressed_bytes); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/page_compression.h b/be/src/olap/rowset/segment_v2/page_compression.h index 670fbf67e7..d88e0f3743 100644 --- a/be/src/olap/rowset/segment_v2/page_compression.h +++ b/be/src/olap/rowset/segment_v2/page_compression.h @@ -38,7 +38,7 @@ namespace segment_v2 { // The type of compression codec for Data is stored elsewhere and should // be passed into the constructor. // Usage example: -// // page_size refers to page read from storage +// // page_slice refers to page read from storage // PageDecompressor decompressor(page_slice, codec); // // points to decompressed Data of the page (without footer) // Slice uncompressed_slice; @@ -46,21 +46,21 @@ namespace segment_v2 { // // use uncompressed_slice // // we have a new buffer for decompressed page // if (uncompressed_slice.data != page_slice.data) { -// delete[] uncompressed_bytes.data; +// delete[] page_slice.data; // } class PageDecompressor { public: - PageDecompressor(const Slice& data, const BlockCompressionCodec* codec) - : _data(data), _codec(codec) { + PageDecompressor(const Slice& compressed_data, const BlockCompressionCodec* codec) + : _data(compressed_data), _codec(codec) { } - // This client will set uncompress content to input param. - // In normal case(content.data != input_data.data) client should - // call delete[] content.data to free heap memory. However - // when the data is not compressed, this function will return input data - // directly. In this case content.data == input_data.data, + // This client will set uncompress content to uncompressed_data. + // In normal case(compressed_data.data != uncompressed_data.data) client should + // call delete[] compressed_data.data to free heap memory. However + // when the data is not compressed, this function will return compressed_data + // directly. In this case compressed_data.data == uncompressed_data.data, // client should not free content. - Status decompress_to(Slice* content); + Status decompress_to(Slice* uncompressed_data); private: Slice _data; const BlockCompressionCodec* _codec; diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h index 285e49a1db..a6e4c47ef2 100644 --- a/be/src/olap/rowset/segment_v2/page_decoder.h +++ b/be/src/olap/rowset/segment_v2/page_decoder.h @@ -24,7 +24,7 @@ namespace doris { namespace segment_v2 { -// PageDecoder is used to decode page page. +// PageDecoder is used to decode page. class PageDecoder { public: PageDecoder() { } diff --git a/be/src/olap/rowset/segment_v2/page_handle.h b/be/src/olap/rowset/segment_v2/page_handle.h index 890ec56d90..04f49bd614 100644 --- a/be/src/olap/rowset/segment_v2/page_handle.h +++ b/be/src/olap/rowset/segment_v2/page_handle.h @@ -28,7 +28,7 @@ namespace segment_v2 { // A page's data may be in cache, or may not in cache. We use this // class to unify these two cases. // If client use this struct to wrap data not in cache, this class -// will free data's memory when it is destoryed. +// will free data's memory when it is destroyed. class PageHandle { public: PageHandle() : _is_data_owner(false) { } diff --git a/be/src/olap/rowset/segment_v2/row_ranges.h b/be/src/olap/rowset/segment_v2/row_ranges.h index a0664e86cb..a6a57a2643 100644 --- a/be/src/olap/rowset/segment_v2/row_ranges.h +++ b/be/src/olap/rowset/segment_v2/row_ranges.h @@ -51,7 +51,7 @@ public: return false; } - // Returns true if the two ranges are overlapped or false. + // Returns true if the two ranges are intersected or false. // The intersection of the two ranges is returned through range. static bool range_intersection(const RowRange& left, const RowRange& right, RowRange* range) { if (left._from <= right._from) { diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 45b17c6c5c..ca442c01cd 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -52,8 +52,8 @@ using SegmentSharedPtr = std::shared_ptr; // And user can create a SegmentIterator through new_iterator function. // // NOTE: This segment is used to a specified TabletSchema, when TabletSchema -// is changed, this segemnt can not be used any more. For eample, after a schema -// change finished, client should disalbe all cahced Segment for old TabletSchema. +// is changed, this segment can not be used any more. For example, after a schema +// change finished, client should disable all cached Segment for old TabletSchema. class Segment : public std::enable_shared_from_this { public: Segment(std::string fname, uint32_t segment_id,