Fix segment v2 comment (#1769)

2019-09-09 18:26:48 +08:00
parent cd5cfea5cc
commit 0f44ce99ce
11 changed files with 32 additions and 43 deletions
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@ -162,7 +162,6 @@ Status BinaryDictPageDecoder::init() {
        _data_page_decoder.reset(new BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));
    } else if (_encoding_type == PLAIN_ENCODING) {
        DCHECK_EQ(_encoding_type, PLAIN_ENCODING);
-        // use plain page decoder to decode data
        _data_page_decoder.reset(new BinaryPlainPageDecoder(_data, _options));
    } else {
        LOG(WARNING) << "invalide encoding type:" << _encoding_type;
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@ -137,7 +137,6 @@ public:

 private:
    Slice _finish(int final_size_of_type) {
-        //_data.resize(BITSHUFFLE_PAGE_HEADER_SIZE + final_size_of_type * _count);
        _data.resize(final_size_of_type * _count);

        // Do padding so that the input num of element is multiple of 8.
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@ -114,7 +114,7 @@ Status ColumnReader::read_page(const PagePointer& pp, PageHandle* handle) {
        *handle = PageHandle(std::move(cache_handle));
        return Status::OK();
    }
-    // Now we read this from file. we 
+    // Now we read this from file.
    size_t page_size = pp.size;
    if (page_size < sizeof(uint32_t)) {
        return Status::Corruption(Substitute("Bad page, page size is too small, size=$0", page_size));
@ -173,7 +173,7 @@ void ColumnReader::_get_filtered_pages(CondColumn* cond_column, std::vector<uint
    std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type));
    std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type));
    for (int32_t i = 0; i < page_size; ++i) {
-        // min value and max value are valid if exisst_none_null is true
+        // min value and max value are valid if has_not_null is true
        if (zone_maps[i].has_not_null()) {
            min_value->from_string(zone_maps[i].min());
            max_value->from_string(zone_maps[i].max());
@ -264,10 +264,8 @@ Status FileColumnIterator::seek_to_first() {
 }

 Status FileColumnIterator::seek_to_ordinal(rowid_t rid) {
-    if (_page != nullptr && _page->contains(rid)) {
-        // current page contains this row, we just
-    } else {
-        // we need to seek to
+    // if current page contains this row, we don't need to seek
+    if (_page == nullptr || !_page->contains(rid)) {
        RETURN_IF_ERROR(_reader->seek_at_or_before(rid, &_page_iter));
        _page.reset(new ParsedPage());
        RETURN_IF_ERROR(_read_page(_page_iter, _page.get()));
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@ -46,12 +46,10 @@ class ParsedPage;
 class ColumnIterator;

 struct ColumnReaderOptions {
-    // If verify checksum when read page
+    // whether verify checksum when read page
    bool verify_checksum = true;
 };

-// Used to read one column's data. And user should pass ColumnData meta
-// when he want to read this column's data.
 // There will be concurrent users to read the same column. So
 // we should do our best to reduce resource usage through share
 // same information, such as OrdinalPageIndex and Page data.
@ -91,10 +89,7 @@ private:
    void _calculate_row_ranges(const std::vector<uint32_t>& page_indexes, RowRanges* row_ranges);

 private:
-    // input param
    ColumnReaderOptions _opts;
-    // we need colun data to parse column data.
-    // use shared_ptr here is to make things simple
    ColumnMetaPB _meta;
    uint64_t _num_rows;
    RandomAccessFile* _file = nullptr;
@ -130,8 +125,7 @@ public:
    // from Arena
    virtual Status next_batch(size_t* n, ColumnBlock* dst) = 0;

-    // Get current oridinal
-    virtual rowid_t get_current_oridinal() const = 0;
+    virtual rowid_t get_current_ordinal() const = 0;

 #if 0
    // Call this function every time before next_batch.
@ -170,8 +164,7 @@ public:

    Status next_batch(size_t* n, ColumnBlock* dst) override;

-    // Get current oridinal
-    rowid_t get_current_oridinal() const override { return _current_rowid; }
+    rowid_t get_current_ordinal() const override { return _current_rowid; }

 private:
    void _seek_to_pos_in_page(ParsedPage* page, uint32_t offset_in_page);
@ -181,10 +174,10 @@ private:
 private:
    ColumnReader* _reader;

-    // We define an operation is one seek and follwing read.
-    // If new seek is issued, there will be a new operation
-    // current page
-    // When _page is null, it means that this reader can not be read
+    // 1. The _page represents current page.
+    // 2. We define an operation is one seek and following read,
+    //    If new seek is issued, the _page will be reset.
+    // 3. When _page is null, it means that this reader can not be read.
    std::unique_ptr<ParsedPage> _page;

    // page iterator used to get next page when current page is finished.
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@ -129,8 +129,8 @@ Status ColumnWriter::append(const void* data, size_t num_rows) {
    return _append_data((const uint8_t**)&data, num_rows);
 }

-// append data to page builder. this funciton will make sure that
-// num_rows must be written before return. And ptr will be modifed
+// append data to page builder. this function will make sure that
+// num_rows must be written before return. And ptr will be modified
 // to next data should be written
 Status ColumnWriter::_append_data(const uint8_t** ptr, size_t num_rows) {
    size_t remaining = num_rows;
@ -247,7 +247,7 @@ Status ColumnWriter::_write_physical_page(std::vector<Slice>* origin_data, PageP
    std::vector<Slice>* output_data = origin_data;
    std::vector<Slice> compressed_data;

-    // Put compressor out of if block, because we should use compressor's
+    // Put compressor out of if block, because we will use compressor's
    // content until this function finished.
    PageCompressor compressor(_compress_codec);
    if (_compress_codec != nullptr) {
--- a/be/src/olap/rowset/segment_v2/page_compression.cpp
+++ b/be/src/olap/rowset/segment_v2/page_compression.cpp
@ -26,7 +26,7 @@ namespace segment_v2 {

 using strings::Substitute;

-Status PageDecompressor::decompress_to(Slice* content) {
+Status PageDecompressor::decompress_to(Slice* uncompressed_data) {
    if (_data.size < 4) {
        return Status::Corruption(
            Substitute("Compressed page's size is too small, size=$0, needed=$1",
@ -40,7 +40,7 @@ Status PageDecompressor::decompress_to(Slice* content) {
        // If compressed_slice's size is equal with _uncompressed_bytes, it means
        // compressor store this directly without compression. So we just copy
        // this to buf and return.
-        *content = compressed_slice;
+        *uncompressed_data = compressed_slice;
        return Status::OK();
    }
    std::unique_ptr<char[]> buf(new char[uncompressed_bytes]);
@ -54,7 +54,7 @@ Status PageDecompressor::decompress_to(Slice* content) {
            Substitute("Uncompressed size not match, record=$0 vs decompress=$1",
                       uncompressed_bytes, uncompressed_slice.size));
    }
-    *content = Slice(buf.release(), uncompressed_bytes);
+    *uncompressed_data = Slice(buf.release(), uncompressed_bytes);
    return Status::OK();
 }

--- a/be/src/olap/rowset/segment_v2/page_compression.h
+++ b/be/src/olap/rowset/segment_v2/page_compression.h
@ -38,7 +38,7 @@ namespace segment_v2 {
 // The type of compression codec for Data is stored elsewhere and should
 // be passed into the constructor.
 // Usage example:
-//     // page_size refers to page read from storage
+//     // page_slice refers to page read from storage
 //     PageDecompressor decompressor(page_slice, codec);
 //     // points to decompressed Data of the page (without footer)
 //     Slice uncompressed_slice;
@ -46,21 +46,21 @@ namespace segment_v2 {
 //     // use uncompressed_slice
 //     // we have a new buffer for decompressed page
 //     if (uncompressed_slice.data != page_slice.data) {
-//         delete[] uncompressed_bytes.data;
+//         delete[] page_slice.data;
 //     }
 class PageDecompressor {
 public:
-    PageDecompressor(const Slice& data, const BlockCompressionCodec* codec)
-        : _data(data), _codec(codec) {
+    PageDecompressor(const Slice& compressed_data, const BlockCompressionCodec* codec)
+        : _data(compressed_data), _codec(codec) {
    }
    
-    // This client will set uncompress content to input param.
-    // In normal case(content.data != input_data.data) client should
-    // call delete[] content.data to free heap memory. However
-    // when the data is not compressed, this function will return input data
-    // directly. In this case content.data == input_data.data,
+    // This client will set uncompress content to uncompressed_data.
+    // In normal case(compressed_data.data != uncompressed_data.data) client should
+    // call delete[] compressed_data.data to free heap memory. However
+    // when the data is not compressed, this function will return compressed_data
+    // directly. In this case compressed_data.data == uncompressed_data.data,
    // client should not free content.
-    Status decompress_to(Slice* content);
+    Status decompress_to(Slice* uncompressed_data);
 private:
    Slice _data;
    const BlockCompressionCodec* _codec;
--- a/be/src/olap/rowset/segment_v2/page_decoder.h
+++ b/be/src/olap/rowset/segment_v2/page_decoder.h
@ -24,7 +24,7 @@
 namespace doris {
 namespace segment_v2 {

-// PageDecoder is used to decode page page.
+// PageDecoder is used to decode page.
 class PageDecoder {
 public:
    PageDecoder() { }
--- a/be/src/olap/rowset/segment_v2/page_handle.h
+++ b/be/src/olap/rowset/segment_v2/page_handle.h
@ -28,7 +28,7 @@ namespace segment_v2 {
 // A page's data may be in cache, or may not in cache. We use this
 // class to unify these two cases.
 // If client use this struct to wrap data not in cache, this class
-// will free data's memory when it is destoryed.
+// will free data's memory when it is destroyed.
 class PageHandle {
 public:
    PageHandle() : _is_data_owner(false) { }
--- a/be/src/olap/rowset/segment_v2/row_ranges.h
+++ b/be/src/olap/rowset/segment_v2/row_ranges.h
@ -51,7 +51,7 @@ public:
        return false;
    }

-    // Returns true if the two ranges are overlapped or false.
+    // Returns true if the two ranges are intersected or false.
    // The intersection of the two ranges is returned through range.
    static bool range_intersection(const RowRange& left, const RowRange& right, RowRange* range) {
        if (left._from <= right._from) {
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@ -52,8 +52,8 @@ using SegmentSharedPtr = std::shared_ptr<Segment>;
 // And user can create a SegmentIterator through new_iterator function.
 //
 // NOTE: This segment is used to a specified TabletSchema, when TabletSchema
-// is changed, this segemnt can not be used any more. For eample, after a schema
-// change finished, client should disalbe all cahced Segment for old TabletSchema.
+// is changed, this segment can not be used any more. For example, after a schema
+// change finished, client should disable all cached Segment for old TabletSchema.
 class Segment : public std::enable_shared_from_this<Segment> {
 public:
    Segment(std::string fname, uint32_t segment_id,