From 95397ff05d8fd1ecb11e95bd95475e0dcb48e7fc Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Tue, 17 Jan 2023 19:16:16 +0800 Subject: [PATCH] [refactor](array) remove depandancy of ColumnBlock, ColumnBlockView (#16002) change to vectorized::MutableColumnPtr --- be/src/olap/rowset/segment_v2/bitshuffle_page.h | 14 ++++++++++---- be/src/olap/rowset/segment_v2/column_reader.cpp | 14 +++++--------- be/src/olap/rowset/segment_v2/column_reader.h | 1 - .../rowset/segment_v2/frame_of_reference_page.h | 4 ++-- be/src/olap/rowset/segment_v2/page_decoder.h | 4 ++-- be/src/olap/rowset/segment_v2/plain_page.h | 4 ---- 6 files changed, 19 insertions(+), 22 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index 02163a6a7f..4d2f2fc46d 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -383,7 +383,8 @@ public: return Status::OK(); } - Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override { + template + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { DCHECK(_parsed); if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) { *n = 0; @@ -393,13 +394,18 @@ public: size_t max_fetch = std::min(*n, static_cast(_num_elements - _cur_index)); dst->insert_many_fix_len_data(get_data(_cur_index), max_fetch); - *n = max_fetch; - _cur_index += max_fetch; + if constexpr (forward_index) { + _cur_index += max_fetch; + } return Status::OK(); }; + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override { + return next_batch<>(n, dst); + }; + Status read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal, size_t* n, vectorized::MutableColumnPtr& dst) override { DCHECK(_parsed); @@ -426,7 +432,7 @@ public: return Status::OK(); } - Status peek_next_batch(size_t* n, ColumnBlockView* dst) override { + Status peek_next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override { return next_batch(n, dst); } diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 237e8abf8f..9cc6b9d4a6 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -486,21 +486,17 @@ Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) { if (_array_reader->is_nullable()) { RETURN_IF_ERROR(_null_iterator->init(opts)); } - const auto* offset_type_info = get_scalar_type_info(); - RETURN_IF_ERROR( - ColumnVectorBatch::create(1024, false, offset_type_info, nullptr, &_length_batch)); return Status::OK(); } Status ArrayFileColumnIterator::_peek_one_offset(ordinal_t* offset) { if (_offset_iterator->get_current_page()->has_remaining()) { PageDecoder* offset_page_decoder = _offset_iterator->get_current_page()->data_decoder; - ColumnBlock ordinal_block(_length_batch.get(), nullptr); - ColumnBlockView ordinal_view(&ordinal_block); - size_t i = 1; - RETURN_IF_ERROR(offset_page_decoder->peek_next_batch(&i, &ordinal_view)); // not null - DCHECK(i == 1); - *offset = *reinterpret_cast(_length_batch->data()); + vectorized::MutableColumnPtr offset_col = vectorized::ColumnUInt64::create(); + size_t n = 1; + RETURN_IF_ERROR(offset_page_decoder->peek_next_batch(&n, offset_col)); // not null + DCHECK(offset_col->size() == 1); + *offset = offset_col->get_uint(0); } else { *offset = _offset_iterator->get_current_page()->next_array_item_ordinal; } diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index c5dd729d82..30d5cb48a2 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -429,7 +429,6 @@ private: std::unique_ptr _offset_iterator; std::unique_ptr _null_iterator; std::unique_ptr _item_iterator; - std::unique_ptr _length_batch; Status _peek_one_offset(ordinal_t* offset); Status _seek_by_offsets(ordinal_t ord); diff --git a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h index 909aa35b76..283384ed2d 100644 --- a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h +++ b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h @@ -165,8 +165,8 @@ public: return Status::NotSupported("frame page not implement vec op now"); }; - Status peek_next_batch(size_t* n, ColumnBlockView* dst) override { - return next_batch(n, dst); + Status peek_next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override { + return Status::NotSupported("frame page not implement vec op now"); } size_t count() const override { return _num_elements; } diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h index 9207825de3..70df3bf9bc 100644 --- a/be/src/olap/rowset/segment_v2/page_decoder.h +++ b/be/src/olap/rowset/segment_v2/page_decoder.h @@ -92,8 +92,8 @@ public: // Same as `next_batch` except for not moving forward the cursor. // When read array's ordinals in `ArrayFileColumnIterator`, we want to read one extra ordinal // but do not want to move forward the cursor. - virtual Status peek_next_batch(size_t* n, ColumnBlockView* dst) { - return Status::NotSupported("peek_next_batch"); + virtual Status peek_next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { + return Status::NotSupported("not implement vec op now"); } // Return the number of elements in this page. diff --git a/be/src/olap/rowset/segment_v2/plain_page.h b/be/src/olap/rowset/segment_v2/plain_page.h index a38a3d7dc9..a69e71b31b 100644 --- a/be/src/olap/rowset/segment_v2/plain_page.h +++ b/be/src/olap/rowset/segment_v2/plain_page.h @@ -206,10 +206,6 @@ public: return Status::OK(); } - Status peek_next_batch(size_t* n, ColumnBlockView* dst) override { - return next_batch(n, dst); - } - size_t count() const override { DCHECK(_parsed); return _num_elems;