From ccf2e5bb9ea686b104ec270890bbad68b7945534 Mon Sep 17 00:00:00 2001 From: kangpinghuang <40422952+kangpinghuang@users.noreply.github.com> Date: Tue, 11 Jun 2019 10:37:16 +0800 Subject: [PATCH] Add page api for new format segment (#1270) --- be/src/olap/rowset/segment_v2/page_builder.h | 78 +++++++++++++++++++ be/src/olap/rowset/segment_v2/page_decoder.h | 81 ++++++++++++++++++++ be/src/runtime/vectorized_row_batch.h | 27 +++++++ 3 files changed, 186 insertions(+) create mode 100644 be/src/olap/rowset/segment_v2/page_builder.h create mode 100644 be/src/olap/rowset/segment_v2/page_decoder.h diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h new file mode 100644 index 0000000000..e6952766f4 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/page_builder.h @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "util/slice.h" +#include "common/status.h" +#include "olap/rowset/segment_v2/common.h" + +namespace doris { + +namespace segment_v2 { + +// PageBuilder is used to build page +// Page is a data management unit, including: +// 1. Data Page: store encoded and compressed data +// 2. BloomFilter Page: store bloom filter of data +// 3. Ordinal Index Page: store ordinal index of data +// 4. Short Key Index Page: store short key index of data +// 5. Bitmap Index Page: store bitmap index of data +class PageBuilder { +public: + virtual ~PageBuilder() { } + + // Used by column writer to determine whether the current page is full. + // Column writer depends on the result to decide whether to flush current page. + virtual bool is_page_full() = 0; + + // Add a sequence of values to the page. + // The number of values actually added will be returned through count, which may be less + // than requested if the page is full. + // + // vals size should be decided according to the page build type + virtual doris::Status add(const uint8_t* vals, size_t* count) = 0; + + // Get the dictionary page for dictionary encoding mode column. + virtual doris::Status get_dictionary_page(doris::Slice* dictionary_page); + + // Get the bitmap page for bitmap indexed column. + virtual doris::Status get_bitmap_page(doris::Slice* bitmap_page); + + // Return a Slice which represents the encoded data of current page. + // + // This Slice points to internal data of this builder. + virtual Slice finish(const rowid_t page_first_rowid) = 0; + + // Reset the internal state of the page builder. + // + // Any data previously returned by finish may be invalidated by this call. + virtual void reset() = 0; + + // Return the number of entries that have been added to the page. + virtual size_t count() const = 0; + +private: + DISALLOW_COPY_AND_ASSIGN(PageBuilder); +}; + +} // namespace segment_v2 + +} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h new file mode 100644 index 0000000000..972864235a --- /dev/null +++ b/be/src/olap/rowset/segment_v2/page_decoder.h @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "runtime/vectorized_row_batch.h" +#include "common/status.h" + +namespace doris { + +namespace segment_v2 { + +// PageDecoder is used to decode page page. +class PageDecoder { +public: + virtual ~PageDecoder() { } + + // Call this to do some preparation for decoder. + // eg: parse data page header + virtual doris::Status init() = 0; + + // Seek the decoder to the given positional index of the page. + // For example, seek_to_position_in_page(0) seeks to the first + // stored entry. + // + // It is an error to call this with a value larger than Count(). + // Doing so has undefined results. + virtual doris::Status seek_to_position_in_page(size_t pos) = 0; + + // Seek the decoder forward by a given number of rows, or to the end + // of the page. This is primarily used to skip over data. + // + // Return the step skipped. + virtual size_t seek_forward(size_t n) { + size_t step = std::min(n, count() - current_index()); + DCHECK_GE(step, 0); + seek_to_position_in_page(current_index() + step); + return step; + } + + // Fetch the next vector of values from the page into 'column_vector_view'. + // The output vector must have space for up to n cells. + // + // Return the size of read entries . + // + // In the case that the values are themselves references + // to other memory (eg Slices), the referred-to memory is + // allocated in the column_vector_view's mem_pool. + virtual doris::Status next_batch(size_t* n, doris::ColumnVectorView* column_vector_view) = 0; + + // Return the number of elements in this page. + virtual size_t count() const = 0; + + // Return the position within the page of the currently seeked + // entry (ie the entry that will next be returned by next_vector()) + virtual size_t current_index() const = 0; + + // Return the first rowid stored in this page. + virtual rowid_t get_first_rowid() const = 0; + +private: + DISALLOW_COPY_AND_ASSIGN(PageDecoder); +}; + +} // namespace segment_v2 + +} // namespace doris diff --git a/be/src/runtime/vectorized_row_batch.h b/be/src/runtime/vectorized_row_batch.h index cdfe664cc2..f62601f8fb 100644 --- a/be/src/runtime/vectorized_row_batch.h +++ b/be/src/runtime/vectorized_row_batch.h @@ -70,6 +70,33 @@ private: bool* _is_null = nullptr; }; +class ColumnVectorView { +public: + explicit ColumnVectorView(ColumnVector* column_vector, size_t row_offset, MemPool* mem_pool) + : _column_vector(column_vector), _row_offset(row_offset), _mem_pool(mem_pool) { } + + void advance(size_t skip) { + _row_offset += skip; + } + + size_t first_row_index() const { + return _row_offset; + } + + ColumnVector* column_vector() { + return _column_vector; + } + + MemPool* mem_pool() { + return _mem_pool; + } + +private: + ColumnVector* _column_vector; + size_t _row_offset; + MemPool* _mem_pool; +}; + class VectorizedRowBatch { public: VectorizedRowBatch(