Add page api for new format segment (#1270)
This commit is contained in:
78
be/src/olap/rowset/segment_v2/page_builder.h
Normal file
78
be/src/olap/rowset/segment_v2/page_builder.h
Normal file
@ -0,0 +1,78 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
#include "util/slice.h"
|
||||
#include "common/status.h"
|
||||
#include "olap/rowset/segment_v2/common.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
// PageBuilder is used to build page
|
||||
// Page is a data management unit, including:
|
||||
// 1. Data Page: store encoded and compressed data
|
||||
// 2. BloomFilter Page: store bloom filter of data
|
||||
// 3. Ordinal Index Page: store ordinal index of data
|
||||
// 4. Short Key Index Page: store short key index of data
|
||||
// 5. Bitmap Index Page: store bitmap index of data
|
||||
class PageBuilder {
|
||||
public:
|
||||
virtual ~PageBuilder() { }
|
||||
|
||||
// Used by column writer to determine whether the current page is full.
|
||||
// Column writer depends on the result to decide whether to flush current page.
|
||||
virtual bool is_page_full() = 0;
|
||||
|
||||
// Add a sequence of values to the page.
|
||||
// The number of values actually added will be returned through count, which may be less
|
||||
// than requested if the page is full.
|
||||
//
|
||||
// vals size should be decided according to the page build type
|
||||
virtual doris::Status add(const uint8_t* vals, size_t* count) = 0;
|
||||
|
||||
// Get the dictionary page for dictionary encoding mode column.
|
||||
virtual doris::Status get_dictionary_page(doris::Slice* dictionary_page);
|
||||
|
||||
// Get the bitmap page for bitmap indexed column.
|
||||
virtual doris::Status get_bitmap_page(doris::Slice* bitmap_page);
|
||||
|
||||
// Return a Slice which represents the encoded data of current page.
|
||||
//
|
||||
// This Slice points to internal data of this builder.
|
||||
virtual Slice finish(const rowid_t page_first_rowid) = 0;
|
||||
|
||||
// Reset the internal state of the page builder.
|
||||
//
|
||||
// Any data previously returned by finish may be invalidated by this call.
|
||||
virtual void reset() = 0;
|
||||
|
||||
// Return the number of entries that have been added to the page.
|
||||
virtual size_t count() const = 0;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(PageBuilder);
|
||||
};
|
||||
|
||||
} // namespace segment_v2
|
||||
|
||||
} // namespace doris
|
||||
81
be/src/olap/rowset/segment_v2/page_decoder.h
Normal file
81
be/src/olap/rowset/segment_v2/page_decoder.h
Normal file
@ -0,0 +1,81 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/vectorized_row_batch.h"
|
||||
#include "common/status.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
// PageDecoder is used to decode page page.
|
||||
class PageDecoder {
|
||||
public:
|
||||
virtual ~PageDecoder() { }
|
||||
|
||||
// Call this to do some preparation for decoder.
|
||||
// eg: parse data page header
|
||||
virtual doris::Status init() = 0;
|
||||
|
||||
// Seek the decoder to the given positional index of the page.
|
||||
// For example, seek_to_position_in_page(0) seeks to the first
|
||||
// stored entry.
|
||||
//
|
||||
// It is an error to call this with a value larger than Count().
|
||||
// Doing so has undefined results.
|
||||
virtual doris::Status seek_to_position_in_page(size_t pos) = 0;
|
||||
|
||||
// Seek the decoder forward by a given number of rows, or to the end
|
||||
// of the page. This is primarily used to skip over data.
|
||||
//
|
||||
// Return the step skipped.
|
||||
virtual size_t seek_forward(size_t n) {
|
||||
size_t step = std::min(n, count() - current_index());
|
||||
DCHECK_GE(step, 0);
|
||||
seek_to_position_in_page(current_index() + step);
|
||||
return step;
|
||||
}
|
||||
|
||||
// Fetch the next vector of values from the page into 'column_vector_view'.
|
||||
// The output vector must have space for up to n cells.
|
||||
//
|
||||
// Return the size of read entries .
|
||||
//
|
||||
// In the case that the values are themselves references
|
||||
// to other memory (eg Slices), the referred-to memory is
|
||||
// allocated in the column_vector_view's mem_pool.
|
||||
virtual doris::Status next_batch(size_t* n, doris::ColumnVectorView* column_vector_view) = 0;
|
||||
|
||||
// Return the number of elements in this page.
|
||||
virtual size_t count() const = 0;
|
||||
|
||||
// Return the position within the page of the currently seeked
|
||||
// entry (ie the entry that will next be returned by next_vector())
|
||||
virtual size_t current_index() const = 0;
|
||||
|
||||
// Return the first rowid stored in this page.
|
||||
virtual rowid_t get_first_rowid() const = 0;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(PageDecoder);
|
||||
};
|
||||
|
||||
} // namespace segment_v2
|
||||
|
||||
} // namespace doris
|
||||
@ -70,6 +70,33 @@ private:
|
||||
bool* _is_null = nullptr;
|
||||
};
|
||||
|
||||
class ColumnVectorView {
|
||||
public:
|
||||
explicit ColumnVectorView(ColumnVector* column_vector, size_t row_offset, MemPool* mem_pool)
|
||||
: _column_vector(column_vector), _row_offset(row_offset), _mem_pool(mem_pool) { }
|
||||
|
||||
void advance(size_t skip) {
|
||||
_row_offset += skip;
|
||||
}
|
||||
|
||||
size_t first_row_index() const {
|
||||
return _row_offset;
|
||||
}
|
||||
|
||||
ColumnVector* column_vector() {
|
||||
return _column_vector;
|
||||
}
|
||||
|
||||
MemPool* mem_pool() {
|
||||
return _mem_pool;
|
||||
}
|
||||
|
||||
private:
|
||||
ColumnVector* _column_vector;
|
||||
size_t _row_offset;
|
||||
MemPool* _mem_pool;
|
||||
};
|
||||
|
||||
class VectorizedRowBatch {
|
||||
public:
|
||||
VectorizedRowBatch(
|
||||
|
||||
Reference in New Issue
Block a user