Add page api for new format segment (#1270)

This commit is contained in:
kangpinghuang
2019-06-11 10:37:16 +08:00
committed by ZHAO Chun
parent 922fa28097
commit ccf2e5bb9e
3 changed files with 186 additions and 0 deletions

View File

@ -0,0 +1,78 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <stdint.h>
#include <vector>
#include "util/slice.h"
#include "common/status.h"
#include "olap/rowset/segment_v2/common.h"
namespace doris {
namespace segment_v2 {
// PageBuilder is used to build page
// Page is a data management unit, including:
// 1. Data Page: store encoded and compressed data
// 2. BloomFilter Page: store bloom filter of data
// 3. Ordinal Index Page: store ordinal index of data
// 4. Short Key Index Page: store short key index of data
// 5. Bitmap Index Page: store bitmap index of data
class PageBuilder {
public:
virtual ~PageBuilder() { }
// Used by column writer to determine whether the current page is full.
// Column writer depends on the result to decide whether to flush current page.
virtual bool is_page_full() = 0;
// Add a sequence of values to the page.
// The number of values actually added will be returned through count, which may be less
// than requested if the page is full.
//
// vals size should be decided according to the page build type
virtual doris::Status add(const uint8_t* vals, size_t* count) = 0;
// Get the dictionary page for dictionary encoding mode column.
virtual doris::Status get_dictionary_page(doris::Slice* dictionary_page);
// Get the bitmap page for bitmap indexed column.
virtual doris::Status get_bitmap_page(doris::Slice* bitmap_page);
// Return a Slice which represents the encoded data of current page.
//
// This Slice points to internal data of this builder.
virtual Slice finish(const rowid_t page_first_rowid) = 0;
// Reset the internal state of the page builder.
//
// Any data previously returned by finish may be invalidated by this call.
virtual void reset() = 0;
// Return the number of entries that have been added to the page.
virtual size_t count() const = 0;
private:
DISALLOW_COPY_AND_ASSIGN(PageBuilder);
};
} // namespace segment_v2
} // namespace doris

View File

@ -0,0 +1,81 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "runtime/vectorized_row_batch.h"
#include "common/status.h"
namespace doris {
namespace segment_v2 {
// PageDecoder is used to decode page page.
class PageDecoder {
public:
virtual ~PageDecoder() { }
// Call this to do some preparation for decoder.
// eg: parse data page header
virtual doris::Status init() = 0;
// Seek the decoder to the given positional index of the page.
// For example, seek_to_position_in_page(0) seeks to the first
// stored entry.
//
// It is an error to call this with a value larger than Count().
// Doing so has undefined results.
virtual doris::Status seek_to_position_in_page(size_t pos) = 0;
// Seek the decoder forward by a given number of rows, or to the end
// of the page. This is primarily used to skip over data.
//
// Return the step skipped.
virtual size_t seek_forward(size_t n) {
size_t step = std::min(n, count() - current_index());
DCHECK_GE(step, 0);
seek_to_position_in_page(current_index() + step);
return step;
}
// Fetch the next vector of values from the page into 'column_vector_view'.
// The output vector must have space for up to n cells.
//
// Return the size of read entries .
//
// In the case that the values are themselves references
// to other memory (eg Slices), the referred-to memory is
// allocated in the column_vector_view's mem_pool.
virtual doris::Status next_batch(size_t* n, doris::ColumnVectorView* column_vector_view) = 0;
// Return the number of elements in this page.
virtual size_t count() const = 0;
// Return the position within the page of the currently seeked
// entry (ie the entry that will next be returned by next_vector())
virtual size_t current_index() const = 0;
// Return the first rowid stored in this page.
virtual rowid_t get_first_rowid() const = 0;
private:
DISALLOW_COPY_AND_ASSIGN(PageDecoder);
};
} // namespace segment_v2
} // namespace doris

View File

@ -70,6 +70,33 @@ private:
bool* _is_null = nullptr;
};
class ColumnVectorView {
public:
explicit ColumnVectorView(ColumnVector* column_vector, size_t row_offset, MemPool* mem_pool)
: _column_vector(column_vector), _row_offset(row_offset), _mem_pool(mem_pool) { }
void advance(size_t skip) {
_row_offset += skip;
}
size_t first_row_index() const {
return _row_offset;
}
ColumnVector* column_vector() {
return _column_vector;
}
MemPool* mem_pool() {
return _mem_pool;
}
private:
ColumnVector* _column_vector;
size_t _row_offset;
MemPool* _mem_pool;
};
class VectorizedRowBatch {
public:
VectorizedRowBatch(