Add OrdinalPageIndex and EncodingInfo (#1282)
This commit is contained in:
@ -579,7 +579,7 @@ FUNCTION(ADD_BE_TEST TEST_NAME)
|
||||
TARGET_LINK_LIBRARIES(${TEST_FILE_NAME} ${TEST_LINK_LIBS})
|
||||
SET_TARGET_PROPERTIES(${TEST_FILE_NAME} PROPERTIES COMPILE_FLAGS "-fno-access-control")
|
||||
if (NOT "${TEST_DIR_NAME}" STREQUAL "")
|
||||
SET_TARGET_PROPERTIES(${TEST_FILE_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}/${DIR_NAME}/${TEST_DIR_NAME}")
|
||||
SET_TARGET_PROPERTIES(${TEST_FILE_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}/${TEST_DIR_NAME}")
|
||||
endif()
|
||||
ADD_TEST(${TEST_FILE_NAME} "${BUILD_OUTPUT_ROOT_DIRECTORY}/${TEST_NAME}")
|
||||
ENDFUNCTION()
|
||||
|
||||
@ -83,4 +83,6 @@ add_library(Olap STATIC
|
||||
types.cpp
|
||||
utils.cpp
|
||||
wrapper_field.cpp
|
||||
rowset/segment_v2/ordinal_page_index.cpp
|
||||
rowset/segment_v2/encoding_info.cpp
|
||||
)
|
||||
|
||||
@ -1,28 +1,29 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace doris {
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
typedef uint32_t rowid_t;
|
||||
|
||||
} // namespace segment_v2
|
||||
|
||||
} // namespace doris
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
using rowid_t = uint32_t;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
131
be/src/olap/rowset/segment_v2/encoding_info.cpp
Normal file
131
be/src/olap/rowset/segment_v2/encoding_info.cpp
Normal file
@ -0,0 +1,131 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/rowset/segment_v2/encoding_info.h"
|
||||
|
||||
#include "olap/olap_common.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
struct EncodingMapHash {
|
||||
size_t operator()(const std::pair<FieldType, EncodingTypePB>& pair) const {
|
||||
return (pair.first << 5) ^ pair.second;
|
||||
}
|
||||
};
|
||||
|
||||
template<FieldType type, EncodingTypePB encoding>
|
||||
struct TypeEncodingTraits { };
|
||||
|
||||
template<FieldType type>
|
||||
struct TypeEncodingTraits<type, PLAIN_ENCODING> {
|
||||
static Status create_page_builder(PageBuilder** builder) {
|
||||
return Status::OK;
|
||||
}
|
||||
static Status create_page_decoder(PageDecoder** decoder) {
|
||||
return Status::OK;
|
||||
}
|
||||
};
|
||||
|
||||
class EncodingInfoResolver {
|
||||
public:
|
||||
EncodingInfoResolver();
|
||||
~EncodingInfoResolver();
|
||||
|
||||
EncodingTypePB get_default_encoding_type(FieldType type) const {
|
||||
auto it = _default_encoding_type_map.find(type);
|
||||
if (it != std::end(_default_encoding_type_map)) {
|
||||
return it->second;
|
||||
}
|
||||
return DEFAULT_ENCODING;
|
||||
}
|
||||
|
||||
Status get(FieldType data_type, EncodingTypePB encoding_type, const EncodingInfo** out);
|
||||
|
||||
private:
|
||||
template<FieldType type, EncodingTypePB encoding_type>
|
||||
void _add_map() {
|
||||
TypeEncodingTraits<type, encoding_type> traits;
|
||||
std::unique_ptr<EncodingInfo> encoding(new EncodingInfo(traits));
|
||||
if (_default_encoding_type_map.find(type) == std::end(_default_encoding_type_map)) {
|
||||
_default_encoding_type_map[type] = encoding_type;
|
||||
}
|
||||
auto key = std::make_pair(type, encoding_type);
|
||||
_encoding_map.emplace(key, encoding.release());
|
||||
}
|
||||
|
||||
std::unordered_map<FieldType, EncodingTypePB, std::hash<int>> _default_encoding_type_map;
|
||||
|
||||
std::unordered_map<std::pair<FieldType, EncodingTypePB>,
|
||||
EncodingInfo*, EncodingMapHash> _encoding_map;
|
||||
};
|
||||
|
||||
EncodingInfoResolver::EncodingInfoResolver() {
|
||||
_add_map<OLAP_FIELD_TYPE_TINYINT, PLAIN_ENCODING>();
|
||||
_add_map<OLAP_FIELD_TYPE_SMALLINT, PLAIN_ENCODING>();
|
||||
_add_map<OLAP_FIELD_TYPE_INT, PLAIN_ENCODING>();
|
||||
_add_map<OLAP_FIELD_TYPE_BIGINT, PLAIN_ENCODING>();
|
||||
_add_map<OLAP_FIELD_TYPE_LARGEINT, PLAIN_ENCODING>();
|
||||
_add_map<OLAP_FIELD_TYPE_FLOAT, PLAIN_ENCODING>();
|
||||
_add_map<OLAP_FIELD_TYPE_DOUBLE, PLAIN_ENCODING>();
|
||||
}
|
||||
|
||||
EncodingInfoResolver::~EncodingInfoResolver() {
|
||||
for (auto& it : _encoding_map) {
|
||||
delete it.second;
|
||||
}
|
||||
_encoding_map.clear();
|
||||
}
|
||||
|
||||
Status EncodingInfoResolver::get(
|
||||
FieldType data_type,
|
||||
EncodingTypePB encoding_type,
|
||||
const EncodingInfo** out) {
|
||||
if (encoding_type == DEFAULT_ENCODING) {
|
||||
encoding_type = get_default_encoding_type(data_type);
|
||||
}
|
||||
auto key = std::make_pair(data_type, encoding_type);
|
||||
auto it = _encoding_map.find(key);
|
||||
if (it == std::end(_encoding_map)) {
|
||||
return Status("fail to find valid type encoding");
|
||||
}
|
||||
*out = it->second;
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
static EncodingInfoResolver s_encoding_info_resolver;
|
||||
|
||||
template<typename TraitsClass>
|
||||
EncodingInfo::EncodingInfo(TraitsClass traits)
|
||||
: _create_buidler_func(TraitsClass::create_page_builder),
|
||||
_create_decoder_func(TraitsClass::create_page_decoder) {
|
||||
}
|
||||
|
||||
Status EncodingInfo::get(const TypeInfo* type_info,
|
||||
EncodingTypePB encoding_type,
|
||||
const EncodingInfo** out) {
|
||||
// TODO(zc): use BIGINT
|
||||
return s_encoding_info_resolver.get(OLAP_FIELD_TYPE_BIGINT, encoding_type, out);
|
||||
}
|
||||
|
||||
EncodingTypePB EncodingInfo::get_default_encoding_type(const TypeInfo* type_info) {
|
||||
// TODO(zc): use BIGINT
|
||||
return s_encoding_info_resolver.get_default_encoding_type(OLAP_FIELD_TYPE_BIGINT);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
63
be/src/olap/rowset/segment_v2/encoding_info.h
Normal file
63
be/src/olap/rowset/segment_v2/encoding_info.h
Normal file
@ -0,0 +1,63 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "gen_cpp/segment_v2.pb.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class TypeInfo;
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
class PageBuilder;
|
||||
class PageDecoder;
|
||||
|
||||
class EncodingInfo {
|
||||
public:
|
||||
// Get EncodingInfo for TypeInfo and EncodingTypePB
|
||||
static Status get(const TypeInfo* type_info,
|
||||
EncodingTypePB encoding_type,
|
||||
const EncodingInfo** encoding);
|
||||
// Get default type info
|
||||
static EncodingTypePB get_default_encoding_type(const TypeInfo* type_info);
|
||||
|
||||
Status create_page_builder(PageBuilder** builder) const {
|
||||
return _create_buidler_func(builder);
|
||||
}
|
||||
Status create_page_decoder(PageDecoder** decoder) const {
|
||||
return _create_decoder_func(decoder);
|
||||
}
|
||||
private:
|
||||
friend class EncodingInfoResolver;
|
||||
|
||||
template<typename TypeEncodingTraits>
|
||||
EncodingInfo(TypeEncodingTraits traits);
|
||||
|
||||
using CreateBuilderFunc = std::function<Status(PageBuilder**)>;
|
||||
CreateBuilderFunc _create_buidler_func;
|
||||
|
||||
using CreateDecoderFunc = std::function<Status(PageDecoder**)>;
|
||||
CreateDecoderFunc _create_decoder_func;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
73
be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
Normal file
73
be/src/olap/rowset/segment_v2/ordinal_page_index.cpp
Normal file
@ -0,0 +1,73 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/rowset/segment_v2/ordinal_page_index.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
OrdinalPageIndex::~OrdinalPageIndex() {
|
||||
delete _rowids;
|
||||
delete _pages;
|
||||
}
|
||||
|
||||
Status OrdinalPageIndex::load() {
|
||||
DCHECK_GE(_data.size, _header_size()) << "block size must greate than header";
|
||||
const uint8_t* ptr = (const uint8_t*)_data.data;
|
||||
const uint8_t* limit = (const uint8_t*)_data.data + _data.size;
|
||||
|
||||
_num_pages = decode_fixed32_le(ptr);
|
||||
ptr += 4;
|
||||
|
||||
_rowids = new rowid_t[_num_pages];
|
||||
_pages = new PagePointer[_num_pages];
|
||||
for (int i = 0; i < _num_pages; ++i) {
|
||||
ptr = decode_varint32_ptr(ptr, limit, &_rowids[i]);
|
||||
if (ptr == nullptr) {
|
||||
return Status("Data corruption");
|
||||
}
|
||||
ptr = _pages[i].decode_from(ptr, limit);
|
||||
if (ptr == nullptr) {
|
||||
return Status("Data corruption");
|
||||
}
|
||||
}
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
OrdinalPageIndexIterator OrdinalPageIndex::seek_at_or_before(rowid_t rid) {
|
||||
int32_t left = 0;
|
||||
int32_t right = _num_pages - 1;
|
||||
while (left < right) {
|
||||
int32_t mid = (left + right + 1) / 2;
|
||||
|
||||
if (_rowids[mid] < rid) {
|
||||
left = mid;
|
||||
} else if (_rowids[mid] > rid) {
|
||||
right = mid - 1;
|
||||
} else {
|
||||
left = mid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (_rowids[left] > rid) {
|
||||
return OrdinalPageIndexIterator(this, _num_pages);
|
||||
}
|
||||
return OrdinalPageIndexIterator(this, left);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
133
be/src/olap/rowset/segment_v2/ordinal_page_index.h
Normal file
133
be/src/olap/rowset/segment_v2/ordinal_page_index.h
Normal file
@ -0,0 +1,133 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "olap/rowset/segment_v2/common.h"
|
||||
#include "olap/rowset/segment_v2/page_pointer.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/slice.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
// this class encode ordinal page index
|
||||
// the binary format is like that
|
||||
// Header | Content
|
||||
// Header:
|
||||
// number of elements (4 Bytes)
|
||||
// Content:
|
||||
// array of index_pair
|
||||
// index_pair:
|
||||
// Ordinal (4 Bytes)
|
||||
// PagePointer (8 Bytes)
|
||||
class OrdinalPageIndexBuilder {
|
||||
public:
|
||||
OrdinalPageIndexBuilder() : _num_pages(0) {
|
||||
_buffer.reserve(4 * 1024);
|
||||
// reserve space for number of elements
|
||||
_buffer.resize(4);
|
||||
}
|
||||
|
||||
void append_entry(rowid_t rid, const PagePointer& page) {
|
||||
// rid
|
||||
put_varint32(&_buffer, rid);
|
||||
// page pointer
|
||||
page.encode_to(&_buffer);
|
||||
_num_pages++;
|
||||
}
|
||||
|
||||
Slice finish() {
|
||||
// encoded number of elements
|
||||
encode_fixed32_le((uint8_t*)_buffer.data(), _num_pages);
|
||||
return Slice(_buffer);
|
||||
}
|
||||
|
||||
private:
|
||||
std::string _buffer;
|
||||
uint32_t _num_pages;
|
||||
};
|
||||
|
||||
class OrdinalPageIndex;
|
||||
class OrdinalPageIndexIterator {
|
||||
public:
|
||||
OrdinalPageIndexIterator(OrdinalPageIndex* index) : _index(index), _cur_idx(-1) { }
|
||||
OrdinalPageIndexIterator(OrdinalPageIndex* index, int cur_idx) : _index(index), _cur_idx(cur_idx) { }
|
||||
inline bool valid() const;
|
||||
inline void next();
|
||||
inline rowid_t rowid() const;
|
||||
inline const PagePointer& page() const;
|
||||
private:
|
||||
OrdinalPageIndex* _index;
|
||||
int32_t _cur_idx;
|
||||
};
|
||||
|
||||
// Page index
|
||||
class OrdinalPageIndex {
|
||||
public:
|
||||
OrdinalPageIndex(const Slice& data)
|
||||
: _data(data), _num_pages(0), _rowids(nullptr), _pages(nullptr) {
|
||||
}
|
||||
~OrdinalPageIndex();
|
||||
|
||||
Status load();
|
||||
|
||||
OrdinalPageIndexIterator seek_at_or_before(rowid_t rid);
|
||||
OrdinalPageIndexIterator begin() {
|
||||
return OrdinalPageIndexIterator(this);
|
||||
}
|
||||
OrdinalPageIndexIterator end() {
|
||||
return OrdinalPageIndexIterator(this, _num_pages);
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t _header_size() const { return 4; }
|
||||
|
||||
private:
|
||||
friend OrdinalPageIndexIterator;
|
||||
|
||||
Slice _data;
|
||||
|
||||
// valid after laod
|
||||
int32_t _num_pages;
|
||||
rowid_t* _rowids;
|
||||
PagePointer* _pages;
|
||||
};
|
||||
|
||||
inline bool OrdinalPageIndexIterator::valid() const {
|
||||
return _cur_idx < _index->_num_pages;
|
||||
}
|
||||
|
||||
inline void OrdinalPageIndexIterator::next() {
|
||||
_cur_idx++;
|
||||
DCHECK_LT(_cur_idx, _index->_num_pages);
|
||||
}
|
||||
|
||||
inline rowid_t OrdinalPageIndexIterator::rowid() const {
|
||||
return _index->_rowids[_cur_idx];
|
||||
}
|
||||
|
||||
inline const PagePointer& OrdinalPageIndexIterator::page() const {
|
||||
return _index->_pages[_cur_idx];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
58
be/src/olap/rowset/segment_v2/page_pointer.h
Normal file
58
be/src/olap/rowset/segment_v2/page_pointer.h
Normal file
@ -0,0 +1,58 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "gen_cpp/segment_v2.pb.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
struct PagePointer {
|
||||
uint64_t offset;
|
||||
uint32_t size;
|
||||
|
||||
PagePointer() : offset(0), size(0) { }
|
||||
PagePointer(uint64_t offset_, uint32_t size_) : offset(offset_), size(size_) { }
|
||||
PagePointer(const PagePointerPB& from) : offset(from.offset()), size(from.size()) { }
|
||||
|
||||
void to_proto(PagePointerPB* to) {
|
||||
to->set_offset(offset);
|
||||
to->set_size(size);
|
||||
}
|
||||
|
||||
const uint8_t* decode_from(const uint8_t* data, const uint8_t* limit) {
|
||||
data = decode_varint64_ptr(data, limit, &offset);
|
||||
if (data == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return decode_varint32_ptr(data, limit, &size);
|
||||
}
|
||||
void encode_to(std::string* dst) const {
|
||||
put_varint64_varint32(dst, offset, size);
|
||||
}
|
||||
|
||||
bool operator==(const PagePointer& other) const {
|
||||
return offset == other.offset && size == other.size;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
@ -44,3 +44,5 @@ ADD_BE_TEST(serialize_test)
|
||||
ADD_BE_TEST(olap_meta_test)
|
||||
ADD_BE_TEST(olap_header_manager_test)
|
||||
ADD_BE_TEST(field_info_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/ordinal_page_index_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/encoding_info_test)
|
||||
|
||||
59
be/test/olap/rowset/segment_v2/encoding_info_test.cpp
Normal file
59
be/test/olap/rowset/segment_v2/encoding_info_test.cpp
Normal file
@ -0,0 +1,59 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/rowset/segment_v2/encoding_info.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/types.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
class EncodingInfoTest : public testing::Test {
|
||||
public:
|
||||
EncodingInfoTest() { }
|
||||
virtual ~EncodingInfoTest() {
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(EncodingInfoTest, normal) {
|
||||
auto type_info = get_type_info(OLAP_FIELD_TYPE_BIGINT);
|
||||
const EncodingInfo* encoding_info = nullptr;
|
||||
auto status = EncodingInfo::get(type_info, PLAIN_ENCODING, &encoding_info);
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_NE(nullptr, encoding_info);
|
||||
}
|
||||
|
||||
TEST_F(EncodingInfoTest, no_encoding) {
|
||||
auto type_info = get_type_info(OLAP_FIELD_TYPE_BIGINT);
|
||||
const EncodingInfo* encoding_info = nullptr;
|
||||
auto status = EncodingInfo::get(type_info, DICT_ENCODING, &encoding_info);
|
||||
ASSERT_FALSE(status.ok());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
105
be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp
Normal file
105
be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp
Normal file
@ -0,0 +1,105 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/rowset/segment_v2/ordinal_page_index.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "common/logging.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
class OrdinalPageIndexTest : public testing::Test {
|
||||
public:
|
||||
OrdinalPageIndexTest() { }
|
||||
virtual ~OrdinalPageIndexTest() {
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(OrdinalPageIndexTest, normal) {
|
||||
// rowid, page pointer
|
||||
// 1, (0, 4096)
|
||||
// 1 + 4096, (1 * 4096, 4096)
|
||||
// a page have 16KB, and have 4096 rows
|
||||
OrdinalPageIndexBuilder builder;
|
||||
|
||||
// we test a 16KB page
|
||||
for (uint64_t i = 0; i < 16 * 1024; ++i) {
|
||||
builder.append_entry(1 + 4096 * i, {16 * 1024 * i, 16 * 1024});
|
||||
}
|
||||
|
||||
auto slice = builder.finish();
|
||||
LOG(INFO) << "index block's size=" << slice.size;
|
||||
|
||||
OrdinalPageIndex index(slice);
|
||||
auto st = index.load();
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
PagePointer page;
|
||||
{
|
||||
auto iter = index.seek_at_or_before(1);
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(1, iter.rowid());
|
||||
ASSERT_EQ(PagePointer(0, 16 * 1024), iter.page());
|
||||
}
|
||||
{
|
||||
auto iter = index.seek_at_or_before(4095);
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(1, iter.rowid());
|
||||
ASSERT_EQ(PagePointer(0, 16 * 1024), iter.page());
|
||||
}
|
||||
{
|
||||
auto iter = index.seek_at_or_before(4098);
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(4097, iter.rowid());
|
||||
ASSERT_EQ(PagePointer(1 * 16 * 1024, 16 * 1024), iter.page());
|
||||
|
||||
iter.next();
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(4097 + 4096, iter.rowid());
|
||||
ASSERT_EQ(PagePointer(2 * 16 * 1024, 16 * 1024), iter.page());
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
auto iter = index.seek_at_or_before(0);
|
||||
ASSERT_FALSE(iter.valid());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(OrdinalPageIndexTest, corrupt) {
|
||||
std::string str;
|
||||
str.resize(4);
|
||||
|
||||
encode_fixed32_le((uint8_t*)str.data(), 1);
|
||||
|
||||
Slice slice(str);
|
||||
OrdinalPageIndex index(slice);
|
||||
auto st = index.load();
|
||||
ASSERT_FALSE(st.ok());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
@ -16,6 +16,8 @@
|
||||
// under the License.
|
||||
// Define file format struct, like data header, index header.
|
||||
|
||||
syntax="proto2";
|
||||
|
||||
package doris.segment_v2;
|
||||
|
||||
message ColumnSchemaPB {
|
||||
@ -35,7 +37,7 @@ message ColumnSchemaPB {
|
||||
// page position info
|
||||
message PagePointerPB {
|
||||
required uint64 offset = 1; // offset in segment file
|
||||
required uint32 length = 2; // size of page in byte
|
||||
required uint32 size = 2; // size of page in byte
|
||||
}
|
||||
|
||||
message MetadataPairPB {
|
||||
@ -44,23 +46,23 @@ message MetadataPairPB {
|
||||
}
|
||||
|
||||
enum EncodingTypePB {
|
||||
PLAIN_ENCODING = 1;
|
||||
PREFIX_ENCODING = 2;
|
||||
UNKNOWN_ENCODING = 0;
|
||||
DEFAULT_ENCODING = 1;
|
||||
PLAIN_ENCODING = 2;
|
||||
PREFIX_ENCODING = 3;
|
||||
RLE = 4;
|
||||
DICT_ENCODING = 5;
|
||||
BIT_SHUFFLE = 6;
|
||||
UNKNOWN_ENCODING = 1000;
|
||||
}
|
||||
|
||||
enum CompressionTypePB {
|
||||
DEFAULT_COMPRESSION = 0;
|
||||
NO_COMPRESSION = 1;
|
||||
SNAPPY = 2;
|
||||
LZ4 = 3;
|
||||
ZLIB = 4;
|
||||
ZSTB = 5;
|
||||
LZO = 6;
|
||||
UNKNOWN_COMPRESSION = 1000;
|
||||
UNKNOWN_COMPRESSION = 0;
|
||||
DEFAULT_COMPRESSION = 1;
|
||||
NO_COMPRESSION = 2;
|
||||
SNAPPY = 3;
|
||||
LZ4 = 4;
|
||||
ZLIB = 5;
|
||||
ZSTB = 6;
|
||||
}
|
||||
|
||||
message ZoneMapPB {
|
||||
@ -70,23 +72,34 @@ message ZoneMapPB {
|
||||
}
|
||||
|
||||
message ColumnMetaPB {
|
||||
optional EncodingTypePB encoding = 1;
|
||||
// this field is FieldType's value
|
||||
optional int32 type = 1;
|
||||
optional EncodingTypePB encoding = 2;
|
||||
// compress type for column
|
||||
optional CompressionTypePB compress_type = 3;
|
||||
// if this column can be nullable
|
||||
optional bool is_nullable = 4;
|
||||
|
||||
optional PagePointerPB dict_page = 2;// dictionary page for DICT_ENCODING
|
||||
repeated PagePointerPB bloom_filter_pages = 3; // bloom filter pages for bloom filter column
|
||||
optional PagePointerPB ordinal_index_page = 4; // ordinal index page
|
||||
optional PagePointerPB page_zonemap_page = 5; // page zonemap info of column
|
||||
// // dictionary page for DICT_ENCODING
|
||||
// optional PagePointerPB dict_page = 2;
|
||||
|
||||
optional PagePointerPB bitmap_index_page = 6; // bitmap index page
|
||||
// // bloom filter pages for bloom filter column
|
||||
// repeated PagePointerPB bloom_filter_pages = 3;
|
||||
|
||||
optional uint64 data_footprint = 7; // data footprint of column after encoding and compress
|
||||
optional uint64 index_footprint = 8; // index footprint of column after encoding and compress
|
||||
optional uint64 raw_data_footprint = 9; // raw column data footprint
|
||||
// optional PagePointerPB ordinal_index_page = 4; // ordinal index page
|
||||
// optional PagePointerPB page_zonemap_page = 5; // page zonemap info of column
|
||||
|
||||
optional CompressionTypePB compress_type = 10; // compress type for column
|
||||
// optional PagePointerPB bitmap_index_page = 6; // bitmap index page
|
||||
|
||||
optional ZoneMapPB column_zonemap = 11; // column zonemap info
|
||||
repeated MetadataPairPB column_meta_datas = 12;
|
||||
// // data footprint of column after encoding and compress
|
||||
// optional uint64 data_footprint = 7;
|
||||
// // index footprint of column after encoding and compress
|
||||
// optional uint64 index_footprint = 8;
|
||||
// // raw column data footprint
|
||||
// optional uint64 raw_data_footprint = 9;
|
||||
|
||||
// optional ZoneMapPB column_zonemap = 11; // column zonemap info
|
||||
// repeated MetadataPairPB column_meta_datas = 12;
|
||||
}
|
||||
|
||||
message FileFooterPB {
|
||||
|
||||
@ -228,6 +228,8 @@ ${DORIS_TEST_BINARY_DIR}/olap/olap_header_manager_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/olap_meta_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/delta_writer_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/field_info_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test
|
||||
|
||||
# Running routine load test
|
||||
${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test
|
||||
|
||||
Reference in New Issue
Block a user