diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index b9886571dc..c4f0672a53 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -579,7 +579,7 @@ FUNCTION(ADD_BE_TEST TEST_NAME) TARGET_LINK_LIBRARIES(${TEST_FILE_NAME} ${TEST_LINK_LIBS}) SET_TARGET_PROPERTIES(${TEST_FILE_NAME} PROPERTIES COMPILE_FLAGS "-fno-access-control") if (NOT "${TEST_DIR_NAME}" STREQUAL "") - SET_TARGET_PROPERTIES(${TEST_FILE_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}/${DIR_NAME}/${TEST_DIR_NAME}") + SET_TARGET_PROPERTIES(${TEST_FILE_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}/${TEST_DIR_NAME}") endif() ADD_TEST(${TEST_FILE_NAME} "${BUILD_OUTPUT_ROOT_DIRECTORY}/${TEST_NAME}") ENDFUNCTION() diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index e0e1a82e05..51c76663a0 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -83,4 +83,6 @@ add_library(Olap STATIC types.cpp utils.cpp wrapper_field.cpp + rowset/segment_v2/ordinal_page_index.cpp + rowset/segment_v2/encoding_info.cpp ) diff --git a/be/src/olap/rowset/segment_v2/common.h b/be/src/olap/rowset/segment_v2/common.h index 892424c266..61e276f2dd 100644 --- a/be/src/olap/rowset/segment_v2/common.h +++ b/be/src/olap/rowset/segment_v2/common.h @@ -1,28 +1,29 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -namespace doris { - -namespace segment_v2 { - -typedef uint32_t rowid_t; - -} // namespace segment_v2 - -} // namespace doris +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +namespace doris { +namespace segment_v2 { + +using rowid_t = uint32_t; + +} +} diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp new file mode 100644 index 0000000000..33037b10a2 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/encoding_info.h" + +#include "olap/olap_common.h" + +namespace doris { +namespace segment_v2 { + +struct EncodingMapHash { + size_t operator()(const std::pair& pair) const { + return (pair.first << 5) ^ pair.second; + } +}; + +template +struct TypeEncodingTraits { }; + +template +struct TypeEncodingTraits { + static Status create_page_builder(PageBuilder** builder) { + return Status::OK; + } + static Status create_page_decoder(PageDecoder** decoder) { + return Status::OK; + } +}; + +class EncodingInfoResolver { +public: + EncodingInfoResolver(); + ~EncodingInfoResolver(); + + EncodingTypePB get_default_encoding_type(FieldType type) const { + auto it = _default_encoding_type_map.find(type); + if (it != std::end(_default_encoding_type_map)) { + return it->second; + } + return DEFAULT_ENCODING; + } + + Status get(FieldType data_type, EncodingTypePB encoding_type, const EncodingInfo** out); + +private: + template + void _add_map() { + TypeEncodingTraits traits; + std::unique_ptr encoding(new EncodingInfo(traits)); + if (_default_encoding_type_map.find(type) == std::end(_default_encoding_type_map)) { + _default_encoding_type_map[type] = encoding_type; + } + auto key = std::make_pair(type, encoding_type); + _encoding_map.emplace(key, encoding.release()); + } + + std::unordered_map> _default_encoding_type_map; + + std::unordered_map, + EncodingInfo*, EncodingMapHash> _encoding_map; +}; + +EncodingInfoResolver::EncodingInfoResolver() { + _add_map(); + _add_map(); + _add_map(); + _add_map(); + _add_map(); + _add_map(); + _add_map(); +} + +EncodingInfoResolver::~EncodingInfoResolver() { + for (auto& it : _encoding_map) { + delete it.second; + } + _encoding_map.clear(); +} + +Status EncodingInfoResolver::get( + FieldType data_type, + EncodingTypePB encoding_type, + const EncodingInfo** out) { + if (encoding_type == DEFAULT_ENCODING) { + encoding_type = get_default_encoding_type(data_type); + } + auto key = std::make_pair(data_type, encoding_type); + auto it = _encoding_map.find(key); + if (it == std::end(_encoding_map)) { + return Status("fail to find valid type encoding"); + } + *out = it->second; + return Status::OK; +} + +static EncodingInfoResolver s_encoding_info_resolver; + +template +EncodingInfo::EncodingInfo(TraitsClass traits) + : _create_buidler_func(TraitsClass::create_page_builder), + _create_decoder_func(TraitsClass::create_page_decoder) { +} + +Status EncodingInfo::get(const TypeInfo* type_info, + EncodingTypePB encoding_type, + const EncodingInfo** out) { + // TODO(zc): use BIGINT + return s_encoding_info_resolver.get(OLAP_FIELD_TYPE_BIGINT, encoding_type, out); +} + +EncodingTypePB EncodingInfo::get_default_encoding_type(const TypeInfo* type_info) { + // TODO(zc): use BIGINT + return s_encoding_info_resolver.get_default_encoding_type(OLAP_FIELD_TYPE_BIGINT); +} + +} +} diff --git a/be/src/olap/rowset/segment_v2/encoding_info.h b/be/src/olap/rowset/segment_v2/encoding_info.h new file mode 100644 index 0000000000..887636a9a7 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/encoding_info.h @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" +#include "gen_cpp/segment_v2.pb.h" + +namespace doris { + +class TypeInfo; + +namespace segment_v2 { + +class PageBuilder; +class PageDecoder; + +class EncodingInfo { +public: + // Get EncodingInfo for TypeInfo and EncodingTypePB + static Status get(const TypeInfo* type_info, + EncodingTypePB encoding_type, + const EncodingInfo** encoding); + // Get default type info + static EncodingTypePB get_default_encoding_type(const TypeInfo* type_info); + + Status create_page_builder(PageBuilder** builder) const { + return _create_buidler_func(builder); + } + Status create_page_decoder(PageDecoder** decoder) const { + return _create_decoder_func(decoder); + } +private: + friend class EncodingInfoResolver; + + template + EncodingInfo(TypeEncodingTraits traits); + + using CreateBuilderFunc = std::function; + CreateBuilderFunc _create_buidler_func; + + using CreateDecoderFunc = std::function; + CreateDecoderFunc _create_decoder_func; +}; + +} +} diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp new file mode 100644 index 0000000000..9bd96b1fc0 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/ordinal_page_index.h" + +namespace doris { +namespace segment_v2 { + +OrdinalPageIndex::~OrdinalPageIndex() { + delete _rowids; + delete _pages; +} + +Status OrdinalPageIndex::load() { + DCHECK_GE(_data.size, _header_size()) << "block size must greate than header"; + const uint8_t* ptr = (const uint8_t*)_data.data; + const uint8_t* limit = (const uint8_t*)_data.data + _data.size; + + _num_pages = decode_fixed32_le(ptr); + ptr += 4; + + _rowids = new rowid_t[_num_pages]; + _pages = new PagePointer[_num_pages]; + for (int i = 0; i < _num_pages; ++i) { + ptr = decode_varint32_ptr(ptr, limit, &_rowids[i]); + if (ptr == nullptr) { + return Status("Data corruption"); + } + ptr = _pages[i].decode_from(ptr, limit); + if (ptr == nullptr) { + return Status("Data corruption"); + } + } + return Status::OK; +} + +OrdinalPageIndexIterator OrdinalPageIndex::seek_at_or_before(rowid_t rid) { + int32_t left = 0; + int32_t right = _num_pages - 1; + while (left < right) { + int32_t mid = (left + right + 1) / 2; + + if (_rowids[mid] < rid) { + left = mid; + } else if (_rowids[mid] > rid) { + right = mid - 1; + } else { + left = mid; + break; + } + } + if (_rowids[left] > rid) { + return OrdinalPageIndexIterator(this, _num_pages); + } + return OrdinalPageIndexIterator(this, left); +} + +} +} diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.h b/be/src/olap/rowset/segment_v2/ordinal_page_index.h new file mode 100644 index 0000000000..a1845ab30c --- /dev/null +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.h @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "common/status.h" +#include "olap/rowset/segment_v2/common.h" +#include "olap/rowset/segment_v2/page_pointer.h" +#include "util/coding.h" +#include "util/slice.h" + +namespace doris { +namespace segment_v2 { + +// this class encode ordinal page index +// the binary format is like that +// Header | Content +// Header: +// number of elements (4 Bytes) +// Content: +// array of index_pair +// index_pair: +// Ordinal (4 Bytes) +// PagePointer (8 Bytes) +class OrdinalPageIndexBuilder { +public: + OrdinalPageIndexBuilder() : _num_pages(0) { + _buffer.reserve(4 * 1024); + // reserve space for number of elements + _buffer.resize(4); + } + + void append_entry(rowid_t rid, const PagePointer& page) { + // rid + put_varint32(&_buffer, rid); + // page pointer + page.encode_to(&_buffer); + _num_pages++; + } + + Slice finish() { + // encoded number of elements + encode_fixed32_le((uint8_t*)_buffer.data(), _num_pages); + return Slice(_buffer); + } + +private: + std::string _buffer; + uint32_t _num_pages; +}; + +class OrdinalPageIndex; +class OrdinalPageIndexIterator { +public: + OrdinalPageIndexIterator(OrdinalPageIndex* index) : _index(index), _cur_idx(-1) { } + OrdinalPageIndexIterator(OrdinalPageIndex* index, int cur_idx) : _index(index), _cur_idx(cur_idx) { } + inline bool valid() const; + inline void next(); + inline rowid_t rowid() const; + inline const PagePointer& page() const; +private: + OrdinalPageIndex* _index; + int32_t _cur_idx; +}; + +// Page index +class OrdinalPageIndex { +public: + OrdinalPageIndex(const Slice& data) + : _data(data), _num_pages(0), _rowids(nullptr), _pages(nullptr) { + } + ~OrdinalPageIndex(); + + Status load(); + + OrdinalPageIndexIterator seek_at_or_before(rowid_t rid); + OrdinalPageIndexIterator begin() { + return OrdinalPageIndexIterator(this); + } + OrdinalPageIndexIterator end() { + return OrdinalPageIndexIterator(this, _num_pages); + } + +private: + uint32_t _header_size() const { return 4; } + +private: + friend OrdinalPageIndexIterator; + + Slice _data; + + // valid after laod + int32_t _num_pages; + rowid_t* _rowids; + PagePointer* _pages; +}; + +inline bool OrdinalPageIndexIterator::valid() const { + return _cur_idx < _index->_num_pages; +} + +inline void OrdinalPageIndexIterator::next() { + _cur_idx++; + DCHECK_LT(_cur_idx, _index->_num_pages); +} + +inline rowid_t OrdinalPageIndexIterator::rowid() const { + return _index->_rowids[_cur_idx]; +} + +inline const PagePointer& OrdinalPageIndexIterator::page() const { + return _index->_pages[_cur_idx]; +} + +} +} diff --git a/be/src/olap/rowset/segment_v2/page_pointer.h b/be/src/olap/rowset/segment_v2/page_pointer.h new file mode 100644 index 0000000000..339ba45e88 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/page_pointer.h @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "gen_cpp/segment_v2.pb.h" +#include "util/coding.h" + +namespace doris { +namespace segment_v2 { + +struct PagePointer { + uint64_t offset; + uint32_t size; + + PagePointer() : offset(0), size(0) { } + PagePointer(uint64_t offset_, uint32_t size_) : offset(offset_), size(size_) { } + PagePointer(const PagePointerPB& from) : offset(from.offset()), size(from.size()) { } + + void to_proto(PagePointerPB* to) { + to->set_offset(offset); + to->set_size(size); + } + + const uint8_t* decode_from(const uint8_t* data, const uint8_t* limit) { + data = decode_varint64_ptr(data, limit, &offset); + if (data == nullptr) { + return nullptr; + } + return decode_varint32_ptr(data, limit, &size); + } + void encode_to(std::string* dst) const { + put_varint64_varint32(dst, offset, size); + } + + bool operator==(const PagePointer& other) const { + return offset == other.offset && size == other.size; + } +}; + +} +} diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt index 921c1dbedd..3084c03f04 100644 --- a/be/test/olap/CMakeLists.txt +++ b/be/test/olap/CMakeLists.txt @@ -44,3 +44,5 @@ ADD_BE_TEST(serialize_test) ADD_BE_TEST(olap_meta_test) ADD_BE_TEST(olap_header_manager_test) ADD_BE_TEST(field_info_test) +ADD_BE_TEST(rowset/segment_v2/ordinal_page_index_test) +ADD_BE_TEST(rowset/segment_v2/encoding_info_test) diff --git a/be/test/olap/rowset/segment_v2/encoding_info_test.cpp b/be/test/olap/rowset/segment_v2/encoding_info_test.cpp new file mode 100644 index 0000000000..b4690f48d5 --- /dev/null +++ b/be/test/olap/rowset/segment_v2/encoding_info_test.cpp @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/encoding_info.h" + +#include +#include + +#include "common/logging.h" +#include "olap/olap_common.h" +#include "olap/types.h" + +namespace doris { +namespace segment_v2 { + +class EncodingInfoTest : public testing::Test { +public: + EncodingInfoTest() { } + virtual ~EncodingInfoTest() { + } +}; + +TEST_F(EncodingInfoTest, normal) { + auto type_info = get_type_info(OLAP_FIELD_TYPE_BIGINT); + const EncodingInfo* encoding_info = nullptr; + auto status = EncodingInfo::get(type_info, PLAIN_ENCODING, &encoding_info); + ASSERT_TRUE(status.ok()); + ASSERT_NE(nullptr, encoding_info); +} + +TEST_F(EncodingInfoTest, no_encoding) { + auto type_info = get_type_info(OLAP_FIELD_TYPE_BIGINT); + const EncodingInfo* encoding_info = nullptr; + auto status = EncodingInfo::get(type_info, DICT_ENCODING, &encoding_info); + ASSERT_FALSE(status.ok()); +} + +} +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp new file mode 100644 index 0000000000..f2789621a8 --- /dev/null +++ b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/ordinal_page_index.h" + +#include +#include + +#include "common/logging.h" + +namespace doris { +namespace segment_v2 { + +class OrdinalPageIndexTest : public testing::Test { +public: + OrdinalPageIndexTest() { } + virtual ~OrdinalPageIndexTest() { + } +}; + +TEST_F(OrdinalPageIndexTest, normal) { + // rowid, page pointer + // 1, (0, 4096) + // 1 + 4096, (1 * 4096, 4096) + // a page have 16KB, and have 4096 rows + OrdinalPageIndexBuilder builder; + + // we test a 16KB page + for (uint64_t i = 0; i < 16 * 1024; ++i) { + builder.append_entry(1 + 4096 * i, {16 * 1024 * i, 16 * 1024}); + } + + auto slice = builder.finish(); + LOG(INFO) << "index block's size=" << slice.size; + + OrdinalPageIndex index(slice); + auto st = index.load(); + ASSERT_TRUE(st.ok()); + + PagePointer page; + { + auto iter = index.seek_at_or_before(1); + ASSERT_TRUE(iter.valid()); + ASSERT_EQ(1, iter.rowid()); + ASSERT_EQ(PagePointer(0, 16 * 1024), iter.page()); + } + { + auto iter = index.seek_at_or_before(4095); + ASSERT_TRUE(iter.valid()); + ASSERT_EQ(1, iter.rowid()); + ASSERT_EQ(PagePointer(0, 16 * 1024), iter.page()); + } + { + auto iter = index.seek_at_or_before(4098); + ASSERT_TRUE(iter.valid()); + ASSERT_EQ(4097, iter.rowid()); + ASSERT_EQ(PagePointer(1 * 16 * 1024, 16 * 1024), iter.page()); + + iter.next(); + ASSERT_TRUE(iter.valid()); + ASSERT_EQ(4097 + 4096, iter.rowid()); + ASSERT_EQ(PagePointer(2 * 16 * 1024, 16 * 1024), iter.page()); + + } + + { + auto iter = index.seek_at_or_before(0); + ASSERT_FALSE(iter.valid()); + } +} + +TEST_F(OrdinalPageIndexTest, corrupt) { + std::string str; + str.resize(4); + + encode_fixed32_le((uint8_t*)str.data(), 1); + + Slice slice(str); + OrdinalPageIndex index(slice); + auto st = index.load(); + ASSERT_FALSE(st.ok()); +} + +} +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index 68603b54e3..5057ee7f19 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -16,6 +16,8 @@ // under the License. // Define file format struct, like data header, index header. +syntax="proto2"; + package doris.segment_v2; message ColumnSchemaPB { @@ -35,7 +37,7 @@ message ColumnSchemaPB { // page position info message PagePointerPB { required uint64 offset = 1; // offset in segment file - required uint32 length = 2; // size of page in byte + required uint32 size = 2; // size of page in byte } message MetadataPairPB { @@ -44,23 +46,23 @@ message MetadataPairPB { } enum EncodingTypePB { - PLAIN_ENCODING = 1; - PREFIX_ENCODING = 2; + UNKNOWN_ENCODING = 0; + DEFAULT_ENCODING = 1; + PLAIN_ENCODING = 2; + PREFIX_ENCODING = 3; RLE = 4; DICT_ENCODING = 5; BIT_SHUFFLE = 6; - UNKNOWN_ENCODING = 1000; } enum CompressionTypePB { - DEFAULT_COMPRESSION = 0; - NO_COMPRESSION = 1; - SNAPPY = 2; - LZ4 = 3; - ZLIB = 4; - ZSTB = 5; - LZO = 6; - UNKNOWN_COMPRESSION = 1000; + UNKNOWN_COMPRESSION = 0; + DEFAULT_COMPRESSION = 1; + NO_COMPRESSION = 2; + SNAPPY = 3; + LZ4 = 4; + ZLIB = 5; + ZSTB = 6; } message ZoneMapPB { @@ -70,23 +72,34 @@ message ZoneMapPB { } message ColumnMetaPB { - optional EncodingTypePB encoding = 1; + // this field is FieldType's value + optional int32 type = 1; + optional EncodingTypePB encoding = 2; + // compress type for column + optional CompressionTypePB compress_type = 3; + // if this column can be nullable + optional bool is_nullable = 4; - optional PagePointerPB dict_page = 2;// dictionary page for DICT_ENCODING - repeated PagePointerPB bloom_filter_pages = 3; // bloom filter pages for bloom filter column - optional PagePointerPB ordinal_index_page = 4; // ordinal index page - optional PagePointerPB page_zonemap_page = 5; // page zonemap info of column + // // dictionary page for DICT_ENCODING + // optional PagePointerPB dict_page = 2; - optional PagePointerPB bitmap_index_page = 6; // bitmap index page + // // bloom filter pages for bloom filter column + // repeated PagePointerPB bloom_filter_pages = 3; - optional uint64 data_footprint = 7; // data footprint of column after encoding and compress - optional uint64 index_footprint = 8; // index footprint of column after encoding and compress - optional uint64 raw_data_footprint = 9; // raw column data footprint + // optional PagePointerPB ordinal_index_page = 4; // ordinal index page + // optional PagePointerPB page_zonemap_page = 5; // page zonemap info of column - optional CompressionTypePB compress_type = 10; // compress type for column + // optional PagePointerPB bitmap_index_page = 6; // bitmap index page - optional ZoneMapPB column_zonemap = 11; // column zonemap info - repeated MetadataPairPB column_meta_datas = 12; + // // data footprint of column after encoding and compress + // optional uint64 data_footprint = 7; + // // index footprint of column after encoding and compress + // optional uint64 index_footprint = 8; + // // raw column data footprint + // optional uint64 raw_data_footprint = 9; + + // optional ZoneMapPB column_zonemap = 11; // column zonemap info + // repeated MetadataPairPB column_meta_datas = 12; } message FileFooterPB { diff --git a/run-ut.sh b/run-ut.sh index 001622bff6..326aff2b0a 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -228,6 +228,8 @@ ${DORIS_TEST_BINARY_DIR}/olap/olap_header_manager_test ${DORIS_TEST_BINARY_DIR}/olap/olap_meta_test ${DORIS_TEST_BINARY_DIR}/olap/delta_writer_test ${DORIS_TEST_BINARY_DIR}/olap/field_info_test +${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test +${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test # Running routine load test ${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test