From de725d5d44892df35c459979eaadfa06c54c678a Mon Sep 17 00:00:00 2001 From: yixiutt <102007456+yixiutt@users.noreply.github.com> Date: Tue, 14 Feb 2023 00:06:14 +0800 Subject: [PATCH] [bugfix](column_reader) index_page should not be pre-decoded (#16605) In our current logic, index page will be pre-decoded but it will return OK as index page use BinaryPlainPageBuilder and first 4 bytes of the page is a offset so it's high probablility not equal to EncodingTypePB::DICT_ENCODING which is 5. Code in bitshuffle_page_pre_decode.h ``` if constexpr (USED_IN_DICT_ENCODING) { auto type = decode_fixed32_le((const uint8_t*)&data.data[0]); if (static_cast(type) != EncodingTypePB::DICT_ENCODING) { return Status::OK(); } size_of_dict_header = BINARY_DICT_PAGE_HEADER_SIZE; data.remove_prefix(4); } ``` But if type just equal to EncodingTypePB::DICT_ENCODING and then it will use BitShuffle to decode BinaryPlainPage, which will leads to an fatal error. --- be/src/olap/rowset/segment_v2/column_reader.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index abc2726e2a..9079118ecd 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -211,6 +211,10 @@ Status ColumnReader::read_page(const ColumnIteratorOptions& iter_opts, const Pag opts.type = iter_opts.type; opts.encoding_info = _encoding_info; opts.io_ctx = iter_opts.io_ctx; + // index page should not pre decode + if (iter_opts.type == INDEX_PAGE) { + opts.pre_decode = false; + } return PageIO::read_and_decompress_page(opts, handle, page_body, footer); }