diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h index 1a48894c97..0e9836bf0e 100644 --- a/be/src/olap/rowset/segment_v2/binary_plain_page.h +++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h @@ -293,16 +293,23 @@ public: return Slice(&_data[start_offset], len); } - void get_dict_word_info(StringRef* dict_word_info) { + Status get_dict_word_info(StringRef* dict_word_info) { if (UNLIKELY(_num_elems <= 0)) { - return; + return Status::OK(); } char* data_begin = (char*)&_data[0]; char* offset_ptr = (char*)&_data[_offsets_pos]; for (uint32_t i = 0; i < _num_elems; ++i) { - dict_word_info[i].data = data_begin + decode_fixed32_le((uint8_t*)offset_ptr); + uint32_t offset = decode_fixed32_le((uint8_t*)offset_ptr); + if (offset > _offsets_pos) { + return Status::Corruption( + "file corruption: offsets pos beyonds data_size: {}, num_element: {}" + ", offset_pos: {}, offset: {}", + _data.size, _num_elems, _offsets_pos, offset); + } + dict_word_info[i].data = data_begin + offset; offset_ptr += sizeof(uint32_t); } @@ -313,6 +320,7 @@ public: dict_word_info[_num_elems - 1].size = (data_begin + _offsets_pos) - (char*)dict_word_info[_num_elems - 1].data; + return Status::OK(); } private: diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 545a1ac8a3..f908232367 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -1289,7 +1289,7 @@ Status FileColumnIterator::_read_dict_data() { auto* pd_decoder = (BinaryPlainPageDecoder*)_dict_decoder.get(); _dict_word_info.reset(new StringRef[pd_decoder->_num_elems]); - pd_decoder->get_dict_word_info(_dict_word_info.get()); + RETURN_IF_ERROR(pd_decoder->get_dict_word_info(_dict_word_info.get())); return Status::OK(); }