diff --git a/src/storage/blocksstable/encoding/ob_dict_encoder.cpp b/src/storage/blocksstable/encoding/ob_dict_encoder.cpp index 768b023f5c..c7af965df6 100644 --- a/src/storage/blocksstable/encoding/ob_dict_encoder.cpp +++ b/src/storage/blocksstable/encoding/ob_dict_encoder.cpp @@ -95,7 +95,7 @@ int ObDictEncoder::traverse(bool &suitable) count_ = ht_->size(); const bool enable_bit_packing = ctx_->encoding_ctx_->encoder_opt_.enable_bit_packing_; - if (0 > dict_fix_data_size_) { + if (store_var_dict()) { dict_index_byte_ = var_data_size_ <= UINT8_MAX ? 1 : 2; if (OB_UNLIKELY(var_data_size_ > UINT16_MAX)) { dict_index_byte_ = 4; @@ -183,7 +183,7 @@ int ObDictEncoder::store_meta(ObBufferWriter &buf_writer) dict_meta_header_ = reinterpret_cast(buf); buf += sizeof(ObDictMetaHeader); - if (0 > dict_fix_data_size_) { // fill var dict data + if (store_var_dict()) { // fill var dict data // var: the first var do not need index const int64_t meta_size = sizeof(ObDictMetaHeader) + (count_ - 1) * dict_index_byte_ + var_data_size_; diff --git a/src/storage/blocksstable/encoding/ob_dict_encoder.h b/src/storage/blocksstable/encoding/ob_dict_encoder.h index 0638d6a2e4..e2c4d515bb 100644 --- a/src/storage/blocksstable/encoding/ob_dict_encoder.h +++ b/src/storage/blocksstable/encoding/ob_dict_encoder.h @@ -108,7 +108,7 @@ public: inline int64_t calc_meta_size() const { int64_t size = sizeof(ObDictMetaHeader); - if (0 > dict_fix_data_size_) { // dict + if (store_var_dict()) { // dict // we do not store the index for the first // element, since it is always 0 size += dict_index_byte_ * (count_ - 1) + var_data_size_; @@ -127,6 +127,7 @@ private: { return sizeof(obj.nmb_desc_) + obj.nmb_desc_.len_ * sizeof(obj.v_.nmb_digits_[0]); } + bool store_var_dict() const { return 0 > dict_fix_data_size_ || UINT16_MAX < dict_fix_data_size_; } struct ColumnStoreFiller; private: diff --git a/unittest/storage/blocksstable/encoding/test_micro_block_encoder.cpp b/unittest/storage/blocksstable/encoding/test_micro_block_encoder.cpp index d2eaf72dd6..0916eb4e6b 100644 --- a/unittest/storage/blocksstable/encoding/test_micro_block_encoder.cpp +++ b/unittest/storage/blocksstable/encoding/test_micro_block_encoder.cpp @@ -41,7 +41,8 @@ enum EncodeTestCase class TestIColumnEncoder : public ::testing::Test { public: - TestIColumnEncoder() {} + TestIColumnEncoder(const bool is_multi_version_row = false) + : is_multi_version_row_(is_multi_version_row) {} virtual ~TestIColumnEncoder() {} virtual void SetUp(); virtual void TearDown() {} @@ -55,6 +56,7 @@ protected: ObTableReadInfo read_info_; ObArenaAllocator allocator_; common::ObArray col_descs_; + bool is_multi_version_row_; }; void TestIColumnEncoder::SetUp() @@ -89,18 +91,23 @@ void TestIColumnEncoder::SetUp() ASSERT_EQ(OB_SUCCESS, table.add_column(col)); } - ASSERT_EQ(OB_SUCCESS, row_generate_.init(table)); - ASSERT_EQ(OB_SUCCESS, row_generate_.get_schema().get_column_ids(col_descs_)); + ASSERT_EQ(OB_SUCCESS, row_generate_.init(table, is_multi_version_row_)); + if (is_multi_version_row_) { + ASSERT_EQ(OB_SUCCESS, row_generate_.get_schema().get_multi_version_column_descs(col_descs_)); + } else { + ASSERT_EQ(OB_SUCCESS, row_generate_.get_schema().get_column_ids(col_descs_)); + } ASSERT_EQ(OB_SUCCESS, read_info_.init(allocator_, row_generate_.get_schema().get_column_count(), row_generate_.get_schema().get_rowkey_column_num(), lib::is_oracle_mode(), - col_descs_)); + col_descs_, + true)); ctx_.micro_block_size_ = 1L << 20; // 1MB, maximum micro block size; ctx_.macro_block_size_ = 2L << 20; ctx_.rowkey_column_cnt_ = rowkey_cnt_; - ctx_.column_cnt_ = column_cnt_; + ctx_.column_cnt_ = is_multi_version_row_ ? column_cnt_ + 2 : column_cnt_; ctx_.col_descs_ = &col_descs_; ctx_.major_working_cluster_version_=cal_version(3, 1, 0, 0); ctx_.row_store_type_ = common::ENCODING_ROW_STORE; @@ -160,6 +167,63 @@ TEST_F(TestEncoderOverFlow, test_append_row_with_timestamp_and_max_estimate_limi } } +static ObObjType test_dict_large_varchar[2] = {ObIntType, ObVarcharType}; +class TestDictLargeVarchar : public TestIColumnEncoder +{ +public: + TestDictLargeVarchar() : TestIColumnEncoder(true) + { + rowkey_cnt_ = 1; + column_cnt_ = 2; + col_types_ = reinterpret_cast(allocator_.alloc(sizeof(ObObjType) * column_cnt_)); + for (int64_t i = 0; i < column_cnt_; ++i) { + col_types_[i] = test_dict_large_varchar[i]; + } + } + virtual ~TestDictLargeVarchar() + { + allocator_.free(col_types_); + } + + int64_t full_column_cnt_ = 4; +}; + +TEST_F(TestDictLargeVarchar, test_dict_large_varchar) +{ + ctx_.column_encodings_ = static_cast(allocator_.alloc(sizeof(int64_t) * full_column_cnt_)); + for (int64_t i = 0; i < full_column_cnt_; ++i) { + ctx_.column_encodings_[i] = ObColumnHeader::Type::DICT; + } + ObMicroBlockEncoder encoder; + ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_)); + + ObDatumRow row; + ASSERT_EQ(OB_SUCCESS, row.init(allocator_, full_column_cnt_)); + const int64_t varchar_data_size = UINT16_MAX * 2 + 1; + char *varchar_data = static_cast(allocator_.alloc(varchar_data_size)); + ASSERT_TRUE(nullptr != varchar_data); + MEMSET(varchar_data, 7, varchar_data_size); + row.storage_datums_[0].set_int(1); + row.storage_datums_[3].set_string(varchar_data, varchar_data_size); + + ASSERT_EQ(OB_SUCCESS, encoder.append_row(row)); + + char *buf = nullptr; + int64_t size = 0; + ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, size)); + + ObMicroBlockData micro_data(buf, size); + ObMicroBlockDecoder decoder; + ObDatumRow read_row; + ASSERT_EQ(OB_SUCCESS, read_row.init(full_column_cnt_)); + ASSERT_EQ(OB_SUCCESS, decoder.init(micro_data, read_info_)); + ASSERT_EQ(OB_SUCCESS, decoder.get_row(0, read_row)); + STORAGE_LOG(INFO, "[Salton]", K(read_row)); + + ASSERT_EQ(row.storage_datums_[3].len_, read_row.storage_datums_[3].len_); + ASSERT_TRUE(ObDatum::binary_equal(row.storage_datums_[3], read_row.storage_datums_[3])); +} + class TestEncodingRowBufHolder : public ::testing::Test { public: