diff --git a/src/storage/blocksstable/encoding/ob_column_equal_encoder.h b/src/storage/blocksstable/encoding/ob_column_equal_encoder.h index 036190a2a..ab3085296 100644 --- a/src/storage/blocksstable/encoding/ob_column_equal_encoder.h +++ b/src/storage/blocksstable/encoding/ob_column_equal_encoder.h @@ -72,6 +72,7 @@ public: UNUSED(buf_writer); return common::OB_NOT_SUPPORTED; } + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(ref_col_idx)); private: OB_INLINE int is_datum_equal( const common::ObDatum &left, diff --git a/src/storage/blocksstable/encoding/ob_const_encoder.h b/src/storage/blocksstable/encoding/ob_const_encoder.h index cdb49b642..6cadb382f 100644 --- a/src/storage/blocksstable/encoding/ob_const_encoder.h +++ b/src/storage/blocksstable/encoding/ob_const_encoder.h @@ -72,6 +72,7 @@ public: virtual void reuse() override ; virtual int store_fix_data(ObBufferWriter &buf_writer) override; + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(count), K_(row_id_byte), KPC_(const_meta_header), K_(dict_encoder)); private: int store_meta_without_dict(ObBufferWriter &buf_writer); int get_cell_len(const common::ObDatum &datum, int64_t &length) const; diff --git a/src/storage/blocksstable/encoding/ob_dict_encoder.h b/src/storage/blocksstable/encoding/ob_dict_encoder.h index 5cf109958..c771b4869 100644 --- a/src/storage/blocksstable/encoding/ob_dict_encoder.h +++ b/src/storage/blocksstable/encoding/ob_dict_encoder.h @@ -121,6 +121,8 @@ public: virtual ObColumnHeader::Type get_type() const override { return type_; } virtual int store_fix_data(ObBufferWriter &buf_writer) override; + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(type_store_size), K_(dict_fix_data_size), + K_(var_data_size), K_(dict_index_byte), K_(max_integer), KPC_(dict_meta_header), K_(count), K_(need_sort)); private: int build_dict(); int store_dict(const common::ObDatum &datum, char *buf, int64_t &len); diff --git a/src/storage/blocksstable/encoding/ob_hex_string_encoder.h b/src/storage/blocksstable/encoding/ob_hex_string_encoder.h index 0b50effd0..9cf48327b 100644 --- a/src/storage/blocksstable/encoding/ob_hex_string_encoder.h +++ b/src/storage/blocksstable/encoding/ob_hex_string_encoder.h @@ -175,7 +175,8 @@ public: virtual void reuse() override; virtual int store_fix_data(ObBufferWriter &buf_writer) override; - + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(min_string_size), K_(max_string_size), + K_(sum_size), K_(null_cnt), K_(nope_cnt), KPC_(header)); private: struct ColumnStoreFiller; diff --git a/src/storage/blocksstable/encoding/ob_integer_base_diff_encoder.h b/src/storage/blocksstable/encoding/ob_integer_base_diff_encoder.h index f4609cd19..037aa7c61 100644 --- a/src/storage/blocksstable/encoding/ob_integer_base_diff_encoder.h +++ b/src/storage/blocksstable/encoding/ob_integer_base_diff_encoder.h @@ -63,6 +63,7 @@ public: virtual int64_t calc_size() const override; virtual ObColumnHeader::Type get_type() const { return type_; } virtual int store_fix_data(ObBufferWriter &buf_writer) override; + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(type_store_size), KPC_(header)); private: class ObIIntegerData { diff --git a/src/storage/blocksstable/encoding/ob_inter_column_substring_encoder.h b/src/storage/blocksstable/encoding/ob_inter_column_substring_encoder.h index 8f292ef7e..3e839e44e 100644 --- a/src/storage/blocksstable/encoding/ob_inter_column_substring_encoder.h +++ b/src/storage/blocksstable/encoding/ob_inter_column_substring_encoder.h @@ -92,6 +92,8 @@ public: virtual int64_t get_ref_col_idx() const override; virtual void reuse() override; virtual int store_fix_data(ObBufferWriter &buf_writer) override; + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(ref_col_idx), KPC_(ref_ctx), + K_(same_start_pos), K_(fix_data_size), K_(start_pos_byte), K_(val_len_byte)); private: // return: diff --git a/src/storage/blocksstable/encoding/ob_micro_block_encoder.cpp b/src/storage/blocksstable/encoding/ob_micro_block_encoder.cpp index 1b4c62154..390bd4543 100644 --- a/src/storage/blocksstable/encoding/ob_micro_block_encoder.cpp +++ b/src/storage/blocksstable/encoding/ob_micro_block_encoder.cpp @@ -342,7 +342,8 @@ void ObMicroBlockEncoder::print_micro_block_encoder_status() const { FLOG_INFO("Build micro block failed, print encoder status: ", K_(ctx), K_(estimate_size), K_(estimate_size_limit), K_(header_size), - K_(expand_pct), K_(string_col_cnt), K_(estimate_base_store_size), K_(length)); + K_(expand_pct), K_(string_col_cnt), K_(estimate_base_store_size), K_(length), + K(get_row_count())); int64_t idx = 0; FOREACH(e, encoders_) { FLOG_INFO("Print column encoder: ", K(idx), KPC(*e)); diff --git a/src/storage/blocksstable/encoding/ob_raw_encoder.h b/src/storage/blocksstable/encoding/ob_raw_encoder.h index e7510fdc7..3e9e91f56 100644 --- a/src/storage/blocksstable/encoding/ob_raw_encoder.h +++ b/src/storage/blocksstable/encoding/ob_raw_encoder.h @@ -64,6 +64,8 @@ public: virtual ObColumnHeader::Type get_type() const override { return type_; } virtual int store_fix_data(ObBufferWriter &buf_writer) override; + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(type_store_size), K_(null_cnt), + K_(nope_cnt), K_(fix_data_size), K_(max_integer), K_(var_data_size)); private: struct DatumDataSetter; diff --git a/src/storage/blocksstable/encoding/ob_rle_encoder.h b/src/storage/blocksstable/encoding/ob_rle_encoder.h index 6feaefc21..9dfe4ead3 100644 --- a/src/storage/blocksstable/encoding/ob_rle_encoder.h +++ b/src/storage/blocksstable/encoding/ob_rle_encoder.h @@ -73,6 +73,8 @@ public: virtual int64_t calc_size() const override; virtual ObColumnHeader::Type get_type() const override { return type_; } virtual int store_fix_data(ObBufferWriter &buf_writer) override; + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(count), K_(row_id_byte), + K_(ref_byte), KPC_(rle_meta_header), K_(dict_encoder)); private: int64_t count_; int64_t row_id_byte_; diff --git a/src/storage/blocksstable/encoding/ob_string_diff_encoder.cpp b/src/storage/blocksstable/encoding/ob_string_diff_encoder.cpp index 2e992501b..c7e82e95d 100644 --- a/src/storage/blocksstable/encoding/ob_string_diff_encoder.cpp +++ b/src/storage/blocksstable/encoding/ob_string_diff_encoder.cpp @@ -366,7 +366,17 @@ int ObStringDiffEncoder::get_var_length(const int64_t row_id, int64_t &length) ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(row_id)); } else { - length = row_store_size_; + const int64_t &data_version = ctx_->encoding_ctx_->major_working_cluster_version_; + // For version 4.x, fix string diff var-length encoding overflow on versions: + // from 4.2.5 bp2 to 4.3.0, and all versions after 4.3.5 + const bool enable_var_length_fix = (data_version > DATA_VERSION_4_3_5_0) + || (data_version >= MOCK_DATA_VERSION_4_2_5_2 && data_version < DATA_VERSION_4_3_0_0); + const ObDatum &datum = rows_->at(row_id).get_datum(column_index_); + if (enable_var_length_fix && (datum.is_null() || datum.is_nop())) { + length = 0; + } else { + length = row_store_size_; + } } return ret; } diff --git a/src/storage/blocksstable/encoding/ob_string_diff_encoder.h b/src/storage/blocksstable/encoding/ob_string_diff_encoder.h index 8dcb2d56a..f2191353f 100644 --- a/src/storage/blocksstable/encoding/ob_string_diff_encoder.h +++ b/src/storage/blocksstable/encoding/ob_string_diff_encoder.h @@ -130,6 +130,8 @@ public: virtual int64_t calc_size() const override; virtual ObColumnHeader::Type get_type() const override { return type_; } virtual int store_fix_data(ObBufferWriter &buf_writer) override; + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(string_size), K_(common_size), K_(row_store_size), + K_(null_cnt), K_(nope_cnt), KPC_(header), K_(last_change_diff_row_id)); private: int traverse_cell( char *&data, diff --git a/src/storage/blocksstable/encoding/ob_string_prefix_encoder.h b/src/storage/blocksstable/encoding/ob_string_prefix_encoder.h index 1d443bcb5..efe7497ba 100644 --- a/src/storage/blocksstable/encoding/ob_string_prefix_encoder.h +++ b/src/storage/blocksstable/encoding/ob_string_prefix_encoder.h @@ -132,6 +132,8 @@ public: virtual int64_t calc_size() const override; virtual ObColumnHeader::Type get_type() const override { return type_; } virtual int store_fix_data(ObBufferWriter &buf_writer) override; + INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, KPC_(meta_header), K_(prefix_count), + K_(prefix_index_byte), K_(prefix_length), K_(calc_size)); private: ObStringPrefixMetaHeader *meta_header_; int64_t prefix_count_; diff --git a/unittest/storage/blocksstable/encoding/test_micro_block_encoder.cpp b/unittest/storage/blocksstable/encoding/test_micro_block_encoder.cpp index 1bbf1a8a9..513f20f2e 100644 --- a/unittest/storage/blocksstable/encoding/test_micro_block_encoder.cpp +++ b/unittest/storage/blocksstable/encoding/test_micro_block_encoder.cpp @@ -332,6 +332,95 @@ TEST_F(TestColumnEqualExceptionList, test_column_equal_ext_offset_overflow) } +static ObObjType test_string_diff[2] = {ObIntType, ObVarcharType}; +class TestStringDiffNullLength : public TestIColumnEncoder +{ +public: + TestStringDiffNullLength() {} + virtual ~TestStringDiffNullLength() {} + virtual void SetUp(); + virtual void TearDown(); +}; + +void TestStringDiffNullLength::SetUp() +{ + rowkey_cnt_ = 1; + column_cnt_ = 2; + col_types_ = reinterpret_cast(allocator_.alloc(sizeof(ObObjType) * column_cnt_)); + ASSERT_NE(nullptr, col_types_); + for (int64_t i = 0; i < column_cnt_; ++i) { + col_types_[i] = test_string_diff[i]; + } + TestIColumnEncoder::SetUp(); +} + +void TestStringDiffNullLength::TearDown() +{ + TestIColumnEncoder::TearDown(); + allocator_.free(col_types_); +} + +TEST_F(TestStringDiffNullLength, test_string_diff_null_length) +{ + int64_t column_encoding_array[2] = + {ObColumnHeader::Type::RAW, + ObColumnHeader::Type::STRING_DIFF}; + ctx_.column_encodings_ = column_encoding_array; + ctx_.micro_block_size_ = 1 << 20; // 1M + ctx_.major_working_cluster_version_ = DATA_VERSION_4_3_5_1; + ObMicroBlockEncoder encoder; + encoder.data_buffer_.allocator_.set_tenant_id(500); + encoder.row_buf_holder_.allocator_.set_tenant_id(500); + ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_)); + + ObDatumRow row; + ASSERT_EQ(OB_SUCCESS, row.init(allocator_, 2)); + const char *str1 = "this is a meaningless string for a test case to verify fixed string diff encoding"; + const char *str2 = "this is 7777777777777777777777777777777777777777777777777777777777777777 encoding"; + const char *str3 = "this is 8888888888888888888888888888888888888888888888888888888888888888 encoding"; + row.storage_datums_[0].set_int(0); + row.storage_datums_[1].set_string(ObString(str1)); + const int64_t single_str_len = row.storage_datums_[1].len_; + ASSERT_EQ(OB_SUCCESS, encoder.append_row(row)); + row.storage_datums_[0].set_int(1); + row.storage_datums_[1].set_string(ObString(str2)); + ASSERT_EQ(OB_SUCCESS, encoder.append_row(row)); + row.storage_datums_[0].set_int(2); + row.storage_datums_[1].set_string(ObString(str3)); + ASSERT_EQ(OB_SUCCESS, encoder.append_row(row)); + for (int64_t i = 0; i < 52400; ++i) { + row.storage_datums_[0].set_int(i + 3); + row.storage_datums_[1].set_null(); + ASSERT_EQ(OB_SUCCESS, encoder.append_row(row)); + } + + char *buf = nullptr; + int64_t size = 0; + ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, size)); + + LOG_INFO("show sizes", K(encoder.length_), K(size)); + + ObMicroBlockData micro_data(buf, size); + ObMicroBlockDecoder decoder; + ObDatumRow read_row; + ASSERT_EQ(OB_SUCCESS, read_row.init(2)); + ASSERT_EQ(OB_SUCCESS, decoder.init(micro_data, nullptr)); + ASSERT_EQ(OB_SUCCESS, decoder.get_row(0, read_row)); + const ObString &read_string1 = read_row.storage_datums_[1].get_string(); + ASSERT_EQ(0, MEMCMP(str1, read_string1.ptr(), read_string1.length())); + ASSERT_EQ(OB_SUCCESS, decoder.get_row(1, read_row)); + const ObString &read_string2 = read_row.storage_datums_[1].get_string(); + ASSERT_EQ(0, MEMCMP(str2, read_string2.ptr(), read_string2.length())); + ASSERT_EQ(OB_SUCCESS, decoder.get_row(2, read_row)); + const ObString &read_string3 = read_row.storage_datums_[1].get_string(); + ASSERT_EQ(0, MEMCMP(str3, read_string3.ptr(), read_string3.length())); + for (int64_t i = 0; i < 52400; ++i) { + ASSERT_EQ(OB_SUCCESS, decoder.get_row(i + 3, read_row)); + ASSERT_TRUE(read_row.storage_datums_[1].is_null()); + } + +} + class TestEncodingRowBufHolder : public ::testing::Test { public: