[bugfix] fix string diff var-length encoding overflow

This commit is contained in:
saltonz 2025-01-06 06:14:41 +00:00 committed by ob-robot
parent a2f07a7c48
commit a0e0511e18
13 changed files with 119 additions and 3 deletions

View File

@ -72,6 +72,7 @@ public:
UNUSED(buf_writer);
return common::OB_NOT_SUPPORTED;
}
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(ref_col_idx));
private:
OB_INLINE int is_datum_equal(
const common::ObDatum &left,

View File

@ -72,6 +72,7 @@ public:
virtual void reuse() override ;
virtual int store_fix_data(ObBufferWriter &buf_writer) override;
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(count), K_(row_id_byte), KPC_(const_meta_header), K_(dict_encoder));
private:
int store_meta_without_dict(ObBufferWriter &buf_writer);
int get_cell_len(const common::ObDatum &datum, int64_t &length) const;

View File

@ -121,6 +121,8 @@ public:
virtual ObColumnHeader::Type get_type() const override { return type_; }
virtual int store_fix_data(ObBufferWriter &buf_writer) override;
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(type_store_size), K_(dict_fix_data_size),
K_(var_data_size), K_(dict_index_byte), K_(max_integer), KPC_(dict_meta_header), K_(count), K_(need_sort));
private:
int build_dict();
int store_dict(const common::ObDatum &datum, char *buf, int64_t &len);

View File

@ -175,7 +175,8 @@ public:
virtual void reuse() override;
virtual int store_fix_data(ObBufferWriter &buf_writer) override;
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(min_string_size), K_(max_string_size),
K_(sum_size), K_(null_cnt), K_(nope_cnt), KPC_(header));
private:
struct ColumnStoreFiller;

View File

@ -63,6 +63,7 @@ public:
virtual int64_t calc_size() const override;
virtual ObColumnHeader::Type get_type() const { return type_; }
virtual int store_fix_data(ObBufferWriter &buf_writer) override;
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(type_store_size), KPC_(header));
private:
class ObIIntegerData
{

View File

@ -92,6 +92,8 @@ public:
virtual int64_t get_ref_col_idx() const override;
virtual void reuse() override;
virtual int store_fix_data(ObBufferWriter &buf_writer) override;
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(ref_col_idx), KPC_(ref_ctx),
K_(same_start_pos), K_(fix_data_size), K_(start_pos_byte), K_(val_len_byte));
private:
// return:

View File

@ -342,7 +342,8 @@ void ObMicroBlockEncoder::print_micro_block_encoder_status() const
{
FLOG_INFO("Build micro block failed, print encoder status: ", K_(ctx),
K_(estimate_size), K_(estimate_size_limit), K_(header_size),
K_(expand_pct), K_(string_col_cnt), K_(estimate_base_store_size), K_(length));
K_(expand_pct), K_(string_col_cnt), K_(estimate_base_store_size), K_(length),
K(get_row_count()));
int64_t idx = 0;
FOREACH(e, encoders_) {
FLOG_INFO("Print column encoder: ", K(idx), KPC(*e));

View File

@ -64,6 +64,8 @@ public:
virtual ObColumnHeader::Type get_type() const override { return type_; }
virtual int store_fix_data(ObBufferWriter &buf_writer) override;
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(type_store_size), K_(null_cnt),
K_(nope_cnt), K_(fix_data_size), K_(max_integer), K_(var_data_size));
private:
struct DatumDataSetter;

View File

@ -73,6 +73,8 @@ public:
virtual int64_t calc_size() const override;
virtual ObColumnHeader::Type get_type() const override { return type_; }
virtual int store_fix_data(ObBufferWriter &buf_writer) override;
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(count), K_(row_id_byte),
K_(ref_byte), KPC_(rle_meta_header), K_(dict_encoder));
private:
int64_t count_;
int64_t row_id_byte_;

View File

@ -366,7 +366,17 @@ int ObStringDiffEncoder::get_var_length(const int64_t row_id, int64_t &length)
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(row_id));
} else {
length = row_store_size_;
const int64_t &data_version = ctx_->encoding_ctx_->major_working_cluster_version_;
// For version 4.x, fix string diff var-length encoding overflow on versions:
// from 4.2.5 bp2 to 4.3.0, and all versions after 4.3.5
const bool enable_var_length_fix = (data_version > DATA_VERSION_4_3_5_0)
|| (data_version >= MOCK_DATA_VERSION_4_2_5_2 && data_version < DATA_VERSION_4_3_0_0);
const ObDatum &datum = rows_->at(row_id).get_datum(column_index_);
if (enable_var_length_fix && (datum.is_null() || datum.is_nop())) {
length = 0;
} else {
length = row_store_size_;
}
}
return ret;
}

View File

@ -130,6 +130,8 @@ public:
virtual int64_t calc_size() const override;
virtual ObColumnHeader::Type get_type() const override { return type_; }
virtual int store_fix_data(ObBufferWriter &buf_writer) override;
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, K_(string_size), K_(common_size), K_(row_store_size),
K_(null_cnt), K_(nope_cnt), KPC_(header), K_(last_change_diff_row_id));
private:
int traverse_cell(
char *&data,

View File

@ -132,6 +132,8 @@ public:
virtual int64_t calc_size() const override;
virtual ObColumnHeader::Type get_type() const override { return type_; }
virtual int store_fix_data(ObBufferWriter &buf_writer) override;
INHERIT_TO_STRING_KV("ObIColumnEncoder", ObIColumnEncoder, KPC_(meta_header), K_(prefix_count),
K_(prefix_index_byte), K_(prefix_length), K_(calc_size));
private:
ObStringPrefixMetaHeader *meta_header_;
int64_t prefix_count_;

View File

@ -332,6 +332,95 @@ TEST_F(TestColumnEqualExceptionList, test_column_equal_ext_offset_overflow)
}
static ObObjType test_string_diff[2] = {ObIntType, ObVarcharType};
class TestStringDiffNullLength : public TestIColumnEncoder
{
public:
TestStringDiffNullLength() {}
virtual ~TestStringDiffNullLength() {}
virtual void SetUp();
virtual void TearDown();
};
void TestStringDiffNullLength::SetUp()
{
rowkey_cnt_ = 1;
column_cnt_ = 2;
col_types_ = reinterpret_cast<ObObjType *>(allocator_.alloc(sizeof(ObObjType) * column_cnt_));
ASSERT_NE(nullptr, col_types_);
for (int64_t i = 0; i < column_cnt_; ++i) {
col_types_[i] = test_string_diff[i];
}
TestIColumnEncoder::SetUp();
}
void TestStringDiffNullLength::TearDown()
{
TestIColumnEncoder::TearDown();
allocator_.free(col_types_);
}
TEST_F(TestStringDiffNullLength, test_string_diff_null_length)
{
int64_t column_encoding_array[2] =
{ObColumnHeader::Type::RAW,
ObColumnHeader::Type::STRING_DIFF};
ctx_.column_encodings_ = column_encoding_array;
ctx_.micro_block_size_ = 1 << 20; // 1M
ctx_.major_working_cluster_version_ = DATA_VERSION_4_3_5_1;
ObMicroBlockEncoder encoder;
encoder.data_buffer_.allocator_.set_tenant_id(500);
encoder.row_buf_holder_.allocator_.set_tenant_id(500);
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
ObDatumRow row;
ASSERT_EQ(OB_SUCCESS, row.init(allocator_, 2));
const char *str1 = "this is a meaningless string for a test case to verify fixed string diff encoding";
const char *str2 = "this is 7777777777777777777777777777777777777777777777777777777777777777 encoding";
const char *str3 = "this is 8888888888888888888888888888888888888888888888888888888888888888 encoding";
row.storage_datums_[0].set_int(0);
row.storage_datums_[1].set_string(ObString(str1));
const int64_t single_str_len = row.storage_datums_[1].len_;
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
row.storage_datums_[0].set_int(1);
row.storage_datums_[1].set_string(ObString(str2));
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
row.storage_datums_[0].set_int(2);
row.storage_datums_[1].set_string(ObString(str3));
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
for (int64_t i = 0; i < 52400; ++i) {
row.storage_datums_[0].set_int(i + 3);
row.storage_datums_[1].set_null();
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
}
char *buf = nullptr;
int64_t size = 0;
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, size));
LOG_INFO("show sizes", K(encoder.length_), K(size));
ObMicroBlockData micro_data(buf, size);
ObMicroBlockDecoder decoder;
ObDatumRow read_row;
ASSERT_EQ(OB_SUCCESS, read_row.init(2));
ASSERT_EQ(OB_SUCCESS, decoder.init(micro_data, nullptr));
ASSERT_EQ(OB_SUCCESS, decoder.get_row(0, read_row));
const ObString &read_string1 = read_row.storage_datums_[1].get_string();
ASSERT_EQ(0, MEMCMP(str1, read_string1.ptr(), read_string1.length()));
ASSERT_EQ(OB_SUCCESS, decoder.get_row(1, read_row));
const ObString &read_string2 = read_row.storage_datums_[1].get_string();
ASSERT_EQ(0, MEMCMP(str2, read_string2.ptr(), read_string2.length()));
ASSERT_EQ(OB_SUCCESS, decoder.get_row(2, read_row));
const ObString &read_string3 = read_row.storage_datums_[1].get_string();
ASSERT_EQ(0, MEMCMP(str3, read_string3.ptr(), read_string3.length()));
for (int64_t i = 0; i < 52400; ++i) {
ASSERT_EQ(OB_SUCCESS, decoder.get_row(i + 3, read_row));
ASSERT_TRUE(read_row.storage_datums_[1].is_null());
}
}
class TestEncodingRowBufHolder : public ::testing::Test
{
public: