 566e920620
			
		
	
	566e920620
	
	
	
		
			
			Co-authored-by: wangt1xiuyi <13547954130@163.com> Co-authored-by: yangqise7en <877793735@qq.com> Co-authored-by: Zach41 <zach_41@163.com>
		
			
				
	
	
		
			771 lines
		
	
	
		
			33 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			771 lines
		
	
	
		
			33 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /**
 | |
|  * Copyright (c) 2021 OceanBase
 | |
|  * OceanBase CE is licensed under Mulan PubL v2.
 | |
|  * You can use this software according to the terms and conditions of the Mulan PubL v2.
 | |
|  * You may obtain a copy of Mulan PubL v2 at:
 | |
|  *          http://license.coscl.org.cn/MulanPubL-2.0
 | |
|  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | |
|  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | |
|  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | |
|  * See the Mulan PubL v2 for more details.
 | |
|  */
 | |
| 
 | |
| #define USING_LOG_PREFIX STORAGE
 | |
| 
 | |
| #include <gtest/gtest.h>
 | |
| #define protected public
 | |
| #define private public
 | |
| #include "ob_cs_encoding_test_base.h"
 | |
| #include "storage/blocksstable/cs_encoding/ob_cs_encoding_util.h"
 | |
| #include "storage/blocksstable/cs_encoding/ob_micro_block_cs_encoder.h"
 | |
| #include "lib/wide_integer/ob_wide_integer.h"
 | |
| 
 | |
| namespace oceanbase
 | |
| {
 | |
| namespace blocksstable
 | |
| {
 | |
| 
 | |
| using namespace common;
 | |
| using namespace storage;
 | |
| using namespace share::schema;
 | |
| 
 | |
| class TestCSEncoder : public ObCSEncodingTestBase, public ::testing::Test
 | |
| {
 | |
| public:
 | |
|   TestCSEncoder() {}
 | |
|   virtual ~TestCSEncoder() {}
 | |
| 
 | |
|   virtual void SetUp() {}
 | |
|   virtual void TearDown() {}
 | |
| };
 | |
| 
 | |
| TEST_F(TestCSEncoder, test_integer_encoder)
 | |
| {
 | |
|   const int64_t rowkey_cnt = 1;
 | |
|   const int64_t col_cnt = 1;
 | |
|   ObObjType col_types[col_cnt] = {ObInt32Type};
 | |
|   ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
 | |
|   ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
 | |
| 
 | |
|   int64_t row_cnt = 100;
 | |
|   ObMicroBlockCSEncoder encoder;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
 | |
|   encoder.is_all_column_force_raw_ = true;
 | |
|   // Generate data and encode
 | |
|   ObDatumRow row;
 | |
|   ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
 | |
| 
 | |
|   // <1>integer range is -50 to 49
 | |
|   int64_t seed = 0;
 | |
|   for (int64_t i = 0; i < row_cnt; ++i) {
 | |
|     seed = i - 50;  // -50 ~ 49
 | |
|     ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
| 
 | |
|   char *buf = nullptr;
 | |
|   int64_t buf_size = 0;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   ObIColumnCSEncoder *e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(-50, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(49, (int64_t)e->ctx_->integer_max_);
 | |
| 
 | |
|   ASSERT_EQ(sizeof(uint64_t) * row_cnt, encoder.ctx_.estimate_block_size_);
 | |
|   ASSERT_EQ(buf_size, encoder.ctx_.real_block_size_ + encoder.all_headers_size_);
 | |
|   LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
 | |
| 
 | |
|   ObIntegerColumnEncoder *int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_raw_encoding());
 | |
|   ASSERT_EQ(1, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_base()); // raw encoding also use base
 | |
|   ASSERT_EQ(-50, int_col_encoder->enc_ctx_.meta_.base_value_);
 | |
| 
 | |
|   // <2> write 3 row: INT32_MIN/-1/NULL
 | |
|   // row count less than ObCSEncodingUtil::ENCODING_ROW_COUNT_THRESHOLD,
 | |
|   // so use will force raw encoding in encoder.
 | |
|   encoder.reuse();
 | |
|   seed = INT32_MIN;
 | |
|   ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
| 
 | |
|   row.storage_datums_[0].set_null();
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
| 
 | |
|   seed = -1;
 | |
|   ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
| 
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(INT32_MIN, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(-1, (int64_t)e->ctx_->integer_max_);
 | |
|   // estimate_block_size_ won't reset to zero when reuse, so total row cnt is 100 + 3
 | |
|   ASSERT_EQ(sizeof(uint64_t) * 103, encoder.ctx_.estimate_block_size_);
 | |
| 
 | |
|   int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_raw_encoding());
 | |
|   ASSERT_EQ(4, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_base()); // raw encoding also use base
 | |
|   ASSERT_EQ(INT32_MIN, int_col_encoder->enc_ctx_.meta_.base_value());
 | |
|   ASSERT_EQ(0, int_col_encoder->enc_ctx_.meta_.null_replaced_value_);
 | |
| 
 | |
|   // <3> write 3 row: INT32_MIN/INT32_MAX/NULL + 200 rows
 | |
|   encoder.reuse();
 | |
|   seed = INT32_MIN;
 | |
|   ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   seed = INT32_MAX;
 | |
|   ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   row.storage_datums_[0].set_null();
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
| 
 | |
|   row_cnt = 200;
 | |
|   for (int64_t i = 0; i < row_cnt; ++i) {
 | |
|     seed = i - 100;
 | |
|     ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
| 
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(INT32_MIN, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(INT32_MAX, (int64_t)e->ctx_->integer_max_);
 | |
|   // estimate_block_size_ won't reset to zero when reuse, so total row cnt is 100 + 3 + 3 + 200
 | |
|   ASSERT_EQ(sizeof(uint64_t) * 306, encoder.ctx_.estimate_block_size_);
 | |
| 
 | |
|   int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(4, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_base());
 | |
|   ASSERT_EQ(INT32_MIN, int_col_encoder->enc_ctx_.meta_.base_value());
 | |
|   ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
 | |
| 
 | |
|   // <4> write 3 row: 0/INT32_MAX/NULL
 | |
|   // row count less than ObCSEncodingUtil::ENCODING_ROW_COUNT_THRESHOLD,
 | |
|   // so use will force raw encoding in encoder.
 | |
|   encoder.reuse();
 | |
|   ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(0, row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   row.storage_datums_[0].set_null();
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(INT32_MAX, row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(0, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(INT32_MAX, (int64_t)e->ctx_->integer_max_);
 | |
|   int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(-1, int_col_encoder->enc_ctx_.meta_.null_replaced_value_);
 | |
| 
 | |
|   // <5> write 3 row: 0/INT32_MAX - 1/NULL
 | |
|   // row count less than ObCSEncodingUtil::ENCODING_ROW_COUNT_THRESHOLD,
 | |
|   // so use will force raw encoding in encoder.
 | |
|   encoder.reuse();
 | |
|   ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(0, row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   row.storage_datums_[0].set_null();
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(INT32_MAX - 1, row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(0, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(INT32_MAX - 1, (int64_t)e->ctx_->integer_max_);
 | |
|   int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(INT32_MAX, int_col_encoder->enc_ctx_.meta_.null_replaced_value());
 | |
| 
 | |
|   // <6> write 1000 row, and monotonic increase
 | |
|   encoder.reuse();
 | |
|   row_cnt = 1000;
 | |
|   for (int64_t i = 0; i < row_cnt; ++i) {
 | |
|     ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i, row));
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(0, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(999, (int64_t)e->ctx_->integer_max_);
 | |
| 
 | |
|   int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(2, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
 | |
|   ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_base());
 | |
| 
 | |
|   //<7> write all null
 | |
|   encoder.reuse();
 | |
|   row_cnt = 1000;
 | |
|   row.storage_datums_[0].set_null();
 | |
|   for (int64_t i = 0; i < row_cnt; ++i) {
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(1, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(0, int_col_encoder->enc_ctx_.meta_.null_replaced_value());
 | |
|   ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
 | |
|   ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_base());
 | |
| 
 | |
|   reuse();
 | |
| }
 | |
| 
 | |
| TEST_F(TestCSEncoder, test_big_integer_encoder)
 | |
| {
 | |
|   const int64_t rowkey_cnt = 1;
 | |
|   const int64_t col_cnt = 2;
 | |
|   ObObjType col_types[col_cnt] = {ObIntType, ObUInt64Type};
 | |
|   ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
 | |
| 
 | |
|   ObMicroBlockCSEncoder encoder;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
 | |
|   ObDatumRow row;
 | |
|   ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
 | |
| 
 | |
|   int64_t row_cnt = 200;
 | |
|   for (int64_t i = 0; i < row_cnt; ++i) {
 | |
|     row.storage_datums_[0].set_int(INT64_MIN + 1000 + i);
 | |
|     row.storage_datums_[1].set_uint(UINT64_MAX - 1000 + i );
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   row.storage_datums_[0].set_int(INT64_MIN);
 | |
|   row.storage_datums_[1].set_uint(UINT64_MAX);
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   row.storage_datums_[0].set_int(INT64_MAX);
 | |
|   row.storage_datums_[1].set_uint(UINT64_MAX - UINT32_MAX);
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
| 
 | |
|   char *buf = nullptr;
 | |
|   int64_t buf_size = 0;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   ObIColumnCSEncoder *e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(INT64_MIN, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(INT64_MAX, (int64_t)e->ctx_->integer_max_);
 | |
| 
 | |
| 
 | |
|   ObIntegerColumnEncoder *int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(8, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_base());
 | |
|   ASSERT_EQ(INT64_MIN, int_col_encoder->enc_ctx_.meta_.base_value());
 | |
| 
 | |
|   e = encoder.encoders_[1];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(UINT64_MAX - UINT32_MAX, e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(UINT64_MAX, e->ctx_->integer_max_);
 | |
| 
 | |
|   int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(8, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_base());
 | |
| 
 | |
|   reuse();
 | |
| }
 | |
| 
 | |
| 
 | |
| TEST_F(TestCSEncoder, test_string_encoder)
 | |
| {
 | |
|   const int64_t rowkey_cnt = 1;
 | |
|   const int64_t col_cnt = 2;
 | |
|   ObObjType col_types[col_cnt] = {ObIntType, ObVarcharType};
 | |
|   ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
 | |
|   ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
 | |
|   ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STRING;
 | |
| 
 | |
|   // <1> 100 fixed len string and has one null
 | |
|   int64_t row_cnt = 100;
 | |
|   ObMicroBlockCSEncoder encoder;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
 | |
|   encoder.is_all_column_force_raw_ = true;
 | |
|   // Generate data and encode
 | |
|   ObDatumRow row;
 | |
|   ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
 | |
| 
 | |
|   int64_t seed = 0;
 | |
|   for (int64_t i = 0; i < row_cnt; ++i) {
 | |
|     ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i, row));
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   for (int64_t i = 0; i < col_cnt; i++) {
 | |
|     row.storage_datums_[i].set_null();
 | |
|   }
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
| 
 | |
|   char *buf = nullptr;
 | |
|   int64_t buf_size = 0;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   ObIColumnCSEncoder *e = encoder.encoders_[1];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
 | |
|   ASSERT_EQ(64, e->ctx_->fix_data_size_);
 | |
|   ASSERT_EQ(64 * row_cnt, e->ctx_->var_data_size_);
 | |
|   ASSERT_EQ(64 * row_cnt, e->ctx_->dict_var_data_size_);
 | |
|   ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
 | |
| 
 | |
|   ASSERT_EQ((8 + 64 + 8) * row_cnt + 16, encoder.ctx_.estimate_block_size_);
 | |
|   ASSERT_EQ(buf_size, encoder.ctx_.real_block_size_ + encoder.all_headers_size_);
 | |
|   LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
 | |
| 
 | |
|   ObStringColumnEncoder *str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
 | |
|   ASSERT_EQ(64, str_col_encoder->enc_ctx_.meta_.get_fixed_string_len());
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
 | |
|   ASSERT_EQ(true, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
 | |
| 
 | |
|   // <2> var length string and has null and has no zero length string
 | |
|   row_cnt = 100;
 | |
|   encoder.reuse();
 | |
|   int64_t sum_len = 0;
 | |
|   for (int64_t i = 0; i < row_cnt; i++) {
 | |
|     char *varchar_data = static_cast<char *>(allocator_.alloc(i + 1));
 | |
|     ASSERT_TRUE(nullptr != varchar_data);
 | |
|     MEMSET(varchar_data, 0xf, i + 1);
 | |
|     row.storage_datums_[0].set_int(i);
 | |
|     row.storage_datums_[1].set_string(varchar_data, i + 1);
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|     sum_len += i + 1;
 | |
|   }
 | |
|   row.storage_datums_[0].set_null();
 | |
|   row.storage_datums_[1].set_null();
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
| 
 | |
|   e = encoder.encoders_[1];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
 | |
|   ASSERT_EQ(-1, e->ctx_->fix_data_size_);
 | |
|   ASSERT_EQ(sum_len, e->ctx_->var_data_size_);
 | |
|   ASSERT_EQ(sum_len, e->ctx_->dict_var_data_size_);
 | |
|   ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
 | |
|   LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
 | |
| 
 | |
|   str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.get_fixed_string_len());
 | |
|   ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
 | |
| 
 | |
|   // <3> var length string and has null and has zero length string
 | |
|   encoder.reuse();
 | |
|   for (int64_t i = 0; i < ObCSEncodingUtil::ENCODING_ROW_COUNT_THRESHOLD - 2; i++) {
 | |
|     char *varchar_data = static_cast<char *>(allocator_.alloc(i + 1));
 | |
|     ASSERT_TRUE(nullptr != varchar_data);
 | |
|     MEMSET(varchar_data, 0xf, i + 1);
 | |
|     row.storage_datums_[0].set_int(i);
 | |
|     row.storage_datums_[1].set_string(varchar_data, i);
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   row.storage_datums_[0].set_null();
 | |
|   row.storage_datums_[1].set_null();
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[1];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
 | |
|   ASSERT_EQ(-1, e->ctx_->fix_data_size_);
 | |
|   ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
 | |
|   LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
 | |
| 
 | |
|   str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
 | |
| 
 | |
|   // <4> write all null
 | |
|   row_cnt = 100;
 | |
|   encoder.reuse();
 | |
|   for (int64_t i = 0; i < row_cnt; i++) {
 | |
|     row.storage_datums_[0].set_int(i);
 | |
|     row.storage_datums_[1].set_null();
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[1];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
 | |
|   ASSERT_EQ(-1, e->ctx_->fix_data_size_);
 | |
|   ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
 | |
|   LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
 | |
| 
 | |
|   str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
 | |
|   ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
 | |
| 
 | |
|   // <4> write all zero length datum
 | |
|   row_cnt = 100;
 | |
|   encoder.reuse();
 | |
|   for (int64_t i = 0; i < row_cnt; i++) {
 | |
|     row.storage_datums_[0].set_int(i);
 | |
|     row.storage_datums_[1].set_string(nullptr, 0);
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[1];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
 | |
|   ASSERT_EQ(0, e->ctx_->fix_data_size_);
 | |
|   ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
 | |
|   LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
 | |
| 
 | |
|   str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
 | |
| 
 | |
|   // <5> write all zero length datum and null
 | |
|   row_cnt = 200;
 | |
|   encoder.reuse();
 | |
|   for (int64_t i = 0; i < row_cnt - 100; i++) {
 | |
|     row.storage_datums_[0].set_int(i);
 | |
|     row.storage_datums_[1].set_string(nullptr, 0);
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   for (int64_t i = row_cnt - 100; i < row_cnt; i++) {
 | |
|     row.storage_datums_[0].set_int(i);
 | |
|     row.storage_datums_[1].set_null();
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
|   e = encoder.encoders_[1];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
 | |
|   ASSERT_EQ(0, e->ctx_->fix_data_size_);
 | |
|   ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
 | |
|   LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
 | |
| 
 | |
|   str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
 | |
|   ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
 | |
| 
 | |
|   reuse();
 | |
| }
 | |
| 
 | |
| TEST_F(TestCSEncoder, test_dict_encoder)
 | |
| {
 | |
|   const int64_t rowkey_cnt = 1;
 | |
|   const int64_t col_cnt = 5;
 | |
|   ObObjType col_types[col_cnt] = {ObIntType, ObVarcharType, ObUInt64Type, ObTextType, ObHexStringType};
 | |
|   ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
 | |
|   ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT;
 | |
|   ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STR_DICT;
 | |
|   // <1> integer dict: has null and has negative value(-50 - 49)
 | |
|   //     string dict:  has null and has zero length value
 | |
|   int64_t row_cnt = 1000;
 | |
|   const int64_t distinct_cnt = 100;
 | |
|   ObMicroBlockCSEncoder encoder;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
 | |
|   ObDatumRow row;
 | |
|   ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
 | |
|   char *varchar_data = static_cast<char *>(allocator_.alloc(row_cnt + 1));
 | |
|   MEMSET(varchar_data, 0xf, row_cnt + 1);
 | |
|   for (int64_t i = 0; i < row_cnt; i++) {
 | |
|     ASSERT_TRUE(nullptr != varchar_data);
 | |
|     row.storage_datums_[0].set_int((i % distinct_cnt) - distinct_cnt / 2);
 | |
|     row.storage_datums_[1].set_string(varchar_data, i % distinct_cnt);
 | |
|     row.storage_datums_[2].set_null();  // all null integer
 | |
|     row.storage_datums_[3].set_null(); // all null string
 | |
|     row.storage_datums_[4].set_string(nullptr, 0); // all zero length string
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   row.storage_datums_[0].set_null();
 | |
|   row.storage_datums_[1].set_null();
 | |
|   row.storage_datums_[2].set_null();
 | |
|   row.storage_datums_[3].set_null();
 | |
|   row.storage_datums_[4].set_string(nullptr, 0);
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   char *buf = nullptr;
 | |
|   int64_t buf_size = 0;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
| 
 | |
|   ObIColumnCSEncoder *e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
 | |
|   ASSERT_EQ(-50, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(49, (int64_t)e->ctx_->integer_max_);
 | |
|   ASSERT_EQ(true, e->ctx_->need_sort_);
 | |
|   ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
 | |
| 
 | |
|   ObIntDictColumnEncoder *dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(1, dict_encoder->integer_dict_enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(false, dict_encoder->integer_dict_enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(true, dict_encoder->integer_dict_enc_ctx_.info_.is_monotonic_inc_);
 | |
|   ASSERT_EQ(distinct_cnt, dict_encoder->max_ref_);
 | |
|   ASSERT_EQ(distinct_cnt, dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
 | |
| 
 | |
|   ASSERT_EQ(1, dict_encoder->ref_enc_ctx_.meta_.get_uint_width_size()); // [0, distinct_cnt]
 | |
|   ASSERT_EQ(false, dict_encoder->ref_enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(2, dict_encoder->stream_offsets_.count());
 | |
| 
 | |
|   e = encoder.encoders_[1];
 | |
|   ASSERT_EQ(true, e->ctx_->need_sort_);
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
 | |
|   ASSERT_EQ(-1, e->ctx_->fix_data_size_);
 | |
|   ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
 | |
|   LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
 | |
|   ObStrDictColumnEncoder *str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(false, str_dict_encoder->string_dict_enc_ctx_.meta_.is_fixed_len_string());
 | |
|   ASSERT_EQ(false, str_dict_encoder->string_dict_enc_ctx_.meta_.is_use_zero_len_as_null());
 | |
|   ASSERT_EQ(false, str_dict_encoder->string_dict_enc_ctx_.info_.raw_encoding_str_offset_);
 | |
|   ASSERT_EQ(distinct_cnt, str_dict_encoder->max_ref_);
 | |
|   ASSERT_EQ(distinct_cnt, str_dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
 | |
| 
 | |
|   ASSERT_EQ(1, str_dict_encoder->ref_enc_ctx_.meta_.get_uint_width_size()); // [0, distinct_cnt]
 | |
|   ASSERT_EQ(false, str_dict_encoder->ref_enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(3, str_dict_encoder->stream_offsets_.count());
 | |
| 
 | |
|   e = encoder.encoders_[2];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
 | |
|   ASSERT_EQ(true, e->ctx_->need_sort_);
 | |
|   dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(0, dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
 | |
|   ASSERT_EQ(0, dict_encoder->stream_offsets_.count());
 | |
| 
 | |
|   e = encoder.encoders_[3]; // ObTextType
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
 | |
|   ASSERT_EQ(true, e->ctx_->need_sort_);
 | |
|   str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(0, dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
 | |
|   ASSERT_EQ(0, dict_encoder->stream_offsets_.count());
 | |
| 
 | |
|   e = encoder.encoders_[4]; // all zero length string will choose string encoding
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
 | |
|   ASSERT_EQ(0, e->ctx_->fix_data_size_);
 | |
|   ObStringColumnEncoder *str_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
 | |
|   ASSERT_EQ(1, str_encoder->stream_offsets_.count());
 | |
|   reuse();
 | |
| }
 | |
| 
 | |
| TEST_F(TestCSEncoder, test_dict_const_ref_encoder)
 | |
| {
 | |
|   const int64_t rowkey_cnt = 1;
 | |
|   const int64_t col_cnt = 5;
 | |
|   ObObjType col_types[col_cnt] = {ObIntType, ObVarcharType, ObUInt64Type, ObVarcharType, ObIntType};
 | |
|   ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
 | |
|   int64_t row_cnt = 1000;
 | |
|   ObMicroBlockCSEncoder encoder;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
 | |
|   ObDatumRow row;
 | |
|   ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
 | |
|   char *varchar_data = static_cast<char *>(allocator_.alloc(row_cnt + 1));
 | |
|   MEMSET(varchar_data, 'a', row_cnt + 1);
 | |
|   int64_t execption_cnt = 64;
 | |
|   for (int64_t i = 0; i < row_cnt - execption_cnt; i++) {
 | |
|     ASSERT_TRUE(nullptr != varchar_data);
 | |
|     row.storage_datums_[0].set_int(1000);
 | |
|     row.storage_datums_[1].set_string(varchar_data, 10);
 | |
|     row.storage_datums_[2].set_int(i % 2 + 1000); // not const dict ref
 | |
|     row.storage_datums_[3].set_null();
 | |
|     row.storage_datums_[4].set_int(1000);
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   for (int64_t i = 0; i < execption_cnt; i++) {
 | |
|     row.storage_datums_[0].set_int(i);
 | |
|     row.storage_datums_[1].set_string(varchar_data, 20);
 | |
|     row.storage_datums_[2].set_null();
 | |
|     row.storage_datums_[3].set_string(varchar_data, 100 + i % 2);
 | |
|     row.storage_datums_[4].set_null();
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   char *buf = nullptr;
 | |
|   int64_t buf_size = 0;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
| 
 | |
|   ObIColumnCSEncoder *e = encoder.encoders_[0];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
 | |
|   ObIntDictColumnEncoder *dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
 | |
|   ASSERT_EQ(execption_cnt, dict_encoder->const_list_header_->dict_ref_); // const value is the last one in the sorted dict
 | |
|   ASSERT_EQ(execption_cnt, dict_encoder->ref_exception_cnt_);
 | |
|   ASSERT_EQ(2 + 2 * execption_cnt, dict_encoder->dict_encoding_meta_.ref_row_cnt_);
 | |
| 
 | |
|   e = encoder.encoders_[1];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
 | |
|   ObStrDictColumnEncoder *str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(1, str_dict_encoder->max_ref_);
 | |
|   ASSERT_EQ(true, str_dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
 | |
|   ASSERT_EQ(0, str_dict_encoder->const_list_header_->dict_ref_);
 | |
|   ASSERT_EQ(execption_cnt, str_dict_encoder->ref_exception_cnt_);
 | |
|   ASSERT_EQ(2 + 2 * execption_cnt, str_dict_encoder->dict_encoding_meta_.ref_row_cnt_);
 | |
| 
 | |
|   e = encoder.encoders_[2];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
 | |
|   ASSERT_EQ(true, e->ctx_->need_sort_);
 | |
|   dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(false, dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
 | |
| 
 | |
|   e = encoder.encoders_[3];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
 | |
|   str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, str_dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
 | |
|   ASSERT_EQ(2, str_dict_encoder->max_ref_);
 | |
|   ASSERT_EQ(2, str_dict_encoder->const_list_header_->dict_ref_);
 | |
|   ASSERT_EQ(execption_cnt, str_dict_encoder->ref_exception_cnt_);
 | |
|   ASSERT_EQ(2 + 2 * execption_cnt, str_dict_encoder->dict_encoding_meta_.ref_row_cnt_);
 | |
| 
 | |
|   e = encoder.encoders_[4];
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
 | |
|   dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
 | |
|   ASSERT_EQ(1, dict_encoder->max_ref_);
 | |
|   ASSERT_EQ(0, dict_encoder->const_list_header_->dict_ref_);
 | |
|   ASSERT_EQ(execption_cnt, dict_encoder->ref_exception_cnt_);
 | |
|   ASSERT_EQ(2 + 2 * execption_cnt, dict_encoder->dict_encoding_meta_.ref_row_cnt_);
 | |
| 
 | |
|   reuse();
 | |
| }
 | |
| 
 | |
| TEST_F(TestCSEncoder, test_decimal_int_encoder)
 | |
| {
 | |
|   const int64_t rowkey_cnt = 1;
 | |
|   const int64_t col_cnt = 6;
 | |
|   ObObjType col_types[col_cnt] = {ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType};
 | |
|   int64_t precision_arr[col_cnt] = {MAX_PRECISION_DECIMAL_INT_32,  // INTEGER
 | |
|                                     MAX_PRECISION_DECIMAL_INT_128, // INTEGER
 | |
|                                     OB_MAX_DECIMAL_PRECISION,      // INT_DICT
 | |
|                                     MAX_PRECISION_DECIMAL_INT_128, // STRING
 | |
|                                     MAX_PRECISION_DECIMAL_INT_128, // STRING
 | |
|                                     OB_MAX_DECIMAL_PRECISION};     // STR_DICT
 | |
|   ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt, ObCompressorType::ZSTD_1_3_8_COMPRESSOR, precision_arr));
 | |
|   ctx_.column_encodings_[4] = ObCSColumnHeader::Type::STRING; // specified, otherwise it will hit STR_DICT
 | |
|   int64_t row_cnt = 1000;
 | |
|   const int64_t distinct_cnt = 100;
 | |
|   ObMicroBlockCSEncoder encoder;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
 | |
|   ObDatumRow row;
 | |
|   ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
 | |
| 
 | |
|   for (int64_t i = 0; i < row_cnt; i++) {
 | |
|     int32_t m = 0;
 | |
|     int128_t j = 0;
 | |
|     int256_t k = 0;
 | |
|     if (i == row_cnt - 3) {
 | |
|       m = INT32_MAX;
 | |
|       row.storage_datums_[0].set_decimal_int(m);
 | |
|       j = INT64_MAX;
 | |
|       row.storage_datums_[1].set_decimal_int(j);
 | |
|       k = 0;
 | |
|       row.storage_datums_[2].set_decimal_int(k);
 | |
| 
 | |
|       j = (int128_t)INT64_MAX << 64;
 | |
|       row.storage_datums_[3].set_decimal_int(j);
 | |
|       row.storage_datums_[4].set_decimal_int(j);
 | |
|       k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
 | |
|       row.storage_datums_[5].set_decimal_int(k);
 | |
|     } else if (i == row_cnt - 2) {
 | |
|       m = INT32_MIN;
 | |
|       row.storage_datums_[0].set_decimal_int(m);
 | |
|       j = INT64_MIN;
 | |
|       row.storage_datums_[1].set_decimal_int(j);
 | |
|       k = INT32_MAX;
 | |
|       row.storage_datums_[2].set_decimal_int(k);
 | |
| 
 | |
|       j = (int128_t)INT64_MIN << 64;
 | |
|       row.storage_datums_[3].set_decimal_int(j);
 | |
|       row.storage_datums_[4].set_decimal_int(j);
 | |
|       k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
 | |
|       row.storage_datums_[5].set_decimal_int(k);
 | |
|     } else if (i == row_cnt - 1) {
 | |
|       row.storage_datums_[0].set_null();
 | |
|       row.storage_datums_[1].set_null();
 | |
|       row.storage_datums_[2].set_null();
 | |
|       row.storage_datums_[3].set_null();
 | |
|       row.storage_datums_[4].set_null();
 | |
|       row.storage_datums_[5].set_null();
 | |
|     } else {
 | |
|       m = i - row_cnt / 2;
 | |
|       row.storage_datums_[0].set_decimal_int(m); // -500 ~ 498
 | |
|       j = i + INT32_MAX;
 | |
|       row.storage_datums_[1].set_decimal_int(j);
 | |
|       k = (i % (distinct_cnt - 2)) - INT32_MAX;
 | |
|       row.storage_datums_[2].set_decimal_int(k);
 | |
| 
 | |
|       j = i;
 | |
|       row.storage_datums_[3].set_decimal_int(j);
 | |
|       row.storage_datums_[4].set_null();
 | |
|       k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
 | |
|       row.storage_datums_[5].set_decimal_int(k);
 | |
|     }
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
 | |
|   }
 | |
|   char *buf = nullptr;
 | |
|   int64_t buf_size = 0;
 | |
|   ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
 | |
| 
 | |
|   ObIColumnCSEncoder *e = encoder.encoders_[0]; // int32_t
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(INT32_MIN, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(INT32_MAX, (int64_t)e->ctx_->integer_max_);
 | |
|   ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
 | |
|   ObIntegerColumnEncoder *int_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(8, int_encoder->enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(true, int_encoder->enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(true, int_encoder->enc_ctx_.meta_.is_use_base());
 | |
|   ASSERT_EQ((int64_t)(INT32_MIN) - 1, (int64_t)(int_encoder->enc_ctx_.meta_.null_replaced_value()));
 | |
| 
 | |
|   e = encoder.encoders_[1]; // int128_t
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
 | |
|   ASSERT_EQ(INT64_MIN, (int64_t)e->ctx_->integer_min_);
 | |
|   ASSERT_EQ(INT64_MAX, (int64_t)e->ctx_->integer_max_);
 | |
|   ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
 | |
|   int_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
 | |
|   ASSERT_EQ(8, int_encoder->enc_ctx_.meta_.get_uint_width_size());
 | |
|   ASSERT_EQ(false, int_encoder->enc_ctx_.meta_.is_use_null_replace_value());
 | |
|   ASSERT_EQ(true, int_encoder->enc_ctx_.meta_.is_use_base());
 | |
|   ASSERT_EQ(INT64_MIN, int_encoder->enc_ctx_.meta_.base_value());
 | |
| 
 | |
|   e = encoder.encoders_[2]; // int256_t
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
 | |
|   ASSERT_EQ(true, e->ctx_->need_sort_);
 | |
|   ObIntDictColumnEncoder *dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, dict_encoder->integer_dict_enc_ctx_.info_.is_monotonic_inc_);
 | |
|   ASSERT_EQ(distinct_cnt, dict_encoder->max_ref_);
 | |
|   ASSERT_EQ(distinct_cnt, dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
 | |
|   ASSERT_EQ(2, dict_encoder->stream_offsets_.count());
 | |
| 
 | |
|   e = encoder.encoders_[3]; // int128_t, use fix length has less storage cost
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
 | |
|   ASSERT_EQ(sizeof(int128_t), e->ctx_->fix_data_size_);
 | |
|   ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
 | |
|   ObStringColumnEncoder *str_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
 | |
|   ASSERT_EQ(true, str_encoder->enc_ctx_.meta_.is_fixed_len_string());
 | |
|   ASSERT_EQ(false, str_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
 | |
| 
 | |
|   e = encoder.encoders_[4]; // int128_t, use var length has less storage cost
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
 | |
|   ASSERT_EQ(sizeof(int128_t), e->ctx_->fix_data_size_);
 | |
|   ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
 | |
|   str_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
 | |
|   ASSERT_EQ(false, str_encoder->enc_ctx_.meta_.is_fixed_len_string());
 | |
|   ASSERT_EQ(true, str_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
 | |
| 
 | |
|   e = encoder.encoders_[5]; // int256_t
 | |
|   ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
 | |
|   ASSERT_EQ(sizeof(int256_t), e->ctx_->fix_data_size_);
 | |
|   ObStrDictColumnEncoder *str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
 | |
|   ASSERT_EQ(distinct_cnt, str_dict_encoder->max_ref_);
 | |
|   ASSERT_EQ(distinct_cnt, str_dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
 | |
|   ASSERT_EQ(2, str_dict_encoder->stream_offsets_.count());
 | |
|   ASSERT_EQ(1, str_dict_encoder->ref_enc_ctx_.meta_.get_uint_width_size()); // [0, distinct_cnt]
 | |
| 
 | |
|   reuse();
 | |
| }
 | |
| 
 | |
| 
 | |
| }  // namespace blocksstable
 | |
| }  // namespace oceanbase
 | |
| 
 | |
| int main(int argc, char **argv)
 | |
| {
 | |
|   system("rm -f test_cs_encoder.log*");
 | |
|   OB_LOGGER.set_file_name("test_cs_encoder.log", true, false);
 | |
|   oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG");
 | |
|   testing::InitGoogleTest(&argc, argv);
 | |
|   return RUN_ALL_TESTS();
 | |
| }
 |