424 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			424 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /**
 | |
|  * Copyright (c) 2021 OceanBase
 | |
|  * OceanBase CE is licensed under Mulan PubL v2.
 | |
|  * You can use this software according to the terms and conditions of the Mulan PubL v2.
 | |
|  * You may obtain a copy of Mulan PubL v2 at:
 | |
|  *          http://license.coscl.org.cn/MulanPubL-2.0
 | |
|  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | |
|  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | |
|  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | |
|  * See the Mulan PubL v2 for more details.
 | |
|  */
 | |
| 
 | |
| #define USING_LOG_PREFIX STORAGE
 | |
| 
 | |
| #include <gtest/gtest.h>
 | |
| #include <iostream>
 | |
| #include <random>
 | |
| #define protected public
 | |
| #define private public
 | |
| #include "storage/blocksstable/cs_encoding/ob_string_stream_encoder.h"
 | |
| #include "storage/blocksstable/cs_encoding/ob_string_stream_decoder.h"
 | |
| #include "storage/blocksstable/cs_encoding/ob_column_encoding_struct.h"
 | |
| #include "storage/blocksstable/cs_encoding/ob_cs_decoding_util.h"
 | |
| #include "lib/codec/ob_fast_delta.h"
 | |
| #include "lib/compress/ob_compress_util.h"
 | |
| 
 | |
| namespace oceanbase
 | |
| {
 | |
| namespace blocksstable
 | |
| {
 | |
| 
 | |
| class TestStringStream : public ::testing::Test
 | |
| {
 | |
| public:
 | |
|   virtual void SetUp() {}
 | |
|   virtual void TearDown() {}
 | |
| 
 | |
|   TestStringStream() : tenant_ctx_(500)
 | |
|   {
 | |
|     srand(time(NULL));
 | |
|     share::ObTenantEnv::set_tenant(&tenant_ctx_);
 | |
|   }
 | |
|   virtual ~TestStringStream() {}
 | |
| 
 | |
|   int64_t max_count = 64<<9;
 | |
| 
 | |
|   void randstr(char *str, const int64_t len)
 | |
|   {
 | |
|     int i;
 | |
|     for (i = 0; i < len; ++i)
 | |
|     {
 | |
|       switch ((rand() % 3)) {
 | |
|         case 1:
 | |
|           str[i] = 'A' + rand() % 26;
 | |
|           break;
 | |
|         case 2:
 | |
|           str[i] = 'a' + rand() % 26;
 | |
|           break;
 | |
|         default:
 | |
|           str[i] = '0' + rand() % 10;
 | |
|           break;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   void generate_datums(ObColDatums *datums, const int64_t size, bool has_null, bool is_fix_len,
 | |
|       bool has_empty_string, bool all_empty, int64_t &total_len)
 | |
|   {
 | |
|     total_len = 0;
 | |
|     for (int64_t i = 0; i < size; i++) {
 | |
|       ObDatum datum;
 | |
|       if (has_null && (i % 5 == 0)) {
 | |
|         datum.set_null();
 | |
|       } else {
 | |
|         int64_t len = i%333 + 1;
 | |
|         if (is_fix_len) {
 | |
|           len = 5;
 | |
|         }
 | |
|         if ((has_empty_string && (i % 7 == 0))
 | |
|             || all_empty) {
 | |
|           len = 0;
 | |
|         }
 | |
|         char *tmp = reinterpret_cast<char *>(allocator_.alloc(len));
 | |
|         datum.ptr_ = tmp;
 | |
|         datum.len_ = len;
 | |
|         randstr(tmp, len);
 | |
|       }
 | |
| 
 | |
|       ASSERT_EQ(OB_SUCCESS, datums->push_back(datum));
 | |
|       total_len += datum.len_;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   void generate_bitmap(char *bitmap, ObColDatums *datums)
 | |
|   {
 | |
|     for (int64_t i = 0; i < datums->count(); i++) {
 | |
|       if (datums->at(i).is_null()) {
 | |
|         bitmap[i/8] |= (1 << (7 - i%8));
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   void buid_raw_integer_stream_data(const ObStreamData &stream_data,
 | |
|                                     const int64_t count,
 | |
|                                     const ObCompressorType type,
 | |
|                                     ObIntegerStreamDecoderCtx &decode_ctx,
 | |
|                                     ObStreamData &raw_stream_data)
 | |
|   {
 | |
|     uint16_t stream_meta_len = 0;
 | |
|     ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::build_decoder_ctx(
 | |
|         stream_data, count,type, decode_ctx, stream_meta_len));
 | |
|     ObStreamData stream_data2(stream_data.buf_ + stream_meta_len, stream_data.len_ - stream_meta_len);
 | |
|     const uint32_t width_size = decode_ctx.meta_.get_uint_width_size();
 | |
|     uint32_t array_buf_size = width_size * decode_ctx.count_;
 | |
|     char *array_buf = (char*)allocator_.alloc(array_buf_size);
 | |
|     ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::transform_to_raw_array(stream_data2, decode_ctx, array_buf, allocator_));
 | |
|     raw_stream_data.set(array_buf, array_buf_size);
 | |
|   }
 | |
| 
 | |
|   void test_and_check_str_datums(int64_t size, const ObCompressorType type, bool use_zero_len_as_null, bool has_null, bool is_fix_len,
 | |
|       bool use_nullbitmap, bool has_empty_string, bool all_null, bool all_empty, bool half_null_half_empty, bool use_null_replaced_ref)
 | |
|   {
 | |
|     LOG_INFO("test_and_check_string_encoding", K(size), K(type), K(use_zero_len_as_null), K(has_null), K(is_fix_len),
 | |
|         K(use_nullbitmap), K(all_null), K(half_null_half_empty), K(use_null_replaced_ref));
 | |
|     ObArenaAllocator local_arena;
 | |
|     ObStringStreamEncoderCtx ctx;
 | |
|     ObCSEncodingOpt encoding_opt;
 | |
|     bool is_use_zero_len_as_null = use_zero_len_as_null;
 | |
|     int64_t fixed_len = -1;
 | |
|     if (is_fix_len) {
 | |
|       fixed_len = 5;
 | |
|     }
 | |
|     if (all_empty) {
 | |
|       fixed_len = 0;
 | |
|     }
 | |
|     if (!(use_nullbitmap || all_empty || has_empty_string || half_null_half_empty || use_null_replaced_ref)) {
 | |
|       is_use_zero_len_as_null = true;
 | |
|     }
 | |
|     common::ObCompressor *compressor = nullptr;
 | |
|     ASSERT_EQ(OB_SUCCESS, ObCompressorPool::get_instance().get_compressor(type, compressor));
 | |
| 
 | |
|     ObStringStreamEncoder encoder;
 | |
|     uint32_t *data = nullptr;
 | |
|     ObColDatums *datums = new ObColDatums(local_arena);
 | |
|     ASSERT_EQ(OB_SUCCESS, datums->resize(max_count));
 | |
|     datums->reuse();
 | |
|     if (half_null_half_empty) {
 | |
|       all_empty = true;
 | |
|     }
 | |
|     int64_t total_len = 0;
 | |
|     generate_datums(datums, size, has_null, is_fix_len, has_empty_string, all_empty, total_len);
 | |
|     if (all_null) {
 | |
|       for (int64_t i = 0; (i < datums->count()); i++) {
 | |
|         total_len -= datums->at(i).len_;
 | |
|         datums->at(i).set_null();
 | |
|       }
 | |
|     }
 | |
|     if (half_null_half_empty) {
 | |
|       for (int64_t i = 0; (i < datums->count()); i++) {
 | |
|         if (i%2 == 0) {
 | |
|           total_len -= datums->at(i).len_;
 | |
|           datums->at(i).set_null();
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     if (fixed_len >= 0) {
 | |
|       total_len = datums->count() * fixed_len;
 | |
|     }
 | |
|     ctx.build_string_stream_meta(fixed_len, is_use_zero_len_as_null, total_len);
 | |
|     ctx.build_string_stream_encoder_info(type, false, &encoding_opt, nullptr, -1, &allocator_);
 | |
|     int64_t bitmap_size = pad8(size);
 | |
|     char *bitmap = new char[bitmap_size];
 | |
|     memset(bitmap, 0, bitmap_size);
 | |
|     generate_bitmap(bitmap, datums);
 | |
| 
 | |
|     ObMicroBufferWriter writer;
 | |
|     ObMicroBufferWriter all_string_writer;
 | |
|     ASSERT_EQ(OB_SUCCESS, writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
 | |
|     ASSERT_EQ(OB_SUCCESS, all_string_writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
 | |
|     common::ObArray<uint32_t> offsets;
 | |
| 
 | |
|     ObColumnDatumIter iter(*datums);
 | |
|     ASSERT_EQ(OB_SUCCESS, encoder.encode(ctx, iter, writer, &all_string_writer, offsets));
 | |
| 
 | |
|     ObStreamData str_data;
 | |
|     ObStreamData raw_offset_data;
 | |
|     // 1. decode integer_stream header
 | |
|     ObIntegerStreamDecoderCtx offset_decoder_ctx;
 | |
|     uint16_t meta_len = 0;
 | |
|     if (!ctx.meta_.is_fixed_len_string()) {
 | |
|       const char *int_stream_start = writer.data() + offsets[0];
 | |
|       int64_t int_stream_len = offsets[1] - offsets[0];
 | |
|       ObStreamData data(int_stream_start, int_stream_len);
 | |
|       buid_raw_integer_stream_data(data, size, type, offset_decoder_ctx, raw_offset_data);
 | |
|     }
 | |
| 
 | |
|     // 2. build_decoding_ctx for string stream
 | |
|     ObStringStreamDecoderCtx str_decode_ctx;
 | |
|     const char *str_stream_start = writer.data();
 | |
|     int64_t str_stream_meta_len = offsets[0];
 | |
|     ObStreamData data2(str_stream_start, str_stream_meta_len);
 | |
|     uint16_t str_meta_len = 0;
 | |
|     ASSERT_EQ(OB_SUCCESS, ObStringStreamDecoder::build_decoder_ctx(data2, str_decode_ctx, str_meta_len));
 | |
|     str_data.set(all_string_writer.data(), all_string_writer.length());
 | |
| 
 | |
|     // 3. decode str
 | |
|     ObColDatums *datums2 = new ObColDatums(local_arena);
 | |
|     ASSERT_EQ(OB_SUCCESS, datums2->resize(max_count));
 | |
|     datums2->reuse();
 | |
|     for (int64_t i = 0; i < size; i++) {
 | |
|       ObDatum datum;
 | |
|       ASSERT_EQ(OB_SUCCESS, datums2->push_back(datum));
 | |
|     }
 | |
| 
 | |
|     // test batch decode
 | |
|     int64_t *row_ids = new int64_t[size];
 | |
|     for (int64_t i = 0; i < size; i++) {
 | |
|       row_ids[i] = i;
 | |
|     }
 | |
|     ObDatum *datums3 = new ObDatum[size];
 | |
|     char *datums2_buf = new char[size * sizeof(uint64_t)];
 | |
|     memset(datums2_buf, 0, size * sizeof(uint64_t));
 | |
|     for (int64_t i = 0; i < size; i++) {
 | |
|       datums3[i].ptr_ = (datums2_buf + i * sizeof(uint64_t));
 | |
|     }
 | |
| 
 | |
|     uint64_t *ref_arr = new uint64_t[size];
 | |
|     int64_t null_replaced_ref = size;
 | |
|     uint32_t ref_width_V = ObRefStoreWidthV::NOT_REF;
 | |
| 
 | |
|     ObBaseColumnDecoderCtx base_ctx;
 | |
|     base_ctx.allocator_ = &allocator_;
 | |
|     base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NO_NULL;
 | |
|     base_ctx.null_desc_ = nullptr;
 | |
|     if (use_nullbitmap) {
 | |
|       base_ctx.null_bitmap_ = bitmap;
 | |
|       base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NULL_BITMAP;
 | |
|     } else if (use_null_replaced_ref) {
 | |
|       ref_width_V = ObRefStoreWidthV::REF_IN_DATUMS;
 | |
|       base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED_REF;
 | |
|       base_ctx.null_replaced_ref_ = null_replaced_ref;
 | |
|       for (int64_t i = 0; i < size; i++) {
 | |
|         if (use_null_replaced_ref && datums->at(i).is_null()) {
 | |
|           datums3[i].pack_ = size;
 | |
|           ref_arr[i] = size;
 | |
|         } else {
 | |
|           datums3[i].pack_ = i;
 | |
|           ref_arr[i] = i;
 | |
|         }
 | |
|       }
 | |
|     } else if (fixed_len < 0 && is_use_zero_len_as_null) {
 | |
|       base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED;
 | |
|     }
 | |
| 
 | |
|     const uint8_t offset_width = str_decode_ctx.meta_.is_fixed_len_string() ?
 | |
|             FIX_STRING_OFFSET_WIDTH_V : offset_decoder_ctx.meta_.width_;
 | |
|     ConvertStringToDatumFunc convert_func = convert_string_to_datum_funcs
 | |
|         [offset_width]
 | |
|         [ref_width_V]
 | |
|         [base_ctx.null_flag_]
 | |
|         [false/*need_copy_V*/];
 | |
|     convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, nullptr, row_ids, size, datums3);
 | |
| 
 | |
|     for (int64_t i = 0; i < size; i++) {
 | |
|       if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
 | |
|         LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]));
 | |
|         ::abort();
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // disorder batch decode
 | |
|     for (int64_t i = 0; i < size; i++) {
 | |
|       datums3[i].reset();
 | |
|     }
 | |
|     int64_t random_idx = ObTimeUtility::current_time()%size;
 | |
|     int64_t row_id = 0;
 | |
|     for (int64_t i = 0; i < size; i++) {
 | |
|       row_id = (i + random_idx) % size;
 | |
|       row_ids[i] = row_id;
 | |
|       if (use_null_replaced_ref && datums->at(row_id).is_null()) {
 | |
|         datums3[i].pack_ = size;
 | |
|       } else {
 | |
|         datums3[i].pack_ = row_id;
 | |
|       }
 | |
|       if (i%9 == 0) {
 | |
|         row_ids[i] = random_idx; //duplicate
 | |
|         if (use_null_replaced_ref && datums->at(random_idx).is_null()) {
 | |
|           datums3[i].pack_ = size;
 | |
|         } else {
 | |
|           datums3[i].pack_ = random_idx;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     convert_func = convert_string_to_datum_funcs
 | |
|         [offset_width]
 | |
|         [ref_width_V]
 | |
|         [base_ctx.null_flag_]
 | |
|         [false/*need_copy_V*/];
 | |
|     convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, nullptr, row_ids, size, datums3);
 | |
| 
 | |
|     for (int64_t i = 0; i < size; i++) {
 | |
|       if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
 | |
|         LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]));
 | |
|         ::abort();
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // test batch decode with ref arr
 | |
|     if (use_null_replaced_ref) {
 | |
|       convert_func = convert_string_to_datum_funcs
 | |
|           [offset_width]
 | |
|           [ObRefStoreWidthV::REF_8_BYTE]
 | |
|           [base_ctx.null_flag_]
 | |
|           [false/*need_copy_V*/];
 | |
|       convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, (char*)ref_arr, row_ids, size, datums3);
 | |
| 
 | |
|       for (int64_t i = 0; i < size; i++) {
 | |
|         if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
 | |
|           LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]), K(ref_arr[row_ids[i]]));
 | |
|           ::abort();
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     delete []ref_arr;
 | |
|     delete []row_ids;
 | |
|     row_ids = nullptr;
 | |
|     delete []datums3;
 | |
|     datums3 = nullptr;
 | |
|     delete datums2;
 | |
|     datums2 = nullptr;
 | |
|     delete datums;
 | |
|     datums = nullptr;
 | |
|   }
 | |
| 
 | |
| protected:
 | |
|   ObArenaAllocator allocator_;
 | |
|   share::ObTenantBase tenant_ctx_;
 | |
| };
 | |
| 
 | |
| TEST_F(TestStringStream, test_datums_encoding)
 | |
| {
 | |
|   for (int64_t j = 0; j < 13; j++) {
 | |
|     common::ObCompressorType compress_type = ObCompressorType::NONE_COMPRESSOR;
 | |
|     bool use_zero_len_as_null = 0;
 | |
|     bool has_null = false;
 | |
|     bool is_fix_len = false;
 | |
|     bool use_nullbitmap = false;
 | |
|     bool has_empty_string = false;
 | |
|     bool all_null = false;
 | |
|     bool all_empty = false;
 | |
|     bool half_null_half_empty = false;
 | |
|     bool use_null_replaced_ref = false;
 | |
|     if (0 == j) {
 | |
|       compress_type = NONE_COMPRESSOR;
 | |
|     } else if (1 == j) {
 | |
|       compress_type = LZ4_COMPRESSOR;
 | |
|     } else if (2 == j) {
 | |
|       compress_type = SNAPPY_COMPRESSOR;
 | |
|       has_null = true;
 | |
|       use_zero_len_as_null = true;
 | |
|     } else if (3 == j) {
 | |
|       compress_type = ZSTD_1_3_8_COMPRESSOR;
 | |
|       is_fix_len = true;
 | |
|     } else if (4 == j) {
 | |
|       compress_type = SNAPPY_COMPRESSOR;
 | |
|       is_fix_len = true;
 | |
|       use_nullbitmap = true;
 | |
|       has_null = true;
 | |
|     } else if (5 == j) {
 | |
|       compress_type = ZSTD_1_3_8_COMPRESSOR;
 | |
|       has_null = true;
 | |
|       use_nullbitmap = true;
 | |
|     } else if (6 == j) {
 | |
|       has_null = true;
 | |
|       use_nullbitmap = true;
 | |
|       has_empty_string = true;
 | |
|     } else if (7 == j) {
 | |
|       // all null, use null replace value
 | |
|       all_null = true;
 | |
|     } else if (8 == j) {
 | |
|       // all empty string
 | |
|       all_empty = true;
 | |
|     } else if (9 == j) {
 | |
|       // half null and half empty
 | |
|       half_null_half_empty = true;
 | |
|       use_nullbitmap = true;
 | |
|     } else if (10 == j) {
 | |
|       // null replace row id
 | |
|       compress_type = SNAPPY_COMPRESSOR;
 | |
|       use_null_replaced_ref = true;
 | |
|     } else if (11 == j) {
 | |
|       // null replace row id
 | |
|       compress_type = SNAPPY_COMPRESSOR;
 | |
|       use_null_replaced_ref = true;
 | |
|       has_null = true;
 | |
|     } else if (12 == j) {
 | |
|       // null replace row id
 | |
|       compress_type = SNAPPY_COMPRESSOR;
 | |
|       use_null_replaced_ref = true;
 | |
|       all_null = true;
 | |
|     }
 | |
| 
 | |
|     for (int64_t i = 1; i <= max_count; i=(i * (i + j + 1))) {
 | |
|       LOG_INFO("round", K(i), K(j));
 | |
|       test_and_check_str_datums(i, compress_type, use_zero_len_as_null, has_null, is_fix_len, use_nullbitmap,
 | |
|           has_empty_string, all_null, all_empty, half_null_half_empty, use_null_replaced_ref);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| } // end namespace blocksstable
 | |
| } // end namespace oceanbase
 | |
| 
 | |
| int main(int argc, char **argv)
 | |
| {
 | |
|   system("rm -f test_string_stream.log*");
 | |
|   OB_LOGGER.set_file_name("test_string_stream.log", true, false);
 | |
|   oceanbase::common::ObLogger::get_logger().set_log_level("INFO");
 | |
|   testing::InitGoogleTest(&argc, argv);
 | |
|   return RUN_ALL_TESTS();
 | |
| }
 | 
