424 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			424 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/**
 | 
						|
 * Copyright (c) 2021 OceanBase
 | 
						|
 * OceanBase CE is licensed under Mulan PubL v2.
 | 
						|
 * You can use this software according to the terms and conditions of the Mulan PubL v2.
 | 
						|
 * You may obtain a copy of Mulan PubL v2 at:
 | 
						|
 *          http://license.coscl.org.cn/MulanPubL-2.0
 | 
						|
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
						|
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
						|
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
						|
 * See the Mulan PubL v2 for more details.
 | 
						|
 */
 | 
						|
 | 
						|
#define USING_LOG_PREFIX STORAGE
 | 
						|
 | 
						|
#include <gtest/gtest.h>
 | 
						|
#include <iostream>
 | 
						|
#include <random>
 | 
						|
#define protected public
 | 
						|
#define private public
 | 
						|
#include "storage/blocksstable/cs_encoding/ob_string_stream_encoder.h"
 | 
						|
#include "storage/blocksstable/cs_encoding/ob_string_stream_decoder.h"
 | 
						|
#include "storage/blocksstable/cs_encoding/ob_column_encoding_struct.h"
 | 
						|
#include "storage/blocksstable/cs_encoding/ob_cs_decoding_util.h"
 | 
						|
#include "lib/codec/ob_fast_delta.h"
 | 
						|
#include "lib/compress/ob_compress_util.h"
 | 
						|
 | 
						|
namespace oceanbase
 | 
						|
{
 | 
						|
namespace blocksstable
 | 
						|
{
 | 
						|
 | 
						|
class TestStringStream : public ::testing::Test
 | 
						|
{
 | 
						|
public:
 | 
						|
  virtual void SetUp() {}
 | 
						|
  virtual void TearDown() {}
 | 
						|
 | 
						|
  TestStringStream() : tenant_ctx_(500)
 | 
						|
  {
 | 
						|
    srand(time(NULL));
 | 
						|
    share::ObTenantEnv::set_tenant(&tenant_ctx_);
 | 
						|
  }
 | 
						|
  virtual ~TestStringStream() {}
 | 
						|
 | 
						|
  int64_t max_count = 64<<9;
 | 
						|
 | 
						|
  void randstr(char *str, const int64_t len)
 | 
						|
  {
 | 
						|
    int i;
 | 
						|
    for (i = 0; i < len; ++i)
 | 
						|
    {
 | 
						|
      switch ((rand() % 3)) {
 | 
						|
        case 1:
 | 
						|
          str[i] = 'A' + rand() % 26;
 | 
						|
          break;
 | 
						|
        case 2:
 | 
						|
          str[i] = 'a' + rand() % 26;
 | 
						|
          break;
 | 
						|
        default:
 | 
						|
          str[i] = '0' + rand() % 10;
 | 
						|
          break;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  void generate_datums(ObColDatums *datums, const int64_t size, bool has_null, bool is_fix_len,
 | 
						|
      bool has_empty_string, bool all_empty, int64_t &total_len)
 | 
						|
  {
 | 
						|
    total_len = 0;
 | 
						|
    for (int64_t i = 0; i < size; i++) {
 | 
						|
      ObDatum datum;
 | 
						|
      if (has_null && (i % 5 == 0)) {
 | 
						|
        datum.set_null();
 | 
						|
      } else {
 | 
						|
        int64_t len = i%333 + 1;
 | 
						|
        if (is_fix_len) {
 | 
						|
          len = 5;
 | 
						|
        }
 | 
						|
        if ((has_empty_string && (i % 7 == 0))
 | 
						|
            || all_empty) {
 | 
						|
          len = 0;
 | 
						|
        }
 | 
						|
        char *tmp = reinterpret_cast<char *>(allocator_.alloc(len));
 | 
						|
        datum.ptr_ = tmp;
 | 
						|
        datum.len_ = len;
 | 
						|
        randstr(tmp, len);
 | 
						|
      }
 | 
						|
 | 
						|
      ASSERT_EQ(OB_SUCCESS, datums->push_back(datum));
 | 
						|
      total_len += datum.len_;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  void generate_bitmap(char *bitmap, ObColDatums *datums)
 | 
						|
  {
 | 
						|
    for (int64_t i = 0; i < datums->count(); i++) {
 | 
						|
      if (datums->at(i).is_null()) {
 | 
						|
        bitmap[i/8] |= (1 << (7 - i%8));
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  void buid_raw_integer_stream_data(const ObStreamData &stream_data,
 | 
						|
                                    const int64_t count,
 | 
						|
                                    const ObCompressorType type,
 | 
						|
                                    ObIntegerStreamDecoderCtx &decode_ctx,
 | 
						|
                                    ObStreamData &raw_stream_data)
 | 
						|
  {
 | 
						|
    uint16_t stream_meta_len = 0;
 | 
						|
    ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::build_decoder_ctx(
 | 
						|
        stream_data, count,type, decode_ctx, stream_meta_len));
 | 
						|
    ObStreamData stream_data2(stream_data.buf_ + stream_meta_len, stream_data.len_ - stream_meta_len);
 | 
						|
    const uint32_t width_size = decode_ctx.meta_.get_uint_width_size();
 | 
						|
    uint32_t array_buf_size = width_size * decode_ctx.count_;
 | 
						|
    char *array_buf = (char*)allocator_.alloc(array_buf_size);
 | 
						|
    ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::transform_to_raw_array(stream_data2, decode_ctx, array_buf, allocator_));
 | 
						|
    raw_stream_data.set(array_buf, array_buf_size);
 | 
						|
  }
 | 
						|
 | 
						|
  void test_and_check_str_datums(int64_t size, const ObCompressorType type, bool use_zero_len_as_null, bool has_null, bool is_fix_len,
 | 
						|
      bool use_nullbitmap, bool has_empty_string, bool all_null, bool all_empty, bool half_null_half_empty, bool use_null_replaced_ref)
 | 
						|
  {
 | 
						|
    LOG_INFO("test_and_check_string_encoding", K(size), K(type), K(use_zero_len_as_null), K(has_null), K(is_fix_len),
 | 
						|
        K(use_nullbitmap), K(all_null), K(half_null_half_empty), K(use_null_replaced_ref));
 | 
						|
    ObArenaAllocator local_arena;
 | 
						|
    ObStringStreamEncoderCtx ctx;
 | 
						|
    ObCSEncodingOpt encoding_opt;
 | 
						|
    bool is_use_zero_len_as_null = use_zero_len_as_null;
 | 
						|
    int64_t fixed_len = -1;
 | 
						|
    if (is_fix_len) {
 | 
						|
      fixed_len = 5;
 | 
						|
    }
 | 
						|
    if (all_empty) {
 | 
						|
      fixed_len = 0;
 | 
						|
    }
 | 
						|
    if (!(use_nullbitmap || all_empty || has_empty_string || half_null_half_empty || use_null_replaced_ref)) {
 | 
						|
      is_use_zero_len_as_null = true;
 | 
						|
    }
 | 
						|
    common::ObCompressor *compressor = nullptr;
 | 
						|
    ASSERT_EQ(OB_SUCCESS, ObCompressorPool::get_instance().get_compressor(type, compressor));
 | 
						|
 | 
						|
    ObStringStreamEncoder encoder;
 | 
						|
    uint32_t *data = nullptr;
 | 
						|
    ObColDatums *datums = new ObColDatums(local_arena);
 | 
						|
    ASSERT_EQ(OB_SUCCESS, datums->resize(max_count));
 | 
						|
    datums->reuse();
 | 
						|
    if (half_null_half_empty) {
 | 
						|
      all_empty = true;
 | 
						|
    }
 | 
						|
    int64_t total_len = 0;
 | 
						|
    generate_datums(datums, size, has_null, is_fix_len, has_empty_string, all_empty, total_len);
 | 
						|
    if (all_null) {
 | 
						|
      for (int64_t i = 0; (i < datums->count()); i++) {
 | 
						|
        total_len -= datums->at(i).len_;
 | 
						|
        datums->at(i).set_null();
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if (half_null_half_empty) {
 | 
						|
      for (int64_t i = 0; (i < datums->count()); i++) {
 | 
						|
        if (i%2 == 0) {
 | 
						|
          total_len -= datums->at(i).len_;
 | 
						|
          datums->at(i).set_null();
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if (fixed_len >= 0) {
 | 
						|
      total_len = datums->count() * fixed_len;
 | 
						|
    }
 | 
						|
    ctx.build_string_stream_meta(fixed_len, is_use_zero_len_as_null, total_len);
 | 
						|
    ctx.build_string_stream_encoder_info(type, false, &encoding_opt, nullptr, -1, &allocator_);
 | 
						|
    int64_t bitmap_size = pad8(size);
 | 
						|
    char *bitmap = new char[bitmap_size];
 | 
						|
    memset(bitmap, 0, bitmap_size);
 | 
						|
    generate_bitmap(bitmap, datums);
 | 
						|
 | 
						|
    ObMicroBufferWriter writer;
 | 
						|
    ObMicroBufferWriter all_string_writer;
 | 
						|
    ASSERT_EQ(OB_SUCCESS, writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
 | 
						|
    ASSERT_EQ(OB_SUCCESS, all_string_writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
 | 
						|
    common::ObArray<uint32_t> offsets;
 | 
						|
 | 
						|
    ObColumnDatumIter iter(*datums);
 | 
						|
    ASSERT_EQ(OB_SUCCESS, encoder.encode(ctx, iter, writer, &all_string_writer, offsets));
 | 
						|
 | 
						|
    ObStreamData str_data;
 | 
						|
    ObStreamData raw_offset_data;
 | 
						|
    // 1. decode integer_stream header
 | 
						|
    ObIntegerStreamDecoderCtx offset_decoder_ctx;
 | 
						|
    uint16_t meta_len = 0;
 | 
						|
    if (!ctx.meta_.is_fixed_len_string()) {
 | 
						|
      const char *int_stream_start = writer.data() + offsets[0];
 | 
						|
      int64_t int_stream_len = offsets[1] - offsets[0];
 | 
						|
      ObStreamData data(int_stream_start, int_stream_len);
 | 
						|
      buid_raw_integer_stream_data(data, size, type, offset_decoder_ctx, raw_offset_data);
 | 
						|
    }
 | 
						|
 | 
						|
    // 2. build_decoding_ctx for string stream
 | 
						|
    ObStringStreamDecoderCtx str_decode_ctx;
 | 
						|
    const char *str_stream_start = writer.data();
 | 
						|
    int64_t str_stream_meta_len = offsets[0];
 | 
						|
    ObStreamData data2(str_stream_start, str_stream_meta_len);
 | 
						|
    uint16_t str_meta_len = 0;
 | 
						|
    ASSERT_EQ(OB_SUCCESS, ObStringStreamDecoder::build_decoder_ctx(data2, str_decode_ctx, str_meta_len));
 | 
						|
    str_data.set(all_string_writer.data(), all_string_writer.length());
 | 
						|
 | 
						|
    // 3. decode str
 | 
						|
    ObColDatums *datums2 = new ObColDatums(local_arena);
 | 
						|
    ASSERT_EQ(OB_SUCCESS, datums2->resize(max_count));
 | 
						|
    datums2->reuse();
 | 
						|
    for (int64_t i = 0; i < size; i++) {
 | 
						|
      ObDatum datum;
 | 
						|
      ASSERT_EQ(OB_SUCCESS, datums2->push_back(datum));
 | 
						|
    }
 | 
						|
 | 
						|
    // test batch decode
 | 
						|
    int64_t *row_ids = new int64_t[size];
 | 
						|
    for (int64_t i = 0; i < size; i++) {
 | 
						|
      row_ids[i] = i;
 | 
						|
    }
 | 
						|
    ObDatum *datums3 = new ObDatum[size];
 | 
						|
    char *datums2_buf = new char[size * sizeof(uint64_t)];
 | 
						|
    memset(datums2_buf, 0, size * sizeof(uint64_t));
 | 
						|
    for (int64_t i = 0; i < size; i++) {
 | 
						|
      datums3[i].ptr_ = (datums2_buf + i * sizeof(uint64_t));
 | 
						|
    }
 | 
						|
 | 
						|
    uint64_t *ref_arr = new uint64_t[size];
 | 
						|
    int64_t null_replaced_ref = size;
 | 
						|
    uint32_t ref_width_V = ObRefStoreWidthV::NOT_REF;
 | 
						|
 | 
						|
    ObBaseColumnDecoderCtx base_ctx;
 | 
						|
    base_ctx.allocator_ = &allocator_;
 | 
						|
    base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NO_NULL;
 | 
						|
    base_ctx.null_desc_ = nullptr;
 | 
						|
    if (use_nullbitmap) {
 | 
						|
      base_ctx.null_bitmap_ = bitmap;
 | 
						|
      base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NULL_BITMAP;
 | 
						|
    } else if (use_null_replaced_ref) {
 | 
						|
      ref_width_V = ObRefStoreWidthV::REF_IN_DATUMS;
 | 
						|
      base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED_REF;
 | 
						|
      base_ctx.null_replaced_ref_ = null_replaced_ref;
 | 
						|
      for (int64_t i = 0; i < size; i++) {
 | 
						|
        if (use_null_replaced_ref && datums->at(i).is_null()) {
 | 
						|
          datums3[i].pack_ = size;
 | 
						|
          ref_arr[i] = size;
 | 
						|
        } else {
 | 
						|
          datums3[i].pack_ = i;
 | 
						|
          ref_arr[i] = i;
 | 
						|
        }
 | 
						|
      }
 | 
						|
    } else if (fixed_len < 0 && is_use_zero_len_as_null) {
 | 
						|
      base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED;
 | 
						|
    }
 | 
						|
 | 
						|
    const uint8_t offset_width = str_decode_ctx.meta_.is_fixed_len_string() ?
 | 
						|
            FIX_STRING_OFFSET_WIDTH_V : offset_decoder_ctx.meta_.width_;
 | 
						|
    ConvertStringToDatumFunc convert_func = convert_string_to_datum_funcs
 | 
						|
        [offset_width]
 | 
						|
        [ref_width_V]
 | 
						|
        [base_ctx.null_flag_]
 | 
						|
        [false/*need_copy_V*/];
 | 
						|
    convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, nullptr, row_ids, size, datums3);
 | 
						|
 | 
						|
    for (int64_t i = 0; i < size; i++) {
 | 
						|
      if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
 | 
						|
        LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]));
 | 
						|
        ::abort();
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    // disorder batch decode
 | 
						|
    for (int64_t i = 0; i < size; i++) {
 | 
						|
      datums3[i].reset();
 | 
						|
    }
 | 
						|
    int64_t random_idx = ObTimeUtility::current_time()%size;
 | 
						|
    int64_t row_id = 0;
 | 
						|
    for (int64_t i = 0; i < size; i++) {
 | 
						|
      row_id = (i + random_idx) % size;
 | 
						|
      row_ids[i] = row_id;
 | 
						|
      if (use_null_replaced_ref && datums->at(row_id).is_null()) {
 | 
						|
        datums3[i].pack_ = size;
 | 
						|
      } else {
 | 
						|
        datums3[i].pack_ = row_id;
 | 
						|
      }
 | 
						|
      if (i%9 == 0) {
 | 
						|
        row_ids[i] = random_idx; //duplicate
 | 
						|
        if (use_null_replaced_ref && datums->at(random_idx).is_null()) {
 | 
						|
          datums3[i].pack_ = size;
 | 
						|
        } else {
 | 
						|
          datums3[i].pack_ = random_idx;
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    convert_func = convert_string_to_datum_funcs
 | 
						|
        [offset_width]
 | 
						|
        [ref_width_V]
 | 
						|
        [base_ctx.null_flag_]
 | 
						|
        [false/*need_copy_V*/];
 | 
						|
    convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, nullptr, row_ids, size, datums3);
 | 
						|
 | 
						|
    for (int64_t i = 0; i < size; i++) {
 | 
						|
      if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
 | 
						|
        LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]));
 | 
						|
        ::abort();
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    // test batch decode with ref arr
 | 
						|
    if (use_null_replaced_ref) {
 | 
						|
      convert_func = convert_string_to_datum_funcs
 | 
						|
          [offset_width]
 | 
						|
          [ObRefStoreWidthV::REF_8_BYTE]
 | 
						|
          [base_ctx.null_flag_]
 | 
						|
          [false/*need_copy_V*/];
 | 
						|
      convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, (char*)ref_arr, row_ids, size, datums3);
 | 
						|
 | 
						|
      for (int64_t i = 0; i < size; i++) {
 | 
						|
        if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
 | 
						|
          LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]), K(ref_arr[row_ids[i]]));
 | 
						|
          ::abort();
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    delete []ref_arr;
 | 
						|
    delete []row_ids;
 | 
						|
    row_ids = nullptr;
 | 
						|
    delete []datums3;
 | 
						|
    datums3 = nullptr;
 | 
						|
    delete datums2;
 | 
						|
    datums2 = nullptr;
 | 
						|
    delete datums;
 | 
						|
    datums = nullptr;
 | 
						|
  }
 | 
						|
 | 
						|
protected:
 | 
						|
  ObArenaAllocator allocator_;
 | 
						|
  share::ObTenantBase tenant_ctx_;
 | 
						|
};
 | 
						|
 | 
						|
TEST_F(TestStringStream, test_datums_encoding)
 | 
						|
{
 | 
						|
  for (int64_t j = 0; j < 13; j++) {
 | 
						|
    common::ObCompressorType compress_type = ObCompressorType::NONE_COMPRESSOR;
 | 
						|
    bool use_zero_len_as_null = 0;
 | 
						|
    bool has_null = false;
 | 
						|
    bool is_fix_len = false;
 | 
						|
    bool use_nullbitmap = false;
 | 
						|
    bool has_empty_string = false;
 | 
						|
    bool all_null = false;
 | 
						|
    bool all_empty = false;
 | 
						|
    bool half_null_half_empty = false;
 | 
						|
    bool use_null_replaced_ref = false;
 | 
						|
    if (0 == j) {
 | 
						|
      compress_type = NONE_COMPRESSOR;
 | 
						|
    } else if (1 == j) {
 | 
						|
      compress_type = LZ4_COMPRESSOR;
 | 
						|
    } else if (2 == j) {
 | 
						|
      compress_type = SNAPPY_COMPRESSOR;
 | 
						|
      has_null = true;
 | 
						|
      use_zero_len_as_null = true;
 | 
						|
    } else if (3 == j) {
 | 
						|
      compress_type = ZSTD_1_3_8_COMPRESSOR;
 | 
						|
      is_fix_len = true;
 | 
						|
    } else if (4 == j) {
 | 
						|
      compress_type = SNAPPY_COMPRESSOR;
 | 
						|
      is_fix_len = true;
 | 
						|
      use_nullbitmap = true;
 | 
						|
      has_null = true;
 | 
						|
    } else if (5 == j) {
 | 
						|
      compress_type = ZSTD_1_3_8_COMPRESSOR;
 | 
						|
      has_null = true;
 | 
						|
      use_nullbitmap = true;
 | 
						|
    } else if (6 == j) {
 | 
						|
      has_null = true;
 | 
						|
      use_nullbitmap = true;
 | 
						|
      has_empty_string = true;
 | 
						|
    } else if (7 == j) {
 | 
						|
      // all null, use null replace value
 | 
						|
      all_null = true;
 | 
						|
    } else if (8 == j) {
 | 
						|
      // all empty string
 | 
						|
      all_empty = true;
 | 
						|
    } else if (9 == j) {
 | 
						|
      // half null and half empty
 | 
						|
      half_null_half_empty = true;
 | 
						|
      use_nullbitmap = true;
 | 
						|
    } else if (10 == j) {
 | 
						|
      // null replace row id
 | 
						|
      compress_type = SNAPPY_COMPRESSOR;
 | 
						|
      use_null_replaced_ref = true;
 | 
						|
    } else if (11 == j) {
 | 
						|
      // null replace row id
 | 
						|
      compress_type = SNAPPY_COMPRESSOR;
 | 
						|
      use_null_replaced_ref = true;
 | 
						|
      has_null = true;
 | 
						|
    } else if (12 == j) {
 | 
						|
      // null replace row id
 | 
						|
      compress_type = SNAPPY_COMPRESSOR;
 | 
						|
      use_null_replaced_ref = true;
 | 
						|
      all_null = true;
 | 
						|
    }
 | 
						|
 | 
						|
    for (int64_t i = 1; i <= max_count; i=(i * (i + j + 1))) {
 | 
						|
      LOG_INFO("round", K(i), K(j));
 | 
						|
      test_and_check_str_datums(i, compress_type, use_zero_len_as_null, has_null, is_fix_len, use_nullbitmap,
 | 
						|
          has_empty_string, all_null, all_empty, half_null_half_empty, use_null_replaced_ref);
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
} // end namespace blocksstable
 | 
						|
} // end namespace oceanbase
 | 
						|
 | 
						|
int main(int argc, char **argv)
 | 
						|
{
 | 
						|
  system("rm -f test_string_stream.log*");
 | 
						|
  OB_LOGGER.set_file_name("test_string_stream.log", true, false);
 | 
						|
  oceanbase::common::ObLogger::get_logger().set_log_level("INFO");
 | 
						|
  testing::InitGoogleTest(&argc, argv);
 | 
						|
  return RUN_ALL_TESTS();
 | 
						|
}
 |