424 lines
14 KiB
C++
424 lines
14 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#define USING_LOG_PREFIX STORAGE
|
|
|
|
#include <gtest/gtest.h>
|
|
#include <iostream>
|
|
#include <random>
|
|
#define protected public
|
|
#define private public
|
|
#include "storage/blocksstable/cs_encoding/ob_string_stream_encoder.h"
|
|
#include "storage/blocksstable/cs_encoding/ob_string_stream_decoder.h"
|
|
#include "storage/blocksstable/cs_encoding/ob_column_encoding_struct.h"
|
|
#include "storage/blocksstable/cs_encoding/ob_cs_decoding_util.h"
|
|
#include "lib/codec/ob_fast_delta.h"
|
|
#include "lib/compress/ob_compress_util.h"
|
|
|
|
namespace oceanbase
|
|
{
|
|
namespace blocksstable
|
|
{
|
|
|
|
class TestStringStream : public ::testing::Test
|
|
{
|
|
public:
|
|
virtual void SetUp() {}
|
|
virtual void TearDown() {}
|
|
|
|
TestStringStream() : tenant_ctx_(500)
|
|
{
|
|
srand(time(NULL));
|
|
share::ObTenantEnv::set_tenant(&tenant_ctx_);
|
|
}
|
|
virtual ~TestStringStream() {}
|
|
|
|
int64_t max_count = 64<<9;
|
|
|
|
void randstr(char *str, const int64_t len)
|
|
{
|
|
int i;
|
|
for (i = 0; i < len; ++i)
|
|
{
|
|
switch ((rand() % 3)) {
|
|
case 1:
|
|
str[i] = 'A' + rand() % 26;
|
|
break;
|
|
case 2:
|
|
str[i] = 'a' + rand() % 26;
|
|
break;
|
|
default:
|
|
str[i] = '0' + rand() % 10;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void generate_datums(ObColDatums *datums, const int64_t size, bool has_null, bool is_fix_len,
|
|
bool has_empty_string, bool all_empty, int64_t &total_len)
|
|
{
|
|
total_len = 0;
|
|
for (int64_t i = 0; i < size; i++) {
|
|
ObDatum datum;
|
|
if (has_null && (i % 5 == 0)) {
|
|
datum.set_null();
|
|
} else {
|
|
int64_t len = i%333 + 1;
|
|
if (is_fix_len) {
|
|
len = 5;
|
|
}
|
|
if ((has_empty_string && (i % 7 == 0))
|
|
|| all_empty) {
|
|
len = 0;
|
|
}
|
|
char *tmp = reinterpret_cast<char *>(allocator_.alloc(len));
|
|
datum.ptr_ = tmp;
|
|
datum.len_ = len;
|
|
randstr(tmp, len);
|
|
}
|
|
|
|
ASSERT_EQ(OB_SUCCESS, datums->push_back(datum));
|
|
total_len += datum.len_;
|
|
}
|
|
}
|
|
|
|
void generate_bitmap(char *bitmap, ObColDatums *datums)
|
|
{
|
|
for (int64_t i = 0; i < datums->count(); i++) {
|
|
if (datums->at(i).is_null()) {
|
|
bitmap[i/8] |= (1 << (7 - i%8));
|
|
}
|
|
}
|
|
}
|
|
|
|
void buid_raw_integer_stream_data(const ObStreamData &stream_data,
|
|
const int64_t count,
|
|
const ObCompressorType type,
|
|
ObIntegerStreamDecoderCtx &decode_ctx,
|
|
ObStreamData &raw_stream_data)
|
|
{
|
|
uint16_t stream_meta_len = 0;
|
|
ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::build_decoder_ctx(
|
|
stream_data, count,type, decode_ctx, stream_meta_len));
|
|
ObStreamData stream_data2(stream_data.buf_ + stream_meta_len, stream_data.len_ - stream_meta_len);
|
|
const uint32_t width_size = decode_ctx.meta_.get_uint_width_size();
|
|
uint32_t array_buf_size = width_size * decode_ctx.count_;
|
|
char *array_buf = (char*)allocator_.alloc(array_buf_size);
|
|
ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::transform_to_raw_array(stream_data2, decode_ctx, array_buf, allocator_));
|
|
raw_stream_data.set(array_buf, array_buf_size);
|
|
}
|
|
|
|
void test_and_check_str_datums(int64_t size, const ObCompressorType type, bool use_zero_len_as_null, bool has_null, bool is_fix_len,
|
|
bool use_nullbitmap, bool has_empty_string, bool all_null, bool all_empty, bool half_null_half_empty, bool use_null_replaced_ref)
|
|
{
|
|
LOG_INFO("test_and_check_string_encoding", K(size), K(type), K(use_zero_len_as_null), K(has_null), K(is_fix_len),
|
|
K(use_nullbitmap), K(all_null), K(half_null_half_empty), K(use_null_replaced_ref));
|
|
ObArenaAllocator local_arena;
|
|
ObStringStreamEncoderCtx ctx;
|
|
ObCSEncodingOpt encoding_opt;
|
|
bool is_use_zero_len_as_null = use_zero_len_as_null;
|
|
int64_t fixed_len = -1;
|
|
if (is_fix_len) {
|
|
fixed_len = 5;
|
|
}
|
|
if (all_empty) {
|
|
fixed_len = 0;
|
|
}
|
|
if (!(use_nullbitmap || all_empty || has_empty_string || half_null_half_empty || use_null_replaced_ref)) {
|
|
is_use_zero_len_as_null = true;
|
|
}
|
|
common::ObCompressor *compressor = nullptr;
|
|
ASSERT_EQ(OB_SUCCESS, ObCompressorPool::get_instance().get_compressor(type, compressor));
|
|
|
|
ObStringStreamEncoder encoder;
|
|
uint32_t *data = nullptr;
|
|
ObColDatums *datums = new ObColDatums(local_arena);
|
|
ASSERT_EQ(OB_SUCCESS, datums->resize(max_count));
|
|
datums->reuse();
|
|
if (half_null_half_empty) {
|
|
all_empty = true;
|
|
}
|
|
int64_t total_len = 0;
|
|
generate_datums(datums, size, has_null, is_fix_len, has_empty_string, all_empty, total_len);
|
|
if (all_null) {
|
|
for (int64_t i = 0; (i < datums->count()); i++) {
|
|
total_len -= datums->at(i).len_;
|
|
datums->at(i).set_null();
|
|
}
|
|
}
|
|
if (half_null_half_empty) {
|
|
for (int64_t i = 0; (i < datums->count()); i++) {
|
|
if (i%2 == 0) {
|
|
total_len -= datums->at(i).len_;
|
|
datums->at(i).set_null();
|
|
}
|
|
}
|
|
}
|
|
if (fixed_len >= 0) {
|
|
total_len = datums->count() * fixed_len;
|
|
}
|
|
ctx.build_string_stream_meta(fixed_len, is_use_zero_len_as_null, total_len);
|
|
ctx.build_string_stream_encoder_info(type, false, &encoding_opt, nullptr, -1, &allocator_);
|
|
int64_t bitmap_size = pad8(size);
|
|
char *bitmap = new char[bitmap_size];
|
|
memset(bitmap, 0, bitmap_size);
|
|
generate_bitmap(bitmap, datums);
|
|
|
|
ObMicroBufferWriter writer;
|
|
ObMicroBufferWriter all_string_writer;
|
|
ASSERT_EQ(OB_SUCCESS, writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
|
|
ASSERT_EQ(OB_SUCCESS, all_string_writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
|
|
common::ObArray<uint32_t> offsets;
|
|
|
|
ObColumnDatumIter iter(*datums);
|
|
ASSERT_EQ(OB_SUCCESS, encoder.encode(ctx, iter, writer, &all_string_writer, offsets));
|
|
|
|
ObStreamData str_data;
|
|
ObStreamData raw_offset_data;
|
|
// 1. decode integer_stream header
|
|
ObIntegerStreamDecoderCtx offset_decoder_ctx;
|
|
uint16_t meta_len = 0;
|
|
if (!ctx.meta_.is_fixed_len_string()) {
|
|
const char *int_stream_start = writer.data() + offsets[0];
|
|
int64_t int_stream_len = offsets[1] - offsets[0];
|
|
ObStreamData data(int_stream_start, int_stream_len);
|
|
buid_raw_integer_stream_data(data, size, type, offset_decoder_ctx, raw_offset_data);
|
|
}
|
|
|
|
// 2. build_decoding_ctx for string stream
|
|
ObStringStreamDecoderCtx str_decode_ctx;
|
|
const char *str_stream_start = writer.data();
|
|
int64_t str_stream_meta_len = offsets[0];
|
|
ObStreamData data2(str_stream_start, str_stream_meta_len);
|
|
uint16_t str_meta_len = 0;
|
|
ASSERT_EQ(OB_SUCCESS, ObStringStreamDecoder::build_decoder_ctx(data2, str_decode_ctx, str_meta_len));
|
|
str_data.set(all_string_writer.data(), all_string_writer.length());
|
|
|
|
// 3. decode str
|
|
ObColDatums *datums2 = new ObColDatums(local_arena);
|
|
ASSERT_EQ(OB_SUCCESS, datums2->resize(max_count));
|
|
datums2->reuse();
|
|
for (int64_t i = 0; i < size; i++) {
|
|
ObDatum datum;
|
|
ASSERT_EQ(OB_SUCCESS, datums2->push_back(datum));
|
|
}
|
|
|
|
// test batch decode
|
|
int64_t *row_ids = new int64_t[size];
|
|
for (int64_t i = 0; i < size; i++) {
|
|
row_ids[i] = i;
|
|
}
|
|
ObDatum *datums3 = new ObDatum[size];
|
|
char *datums2_buf = new char[size * sizeof(uint64_t)];
|
|
memset(datums2_buf, 0, size * sizeof(uint64_t));
|
|
for (int64_t i = 0; i < size; i++) {
|
|
datums3[i].ptr_ = (datums2_buf + i * sizeof(uint64_t));
|
|
}
|
|
|
|
uint64_t *ref_arr = new uint64_t[size];
|
|
int64_t null_replaced_ref = size;
|
|
uint32_t ref_width_V = ObRefStoreWidthV::NOT_REF;
|
|
|
|
ObBaseColumnDecoderCtx base_ctx;
|
|
base_ctx.allocator_ = &allocator_;
|
|
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NO_NULL;
|
|
base_ctx.null_desc_ = nullptr;
|
|
if (use_nullbitmap) {
|
|
base_ctx.null_bitmap_ = bitmap;
|
|
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NULL_BITMAP;
|
|
} else if (use_null_replaced_ref) {
|
|
ref_width_V = ObRefStoreWidthV::REF_IN_DATUMS;
|
|
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED_REF;
|
|
base_ctx.null_replaced_ref_ = null_replaced_ref;
|
|
for (int64_t i = 0; i < size; i++) {
|
|
if (use_null_replaced_ref && datums->at(i).is_null()) {
|
|
datums3[i].pack_ = size;
|
|
ref_arr[i] = size;
|
|
} else {
|
|
datums3[i].pack_ = i;
|
|
ref_arr[i] = i;
|
|
}
|
|
}
|
|
} else if (fixed_len < 0 && is_use_zero_len_as_null) {
|
|
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED;
|
|
}
|
|
|
|
const uint8_t offset_width = str_decode_ctx.meta_.is_fixed_len_string() ?
|
|
FIX_STRING_OFFSET_WIDTH_V : offset_decoder_ctx.meta_.width_;
|
|
ConvertStringToDatumFunc convert_func = convert_string_to_datum_funcs
|
|
[offset_width]
|
|
[ref_width_V]
|
|
[base_ctx.null_flag_]
|
|
[false/*need_copy_V*/];
|
|
convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, nullptr, row_ids, size, datums3);
|
|
|
|
for (int64_t i = 0; i < size; i++) {
|
|
if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
|
|
LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]));
|
|
::abort();
|
|
}
|
|
}
|
|
|
|
// disorder batch decode
|
|
for (int64_t i = 0; i < size; i++) {
|
|
datums3[i].reset();
|
|
}
|
|
int64_t random_idx = ObTimeUtility::current_time()%size;
|
|
int64_t row_id = 0;
|
|
for (int64_t i = 0; i < size; i++) {
|
|
row_id = (i + random_idx) % size;
|
|
row_ids[i] = row_id;
|
|
if (use_null_replaced_ref && datums->at(row_id).is_null()) {
|
|
datums3[i].pack_ = size;
|
|
} else {
|
|
datums3[i].pack_ = row_id;
|
|
}
|
|
if (i%9 == 0) {
|
|
row_ids[i] = random_idx; //duplicate
|
|
if (use_null_replaced_ref && datums->at(random_idx).is_null()) {
|
|
datums3[i].pack_ = size;
|
|
} else {
|
|
datums3[i].pack_ = random_idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
convert_func = convert_string_to_datum_funcs
|
|
[offset_width]
|
|
[ref_width_V]
|
|
[base_ctx.null_flag_]
|
|
[false/*need_copy_V*/];
|
|
convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, nullptr, row_ids, size, datums3);
|
|
|
|
for (int64_t i = 0; i < size; i++) {
|
|
if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
|
|
LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]));
|
|
::abort();
|
|
}
|
|
}
|
|
|
|
// test batch decode with ref arr
|
|
if (use_null_replaced_ref) {
|
|
convert_func = convert_string_to_datum_funcs
|
|
[offset_width]
|
|
[ObRefStoreWidthV::REF_8_BYTE]
|
|
[base_ctx.null_flag_]
|
|
[false/*need_copy_V*/];
|
|
convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, (char*)ref_arr, row_ids, size, datums3);
|
|
|
|
for (int64_t i = 0; i < size; i++) {
|
|
if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
|
|
LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]), K(ref_arr[row_ids[i]]));
|
|
::abort();
|
|
}
|
|
}
|
|
}
|
|
|
|
delete []ref_arr;
|
|
delete []row_ids;
|
|
row_ids = nullptr;
|
|
delete []datums3;
|
|
datums3 = nullptr;
|
|
delete datums2;
|
|
datums2 = nullptr;
|
|
delete datums;
|
|
datums = nullptr;
|
|
}
|
|
|
|
protected:
|
|
ObArenaAllocator allocator_;
|
|
share::ObTenantBase tenant_ctx_;
|
|
};
|
|
|
|
TEST_F(TestStringStream, test_datums_encoding)
|
|
{
|
|
for (int64_t j = 0; j < 13; j++) {
|
|
common::ObCompressorType compress_type = ObCompressorType::NONE_COMPRESSOR;
|
|
bool use_zero_len_as_null = 0;
|
|
bool has_null = false;
|
|
bool is_fix_len = false;
|
|
bool use_nullbitmap = false;
|
|
bool has_empty_string = false;
|
|
bool all_null = false;
|
|
bool all_empty = false;
|
|
bool half_null_half_empty = false;
|
|
bool use_null_replaced_ref = false;
|
|
if (0 == j) {
|
|
compress_type = NONE_COMPRESSOR;
|
|
} else if (1 == j) {
|
|
compress_type = LZ4_COMPRESSOR;
|
|
} else if (2 == j) {
|
|
compress_type = SNAPPY_COMPRESSOR;
|
|
has_null = true;
|
|
use_zero_len_as_null = true;
|
|
} else if (3 == j) {
|
|
compress_type = ZSTD_1_3_8_COMPRESSOR;
|
|
is_fix_len = true;
|
|
} else if (4 == j) {
|
|
compress_type = SNAPPY_COMPRESSOR;
|
|
is_fix_len = true;
|
|
use_nullbitmap = true;
|
|
has_null = true;
|
|
} else if (5 == j) {
|
|
compress_type = ZSTD_1_3_8_COMPRESSOR;
|
|
has_null = true;
|
|
use_nullbitmap = true;
|
|
} else if (6 == j) {
|
|
has_null = true;
|
|
use_nullbitmap = true;
|
|
has_empty_string = true;
|
|
} else if (7 == j) {
|
|
// all null, use null replace value
|
|
all_null = true;
|
|
} else if (8 == j) {
|
|
// all empty string
|
|
all_empty = true;
|
|
} else if (9 == j) {
|
|
// half null and half empty
|
|
half_null_half_empty = true;
|
|
use_nullbitmap = true;
|
|
} else if (10 == j) {
|
|
// null replace row id
|
|
compress_type = SNAPPY_COMPRESSOR;
|
|
use_null_replaced_ref = true;
|
|
} else if (11 == j) {
|
|
// null replace row id
|
|
compress_type = SNAPPY_COMPRESSOR;
|
|
use_null_replaced_ref = true;
|
|
has_null = true;
|
|
} else if (12 == j) {
|
|
// null replace row id
|
|
compress_type = SNAPPY_COMPRESSOR;
|
|
use_null_replaced_ref = true;
|
|
all_null = true;
|
|
}
|
|
|
|
for (int64_t i = 1; i <= max_count; i=(i * (i + j + 1))) {
|
|
LOG_INFO("round", K(i), K(j));
|
|
test_and_check_str_datums(i, compress_type, use_zero_len_as_null, has_null, is_fix_len, use_nullbitmap,
|
|
has_empty_string, all_null, all_empty, half_null_half_empty, use_null_replaced_ref);
|
|
}
|
|
}
|
|
}
|
|
|
|
} // end namespace blocksstable
|
|
} // end namespace oceanbase
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
system("rm -f test_string_stream.log*");
|
|
OB_LOGGER.set_file_name("test_string_stream.log", true, false);
|
|
oceanbase::common::ObLogger::get_logger().set_log_level("INFO");
|
|
testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|