Merge branch 'column_store'
Co-authored-by: wangt1xiuyi <13547954130@163.com> Co-authored-by: yangqise7en <877793735@qq.com> Co-authored-by: Zach41 <zach_41@163.com>
This commit is contained in:
10
unittest/storage/blocksstable/cs_encoding/CMakeLists.txt
Normal file
10
unittest/storage/blocksstable/cs_encoding/CMakeLists.txt
Normal file
@ -0,0 +1,10 @@
|
||||
storage_unittest(test_integer_stream)
|
||||
storage_unittest(test_string_stream)
|
||||
storage_unittest(test_cs_encoder)
|
||||
storage_unittest(test_cs_decoder)
|
||||
storage_unittest(test_integer_pd_filter)
|
||||
storage_unittest(test_int_dict_pd_filter)
|
||||
storage_unittest(test_string_pd_filter)
|
||||
storage_unittest(test_str_dict_pd_filter)
|
||||
storage_unittest(test_decimal_int_pd_filter)
|
||||
storage_unittest(test_perf_cmp_result)
|
||||
@ -0,0 +1,406 @@
|
||||
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#ifndef OCEANBASE_CS_ENCODING_OB_CS_ENCODING_TEST_BASE_H_
|
||||
#define OCEANBASE_CS_ENCODING_OB_CS_ENCODING_TEST_BASE_H_
|
||||
#define USING_LOG_PREFIX STORAGE
|
||||
|
||||
#include "../ob_row_generate.h"
|
||||
#include "common/rowkey/ob_rowkey.h"
|
||||
#include "lib/string/ob_sql_string.h"
|
||||
#include "share/ob_cluster_version.h"
|
||||
#include "storage/blocksstable/ob_block_sstable_struct.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_column_encoding_struct.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_micro_block_cs_encoder.h"
|
||||
#include "storage/blocksstable/ob_decode_resource_pool.h"
|
||||
#include "storage/ob_i_store.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include "sql/engine/basic/ob_pushdown_filter.h"
|
||||
#include "sql/engine/ob_exec_context.h"
|
||||
#include "unittest/storage/mock_ob_table_read_info.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
using namespace sql;
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
using namespace common;
|
||||
using namespace storage;
|
||||
using namespace share::schema;
|
||||
class ObCSEncodingTestBase
|
||||
{
|
||||
public:
|
||||
ObCSEncodingTestBase(): tenant_ctx_(500)
|
||||
{
|
||||
decode_res_pool_ = new(allocator_.alloc(sizeof(ObDecodeResourcePool))) ObDecodeResourcePool;
|
||||
tenant_ctx_.set(decode_res_pool_);
|
||||
share::ObTenantEnv::set_tenant(&tenant_ctx_);
|
||||
decode_res_pool_->init();
|
||||
}
|
||||
virtual ~ObCSEncodingTestBase() {}
|
||||
int prepare(const ObObjType *col_types, const int64_t rowkey_cnt, const int64_t column_cnt,
|
||||
const ObCompressorType compressor_type = ObCompressorType::ZSTD_1_3_8_COMPRESSOR,
|
||||
const int64_t *precision_arr = nullptr);
|
||||
void reuse();
|
||||
|
||||
protected:
|
||||
int init_cs_decoder(const ObMicroBlockHeader *header,
|
||||
const ObMicroBlockDesc &desc,
|
||||
ObMicroBlockData &full_transformed_data,
|
||||
ObMicroBlockCSDecoder &decoder);
|
||||
int build_micro_block_desc(ObMicroBlockCSEncoder &encoder, ObMicroBlockDesc &desc, ObMicroBlockHeader* &header);
|
||||
int full_transform_check_row(const ObMicroBlockHeader *header,
|
||||
const ObMicroBlockDesc &desc,
|
||||
const ObDatumRow *row_arr,
|
||||
const int64_t row_cnt,
|
||||
const bool check_by_get = false);
|
||||
int part_transform_check_row(const ObMicroBlockHeader *header,
|
||||
const ObMicroBlockDesc &desc,
|
||||
const ObDatumRow *row_arr,
|
||||
const int64_t row_cnt,
|
||||
const bool check_by_get = false);
|
||||
int check_get_row_count(const ObMicroBlockHeader *header,
|
||||
const ObMicroBlockDesc &desc,
|
||||
const int64_t *expected_row_cnt_arr,
|
||||
const int64_t col_cnt,
|
||||
const bool contains_null);
|
||||
|
||||
protected:
|
||||
ObRowGenerate row_generate_;
|
||||
ObMicroBlockEncodingCtx ctx_;
|
||||
MockObTableReadInfo read_info_;
|
||||
ObArenaAllocator allocator_;
|
||||
common::ObArray<share::schema::ObColDesc> col_descs_;
|
||||
int64_t column_cnt_;
|
||||
share::ObTenantBase tenant_ctx_;
|
||||
ObDecodeResourcePool *decode_res_pool_;
|
||||
};
|
||||
|
||||
int ObCSEncodingTestBase::prepare(const ObObjType *col_types, const int64_t rowkey_cnt,
|
||||
const int64_t column_cnt, const ObCompressorType compressor_type, const int64_t *precision_arr)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
const int64_t tid = 200001;
|
||||
ObTableSchema table;
|
||||
ObColumnSchemaV2 col;
|
||||
table.reset();
|
||||
table.set_tenant_id(1);
|
||||
table.set_tablegroup_id(1);
|
||||
table.set_database_id(1);
|
||||
table.set_table_id(tid);
|
||||
table.set_table_name("test_cs_encoder_schema");
|
||||
table.set_rowkey_column_num(rowkey_cnt);
|
||||
table.set_max_column_id(column_cnt * 2);
|
||||
ctx_.column_encodings_ = static_cast<int64_t *>(allocator_.alloc(sizeof(int64_t) * column_cnt));
|
||||
ObSqlString str;
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < column_cnt; ++i) {
|
||||
col.reset();
|
||||
col.set_table_id(tid);
|
||||
col.set_column_id(i + OB_APP_MIN_COLUMN_ID);
|
||||
str.assign_fmt("test%ld", i);
|
||||
col.set_column_name(str.ptr());
|
||||
ObObjType type = col_types[i];
|
||||
col.set_data_type(type);
|
||||
|
||||
|
||||
if (ObDecimalIntType == type) {
|
||||
if (precision_arr == nullptr) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid precision_arr", K(ret));
|
||||
} else {
|
||||
col.set_data_precision(precision_arr[i]);
|
||||
col.set_data_scale(0);
|
||||
}
|
||||
}
|
||||
if (ObVarcharType == type || ObCharType == type || ObHexStringType == type
|
||||
|| ObNVarchar2Type == type || ObNCharType == type || ObTextType == type){
|
||||
col.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
if (ObCharType == type) {
|
||||
const int64_t max_char_length = lib::is_oracle_mode()
|
||||
? OB_MAX_ORACLE_CHAR_LENGTH_BYTE
|
||||
: OB_MAX_CHAR_LENGTH;
|
||||
col.set_data_length(max_char_length);
|
||||
}
|
||||
} else {
|
||||
col.set_collation_type(CS_TYPE_BINARY);
|
||||
}
|
||||
if (i < rowkey_cnt) {
|
||||
col.set_rowkey_position(i + 1);
|
||||
} else {
|
||||
col.set_rowkey_position(0);
|
||||
}
|
||||
if (OB_FAIL(table.add_column(col))) {
|
||||
LOG_WARN("fail to add column", K(ret));
|
||||
}
|
||||
ctx_.column_encodings_[i] = ObCSColumnHeader::Type::MAX_TYPE;
|
||||
}
|
||||
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(row_generate_.init(table, &allocator_))) {
|
||||
LOG_WARN("fail to init row_generate_", K(ret));
|
||||
} else if (OB_FAIL(row_generate_.get_schema().get_column_ids(col_descs_))) {
|
||||
LOG_WARN("fail to get_column_ids", K(ret));
|
||||
}
|
||||
const ObColumnSchemaV2 *col_schema = nullptr;
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < column_cnt; ++i) {
|
||||
if (OB_ISNULL(col_schema = row_generate_.get_schema().get_column_schema(i + OB_APP_MIN_COLUMN_ID))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("The column schema is NULL", K(ret), K(i));
|
||||
} else if (col_descs_.at(i).col_type_.is_decimal_int()) {
|
||||
col_descs_.at(i).col_type_.set_stored_precision(col_schema->get_accuracy().get_precision());
|
||||
col_descs_.at(i).col_type_.set_scale(col_schema->get_accuracy().get_scale());
|
||||
}
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(read_info_.init(allocator_, row_generate_.get_schema().get_column_count(),
|
||||
row_generate_.get_schema().get_rowkey_column_num(), lib::is_oracle_mode(),
|
||||
col_descs_, nullptr))) {
|
||||
LOG_WARN("fail to init read_info", K(ret));
|
||||
} else {
|
||||
ctx_.micro_block_size_ = 1L << 20; // 1MB, maximum micro block size;
|
||||
ctx_.macro_block_size_ = 2L << 20;
|
||||
ctx_.rowkey_column_cnt_ = rowkey_cnt;
|
||||
ctx_.column_cnt_ = column_cnt;
|
||||
ctx_.col_descs_ = &col_descs_;
|
||||
ctx_.major_working_cluster_version_ = cal_version(4, 1, 0, 0);
|
||||
ctx_.row_store_type_ = common::CS_ENCODING_ROW_STORE;
|
||||
ctx_.compressor_type_ = compressor_type;
|
||||
ctx_.need_calc_column_chksum_ = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ObCSEncodingTestBase::reuse()
|
||||
{
|
||||
read_info_.reset();
|
||||
row_generate_.reset();
|
||||
allocator_.reuse();
|
||||
col_descs_.reuse();
|
||||
ctx_.column_encodings_ = nullptr;
|
||||
}
|
||||
|
||||
int ObCSEncodingTestBase::build_micro_block_desc(ObMicroBlockCSEncoder &encoder, ObMicroBlockDesc &desc, ObMicroBlockHeader* &header)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_FAIL(encoder.build_micro_block_desc(desc))) {
|
||||
LOG_WARN("fail to build_micro_block_desc", K(ret));
|
||||
} else {
|
||||
header = const_cast<ObMicroBlockHeader *>(desc.header_);
|
||||
header->data_length_ = desc.buf_size_;
|
||||
header->data_zlength_ = desc.buf_size_;
|
||||
header->data_checksum_ = ob_crc64_sse42(0, desc.buf_, desc.buf_size_);
|
||||
header->original_length_ = desc.original_size_;
|
||||
header->set_header_checksum();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObCSEncodingTestBase::init_cs_decoder(const ObMicroBlockHeader *header,
|
||||
const ObMicroBlockDesc &desc,
|
||||
ObMicroBlockData &full_transformed_data,
|
||||
ObMicroBlockCSDecoder &decoder)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t pos = 0;
|
||||
char *buf = nullptr;
|
||||
int64_t buf_len = 0;
|
||||
ObCSMicroBlockTransformer tansformer;
|
||||
if (OB_FAIL(tansformer.init(header, desc.buf_, desc.buf_size_))) {
|
||||
LOG_WARN("fail to init tansformer", K(ret));
|
||||
} else if (OB_FAIL(tansformer.calc_full_transform_size(buf_len))) {
|
||||
LOG_WARN("fail to calc_full_transform_size", K(ret));
|
||||
} else if (OB_ISNULL(buf = static_cast<char *>(allocator_.alloc(buf_len)))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fail to alloc", K(ret), K(buf_len));
|
||||
} else if (OB_FAIL(tansformer.full_transform(buf, buf_len, pos))) {
|
||||
LOG_WARN("fail to full transfrom", K(ret));
|
||||
} else if (buf_len != pos) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("pos is unexpected", K(ret), K(buf_len), K(pos));
|
||||
} else {
|
||||
full_transformed_data.buf_ = buf;
|
||||
full_transformed_data.size_ = buf_len;
|
||||
if (OB_FAIL(decoder.init(full_transformed_data, read_info_))) {
|
||||
LOG_WARN("fail to init decoder", K(ret));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObCSEncodingTestBase::full_transform_check_row(const ObMicroBlockHeader *header,
|
||||
const ObMicroBlockDesc &desc,
|
||||
const ObDatumRow *row_arr,
|
||||
const int64_t row_cnt,
|
||||
const bool check_by_get)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObMicroBlockData full_transformed_data;
|
||||
ObMicroBlockCSDecoder decoder;
|
||||
if (OB_FAIL(init_cs_decoder(header, desc, full_transformed_data, decoder))) {
|
||||
LOG_WARN("fail to init cs_decoder", KR(ret));
|
||||
} else {
|
||||
ObDatumRow row;
|
||||
if (OB_FAIL(row.init(allocator_, ctx_.column_cnt_))) {
|
||||
LOG_WARN("fail to init row", K(ret), K(ctx_.column_cnt_));
|
||||
}
|
||||
|
||||
for (int32_t i = 0; OB_SUCC(ret) && i < row_cnt; ++i) {
|
||||
if (OB_FAIL(decoder.get_row(i, row))) {
|
||||
LOG_WARN("fail to get row", K(i));
|
||||
}
|
||||
for (int64_t j = 0; OB_SUCC(ret) && j < ctx_.column_cnt_; ++j) {
|
||||
if (!ObDatum::binary_equal(row_arr[i].storage_datums_[j], row.storage_datums_[j])) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_INFO("not equal row: ", K(ret), K(i), K(j), K(row_arr[i].storage_datums_[j]), K(row.storage_datums_[j]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret) && check_by_get) {
|
||||
ObCSEncodeBlockGetReader get_reader;
|
||||
ObDatumRowkey rowkey;
|
||||
for(int32_t i = 0; i < row_cnt; ++i) {
|
||||
rowkey.assign(row_arr[i].storage_datums_, ctx_.rowkey_column_cnt_);
|
||||
if (OB_FAIL(get_reader.get_row(full_transformed_data, rowkey, read_info_, row))) {
|
||||
LOG_WARN("fail to get row", K(ret), K(rowkey));
|
||||
}
|
||||
for (int64_t j = 0; OB_SUCC(ret) && j < ctx_.column_cnt_; ++j) {
|
||||
if (!ObDatum::binary_equal(row_arr[i].storage_datums_[j], row.storage_datums_[j])) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_INFO("not equal row: ", K(ret), K(i), K(j), K(row_arr[i].storage_datums_[j]), K(row.storage_datums_[j]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObCSEncodingTestBase::part_transform_check_row(const ObMicroBlockHeader *header,
|
||||
const ObMicroBlockDesc &desc,
|
||||
const ObDatumRow *row_arr,
|
||||
const int64_t row_cnt,
|
||||
const bool check_by_get)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
// test part transform
|
||||
ObMicroBlockCSDecoder decoder;
|
||||
const char *block_buf = desc.buf_ - header->header_size_;
|
||||
const int64_t block_buf_len = desc.buf_size_ + header->header_size_;
|
||||
ObMicroBlockData part_transformed_data(block_buf, block_buf_len);
|
||||
int32_t project_step = 1;
|
||||
for (int project_step = 1; OB_SUCC(ret) && project_step < ctx_.column_cnt_; project_step++) {
|
||||
ObDatumRow row;
|
||||
MockObTableReadInfo read_info;
|
||||
common::ObArray<int32_t> storage_cols_index;
|
||||
common::ObArray<share::schema::ObColDesc> col_descs;
|
||||
for (int store_id = 0; OB_SUCC(ret) && store_id < ctx_.column_cnt_; store_id += project_step) {
|
||||
if (OB_FAIL(storage_cols_index.push_back(store_id))) {
|
||||
LOG_WARN("fail to push_back", K(ret), K(store_id));
|
||||
} else if (OB_FAIL(col_descs.push_back(col_descs_.at(store_id)))) {
|
||||
LOG_WARN("fail to push_back", K(ret), K(store_id));
|
||||
}
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(read_info.init(allocator_, row_generate_.get_schema().get_column_count(),
|
||||
row_generate_.get_schema().get_rowkey_column_num(), lib::is_oracle_mode(),
|
||||
col_descs, &storage_cols_index))) {
|
||||
LOG_WARN("fail to init read_info", K(ret), K(col_descs), K(storage_cols_index));
|
||||
} else if (OB_FAIL(decoder.init(part_transformed_data, read_info))) {
|
||||
LOG_WARN("fail to init decoder", K(ret));
|
||||
} else if (OB_FAIL(row.init(allocator_, col_descs.count()))) {
|
||||
LOG_WARN("fail to init row", K(ret), K(col_descs.count()));
|
||||
}
|
||||
|
||||
LOG_INFO("part read", K(project_step), K(storage_cols_index), K(col_descs), K(read_info));
|
||||
|
||||
for (int32_t i = 0; OB_SUCC(ret) && i < row_cnt; ++i) {
|
||||
if (OB_FAIL(decoder.get_row(i, row))) {
|
||||
LOG_WARN("fail to get row", K(i));
|
||||
}
|
||||
for (int64_t j = 0; OB_SUCC(ret) && j < storage_cols_index.count(); ++j) {
|
||||
if (!ObDatum::binary_equal(row_arr[i].storage_datums_[storage_cols_index.at(j)], row.storage_datums_[j])) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_INFO("not equal row: ", K(ret), K(i), K(j), K(row_arr[i].storage_datums_[j]), K(row.storage_datums_[j]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret) && check_by_get) {
|
||||
ObCSEncodeBlockGetReader get_reader;
|
||||
ObDatumRowkey rowkey;
|
||||
for(int32_t i = 0; i < row_cnt; ++i) {
|
||||
rowkey.assign(row_arr[i].storage_datums_, ctx_.rowkey_column_cnt_);
|
||||
if (OB_FAIL(get_reader.get_row(part_transformed_data, rowkey, read_info, row))) {
|
||||
LOG_WARN("fail to get row", K(ret), K(rowkey));
|
||||
}
|
||||
for (int64_t j = 0; OB_SUCC(ret) && j < storage_cols_index.count(); ++j) {
|
||||
if (!ObDatum::binary_equal(row_arr[i].storage_datums_[storage_cols_index.at(j)], row.storage_datums_[j])) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_INFO("not equal row: ", K(ret), K(i), K(j), K(storage_cols_index.at(j)),
|
||||
K(row_arr[i].storage_datums_[storage_cols_index.at(j)]), K(row.storage_datums_[j]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObCSEncodingTestBase::check_get_row_count(const ObMicroBlockHeader *header,
|
||||
const ObMicroBlockDesc &desc,
|
||||
const int64_t *expected_row_cnt_arr,
|
||||
const int64_t col_cnt,
|
||||
const bool contains_null)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObMicroBlockData full_transformed_data;
|
||||
ObMicroBlockCSDecoder decoder;
|
||||
int64_t *row_ids = nullptr;
|
||||
if (OB_FAIL(init_cs_decoder(header, desc, full_transformed_data, decoder))) {
|
||||
LOG_WARN("fail to init cs_decoder", KR(ret));
|
||||
} else if (OB_ISNULL(row_ids = (int64_t*)allocator_.alloc(header->row_count_ * sizeof(int64_t)))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fail to alloc", K(ret), KPC(header));
|
||||
} else {
|
||||
int64_t real_row_count = 0;
|
||||
for (int64_t i = 0; i < header->row_count_; i++) {
|
||||
row_ids[i] = i;
|
||||
}
|
||||
|
||||
uint64_t seed = ObTimeUtil::current_time();
|
||||
std::shuffle(row_ids, row_ids + header->row_count_, std::default_random_engine(seed)); // 随机打乱数组
|
||||
|
||||
for (int64_t col_idx = 0; OB_SUCC(ret) && col_idx < col_cnt; col_idx++) {
|
||||
if (OB_FAIL(decoder.get_row_count(col_idx, row_ids, header->row_count_, contains_null, real_row_count))) {
|
||||
if (expected_row_cnt_arr[col_idx] != real_row_count) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("row count mismatch", K(ret), K(contains_null),
|
||||
K(col_idx), K(expected_row_cnt_arr[col_idx]), K(real_row_count));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace blocksstable
|
||||
} // namespace oceanbase
|
||||
|
||||
#endif
|
||||
@ -0,0 +1,485 @@
|
||||
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#define USING_LOG_PREFIX STORAGE
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#define protected public
|
||||
#define private public
|
||||
#include "ob_cs_encoding_test_base.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_cs_encoding_util.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_micro_block_cs_encoder.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
using namespace common;
|
||||
using namespace storage;
|
||||
using namespace share::schema;
|
||||
|
||||
using ::testing::Bool;
|
||||
using ::testing::Combine;
|
||||
|
||||
#define ENABLE_CASE_CHECK true
|
||||
|
||||
class ObPdFilterTestBase : public ObCSEncodingTestBase, public ::testing::Test
|
||||
{
|
||||
public:
|
||||
ObPdFilterTestBase()
|
||||
: enable_abnormal_filter_type_(false)
|
||||
{}
|
||||
virtual ~ObPdFilterTestBase() {}
|
||||
virtual void SetUp() {}
|
||||
virtual void TearDown()
|
||||
{
|
||||
reuse();
|
||||
enable_abnormal_filter_type_ = false;
|
||||
}
|
||||
|
||||
void set_obj_collation(ObObj &obj, const ObObjType &column_type);
|
||||
void set_obj_meta_collation(ObObjMeta &obj_meta, const ObObjType &column_type);
|
||||
void setup_obj(ObObj& obj, int64_t column_idx, int64_t seed);
|
||||
void setup_obj(ObObj& obj, int64_t column_idx);
|
||||
|
||||
template<typename T>
|
||||
int build_decimal_filter_ref(const int64_t ref_cnt, const T *ref_arr,
|
||||
const int64_t col_offset, ObArray<ObObj> &ref_objs);
|
||||
int build_integer_filter_ref(const int64_t ref_cnt, const int64_t *ref_arr, const int64_t col_offset,
|
||||
ObArray<ObObj> &ref_objs, const bool use_row_gen = false);
|
||||
int build_string_filter_ref(const char *str_buf, const int64_t str_len, const ObObjType &obj_type,
|
||||
ObArray<ObObj> &ref_objs);
|
||||
|
||||
int build_white_filter(sql::ObPushdownOperator &pd_operator,
|
||||
sql::PushdownFilterInfo &pd_filter_info,
|
||||
sql::ObPushdownWhiteFilterNode &pd_filter_node,
|
||||
ObWhiteFilterExecutor *&white_filter,
|
||||
ObBitmap *&res_bitmap,
|
||||
const ObWhiteFilterOperatorType &op_type,
|
||||
const int64_t row_cnt,
|
||||
const int64_t col_cnt);
|
||||
|
||||
int check_column_store_white_filter(const ObWhiteFilterOperatorType &op_type,
|
||||
const int64_t row_cnt,
|
||||
const int64_t col_cnt,
|
||||
const int64_t col_offset,
|
||||
const ObObjType &col_type,
|
||||
const ObIArray<ObObj> &ref_objs,
|
||||
ObMicroBlockCSDecoder &decoder,
|
||||
const int64_t res_count);
|
||||
|
||||
public:
|
||||
bool enable_abnormal_filter_type_;
|
||||
|
||||
};
|
||||
|
||||
void ObPdFilterTestBase::set_obj_collation(ObObj &obj, const ObObjType &column_type)
|
||||
{
|
||||
set_obj_meta_collation(obj.meta_, column_type);
|
||||
}
|
||||
|
||||
void ObPdFilterTestBase::set_obj_meta_collation(ObObjMeta &obj_meta, const ObObjType &column_type)
|
||||
{
|
||||
if (ObVarcharType == column_type || ObCharType == column_type || ObHexStringType == column_type
|
||||
|| ObNVarchar2Type == column_type || ObNCharType == column_type || ObTextType == column_type) {
|
||||
obj_meta.cs_level_ = CS_LEVEL_IMPLICIT;
|
||||
obj_meta.cs_type_ = CS_TYPE_UTF8MB4_GENERAL_CI;
|
||||
} else {
|
||||
obj_meta.set_type(column_type);
|
||||
}
|
||||
|
||||
switch (column_type) {
|
||||
case ObCharType:
|
||||
obj_meta.set_char();
|
||||
break;
|
||||
case ObVarcharType:
|
||||
obj_meta.set_varchar();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ObPdFilterTestBase::setup_obj(ObObj& obj, int64_t column_idx, int64_t seed)
|
||||
{
|
||||
ObObjMeta column_meta = row_generate_.column_list_.at(column_idx).col_type_;
|
||||
obj.copy_meta_type(column_meta);
|
||||
ObObjType column_type = column_meta.get_type();
|
||||
row_generate_.set_obj(column_type, row_generate_.column_list_.at(column_idx).col_id_, seed, obj, 0);
|
||||
set_obj_collation(obj, column_type);
|
||||
}
|
||||
|
||||
void ObPdFilterTestBase::setup_obj(ObObj& obj, int64_t column_idx)
|
||||
{
|
||||
ObObjMeta column_meta = row_generate_.column_list_.at(column_idx).col_type_;
|
||||
ObObjType column_type = column_meta.get_type();
|
||||
bool is_integer = (ob_obj_type_class(column_type) == ObObjTypeClass::ObIntTC) || (ob_obj_type_class(column_type) == ObObjTypeClass::ObUIntTC);
|
||||
if (enable_abnormal_filter_type_ && is_integer) {
|
||||
if (ob_obj_type_class(column_type) == ObObjTypeClass::ObIntTC) {
|
||||
column_meta.set_int();
|
||||
} else {
|
||||
column_meta.set_uint64();
|
||||
}
|
||||
}
|
||||
column_type = column_meta.get_type();
|
||||
obj.copy_meta_type(column_meta);
|
||||
set_obj_collation(obj, column_type);
|
||||
}
|
||||
|
||||
int ObPdFilterTestBase::build_integer_filter_ref(
|
||||
const int64_t ref_cnt,
|
||||
const int64_t *ref_arr,
|
||||
const int64_t col_offset,
|
||||
ObArray<ObObj> &ref_objs,
|
||||
const bool use_row_gen)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (ref_cnt < 1 || OB_ISNULL(ref_arr)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", KR(ret), K(ref_cnt));
|
||||
} else if (OB_FAIL(ref_objs.reserve(ref_cnt))) {
|
||||
LOG_WARN("fail to reserve", KR(ret), K(ref_cnt));
|
||||
} else {
|
||||
for (int64_t i = 0; OB_SUCC(ret) && (i < ref_cnt); ++i) {
|
||||
if (use_row_gen) {
|
||||
ObObj ref_obj;
|
||||
setup_obj(ref_obj, col_offset, ref_arr[i]/*actual is ref_seed_arr*/);
|
||||
if (OB_FAIL(ref_objs.push_back(ref_obj))) {
|
||||
LOG_WARN("fail to push back", KR(ret), K(i));
|
||||
}
|
||||
} else {
|
||||
ObObj ref_obj(ref_arr[i]);
|
||||
setup_obj(ref_obj, col_offset);
|
||||
if (OB_FAIL(ref_objs.push_back(ref_obj))) {
|
||||
LOG_WARN("fail to push back", KR(ret), K(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int ObPdFilterTestBase::build_decimal_filter_ref(
|
||||
const int64_t ref_cnt,
|
||||
const T *ref_arr,
|
||||
const int64_t col_offset,
|
||||
ObArray<ObObj> &ref_objs)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (ref_cnt < 1 || OB_ISNULL(ref_arr)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", KR(ret), K(ref_cnt));
|
||||
} else if (OB_FAIL(ref_objs.reserve(ref_cnt))) {
|
||||
LOG_WARN("fail to reserve", KR(ret), K(ref_cnt));
|
||||
} else {
|
||||
const int32_t int_bytes = sizeof(T);
|
||||
char *buf = nullptr;
|
||||
for (int64_t i = 0; OB_SUCC(ret) && (i < ref_cnt); ++i) {
|
||||
if (OB_ISNULL(buf = (char *)allocator_.alloc(int_bytes))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
STORAGE_LOG(WARN, "fail to alloc memory");
|
||||
} else {
|
||||
ObObj ref_obj;
|
||||
MEMCPY(buf, &ref_arr[i], int_bytes);
|
||||
ref_obj.set_decimal_int(int_bytes, 0, reinterpret_cast<ObDecimalInt *>(buf));
|
||||
if (OB_FAIL(ref_objs.push_back(ref_obj))) {
|
||||
LOG_WARN("fail to push back", KR(ret), K(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObPdFilterTestBase::build_string_filter_ref(
|
||||
const char *str_buf,
|
||||
const int64_t str_len,
|
||||
const ObObjType &obj_type,
|
||||
ObArray<ObObj> &ref_objs)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_ISNULL(str_buf) || OB_UNLIKELY(str_len < 0)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", KR(ret), K(str_len));
|
||||
} else {
|
||||
ObObj ref_obj;
|
||||
ref_obj.meta_.set_char();
|
||||
set_obj_collation(ref_obj, obj_type);
|
||||
ref_obj.set_char_value(str_buf, str_len);
|
||||
if (OB_FAIL(ref_objs.push_back(ref_obj))) {
|
||||
LOG_WARN("fail to push back", KR(ret));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObPdFilterTestBase::build_white_filter(
|
||||
sql::ObPushdownOperator &pd_operator,
|
||||
sql::PushdownFilterInfo &pd_filter_info,
|
||||
sql::ObPushdownWhiteFilterNode &pd_filter_node,
|
||||
ObWhiteFilterExecutor *&white_filter,
|
||||
ObBitmap *&res_bitmap,
|
||||
const ObWhiteFilterOperatorType &op_type,
|
||||
const int64_t row_cnt,
|
||||
const int64_t col_cnt)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (row_cnt < 1 || col_cnt < 1) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", KR(ret), K(col_cnt), K(row_cnt));
|
||||
} else {
|
||||
// build pd_filter_info
|
||||
void *storage_datum_buf = allocator_.alloc(sizeof(ObStorageDatum) * col_cnt);
|
||||
pd_filter_info.datum_buf_ = new (storage_datum_buf) ObStorageDatum [col_cnt]();
|
||||
pd_filter_info.col_capacity_ = col_cnt;
|
||||
pd_filter_info.start_ = 0;
|
||||
pd_filter_info.count_ = row_cnt;
|
||||
|
||||
// build white_filter_executor
|
||||
pd_filter_node.op_type_ = op_type;
|
||||
|
||||
ObIAllocator* allocator_ptr = &allocator_;
|
||||
white_filter = OB_NEWx(ObWhiteFilterExecutor, allocator_ptr, allocator_, pd_filter_node, pd_operator);
|
||||
|
||||
if (OB_ISNULL(white_filter)) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fail to allocate white filter", KR(ret));
|
||||
} else if (OB_FAIL(white_filter->init_bitmap(row_cnt, res_bitmap))) {
|
||||
LOG_WARN("fail to init bitmap", KR(ret), K(row_cnt));
|
||||
} else if (OB_ISNULL(res_bitmap)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("result bitmap should not be null", KR(ret), K(row_cnt));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObPdFilterTestBase::check_column_store_white_filter(
|
||||
const ObWhiteFilterOperatorType &op_type,
|
||||
const int64_t row_cnt,
|
||||
const int64_t col_cnt,
|
||||
const int64_t col_offset,
|
||||
const ObObjType &col_type,
|
||||
const ObIArray<ObObj> &ref_objs,
|
||||
ObMicroBlockCSDecoder &decoder,
|
||||
const int64_t res_count)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
sql::ObPushdownWhiteFilterNode pd_filter_node(allocator_);
|
||||
sql::PushdownFilterInfo pd_filter_info;
|
||||
sql::ObExecContext exec_ctx(allocator_);
|
||||
sql::ObEvalCtx eval_ctx(exec_ctx);
|
||||
sql::ObPushdownExprSpec expr_spec(allocator_);
|
||||
sql::ObPushdownOperator pd_operator(eval_ctx, expr_spec);
|
||||
ObWhiteFilterExecutor *white_filter = nullptr;
|
||||
common::ObBitmap *res_bitmap = nullptr;
|
||||
|
||||
if (row_cnt < 1 || col_cnt < 1 || col_offset < 0) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", KR(ret), K(row_cnt), K(col_cnt), K(col_offset), K(ref_objs.count()));
|
||||
} else if (OB_FAIL(build_white_filter(pd_operator, pd_filter_info, pd_filter_node, white_filter, res_bitmap,
|
||||
op_type, row_cnt, col_cnt))) {
|
||||
LOG_WARN("fail to build white filter", KR(ret));
|
||||
} else if (OB_ISNULL(white_filter) || OB_ISNULL(res_bitmap)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
} else if (ref_objs.count() < 1 && op_type != sql::WHITE_OP_NU && op_type != sql::WHITE_OP_NN) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", KR(ret), K(op_type));
|
||||
} else {
|
||||
white_filter->col_offsets_.init(1);
|
||||
white_filter->col_params_.init(1);
|
||||
ObColumnParam col_param(allocator_);
|
||||
ObObjMeta col_meta;
|
||||
if (ref_objs.count() < 1) {
|
||||
set_obj_meta_collation(col_meta, col_type);
|
||||
col_param.set_meta_type(col_meta);
|
||||
} else {
|
||||
col_param.set_meta_type(ref_objs.at(0).meta_);
|
||||
}
|
||||
white_filter->col_params_.push_back(&col_param);
|
||||
white_filter->col_offsets_.push_back(col_offset);
|
||||
white_filter->n_cols_ = 1;
|
||||
|
||||
int arg_cnt = ref_objs.count() + 1;
|
||||
if (sql::WHITE_OP_NU == op_type || sql::WHITE_OP_NN == op_type) {
|
||||
arg_cnt = 2;
|
||||
}
|
||||
|
||||
void *expr_ptr = allocator_.alloc(sizeof(sql::ObExpr));
|
||||
MEMSET(expr_ptr, '\0', sizeof(sql::ObExpr));
|
||||
void *expr_ptr_arr = allocator_.alloc(sizeof(sql::ObExpr*) * arg_cnt);
|
||||
MEMSET(expr_ptr_arr, '\0', sizeof(sql::ObExpr*) * arg_cnt);
|
||||
void *expr_arr = allocator_.alloc(sizeof(sql::ObExpr) * arg_cnt);
|
||||
MEMSET(expr_arr, '\0', sizeof(sql::ObExpr) * arg_cnt);
|
||||
EXPECT_TRUE(OB_NOT_NULL(expr_ptr));
|
||||
EXPECT_TRUE(OB_NOT_NULL(expr_ptr_arr));
|
||||
EXPECT_TRUE(OB_NOT_NULL(expr_arr));
|
||||
|
||||
white_filter->filter_.expr_ = reinterpret_cast<sql::ObExpr *>(expr_ptr);
|
||||
white_filter->filter_.expr_->arg_cnt_ = arg_cnt;
|
||||
white_filter->filter_.expr_->args_ = reinterpret_cast<sql::ObExpr **>(expr_ptr_arr);
|
||||
|
||||
ObDatum datums[arg_cnt];
|
||||
white_filter->datum_params_.init(arg_cnt);
|
||||
const int64_t datum_buf_size = sizeof(int8_t) * 128 * arg_cnt;
|
||||
void *datum_buf = allocator_.alloc(datum_buf_size);
|
||||
MEMSET(datum_buf, '\0', datum_buf_size);
|
||||
EXPECT_TRUE(OB_NOT_NULL(datum_buf));
|
||||
|
||||
for (int64_t i = 0; OB_SUCC(ret) && (i < arg_cnt); ++i) {
|
||||
white_filter->filter_.expr_->args_[i] = reinterpret_cast<sql::ObExpr *>(expr_arr) + i;
|
||||
if (i < arg_cnt - 1) {
|
||||
if (sql::WHITE_OP_NU == op_type || sql::WHITE_OP_NN == op_type) {
|
||||
white_filter->filter_.expr_->args_[i]->obj_meta_.set_null();
|
||||
white_filter->filter_.expr_->args_[i]->datum_meta_.type_ = ObNullType;
|
||||
} else {
|
||||
white_filter->filter_.expr_->args_[i]->obj_meta_ = ref_objs.at(i).get_meta();
|
||||
white_filter->filter_.expr_->args_[i]->datum_meta_.type_ = ref_objs.at(i).get_meta().get_type();
|
||||
datums[i].ptr_ = reinterpret_cast<char *>(datum_buf) + i * 128;
|
||||
if (OB_FAIL(datums[i].from_obj(ref_objs.at(i)))) {
|
||||
LOG_WARN("fail to handle datum from obj", KR(ret), K(i), K(ref_objs.at(i)));
|
||||
} else if (OB_FAIL(white_filter->datum_params_.push_back(datums[i]))) {
|
||||
LOG_WARN("fail to push back", KR(ret), K(i), K(datums[i]));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
white_filter->filter_.expr_->args_[i]->type_ = T_REF_COLUMN;
|
||||
}
|
||||
}
|
||||
if (OB_UNLIKELY(2 > white_filter->filter_.expr_->arg_cnt_)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("Unexpected filter expr", K(ret), K(white_filter->filter_.expr_->arg_cnt_));
|
||||
} else {
|
||||
white_filter->cmp_func_ = get_datum_cmp_func(white_filter->filter_.expr_->args_[0]->obj_meta_, white_filter->filter_.expr_->args_[0]->obj_meta_);
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret) ) {
|
||||
if (sql::WHITE_OP_IN == white_filter->get_op_type()) {
|
||||
if (OB_FAIL(white_filter->init_obj_set())) {
|
||||
LOG_WARN("fail to init obj_set", KR(ret));
|
||||
}
|
||||
}
|
||||
|
||||
if (FAILEDx(decoder.filter_pushdown_filter(nullptr, *white_filter, pd_filter_info, *res_bitmap))) {
|
||||
LOG_WARN("fail to filter pushdown filter", KR(ret));
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret)) {
|
||||
if (res_count != res_bitmap->popcnt()) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("result count not match", K(res_count), K(res_bitmap->popcnt()), K(ref_objs));
|
||||
// ::ob_abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#define integer_type_filter_normal_check(flag, op_type, round, ref_cnt, res_arr) \
|
||||
need_check = flag & enable_check; \
|
||||
if (need_check) { \
|
||||
int64_t tmp_ref_arr[ref_cnt]; \
|
||||
ObArray<ObObj> ref_objs; \
|
||||
for (int64_t i = 0; i < round; ++i) { \
|
||||
ref_objs.reset(); \
|
||||
if (ref_cnt > 0) { \
|
||||
int64_t start_idx = ref_cnt * i; \
|
||||
for (int64_t j = start_idx; (j < ref_cnt * (i + 1)); ++j) { \
|
||||
tmp_ref_arr[j - start_idx] = ref_arr[j]; \
|
||||
} \
|
||||
ASSERT_EQ(OB_SUCCESS, build_integer_filter_ref(ref_cnt, tmp_ref_arr, col_offset, ref_objs, false)); \
|
||||
} \
|
||||
ASSERT_EQ(OB_SUCCESS, check_column_store_white_filter(op_type, row_cnt, col_cnt, \
|
||||
col_offset, col_types[col_offset], ref_objs, decoder, res_arr[i])) << "round: " << i << std::endl; \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define decimal_type_filter_normal_check(flag, op_type, round, ref_cnt, res_arr) \
|
||||
need_check = flag & enable_check; \
|
||||
if (need_check) { \
|
||||
std::decay<decltype(*ref_arr)>::type tmp_ref_arr[ref_cnt]; \
|
||||
ObArray<ObObj> ref_objs; \
|
||||
for (int64_t i = 0; i < round; ++i) { \
|
||||
ref_objs.reset(); \
|
||||
if (ref_cnt > 0) { \
|
||||
int64_t start_idx = ref_cnt * i; \
|
||||
for (int64_t j = start_idx; (j < ref_cnt * (i + 1)); ++j) { \
|
||||
tmp_ref_arr[j - start_idx] = ref_arr[j]; \
|
||||
} \
|
||||
ASSERT_EQ(OB_SUCCESS, build_decimal_filter_ref(ref_cnt, tmp_ref_arr, col_offset, ref_objs)); \
|
||||
} \
|
||||
ASSERT_EQ(OB_SUCCESS, check_column_store_white_filter(op_type, row_cnt, col_cnt, \
|
||||
col_offset, col_types[col_offset], ref_objs, decoder, res_arr[i])) << "round: " << i << std::endl; \
|
||||
} \
|
||||
} \
|
||||
|
||||
|
||||
#define raw_type_filter_normal_check(flag, op_type, round, ref_cnt, res_arr) \
|
||||
need_check = flag & enable_check; \
|
||||
if (need_check) { \
|
||||
int64_t tmp_ref_seed_arr[ref_cnt]; \
|
||||
ObArray<ObObj> ref_objs; \
|
||||
for (int64_t i = 0; i < round; ++i) { \
|
||||
ref_objs.reset(); \
|
||||
if (ref_cnt > 0) { \
|
||||
int64_t start_idx = ref_cnt * i; \
|
||||
for (int64_t j = start_idx; (j < ref_cnt * (i + 1)); ++j) { \
|
||||
tmp_ref_seed_arr[j - start_idx] = ref_seed_arr[j]; \
|
||||
} \
|
||||
ASSERT_EQ(OB_SUCCESS, build_integer_filter_ref(ref_cnt, tmp_ref_seed_arr, col_offset, ref_objs, true)); \
|
||||
} \
|
||||
ASSERT_EQ(OB_SUCCESS, check_column_store_white_filter(op_type, row_cnt, col_cnt, \
|
||||
col_offset, col_types[col_offset], ref_objs, decoder, res_arr[i])) << "round: " << i << std::endl; \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define string_type_filter_normal_check(flag, op_type, round, ref_cnt, res_arr) \
|
||||
need_check = flag & enable_check; \
|
||||
if (need_check) { \
|
||||
std::pair<int64_t, int64_t> tmp_ref_arr[ref_cnt]; \
|
||||
ObArray<ObObj> ref_objs; \
|
||||
for (int64_t i = 0; i < round; ++i) { \
|
||||
ref_objs.reset(); \
|
||||
if (ref_cnt > 0) { \
|
||||
int64_t start_idx = ref_cnt * i; \
|
||||
for (int64_t j = start_idx; (j < ref_cnt * (i + 1)); ++j) { \
|
||||
const int64_t cur_idx = j - start_idx; \
|
||||
tmp_ref_arr[cur_idx] = ref_arr[j]; \
|
||||
ASSERT_EQ(OB_SUCCESS, build_string_filter_ref(char_data_arr[tmp_ref_arr[cur_idx].first], tmp_ref_arr[cur_idx].second, col_types[col_offset], ref_objs)); \
|
||||
} \
|
||||
} \
|
||||
ASSERT_EQ(OB_SUCCESS, check_column_store_white_filter(op_type, row_cnt, col_cnt, \
|
||||
col_offset, col_types[col_offset], ref_objs, decoder, res_arr[i])); \
|
||||
} \
|
||||
} \
|
||||
|
||||
|
||||
#define HANDLE_TRANSFORM() \
|
||||
ObMicroBlockDesc micro_block_desc; \
|
||||
ObMicroBlockHeader *header = nullptr; \
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header)); \
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true)); \
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true)); \
|
||||
LOG_INFO(">>>>>>>>>>FINISH DECODER<<<<<<<<<<<"); \
|
||||
LOG_INFO(">>>>>>>>>>START PD FILTER<<<<<<<<<<<"); \
|
||||
ObMicroBlockData full_transformed_data; \
|
||||
ObMicroBlockCSDecoder decoder; \
|
||||
ASSERT_EQ(OB_SUCCESS, init_cs_decoder(header, micro_block_desc, full_transformed_data, decoder));
|
||||
704
unittest/storage/blocksstable/cs_encoding/test_cs_decoder.cpp
Normal file
704
unittest/storage/blocksstable/cs_encoding/test_cs_decoder.cpp
Normal file
@ -0,0 +1,704 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#define USING_LOG_PREFIX STORAGE
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#define protected public
|
||||
#define private public
|
||||
#include "ob_cs_encoding_test_base.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_cs_encoding_util.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_micro_block_cs_encoder.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
using namespace common;
|
||||
using namespace storage;
|
||||
using namespace share::schema;
|
||||
|
||||
using ::testing::Bool;
|
||||
using ::testing::Combine;
|
||||
|
||||
class TestCSDecoder : public ObCSEncodingTestBase, public ::testing::TestWithParam<std::tuple<bool, bool>>
|
||||
{
|
||||
public:
|
||||
TestCSDecoder() {}
|
||||
virtual ~TestCSDecoder() {}
|
||||
|
||||
virtual void SetUp() {}
|
||||
virtual void TearDown() { reuse(); }
|
||||
};
|
||||
|
||||
TEST_P(TestCSDecoder, test_integer_decoder)
|
||||
{
|
||||
const bool has_null = std::get<0>(GetParam());
|
||||
const bool is_force_raw = std::get<1>(GetParam());
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 3;
|
||||
ObObjType col_types[col_cnt] = {ObIntType, ObUInt64Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INTEGER;
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
int64_t row_cnt_without_null[col_cnt] = {row_cnt, row_cnt, row_cnt};
|
||||
|
||||
// <1> integer range is -50 to 49 and has null
|
||||
for (int64_t i = 0; i < row_cnt - 20; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i - 50, row_arr[i]));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = row_cnt - 20; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int(i);
|
||||
if (has_null) {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_cnt_without_null[1]--;
|
||||
row_cnt_without_null[2]--;
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_uint(UINT64_MAX - i);
|
||||
row_arr[i].storage_datums_[2].set_uint(INT64_MAX - i);
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ObMicroBlockDesc micro_block_desc;
|
||||
ObMicroBlockHeader *header = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
LOG_INFO("finish build_micro_block_desc", K(micro_block_desc));
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null, col_cnt, false));
|
||||
|
||||
// <2> big integer(inc/dec/dec)
|
||||
encoder.reuse();
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
int64_t row_cnt_without_null_1[col_cnt] = {row_cnt, row_cnt, row_cnt};
|
||||
for (int64_t i = 0; i < row_cnt - 20 ; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int(INT64_MIN + i);
|
||||
row_arr[i].storage_datums_[1].set_uint(UINT64_MAX - i);
|
||||
row_arr[i].storage_datums_[2].set_int(INT64_MAX - i);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = row_cnt - 20; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int(INT32_MAX + i);
|
||||
if (has_null) {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_cnt_without_null_1[1]--;
|
||||
row_cnt_without_null_1[2]--;
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_uint(UINT64_MAX - i);
|
||||
row_arr[i].storage_datums_[2].set_int(INT64_MIN + 100 - i);
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null_1, col_cnt, false));
|
||||
|
||||
// <3> all null integer
|
||||
encoder.reuse();
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
int64_t row_cnt_without_null_2[col_cnt] = {row_cnt, 0, 0};
|
||||
for (int64_t i = 0; i < row_cnt ; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null_2, col_cnt, false));
|
||||
}
|
||||
|
||||
TEST_P(TestCSDecoder, test_string_decoder)
|
||||
{
|
||||
const bool has_null = std::get<0>(GetParam());
|
||||
const bool is_force_raw = std::get<1>(GetParam());
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 3;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObVarcharType, ObCharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STRING;
|
||||
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
const int64_t row_cnt = 120;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
int64_t row_cnt_without_null[col_cnt] = {row_cnt, row_cnt, row_cnt};
|
||||
|
||||
// <1> var string + fixed string
|
||||
char *char_data = static_cast<char *>(allocator_.alloc(1024));
|
||||
ASSERT_TRUE(nullptr != char_data);
|
||||
MEMSET(char_data, 0xbc, 1024);
|
||||
for (int64_t i = 0; i < row_cnt - 20; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data, i % 100);
|
||||
row_arr[i].storage_datums_[2].set_string(char_data, 100);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = row_cnt - 20; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (has_null) {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_cnt_without_null[1]--;
|
||||
row_cnt_without_null[2]--;
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_string(char_data, i % 100);
|
||||
row_arr[i].storage_datums_[2].set_string(char_data, 100);
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ObMicroBlockDesc micro_block_desc;
|
||||
ObMicroBlockHeader *header = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null, col_cnt, false));
|
||||
|
||||
// <2> all zero length + all null
|
||||
int64_t row_cnt_without_null_1[col_cnt] = {row_cnt, row_cnt, row_cnt};
|
||||
encoder.reuse();
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (has_null) {
|
||||
row_arr[i].storage_datums_[1].set_string(char_data, 0);
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_cnt_without_null_1[2]--;
|
||||
} else {
|
||||
// mix zero_length_string and null in one column
|
||||
if (i < row_cnt / 2) {
|
||||
row_arr[i].storage_datums_[1].set_string(char_data, 0);
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_cnt_without_null_1[2]--;
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_string(char_data, 0);
|
||||
row_cnt_without_null_1[1]--;
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null, col_cnt, false));
|
||||
}
|
||||
|
||||
TEST_P(TestCSDecoder, test_dict_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 4;
|
||||
const bool has_null = std::get<0>(GetParam());
|
||||
const bool is_force_raw = std::get<1>(GetParam());
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType, ObVarcharType, ObCharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[2] = ObCSColumnHeader::Type::STR_DICT; // var string
|
||||
ctx_.column_encodings_[3] = ObCSColumnHeader::Type::STR_DICT; // fixed length string
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
int64_t row_cnt_without_null[col_cnt] = {row_cnt, row_cnt, row_cnt, row_cnt};
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
char *char_data = static_cast<char *>(allocator_.alloc(1024));
|
||||
ASSERT_TRUE(nullptr != char_data);
|
||||
MEMSET(char_data, 0xbc, 1024);
|
||||
const int64_t distint_cnt = 20;
|
||||
for (int64_t i = 0; i < row_cnt - 20; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_int(i % distint_cnt + INT32_MAX);
|
||||
row_arr[i].storage_datums_[2].set_string(char_data, i % distint_cnt);
|
||||
row_arr[i].storage_datums_[3].set_string(char_data, 100);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = row_cnt - 20; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[2].set_string(char_data, i % distint_cnt);
|
||||
if (has_null) {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[3].set_null();
|
||||
row_cnt_without_null[1]--;
|
||||
row_cnt_without_null[3]--;
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_int(i % distint_cnt + INT32_MAX);
|
||||
row_arr[i].storage_datums_[3].set_string(char_data, 100);
|
||||
}
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
ObMicroBlockDesc micro_block_desc;
|
||||
ObMicroBlockHeader *header = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null, col_cnt, false));
|
||||
}
|
||||
|
||||
TEST_F(TestCSDecoder, test_null_dict_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 4;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType, ObVarcharType, ObCharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ctx_.column_encodings_[2] = ObCSColumnHeader::Type::STR_DICT;
|
||||
ctx_.column_encodings_[3] = ObCSColumnHeader::Type::STR_DICT;
|
||||
|
||||
const int64_t row_cnt = 100;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
// <1> all column is null
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_null();
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_arr[i].storage_datums_[3].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
int64_t row_cnt_without_null[col_cnt] = {0, 0, 0, 0};
|
||||
ObMicroBlockDesc micro_block_desc;
|
||||
ObMicroBlockHeader *header = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header, micro_block_desc, row_arr, row_cnt));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header, micro_block_desc, row_arr, row_cnt));
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null, col_cnt, false));
|
||||
|
||||
// <2> middle column is null
|
||||
encoder.reuse();
|
||||
char *char_data = static_cast<char *>(allocator_.alloc(1024));
|
||||
ASSERT_TRUE(nullptr != char_data);
|
||||
MEMSET(char_data, 0xbc, 1024);
|
||||
// ObInt32Type, ObIntType, ObVarcharType, ObCharType
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_arr[i].storage_datums_[3].set_string(char_data, 100);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
int64_t row_cnt_without_null_1[col_cnt] = {row_cnt, 0, 0, row_cnt};
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header, micro_block_desc, row_arr, row_cnt));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header, micro_block_desc, row_arr, row_cnt));
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null_1, col_cnt, false));
|
||||
|
||||
// <3> first and last column is null
|
||||
encoder.reuse();
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_null();
|
||||
row_arr[i].storage_datums_[1].set_int(i - UINT32_MAX);
|
||||
row_arr[i].storage_datums_[2].set_string(char_data, i % 30);
|
||||
row_arr[i].storage_datums_[3].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header, micro_block_desc, row_arr, row_cnt));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header, micro_block_desc, row_arr, row_cnt));
|
||||
|
||||
// <4> last column is null
|
||||
encoder.reuse();
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(INT32_MAX - 1000 + i);
|
||||
row_arr[i].storage_datums_[1].set_int(i - UINT32_MAX);
|
||||
row_arr[i].storage_datums_[2].set_string(char_data, i % 30);
|
||||
row_arr[i].storage_datums_[3].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header, micro_block_desc, row_arr, row_cnt));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header, micro_block_desc, row_arr, row_cnt));
|
||||
}
|
||||
|
||||
TEST_P(TestCSDecoder, test_all_object_type_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = ObExtendType - 1 + 7;
|
||||
const bool specify_dict = std::get<0>(GetParam());
|
||||
const bool is_force_raw = std::get<1>(GetParam());
|
||||
ObObjType col_types[ObObjType::ObMaxType];
|
||||
for (int64_t i = 0; i < col_cnt; ++i) {
|
||||
ObObjType type = static_cast<ObObjType>(i + 1); // from ObTinyIntType
|
||||
if (col_cnt - 1 == i) {
|
||||
type = ObURowIDType;
|
||||
} else if (col_cnt - 2 == i) {
|
||||
type = ObIntervalYMType;
|
||||
} else if (col_cnt - 3 == i) {
|
||||
type = ObIntervalDSType;
|
||||
} else if (col_cnt - 4 == i) {
|
||||
type = ObTimestampTZType;
|
||||
} else if (col_cnt - 5 == i) {
|
||||
type = ObTimestampLTZType;
|
||||
} else if (col_cnt - 6 == i) {
|
||||
type = ObTimestampNanoType;
|
||||
} else if (col_cnt - 7 == i) {
|
||||
type = ObRawType;
|
||||
} else if (type == ObExtendType || type == ObUnknownType) {
|
||||
type = ObVarcharType;
|
||||
}
|
||||
col_types[i] = type;
|
||||
}
|
||||
ObCompressorType compressor_type = ObCompressorType::ZSTD_1_3_8_COMPRESSOR;
|
||||
if (is_force_raw) {
|
||||
compressor_type = ObCompressorType::NONE_COMPRESSOR;
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt, compressor_type));
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
for (int64_t i = 0; i < col_cnt; ++i) {
|
||||
if (i % 2 && specify_dict) {
|
||||
const ObObjTypeStoreClass store_class = get_store_class_map()[ob_obj_type_class(col_types[i])];
|
||||
if (ObCSEncodingUtil::is_integer_store_class(store_class)) {
|
||||
ctx_.column_encodings_[i] = ObCSColumnHeader::Type::INT_DICT; // sepcfiy dict column encoding
|
||||
} else {
|
||||
ctx_.column_encodings_[i] = ObCSColumnHeader::Type::STR_DICT; // sepcfiy dict column encoding
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
|
||||
const int64_t row_cnt = 128;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
for (int64_t i = 0; i < row_cnt - 2; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i, row_arr[i]));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = row_cnt - 2; i < row_cnt; ++i) {
|
||||
for (int64_t j = 0; j < col_cnt; j++) {
|
||||
if (j < rowkey_cnt) {
|
||||
row_arr[i].storage_datums_[j].set_int32(i);
|
||||
} else {
|
||||
row_arr[i].storage_datums_[j].set_null();
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
ObMicroBlockDesc micro_block_desc;
|
||||
ObMicroBlockHeader *header = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header, micro_block_desc, row_arr, row_cnt, true));
|
||||
}
|
||||
|
||||
TEST_P(TestCSDecoder, test_decoder_with_all_stream_encoding_types)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 5;
|
||||
ObObjType col_types[col_cnt] = {ObIntType, ObVarcharType, ObVarcharType, ObIntType, ObIntType};
|
||||
const bool has_null = std::get<0>(GetParam());
|
||||
const bool row_cnt_flag = std::get<1>(GetParam());
|
||||
int64_t row_cnt = 0;
|
||||
if (row_cnt_flag && has_null) {
|
||||
row_cnt = 20;
|
||||
} else if (row_cnt_flag && !has_null) {
|
||||
row_cnt = 100;
|
||||
} else if (!row_cnt_flag && has_null) {
|
||||
row_cnt = 200;
|
||||
} else {
|
||||
row_cnt = 1200;
|
||||
}
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STRING;
|
||||
ctx_.column_encodings_[2] = ObCSColumnHeader::Type::STR_DICT;
|
||||
ctx_.column_encodings_[3] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[4] = ObCSColumnHeader::Type::INT_DICT;
|
||||
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
void *row_arr_buf = allocator_.alloc(sizeof(ObDatumRow) * row_cnt);
|
||||
ASSERT_TRUE(nullptr != row_arr_buf);
|
||||
ObDatumRow *row_arr = new(row_arr_buf) ObDatumRow[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
char *varchar_data = static_cast<char *>(allocator_.alloc(row_cnt + 1));
|
||||
ASSERT_TRUE(varchar_data != nullptr);
|
||||
MEMSET(varchar_data, 0xf, row_cnt + 1);
|
||||
const int64_t repeat_cnt = 3;
|
||||
int64_t row_cnt_without_null[5] = {row_cnt, row_cnt, row_cnt, row_cnt, row_cnt};
|
||||
for (int64_t i = 0; i < row_cnt - 10; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int(i);
|
||||
row_arr[i].storage_datums_[1].set_string(varchar_data, i / repeat_cnt);
|
||||
row_arr[i].storage_datums_[2].set_string(varchar_data, i / repeat_cnt);
|
||||
row_arr[i].storage_datums_[3].set_int(i / repeat_cnt - 100);
|
||||
row_arr[i].storage_datums_[4].set_int(i / repeat_cnt - 100);
|
||||
}
|
||||
for (int64_t i = row_cnt - 10; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int(i);
|
||||
if (has_null) {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_arr[i].storage_datums_[3].set_null();
|
||||
row_arr[i].storage_datums_[4].set_null();
|
||||
row_cnt_without_null[1]--;
|
||||
row_cnt_without_null[2]--;
|
||||
row_cnt_without_null[3]--;
|
||||
row_cnt_without_null[4]--;
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_string(varchar_data, i / repeat_cnt);
|
||||
row_arr[i].storage_datums_[2].set_string(varchar_data, i / repeat_cnt);
|
||||
row_arr[i].storage_datums_[3].set_int(i / repeat_cnt - 100);
|
||||
row_arr[i].storage_datums_[4].set_int(i / repeat_cnt - 100);
|
||||
}
|
||||
}
|
||||
ObIntegerStream::EncodingType stream_types_[ObCSColumnHeader::MAX_INT_STREAM_COUNT_OF_COLUMN];
|
||||
for (int type = 1; type < ObIntegerStream::EncodingType::MAX_TYPE; type++) {
|
||||
stream_types_[0] = {ObIntegerStream::EncodingType(type)};
|
||||
stream_types_[1] = {ObIntegerStream::EncodingType(type)};
|
||||
LOG_INFO("test stream ecoding type", K(type));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.ctx_.previous_cs_encoding_.update_column_encoding_types(
|
||||
0, ObColumnEncodingIdentifier(ObCSColumnHeader::Type::INTEGER, 1, 0), stream_types_, true));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.ctx_.previous_cs_encoding_.update_column_encoding_types(
|
||||
1, ObColumnEncodingIdentifier(ObCSColumnHeader::Type::STRING, 1, 0), stream_types_, true));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.ctx_.previous_cs_encoding_.update_column_encoding_types(
|
||||
2, ObColumnEncodingIdentifier(ObCSColumnHeader::Type::STR_DICT, 2, 0), stream_types_, true)); // string dict
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.ctx_.previous_cs_encoding_.update_column_encoding_types(
|
||||
3, ObColumnEncodingIdentifier(ObCSColumnHeader::Type::INTEGER, 1, 0), stream_types_, true));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.ctx_.previous_cs_encoding_.update_column_encoding_types(
|
||||
4, ObColumnEncodingIdentifier(ObCSColumnHeader::Type::INT_DICT, 2, 0), stream_types_, true)); //integer dict
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ObMicroBlockDesc micro_block_desc;
|
||||
ObMicroBlockHeader *header = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null, 5, false));
|
||||
|
||||
encoder.reuse();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestCSDecoder, test_dict_const_ref_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 5;
|
||||
ObObjType col_types[col_cnt] = {ObIntType, ObVarcharType, ObUInt64Type, ObVarcharType, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
int64_t row_cnt = 1000;
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STR_DICT;
|
||||
ctx_.column_encodings_[2] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ctx_.column_encodings_[3] = ObCSColumnHeader::Type::STR_DICT;
|
||||
ctx_.column_encodings_[4] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
void *row_arr_buf = allocator_.alloc(sizeof(ObDatumRow) * row_cnt);
|
||||
ASSERT_TRUE(nullptr != row_arr_buf);
|
||||
ObDatumRow *row_arr = new(row_arr_buf) ObDatumRow[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
char *varchar_data = static_cast<char *>(allocator_.alloc(row_cnt + 1));
|
||||
MEMSET(varchar_data, 'a', row_cnt + 1);
|
||||
int64_t execption_cnt = 90;
|
||||
for (int64_t i = 0; i < row_cnt - execption_cnt; i++) {
|
||||
ASSERT_TRUE(nullptr != varchar_data);
|
||||
row_arr[i].storage_datums_[0].set_int(i);
|
||||
row_arr[i].storage_datums_[1].set_string(varchar_data, 10);
|
||||
row_arr[i].storage_datums_[2].set_uint(UINT64_MAX);
|
||||
row_arr[i].storage_datums_[3].set_null();
|
||||
row_arr[i].storage_datums_[4].set_int(-9999);
|
||||
}
|
||||
for (int64_t i = row_cnt - execption_cnt; i < row_cnt; i++) {
|
||||
row_arr[i].storage_datums_[0].set_int(i);
|
||||
row_arr[i].storage_datums_[1].set_string(varchar_data, 20);
|
||||
row_arr[i].storage_datums_[2].set_uint(i);
|
||||
row_arr[i].storage_datums_[3].set_string(varchar_data, 100 + i % 2);
|
||||
row_arr[i].storage_datums_[4].set_null();
|
||||
}
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ObMicroBlockDesc micro_block_desc;
|
||||
ObMicroBlockHeader *header = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
|
||||
int64_t row_cnt_without_null[5] = {row_cnt, row_cnt, row_cnt, row_cnt - execption_cnt, execption_cnt};
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null, 5, false));
|
||||
}
|
||||
|
||||
TEST_F(TestCSDecoder, test_decimal_int_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 7;
|
||||
ObObjType col_types[col_cnt] = {ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType};
|
||||
int64_t precision_arr[col_cnt] = {MAX_PRECISION_DECIMAL_INT_64,
|
||||
MAX_PRECISION_DECIMAL_INT_32,
|
||||
MAX_PRECISION_DECIMAL_INT_128,
|
||||
OB_MAX_DECIMAL_PRECISION,
|
||||
MAX_PRECISION_DECIMAL_INT_128,
|
||||
MAX_PRECISION_DECIMAL_INT_128,
|
||||
OB_MAX_DECIMAL_PRECISION};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt, ObCompressorType::ZSTD_1_3_8_COMPRESSOR, precision_arr));
|
||||
ctx_.column_encodings_[3] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ctx_.column_encodings_[5] = ObCSColumnHeader::Type::STRING;
|
||||
ctx_.column_encodings_[6] = ObCSColumnHeader::Type::STR_DICT;
|
||||
int64_t row_cnt = 1000;
|
||||
const int64_t distinct_cnt = 100;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
|
||||
void *row_arr_buf = allocator_.alloc(sizeof(ObDatumRow) * row_cnt);
|
||||
ASSERT_TRUE(nullptr != row_arr_buf);
|
||||
ObDatumRow *row_arr = new(row_arr_buf) ObDatumRow[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; i++) {
|
||||
int64_t p = 0;
|
||||
int32_t m = 0;
|
||||
int128_t j = 0;
|
||||
int256_t k = 0;
|
||||
if (i == row_cnt - 3) {
|
||||
p = 0;
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(p);
|
||||
m = INT32_MAX;
|
||||
row_arr[i].storage_datums_[1].set_decimal_int(m);
|
||||
j = INT64_MAX;
|
||||
row_arr[i].storage_datums_[2].set_decimal_int(j);
|
||||
k = (i % distinct_cnt) - distinct_cnt / 2;
|
||||
row_arr[i].storage_datums_[3].set_decimal_int(k);
|
||||
|
||||
j = (int128_t)INT64_MAX << 64;
|
||||
row_arr[i].storage_datums_[4].set_decimal_int(j);
|
||||
row_arr[i].storage_datums_[5].set_decimal_int(j);
|
||||
k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
|
||||
row_arr[i].storage_datums_[6].set_decimal_int(k);
|
||||
} else if (i == row_cnt - 2) {
|
||||
p = INT32_MAX;
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(p);
|
||||
m = INT32_MIN;
|
||||
row_arr[i].storage_datums_[1].set_decimal_int(m);
|
||||
j = INT64_MIN;
|
||||
row_arr[i].storage_datums_[2].set_decimal_int(j);
|
||||
k = (i % distinct_cnt) - distinct_cnt / 2;
|
||||
row_arr[i].storage_datums_[3].set_decimal_int(k);
|
||||
|
||||
j = (int128_t)INT64_MIN << 64;
|
||||
row_arr[i].storage_datums_[4].set_decimal_int(j);
|
||||
row_arr[i].storage_datums_[5].set_decimal_int(j);
|
||||
k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
|
||||
row_arr[i].storage_datums_[6].set_decimal_int(k);
|
||||
} else if (i == row_cnt - 1) {
|
||||
p = INT64_MAX;
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(p);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_arr[i].storage_datums_[3].set_null();
|
||||
row_arr[i].storage_datums_[4].set_null();
|
||||
row_arr[i].storage_datums_[5].set_null();
|
||||
row_arr[i].storage_datums_[6].set_null();
|
||||
} else {
|
||||
p = i - INT64_MAX;
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(p);
|
||||
m = i - row_cnt / 2;
|
||||
row_arr[i].storage_datums_[1].set_decimal_int(m); // -500 ~ 498
|
||||
j = i + INT32_MAX;
|
||||
row_arr[i].storage_datums_[2].set_decimal_int(j);
|
||||
k = (i % distinct_cnt) - distinct_cnt / 2;
|
||||
row_arr[i].storage_datums_[3].set_decimal_int(k);
|
||||
|
||||
j = i;
|
||||
row_arr[i].storage_datums_[4].set_decimal_int(j);
|
||||
row_arr[i].storage_datums_[5].set_null();
|
||||
k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
|
||||
row_arr[i].storage_datums_[6].set_decimal_int(k);
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
ObMicroBlockDesc micro_block_desc;
|
||||
ObMicroBlockHeader *header = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, build_micro_block_desc(encoder, micro_block_desc, header));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, full_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
ASSERT_EQ(OB_SUCCESS, part_transform_check_row(header,
|
||||
micro_block_desc, row_arr, row_cnt, true));
|
||||
|
||||
int64_t row_cnt_without_null[col_cnt] = {row_cnt, row_cnt - 1, row_cnt - 1, row_cnt - 1, row_cnt - 1, 2, row_cnt - 1};
|
||||
ASSERT_EQ(OB_SUCCESS, check_get_row_count(header, micro_block_desc, row_cnt_without_null, col_cnt, false));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(TestDecoder, TestCSDecoder, Combine(Bool(), Bool()));
|
||||
|
||||
} // namespace blocksstable
|
||||
} // namespace oceanbase
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_cs_decoder.log*");
|
||||
OB_LOGGER.set_file_name("test_cs_decoder.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
770
unittest/storage/blocksstable/cs_encoding/test_cs_encoder.cpp
Normal file
770
unittest/storage/blocksstable/cs_encoding/test_cs_encoder.cpp
Normal file
@ -0,0 +1,770 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#define USING_LOG_PREFIX STORAGE
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#define protected public
|
||||
#define private public
|
||||
#include "ob_cs_encoding_test_base.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_cs_encoding_util.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_micro_block_cs_encoder.h"
|
||||
#include "lib/wide_integer/ob_wide_integer.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
using namespace common;
|
||||
using namespace storage;
|
||||
using namespace share::schema;
|
||||
|
||||
class TestCSEncoder : public ObCSEncodingTestBase, public ::testing::Test
|
||||
{
|
||||
public:
|
||||
TestCSEncoder() {}
|
||||
virtual ~TestCSEncoder() {}
|
||||
|
||||
virtual void SetUp() {}
|
||||
virtual void TearDown() {}
|
||||
};
|
||||
|
||||
TEST_F(TestCSEncoder, test_integer_encoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 1;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
|
||||
int64_t row_cnt = 100;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = true;
|
||||
// Generate data and encode
|
||||
ObDatumRow row;
|
||||
ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
|
||||
|
||||
// <1>integer range is -50 to 49
|
||||
int64_t seed = 0;
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
seed = i - 50; // -50 ~ 49
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
|
||||
char *buf = nullptr;
|
||||
int64_t buf_size = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
ObIColumnCSEncoder *e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(-50, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(49, (int64_t)e->ctx_->integer_max_);
|
||||
|
||||
ASSERT_EQ(sizeof(uint64_t) * row_cnt, encoder.ctx_.estimate_block_size_);
|
||||
ASSERT_EQ(buf_size, encoder.ctx_.real_block_size_ + encoder.all_headers_size_);
|
||||
LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
|
||||
|
||||
ObIntegerColumnEncoder *int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_raw_encoding());
|
||||
ASSERT_EQ(1, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_base()); // raw encoding also use base
|
||||
ASSERT_EQ(-50, int_col_encoder->enc_ctx_.meta_.base_value_);
|
||||
|
||||
// <2> write 3 row: INT32_MIN/-1/NULL
|
||||
// row count less than ObCSEncodingUtil::ENCODING_ROW_COUNT_THRESHOLD,
|
||||
// so use will force raw encoding in encoder.
|
||||
encoder.reuse();
|
||||
seed = INT32_MIN;
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
|
||||
row.storage_datums_[0].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
|
||||
seed = -1;
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(INT32_MIN, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(-1, (int64_t)e->ctx_->integer_max_);
|
||||
// estimate_block_size_ won't reset to zero when reuse, so total row cnt is 100 + 3
|
||||
ASSERT_EQ(sizeof(uint64_t) * 103, encoder.ctx_.estimate_block_size_);
|
||||
|
||||
int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_raw_encoding());
|
||||
ASSERT_EQ(4, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_base()); // raw encoding also use base
|
||||
ASSERT_EQ(INT32_MIN, int_col_encoder->enc_ctx_.meta_.base_value());
|
||||
ASSERT_EQ(0, int_col_encoder->enc_ctx_.meta_.null_replaced_value_);
|
||||
|
||||
// <3> write 3 row: INT32_MIN/INT32_MAX/NULL + 200 rows
|
||||
encoder.reuse();
|
||||
seed = INT32_MIN;
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
seed = INT32_MAX;
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
row.storage_datums_[0].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
|
||||
row_cnt = 200;
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
seed = i - 100;
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(INT32_MIN, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(INT32_MAX, (int64_t)e->ctx_->integer_max_);
|
||||
// estimate_block_size_ won't reset to zero when reuse, so total row cnt is 100 + 3 + 3 + 200
|
||||
ASSERT_EQ(sizeof(uint64_t) * 306, encoder.ctx_.estimate_block_size_);
|
||||
|
||||
int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(4, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_base());
|
||||
ASSERT_EQ(INT32_MIN, int_col_encoder->enc_ctx_.meta_.base_value());
|
||||
ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
|
||||
|
||||
// <4> write 3 row: 0/INT32_MAX/NULL
|
||||
// row count less than ObCSEncodingUtil::ENCODING_ROW_COUNT_THRESHOLD,
|
||||
// so use will force raw encoding in encoder.
|
||||
encoder.reuse();
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(0, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
row.storage_datums_[0].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(INT32_MAX, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(0, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(INT32_MAX, (int64_t)e->ctx_->integer_max_);
|
||||
int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(-1, int_col_encoder->enc_ctx_.meta_.null_replaced_value_);
|
||||
|
||||
// <5> write 3 row: 0/INT32_MAX - 1/NULL
|
||||
// row count less than ObCSEncodingUtil::ENCODING_ROW_COUNT_THRESHOLD,
|
||||
// so use will force raw encoding in encoder.
|
||||
encoder.reuse();
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(0, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
row.storage_datums_[0].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(INT32_MAX - 1, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(0, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(INT32_MAX - 1, (int64_t)e->ctx_->integer_max_);
|
||||
int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(INT32_MAX, int_col_encoder->enc_ctx_.meta_.null_replaced_value());
|
||||
|
||||
// <6> write 1000 row, and monotonic increase
|
||||
encoder.reuse();
|
||||
row_cnt = 1000;
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(0, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(999, (int64_t)e->ctx_->integer_max_);
|
||||
|
||||
int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(2, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
|
||||
ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_base());
|
||||
|
||||
//<7> write all null
|
||||
encoder.reuse();
|
||||
row_cnt = 1000;
|
||||
row.storage_datums_[0].set_null();
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(1, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(0, int_col_encoder->enc_ctx_.meta_.null_replaced_value());
|
||||
ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
|
||||
ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_base());
|
||||
|
||||
reuse();
|
||||
}
|
||||
|
||||
TEST_F(TestCSEncoder, test_big_integer_encoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
ObObjType col_types[col_cnt] = {ObIntType, ObUInt64Type};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row;
|
||||
ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
|
||||
|
||||
int64_t row_cnt = 200;
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row.storage_datums_[0].set_int(INT64_MIN + 1000 + i);
|
||||
row.storage_datums_[1].set_uint(UINT64_MAX - 1000 + i );
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
row.storage_datums_[0].set_int(INT64_MIN);
|
||||
row.storage_datums_[1].set_uint(UINT64_MAX);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
row.storage_datums_[0].set_int(INT64_MAX);
|
||||
row.storage_datums_[1].set_uint(UINT64_MAX - UINT32_MAX);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
|
||||
char *buf = nullptr;
|
||||
int64_t buf_size = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
ObIColumnCSEncoder *e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(INT64_MIN, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(INT64_MAX, (int64_t)e->ctx_->integer_max_);
|
||||
|
||||
|
||||
ObIntegerColumnEncoder *int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(8, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(true, int_col_encoder->enc_ctx_.meta_.is_use_base());
|
||||
ASSERT_EQ(INT64_MIN, int_col_encoder->enc_ctx_.meta_.base_value());
|
||||
|
||||
e = encoder.encoders_[1];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(UINT64_MAX - UINT32_MAX, e->ctx_->integer_min_);
|
||||
ASSERT_EQ(UINT64_MAX, e->ctx_->integer_max_);
|
||||
|
||||
int_col_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(8, int_col_encoder->enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(false, int_col_encoder->enc_ctx_.meta_.is_use_base());
|
||||
|
||||
reuse();
|
||||
}
|
||||
|
||||
|
||||
TEST_F(TestCSEncoder, test_string_encoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
ObObjType col_types[col_cnt] = {ObIntType, ObVarcharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STRING;
|
||||
|
||||
// <1> 100 fixed len string and has one null
|
||||
int64_t row_cnt = 100;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = true;
|
||||
// Generate data and encode
|
||||
ObDatumRow row;
|
||||
ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
|
||||
|
||||
int64_t seed = 0;
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i, row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
for (int64_t i = 0; i < col_cnt; i++) {
|
||||
row.storage_datums_[i].set_null();
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
|
||||
char *buf = nullptr;
|
||||
int64_t buf_size = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
ObIColumnCSEncoder *e = encoder.encoders_[1];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
|
||||
ASSERT_EQ(64, e->ctx_->fix_data_size_);
|
||||
ASSERT_EQ(64 * row_cnt, e->ctx_->var_data_size_);
|
||||
ASSERT_EQ(64 * row_cnt, e->ctx_->dict_var_data_size_);
|
||||
ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
|
||||
|
||||
ASSERT_EQ((8 + 64 + 8) * row_cnt + 16, encoder.ctx_.estimate_block_size_);
|
||||
ASSERT_EQ(buf_size, encoder.ctx_.real_block_size_ + encoder.all_headers_size_);
|
||||
LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
|
||||
|
||||
ObStringColumnEncoder *str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
|
||||
ASSERT_EQ(64, str_col_encoder->enc_ctx_.meta_.get_fixed_string_len());
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
|
||||
ASSERT_EQ(true, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
|
||||
|
||||
// <2> var length string and has null and has no zero length string
|
||||
row_cnt = 100;
|
||||
encoder.reuse();
|
||||
int64_t sum_len = 0;
|
||||
for (int64_t i = 0; i < row_cnt; i++) {
|
||||
char *varchar_data = static_cast<char *>(allocator_.alloc(i + 1));
|
||||
ASSERT_TRUE(nullptr != varchar_data);
|
||||
MEMSET(varchar_data, 0xf, i + 1);
|
||||
row.storage_datums_[0].set_int(i);
|
||||
row.storage_datums_[1].set_string(varchar_data, i + 1);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
sum_len += i + 1;
|
||||
}
|
||||
row.storage_datums_[0].set_null();
|
||||
row.storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
|
||||
e = encoder.encoders_[1];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
|
||||
ASSERT_EQ(-1, e->ctx_->fix_data_size_);
|
||||
ASSERT_EQ(sum_len, e->ctx_->var_data_size_);
|
||||
ASSERT_EQ(sum_len, e->ctx_->dict_var_data_size_);
|
||||
ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
|
||||
LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
|
||||
|
||||
str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.get_fixed_string_len());
|
||||
ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
|
||||
|
||||
// <3> var length string and has null and has zero length string
|
||||
encoder.reuse();
|
||||
for (int64_t i = 0; i < ObCSEncodingUtil::ENCODING_ROW_COUNT_THRESHOLD - 2; i++) {
|
||||
char *varchar_data = static_cast<char *>(allocator_.alloc(i + 1));
|
||||
ASSERT_TRUE(nullptr != varchar_data);
|
||||
MEMSET(varchar_data, 0xf, i + 1);
|
||||
row.storage_datums_[0].set_int(i);
|
||||
row.storage_datums_[1].set_string(varchar_data, i);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
row.storage_datums_[0].set_null();
|
||||
row.storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[1];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
|
||||
ASSERT_EQ(-1, e->ctx_->fix_data_size_);
|
||||
ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
|
||||
LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
|
||||
|
||||
str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
|
||||
|
||||
// <4> write all null
|
||||
row_cnt = 100;
|
||||
encoder.reuse();
|
||||
for (int64_t i = 0; i < row_cnt; i++) {
|
||||
row.storage_datums_[0].set_int(i);
|
||||
row.storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[1];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
|
||||
ASSERT_EQ(-1, e->ctx_->fix_data_size_);
|
||||
ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
|
||||
LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
|
||||
|
||||
str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
|
||||
ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
|
||||
|
||||
// <4> write all zero length datum
|
||||
row_cnt = 100;
|
||||
encoder.reuse();
|
||||
for (int64_t i = 0; i < row_cnt; i++) {
|
||||
row.storage_datums_[0].set_int(i);
|
||||
row.storage_datums_[1].set_string(nullptr, 0);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[1];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
|
||||
ASSERT_EQ(0, e->ctx_->fix_data_size_);
|
||||
ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
|
||||
LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
|
||||
|
||||
str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
|
||||
|
||||
// <5> write all zero length datum and null
|
||||
row_cnt = 200;
|
||||
encoder.reuse();
|
||||
for (int64_t i = 0; i < row_cnt - 100; i++) {
|
||||
row.storage_datums_[0].set_int(i);
|
||||
row.storage_datums_[1].set_string(nullptr, 0);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
for (int64_t i = row_cnt - 100; i < row_cnt; i++) {
|
||||
row.storage_datums_[0].set_int(i);
|
||||
row.storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
e = encoder.encoders_[1];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
|
||||
ASSERT_EQ(0, e->ctx_->fix_data_size_);
|
||||
ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
|
||||
LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
|
||||
|
||||
str_col_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, str_col_encoder->enc_ctx_.meta_.is_fixed_len_string());
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
|
||||
ASSERT_EQ(false, str_col_encoder->enc_ctx_.info_.raw_encoding_str_offset_);
|
||||
|
||||
reuse();
|
||||
}
|
||||
|
||||
TEST_F(TestCSEncoder, test_dict_encoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 5;
|
||||
ObObjType col_types[col_cnt] = {ObIntType, ObVarcharType, ObUInt64Type, ObTextType, ObHexStringType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STR_DICT;
|
||||
// <1> integer dict: has null and has negative value(-50 - 49)
|
||||
// string dict: has null and has zero length value
|
||||
int64_t row_cnt = 1000;
|
||||
const int64_t distinct_cnt = 100;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row;
|
||||
ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
|
||||
char *varchar_data = static_cast<char *>(allocator_.alloc(row_cnt + 1));
|
||||
MEMSET(varchar_data, 0xf, row_cnt + 1);
|
||||
for (int64_t i = 0; i < row_cnt; i++) {
|
||||
ASSERT_TRUE(nullptr != varchar_data);
|
||||
row.storage_datums_[0].set_int((i % distinct_cnt) - distinct_cnt / 2);
|
||||
row.storage_datums_[1].set_string(varchar_data, i % distinct_cnt);
|
||||
row.storage_datums_[2].set_null(); // all null integer
|
||||
row.storage_datums_[3].set_null(); // all null string
|
||||
row.storage_datums_[4].set_string(nullptr, 0); // all zero length string
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
row.storage_datums_[0].set_null();
|
||||
row.storage_datums_[1].set_null();
|
||||
row.storage_datums_[2].set_null();
|
||||
row.storage_datums_[3].set_null();
|
||||
row.storage_datums_[4].set_string(nullptr, 0);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
char *buf = nullptr;
|
||||
int64_t buf_size = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
|
||||
ObIColumnCSEncoder *e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
|
||||
ASSERT_EQ(-50, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(49, (int64_t)e->ctx_->integer_max_);
|
||||
ASSERT_EQ(true, e->ctx_->need_sort_);
|
||||
ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
|
||||
|
||||
ObIntDictColumnEncoder *dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(1, dict_encoder->integer_dict_enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(false, dict_encoder->integer_dict_enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(true, dict_encoder->integer_dict_enc_ctx_.info_.is_monotonic_inc_);
|
||||
ASSERT_EQ(distinct_cnt, dict_encoder->max_ref_);
|
||||
ASSERT_EQ(distinct_cnt, dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
|
||||
|
||||
ASSERT_EQ(1, dict_encoder->ref_enc_ctx_.meta_.get_uint_width_size()); // [0, distinct_cnt]
|
||||
ASSERT_EQ(false, dict_encoder->ref_enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(2, dict_encoder->stream_offsets_.count());
|
||||
|
||||
e = encoder.encoders_[1];
|
||||
ASSERT_EQ(true, e->ctx_->need_sort_);
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
|
||||
ASSERT_EQ(-1, e->ctx_->fix_data_size_);
|
||||
ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
|
||||
LOG_INFO("print ObMicroBlockEncodingCtx", K_(ctx));
|
||||
ObStrDictColumnEncoder *str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(false, str_dict_encoder->string_dict_enc_ctx_.meta_.is_fixed_len_string());
|
||||
ASSERT_EQ(false, str_dict_encoder->string_dict_enc_ctx_.meta_.is_use_zero_len_as_null());
|
||||
ASSERT_EQ(false, str_dict_encoder->string_dict_enc_ctx_.info_.raw_encoding_str_offset_);
|
||||
ASSERT_EQ(distinct_cnt, str_dict_encoder->max_ref_);
|
||||
ASSERT_EQ(distinct_cnt, str_dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
|
||||
|
||||
ASSERT_EQ(1, str_dict_encoder->ref_enc_ctx_.meta_.get_uint_width_size()); // [0, distinct_cnt]
|
||||
ASSERT_EQ(false, str_dict_encoder->ref_enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(3, str_dict_encoder->stream_offsets_.count());
|
||||
|
||||
e = encoder.encoders_[2];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
|
||||
ASSERT_EQ(true, e->ctx_->need_sort_);
|
||||
dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(0, dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
|
||||
ASSERT_EQ(0, dict_encoder->stream_offsets_.count());
|
||||
|
||||
e = encoder.encoders_[3]; // ObTextType
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
|
||||
ASSERT_EQ(true, e->ctx_->need_sort_);
|
||||
str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(0, dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
|
||||
ASSERT_EQ(0, dict_encoder->stream_offsets_.count());
|
||||
|
||||
e = encoder.encoders_[4]; // all zero length string will choose string encoding
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
|
||||
ASSERT_EQ(0, e->ctx_->fix_data_size_);
|
||||
ObStringColumnEncoder *str_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
|
||||
ASSERT_EQ(1, str_encoder->stream_offsets_.count());
|
||||
reuse();
|
||||
}
|
||||
|
||||
TEST_F(TestCSEncoder, test_dict_const_ref_encoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 5;
|
||||
ObObjType col_types[col_cnt] = {ObIntType, ObVarcharType, ObUInt64Type, ObVarcharType, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
int64_t row_cnt = 1000;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row;
|
||||
ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
|
||||
char *varchar_data = static_cast<char *>(allocator_.alloc(row_cnt + 1));
|
||||
MEMSET(varchar_data, 'a', row_cnt + 1);
|
||||
int64_t execption_cnt = 64;
|
||||
for (int64_t i = 0; i < row_cnt - execption_cnt; i++) {
|
||||
ASSERT_TRUE(nullptr != varchar_data);
|
||||
row.storage_datums_[0].set_int(1000);
|
||||
row.storage_datums_[1].set_string(varchar_data, 10);
|
||||
row.storage_datums_[2].set_int(i % 2 + 1000); // not const dict ref
|
||||
row.storage_datums_[3].set_null();
|
||||
row.storage_datums_[4].set_int(1000);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
for (int64_t i = 0; i < execption_cnt; i++) {
|
||||
row.storage_datums_[0].set_int(i);
|
||||
row.storage_datums_[1].set_string(varchar_data, 20);
|
||||
row.storage_datums_[2].set_null();
|
||||
row.storage_datums_[3].set_string(varchar_data, 100 + i % 2);
|
||||
row.storage_datums_[4].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
char *buf = nullptr;
|
||||
int64_t buf_size = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
|
||||
ObIColumnCSEncoder *e = encoder.encoders_[0];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
|
||||
ObIntDictColumnEncoder *dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
|
||||
ASSERT_EQ(execption_cnt, dict_encoder->const_list_header_->dict_ref_); // const value is the last one in the sorted dict
|
||||
ASSERT_EQ(execption_cnt, dict_encoder->ref_exception_cnt_);
|
||||
ASSERT_EQ(2 + 2 * execption_cnt, dict_encoder->dict_encoding_meta_.ref_row_cnt_);
|
||||
|
||||
e = encoder.encoders_[1];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
|
||||
ObStrDictColumnEncoder *str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(1, str_dict_encoder->max_ref_);
|
||||
ASSERT_EQ(true, str_dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
|
||||
ASSERT_EQ(0, str_dict_encoder->const_list_header_->dict_ref_);
|
||||
ASSERT_EQ(execption_cnt, str_dict_encoder->ref_exception_cnt_);
|
||||
ASSERT_EQ(2 + 2 * execption_cnt, str_dict_encoder->dict_encoding_meta_.ref_row_cnt_);
|
||||
|
||||
e = encoder.encoders_[2];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
|
||||
ASSERT_EQ(true, e->ctx_->need_sort_);
|
||||
dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(false, dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
|
||||
|
||||
e = encoder.encoders_[3];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
|
||||
str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, str_dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
|
||||
ASSERT_EQ(2, str_dict_encoder->max_ref_);
|
||||
ASSERT_EQ(2, str_dict_encoder->const_list_header_->dict_ref_);
|
||||
ASSERT_EQ(execption_cnt, str_dict_encoder->ref_exception_cnt_);
|
||||
ASSERT_EQ(2 + 2 * execption_cnt, str_dict_encoder->dict_encoding_meta_.ref_row_cnt_);
|
||||
|
||||
e = encoder.encoders_[4];
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
|
||||
dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, dict_encoder->dict_encoding_meta_.is_const_encoding_ref());
|
||||
ASSERT_EQ(1, dict_encoder->max_ref_);
|
||||
ASSERT_EQ(0, dict_encoder->const_list_header_->dict_ref_);
|
||||
ASSERT_EQ(execption_cnt, dict_encoder->ref_exception_cnt_);
|
||||
ASSERT_EQ(2 + 2 * execption_cnt, dict_encoder->dict_encoding_meta_.ref_row_cnt_);
|
||||
|
||||
reuse();
|
||||
}
|
||||
|
||||
TEST_F(TestCSEncoder, test_decimal_int_encoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 6;
|
||||
ObObjType col_types[col_cnt] = {ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType};
|
||||
int64_t precision_arr[col_cnt] = {MAX_PRECISION_DECIMAL_INT_32, // INTEGER
|
||||
MAX_PRECISION_DECIMAL_INT_128, // INTEGER
|
||||
OB_MAX_DECIMAL_PRECISION, // INT_DICT
|
||||
MAX_PRECISION_DECIMAL_INT_128, // STRING
|
||||
MAX_PRECISION_DECIMAL_INT_128, // STRING
|
||||
OB_MAX_DECIMAL_PRECISION}; // STR_DICT
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt, ObCompressorType::ZSTD_1_3_8_COMPRESSOR, precision_arr));
|
||||
ctx_.column_encodings_[4] = ObCSColumnHeader::Type::STRING; // specified, otherwise it will hit STR_DICT
|
||||
int64_t row_cnt = 1000;
|
||||
const int64_t distinct_cnt = 100;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row;
|
||||
ASSERT_EQ(OB_SUCCESS, row.init(allocator_, col_cnt));
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; i++) {
|
||||
int32_t m = 0;
|
||||
int128_t j = 0;
|
||||
int256_t k = 0;
|
||||
if (i == row_cnt - 3) {
|
||||
m = INT32_MAX;
|
||||
row.storage_datums_[0].set_decimal_int(m);
|
||||
j = INT64_MAX;
|
||||
row.storage_datums_[1].set_decimal_int(j);
|
||||
k = 0;
|
||||
row.storage_datums_[2].set_decimal_int(k);
|
||||
|
||||
j = (int128_t)INT64_MAX << 64;
|
||||
row.storage_datums_[3].set_decimal_int(j);
|
||||
row.storage_datums_[4].set_decimal_int(j);
|
||||
k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
|
||||
row.storage_datums_[5].set_decimal_int(k);
|
||||
} else if (i == row_cnt - 2) {
|
||||
m = INT32_MIN;
|
||||
row.storage_datums_[0].set_decimal_int(m);
|
||||
j = INT64_MIN;
|
||||
row.storage_datums_[1].set_decimal_int(j);
|
||||
k = INT32_MAX;
|
||||
row.storage_datums_[2].set_decimal_int(k);
|
||||
|
||||
j = (int128_t)INT64_MIN << 64;
|
||||
row.storage_datums_[3].set_decimal_int(j);
|
||||
row.storage_datums_[4].set_decimal_int(j);
|
||||
k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
|
||||
row.storage_datums_[5].set_decimal_int(k);
|
||||
} else if (i == row_cnt - 1) {
|
||||
row.storage_datums_[0].set_null();
|
||||
row.storage_datums_[1].set_null();
|
||||
row.storage_datums_[2].set_null();
|
||||
row.storage_datums_[3].set_null();
|
||||
row.storage_datums_[4].set_null();
|
||||
row.storage_datums_[5].set_null();
|
||||
} else {
|
||||
m = i - row_cnt / 2;
|
||||
row.storage_datums_[0].set_decimal_int(m); // -500 ~ 498
|
||||
j = i + INT32_MAX;
|
||||
row.storage_datums_[1].set_decimal_int(j);
|
||||
k = (i % (distinct_cnt - 2)) - INT32_MAX;
|
||||
row.storage_datums_[2].set_decimal_int(k);
|
||||
|
||||
j = i;
|
||||
row.storage_datums_[3].set_decimal_int(j);
|
||||
row.storage_datums_[4].set_null();
|
||||
k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
|
||||
row.storage_datums_[5].set_decimal_int(k);
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row));
|
||||
}
|
||||
char *buf = nullptr;
|
||||
int64_t buf_size = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.build_block(buf, buf_size));
|
||||
|
||||
ObIColumnCSEncoder *e = encoder.encoders_[0]; // int32_t
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(INT32_MIN, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(INT32_MAX, (int64_t)e->ctx_->integer_max_);
|
||||
ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
|
||||
ObIntegerColumnEncoder *int_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(8, int_encoder->enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(true, int_encoder->enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(true, int_encoder->enc_ctx_.meta_.is_use_base());
|
||||
ASSERT_EQ((int64_t)(INT32_MIN) - 1, (int64_t)(int_encoder->enc_ctx_.meta_.null_replaced_value()));
|
||||
|
||||
e = encoder.encoders_[1]; // int128_t
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INTEGER);
|
||||
ASSERT_EQ(INT64_MIN, (int64_t)e->ctx_->integer_min_);
|
||||
ASSERT_EQ(INT64_MAX, (int64_t)e->ctx_->integer_max_);
|
||||
ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
|
||||
int_encoder = reinterpret_cast<ObIntegerColumnEncoder *>(e);
|
||||
ASSERT_EQ(8, int_encoder->enc_ctx_.meta_.get_uint_width_size());
|
||||
ASSERT_EQ(false, int_encoder->enc_ctx_.meta_.is_use_null_replace_value());
|
||||
ASSERT_EQ(true, int_encoder->enc_ctx_.meta_.is_use_base());
|
||||
ASSERT_EQ(INT64_MIN, int_encoder->enc_ctx_.meta_.base_value());
|
||||
|
||||
e = encoder.encoders_[2]; // int256_t
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::INT_DICT);
|
||||
ASSERT_EQ(true, e->ctx_->need_sort_);
|
||||
ObIntDictColumnEncoder *dict_encoder = reinterpret_cast<ObIntDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, dict_encoder->integer_dict_enc_ctx_.info_.is_monotonic_inc_);
|
||||
ASSERT_EQ(distinct_cnt, dict_encoder->max_ref_);
|
||||
ASSERT_EQ(distinct_cnt, dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
|
||||
ASSERT_EQ(2, dict_encoder->stream_offsets_.count());
|
||||
|
||||
e = encoder.encoders_[3]; // int128_t, use fix length has less storage cost
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
|
||||
ASSERT_EQ(sizeof(int128_t), e->ctx_->fix_data_size_);
|
||||
ASSERT_EQ(true, e->get_column_header().has_null_bitmap());
|
||||
ObStringColumnEncoder *str_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
|
||||
ASSERT_EQ(true, str_encoder->enc_ctx_.meta_.is_fixed_len_string());
|
||||
ASSERT_EQ(false, str_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
|
||||
|
||||
e = encoder.encoders_[4]; // int128_t, use var length has less storage cost
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STRING);
|
||||
ASSERT_EQ(sizeof(int128_t), e->ctx_->fix_data_size_);
|
||||
ASSERT_EQ(false, e->get_column_header().has_null_bitmap());
|
||||
str_encoder = reinterpret_cast<ObStringColumnEncoder *>(e);
|
||||
ASSERT_EQ(false, str_encoder->enc_ctx_.meta_.is_fixed_len_string());
|
||||
ASSERT_EQ(true, str_encoder->enc_ctx_.meta_.is_use_zero_len_as_null());
|
||||
|
||||
e = encoder.encoders_[5]; // int256_t
|
||||
ASSERT_EQ(e->get_type(), ObCSColumnHeader::Type::STR_DICT);
|
||||
ASSERT_EQ(sizeof(int256_t), e->ctx_->fix_data_size_);
|
||||
ObStrDictColumnEncoder *str_dict_encoder = reinterpret_cast<ObStrDictColumnEncoder *>(e);
|
||||
ASSERT_EQ(distinct_cnt, str_dict_encoder->max_ref_);
|
||||
ASSERT_EQ(distinct_cnt, str_dict_encoder->dict_encoding_meta_.distinct_val_cnt_);
|
||||
ASSERT_EQ(2, str_dict_encoder->stream_offsets_.count());
|
||||
ASSERT_EQ(1, str_dict_encoder->ref_enc_ctx_.meta_.get_uint_width_size()); // [0, distinct_cnt]
|
||||
|
||||
reuse();
|
||||
}
|
||||
|
||||
|
||||
} // namespace blocksstable
|
||||
} // namespace oceanbase
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_cs_encoder.log*");
|
||||
OB_LOGGER.set_file_name("test_cs_encoder.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@ -0,0 +1,597 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_pd_filter_test_base.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
class TestDecimalIntPdFilter : public ObPdFilterTestBase
|
||||
{
|
||||
};
|
||||
|
||||
TEST_F(TestDecimalIntPdFilter, test_decimal_int_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 7;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType};
|
||||
int64_t precision_arr[col_cnt] = {MAX_PRECISION_DECIMAL_INT_64,
|
||||
MAX_PRECISION_DECIMAL_INT_32,
|
||||
MAX_PRECISION_DECIMAL_INT_128,
|
||||
OB_MAX_DECIMAL_PRECISION,
|
||||
MAX_PRECISION_DECIMAL_INT_128,
|
||||
MAX_PRECISION_DECIMAL_INT_128,
|
||||
OB_MAX_DECIMAL_PRECISION};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt, ObCompressorType::ZSTD_1_3_8_COMPRESSOR, precision_arr));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[2] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[3] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ctx_.column_encodings_[4] = ObCSColumnHeader::Type::STRING;
|
||||
ctx_.column_encodings_[5] = ObCSColumnHeader::Type::STRING;
|
||||
ctx_.column_encodings_[6] = ObCSColumnHeader::Type::STR_DICT;
|
||||
int64_t row_cnt = 1000;
|
||||
const int64_t distinct_cnt = 100;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
|
||||
void *row_arr_buf = allocator_.alloc(sizeof(ObDatumRow) * row_cnt);
|
||||
ASSERT_TRUE(nullptr != row_arr_buf);
|
||||
ObDatumRow *row_arr = new(row_arr_buf) ObDatumRow[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; i++) {
|
||||
int64_t p = 0;
|
||||
int32_t m = 0;
|
||||
int128_t j = 0;
|
||||
int256_t k = 0;
|
||||
if (i == row_cnt - 3) {
|
||||
p = 0;
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(p);
|
||||
m = INT32_MAX;
|
||||
row_arr[i].storage_datums_[1].set_decimal_int(m);
|
||||
j = INT64_MAX;
|
||||
row_arr[i].storage_datums_[2].set_decimal_int(j);
|
||||
k = (i % distinct_cnt) - distinct_cnt / 2;
|
||||
row_arr[i].storage_datums_[3].set_decimal_int(k);
|
||||
|
||||
j = (int128_t)INT64_MAX << 64;
|
||||
row_arr[i].storage_datums_[4].set_decimal_int(j);
|
||||
row_arr[i].storage_datums_[5].set_decimal_int(j);
|
||||
k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
|
||||
row_arr[i].storage_datums_[6].set_decimal_int(k);
|
||||
} else if (i == row_cnt - 2) {
|
||||
p = INT32_MAX;
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(p);
|
||||
m = INT32_MIN;
|
||||
row_arr[i].storage_datums_[1].set_decimal_int(m);
|
||||
j = INT64_MIN;
|
||||
row_arr[i].storage_datums_[2].set_decimal_int(j);
|
||||
k = (i % distinct_cnt) - distinct_cnt / 2;
|
||||
row_arr[i].storage_datums_[3].set_decimal_int(k);
|
||||
|
||||
j = (int128_t)INT64_MIN << 64;
|
||||
row_arr[i].storage_datums_[4].set_decimal_int(j);
|
||||
row_arr[i].storage_datums_[5].set_decimal_int(j);
|
||||
k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
|
||||
row_arr[i].storage_datums_[6].set_decimal_int(k);
|
||||
} else if (i == row_cnt - 1) {
|
||||
p = INT64_MAX;
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(p);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_arr[i].storage_datums_[3].set_null();
|
||||
row_arr[i].storage_datums_[4].set_null();
|
||||
row_arr[i].storage_datums_[5].set_null();
|
||||
row_arr[i].storage_datums_[6].set_null();
|
||||
} else { // [0 ~ 996]
|
||||
p = i + INT64_MIN;
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(p);
|
||||
m = i - 500;
|
||||
row_arr[i].storage_datums_[1].set_decimal_int(m); // -500 ~ 496
|
||||
j = i + INT32_MAX;
|
||||
row_arr[i].storage_datums_[2].set_decimal_int(j);
|
||||
k = (i % distinct_cnt) - distinct_cnt / 2;
|
||||
row_arr[i].storage_datums_[3].set_decimal_int(k);
|
||||
|
||||
j = i;
|
||||
row_arr[i].storage_datums_[4].set_decimal_int(j);
|
||||
row_arr[i].storage_datums_[5].set_null();
|
||||
k = (int256_t)((i % distinct_cnt) - distinct_cnt / 2) << 128;
|
||||
row_arr[i].storage_datums_[6].set_decimal_int(k);
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
int64_t col_offset = 0; // INTEGER: [0-996] + INT64_MIN, 0, INT32_MAX, INT64_MAX
|
||||
bool need_check = true;
|
||||
// check NU/NN
|
||||
{
|
||||
int32_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {1000};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int128_t ref_arr[5] = {INT64_MIN, 0, INT32_MIN, INT32_MAX, INT64_MAX};
|
||||
int64_t res_arr_eq[5] = {1, 1, 0, 1, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {999, 999, 1000, 999, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int32_t ref_arr[5] = {INT32_MIN, 0, INT32_MAX};
|
||||
int64_t res_arr_lt[5] = {997, 997, 998};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 3, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {997, 998, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 3, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int128_t ref_arr[5] = {int128_t(INT64_MIN) << 64, INT32_MIN, 0, INT32_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_gt[5] = {1000, 3, 2, 1, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {1000, 3, 3, 2, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
// check IN/BT
|
||||
{
|
||||
int32_t ref_arr[3] = {INT32_MIN, 0, INT32_MAX};
|
||||
int64_t res_arr[1] = {2};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 3, res_arr);
|
||||
}
|
||||
{
|
||||
int128_t ref_arr[10] = {int128_t(INT64_MIN) << 64, INT64_MIN, INT64_MIN, INT32_MIN,
|
||||
INT32_MIN, 0, 0, INT32_MAX, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr[5] = {1, 997, 1, 2, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
|
||||
col_offset = 1; // INTEGER: INT32_MIN, INT32_MAX, null, [-500 ~ 496]
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int128_t ref_arr[5] = {INT64_MIN, 0, INT32_MIN, INT32_MAX, INT64_MAX};
|
||||
int64_t res_arr_eq[5] = {0, 1, 1, 1, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {999, 998, 998, 998, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int128_t ref_arr[5] = {int128_t(INT64_MIN) << 64, INT32_MIN, 0, INT32_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_lt[5] = {0, 0, 501, 998, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {0, 1, 502, 999, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int128_t ref_arr[5] = {int128_t(INT64_MIN) << 64, INT32_MIN, 0, INT32_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_gt[5] = {999, 998, 497, 0, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {999, 999, 498, 1, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int128_t ref_arr[5] = {int128_t(INT64_MIN) << 64, INT32_MIN, 0, INT32_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr[1] = {3};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
int128_t ref_arr[10] = {int128_t(INT64_MIN) << 64, INT64_MIN, INT64_MIN, INT32_MIN,
|
||||
INT32_MIN, 0, 0, INT32_MAX, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr[5] = {0, 1, 502, 498, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
|
||||
col_offset = 2; // INTEGER: INT64_MIN, INT64_MAX, null, [0-996] + INT32_MAX
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int128_t ref_arr[5] = {INT64_MIN, 0, INT32_MIN, INT32_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_eq[5] = {1, 0, 0, 1, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {998, 999, 999, 998, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int256_t ref_arr[5] = {int256_t(INT64_MIN) << 192, INT32_MIN, 0, INT32_MAX, INT64_MAX};
|
||||
int64_t res_arr_lt[5] = {0, 1, 1, 1, 998};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {0, 1, 1, 2, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int256_t ref_arr[5] = {INT64_MIN, INT32_MIN, 0, INT32_MAX, int256_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_gt[5] = {998, 998, 998, 997, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {999, 998, 998, 998, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int128_t ref_arr[5] = {int128_t(INT64_MIN) << 64, INT32_MIN, 0, INT32_MAX, INT64_MAX};
|
||||
int64_t res_arr[1] = {2};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
int256_t ref_arr[10] = {int256_t(INT64_MIN) << 192, INT64_MIN, INT64_MIN, INT32_MIN,
|
||||
INT32_MIN, 0, 0, INT64_MAX, INT64_MAX, int256_t(INT64_MAX) << 192};
|
||||
int64_t res_arr[5] = {1, 1, 0, 998, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
col_offset = 3; // INT_DICT: null, [-50-46]@10, (47,48,49)@9
|
||||
// check NU/NN
|
||||
{
|
||||
int128_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int128_t ref_arr[5] = {INT64_MIN, -50, 0, 49, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_eq[5] = {0, 10, 10, 9, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {999, 989, 989, 990, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int512_t ref_arr[5] = {int128_t(INT64_MIN) << 64, -50, 0, 49, INT64_MAX};
|
||||
int64_t res_arr_lt[5] = {0, 0, 500, 990, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {0, 10, 510, 999, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int512_t ref_arr[5] = {INT64_MIN, -50, 0, 49, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_gt[5] = {999, 989, 489, 0, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {999, 999, 499, 9, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int128_t ref_arr[5] = {int128_t(INT64_MIN) << 64, -50, 0, 49, INT64_MAX};
|
||||
int64_t res_arr[1] = {29};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
int128_t ref_arr[10] = {int128_t(INT64_MIN) << 64, INT64_MIN, INT64_MIN, -50,
|
||||
-50, 0, 0, INT64_MAX, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr[5] = {0, 10, 510, 499, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
col_offset = 4; // STRING: (int128_t)INT64_MIN << 64, [0-996], (int128_t)INT64_MAX << 64, null
|
||||
// check NU/NN
|
||||
{
|
||||
int128_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int128_t ref_arr[5] = {(int128_t)INT64_MIN << 64, INT64_MIN, 0, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_eq[5] = {1, 0, 1, 0, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {998, 999, 998, 999, 998};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int128_t ref_arr[5] = {(int128_t)INT64_MIN << 64, INT64_MIN, 0, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_lt[5] = {0, 1, 1, 998, 998};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {1, 1, 2, 998, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int128_t ref_arr[5] = {(int128_t)INT64_MIN << 64, INT64_MIN, 0, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_gt[5] = {998, 998, 997, 1, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {999, 998, 998, 1, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int128_t ref_arr[5] = {(int128_t)INT64_MIN << 64, INT64_MIN, 0, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr[1] = {3};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
int128_t ref_arr[10] = {int128_t(INT64_MIN) << 64, INT64_MIN, INT64_MIN, 0,
|
||||
0, INT32_MAX, INT32_MAX, INT64_MAX, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr[5] = {1, 1, 997, 0, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
col_offset = 5; // STRING: (int128_t)INT64_MIN << 64, (int128_t)INT64_MAX << 64, null@998
|
||||
// check NU/NN
|
||||
{
|
||||
int128_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {998};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {2};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int512_t ref_arr[5] = {(int128_t)INT64_MIN << 64, INT64_MIN, 0, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_eq[5] = {1, 0, 0, 0, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {1, 2, 2, 2, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int128_t ref_arr[5] = {(int128_t)INT64_MIN << 64, INT64_MIN, 0, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_lt[5] = {0, 1, 1, 1, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {1, 1, 1, 1, 2};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int512_t ref_arr[5] = {(int128_t)INT64_MIN << 64, INT64_MIN, 0, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr_gt[5] = {1, 1, 1, 1, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {2, 1, 1, 1, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int128_t ref_arr[5] = {(int128_t)INT64_MIN << 64, INT64_MIN, 0, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr[1] = {2};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
int128_t ref_arr[10] = {int128_t(INT64_MIN) << 64, INT64_MIN, INT64_MIN, 0,
|
||||
0, INT32_MAX, INT32_MAX, INT64_MAX, INT64_MAX, int128_t(INT64_MAX) << 64};
|
||||
int64_t res_arr[5] = {1, 0, 0, 0, 1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
col_offset = 6; // k = (int256_t)[-50-48]@10 << 128, 49<<128@9, null
|
||||
// check NU/NN
|
||||
{
|
||||
int128_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {1};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int512_t ref_arr[5] = {(int512_t)INT64_MIN << 128, (int512_t)-50 << 128 , 0, (int512_t)49 << 128 , int512_t(INT64_MAX) << 128};
|
||||
int64_t res_arr_eq[5] = {0, 10, 10, 9, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {999, 989, 989, 990, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int512_t ref_arr[5] = {(int512_t)INT64_MIN << 128, (int512_t)-50 << 128 , 0, (int512_t)49 << 128 , int512_t(INT64_MAX) << 128};
|
||||
int64_t res_arr_lt[5] = {0, 0, 500, 990, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {0, 10, 510, 999, 999};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int512_t ref_arr[5] = {(int512_t)INT64_MIN << 128, (int512_t)-50 << 128 , 0, (int512_t)49 << 128 , int512_t(INT64_MAX) << 128};
|
||||
int64_t res_arr_gt[5] = {999, 989, 489, 0, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {999, 999, 499, 9, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
// check IN/BT
|
||||
// if filter precision is different with col precision, the hash is different even if the integer value is equal
|
||||
/*
|
||||
{
|
||||
int512_t ref_arr[5] = {(int512_t)INT64_MIN << 128, (int512_t)-50 << 128 , 0, (int512_t)49 << 128 , int512_t(INT64_MAX) << 128};
|
||||
int64_t res_arr[1] = {29};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
*/
|
||||
|
||||
{
|
||||
int512_t ref_arr[10] = {int512_t(INT64_MIN) << 256, (int512_t)INT64_MIN << 128, (int512_t)INT64_MIN << 128, 0,
|
||||
0, (int512_t)INT64_MAX << 64, (int512_t)INT64_MAX << 64, (int512_t)INT64_MAX << 128,
|
||||
int512_t(INT64_MAX) << 128, int512_t(INT64_MAX) << 256};
|
||||
int64_t res_arr[5] = {0, 510, 10, 489, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TEST_F(TestDecimalIntPdFilter, test_decimal_int_const_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 4;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObDecimalIntType, ObDecimalIntType, ObDecimalIntType, ObDecimalIntType};
|
||||
int64_t precision_arr[col_cnt] = {MAX_PRECISION_DECIMAL_INT_64,
|
||||
MAX_PRECISION_DECIMAL_INT_32,
|
||||
MAX_PRECISION_DECIMAL_INT_128,
|
||||
OB_MAX_DECIMAL_PRECISION};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt, ObCompressorType::ZSTD_1_3_8_COMPRESSOR, precision_arr));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ctx_.column_encodings_[2] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ctx_.column_encodings_[3] = ObCSColumnHeader::Type::STR_DICT;
|
||||
int64_t row_cnt = 1000;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
|
||||
void *row_arr_buf = allocator_.alloc(sizeof(ObDatumRow) * row_cnt);
|
||||
ASSERT_TRUE(nullptr != row_arr_buf);
|
||||
ObDatumRow *row_arr = new(row_arr_buf) ObDatumRow[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
int32_t m = 0;
|
||||
int128_t j = 0;
|
||||
int256_t k = 0;
|
||||
for (int64_t i = 0; i < row_cnt - 50; i++) {
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(i);
|
||||
m = INT32_MAX;
|
||||
row_arr[i].storage_datums_[1].set_decimal_int(m);
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
k = (int256_t)INT64_MAX << 128;
|
||||
row_arr[i].storage_datums_[3].set_decimal_int(k);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = row_cnt - 50; i < row_cnt - 10; i++) {
|
||||
m = i;
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(i);
|
||||
row_arr[i].storage_datums_[1].set_decimal_int(m);
|
||||
j = INT64_MIN;
|
||||
row_arr[i].storage_datums_[2].set_decimal_int(j);
|
||||
k = 0;
|
||||
row_arr[i].storage_datums_[3].set_decimal_int(k);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = row_cnt - 10; i < row_cnt; i++) {
|
||||
row_arr[i].storage_datums_[0].set_decimal_int(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
row_arr[i].storage_datums_[3].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
int64_t col_offset = 1; // INT_DICT: INT32_MAX@950, [950-989] null@10
|
||||
bool need_check = true;
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {10};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {990};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int128_t ref_arr[5] = {INT32_MAX, 950, 960, 989, 990};
|
||||
int64_t res_arr_eq[5] = {950, 1, 1, 1, 0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {40, 989, 989, 989, 990};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int128_t ref_arr[5] = {INT32_MAX, 950, 960, 989, 990};
|
||||
int64_t res_arr_lt[5] = {40, 0, 10, 39, 40};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {990, 1, 11, 40, 40};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int128_t ref_arr[5] = {INT32_MAX, 950, 960, 989, 990};
|
||||
int64_t res_arr_gt[5] = {0, 989, 979, 950, 950};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {950, 990, 980, 951, 950};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
col_offset = 2; // INT_DICT: null@960 INT64_MIN@40
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {960};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {40};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int128_t ref_arr[1] = {INT64_MIN};
|
||||
int64_t res_arr_eq[1] = {40};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 1, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[1] = {0};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 1, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
col_offset = 3; // STIRNG_DICT: (int256_t)INT64_MAX << 128@950, null@10, 0@40
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {10};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {990};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ/NE
|
||||
{
|
||||
int256_t ref_arr[2] = {(int256_t)INT64_MAX << 128, 0};
|
||||
int64_t res_arr_eq[2] = {950, 40};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 2, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[2] = {40, 950};
|
||||
decimal_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 2, 1, res_arr_ne);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blocksstable
|
||||
} // namespace oceanbase
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_decimal_int_pd_filter.log*");
|
||||
OB_LOGGER.set_file_name("test_decimal_int_pd_filter.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
1992
unittest/storage/blocksstable/cs_encoding/test_decoder_filter_perf.h
Normal file
1992
unittest/storage/blocksstable/cs_encoding/test_decoder_filter_perf.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,552 @@
|
||||
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
|
||||
#include "ob_pd_filter_test_base.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
class TestIntDictPdFilter : public ObPdFilterTestBase
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
TEST_F(TestIntDictPdFilter, test_int_dict_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool has_null = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
|
||||
for (int8_t flag = 0; flag <= 1; ++flag) {
|
||||
bool has_null = flag;
|
||||
const int64_t null_cnt = has_null ? 20 : 0;
|
||||
const int64_t row_cnt = 100 + null_cnt;
|
||||
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
const int64_t distinct_cnt = 20;
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (i < 100) {
|
||||
row_arr[i].storage_datums_[1].set_int(i % distinct_cnt + INT32_MAX);
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
const int64_t TMP_PARAM = 10000;
|
||||
const int64_t round_cnt = 100 / distinct_cnt;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
const int64_t nu_cnt = has_null ? null_cnt : 0;
|
||||
int64_t res_arr_nu[1] = {nu_cnt};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
int64_t ref_arr[4] = {-100, 2 % distinct_cnt + INT32_MAX,
|
||||
19 % distinct_cnt + INT32_MAX, 500 % TMP_PARAM + INT32_MAX};
|
||||
int64_t res_arr_eq[4] = {0, round_cnt, round_cnt, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 4, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[4] = {100, 100 - round_cnt, 100 - round_cnt, 100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 4, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_arr[4] = {-100, 1 % distinct_cnt + INT32_MAX,
|
||||
6 % distinct_cnt + INT32_MAX, 500 % TMP_PARAM + INT32_MAX};
|
||||
int64_t res_arr_lt[4] = {0, round_cnt, 6 * round_cnt, 20 * round_cnt};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 4, 1, res_arr_lt);
|
||||
int64_t res_arr_le[4] = {0, 2 * round_cnt, 7 * round_cnt, 20 * round_cnt};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 4, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[4] = {-100, 17 % distinct_cnt + INT32_MAX,
|
||||
19 % distinct_cnt + INT32_MAX, 500 % TMP_PARAM + INT32_MAX};
|
||||
int64_t res_arr_gt[4] = {20 * round_cnt, 2 * round_cnt, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 4, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[4] = {20 * round_cnt, 3 * round_cnt, round_cnt, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 4, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int64_t ref_arr[4] = {-100, 1 % distinct_cnt + INT32_MAX,
|
||||
6 % distinct_cnt + INT32_MAX, 500 % TMP_PARAM + INT32_MAX};
|
||||
int64_t res_arr[1] = {round_cnt * 2};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 4, res_arr);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[10] = {-100, -90, -55, 2 % distinct_cnt + INT32_MAX, 1 % distinct_cnt + INT32_MAX,
|
||||
4 % distinct_cnt + INT32_MAX, 18 % distinct_cnt + INT32_MAX, 500 % TMP_PARAM + INT32_MAX,
|
||||
1000 % TMP_PARAM + INT32_MAX, 2000 % TMP_PARAM + INT32_MAX};
|
||||
int64_t res_arr[5] = {0, 3 * round_cnt, 4 * round_cnt, 2 * round_cnt, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
|
||||
encoder.reuse();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntDictPdFilter, test_positive_int_dict_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool has_null = true;
|
||||
const bool is_force_raw = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
|
||||
for (int8_t flag = 0; flag <= 1; ++flag) {
|
||||
bool has_null = flag;
|
||||
const int64_t null_cnt = has_null ? 20 : 0;
|
||||
const int64_t row_cnt = 100 + null_cnt;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (i < 100) {
|
||||
row_arr[i].storage_datums_[1].set_int(100 + i);
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
const int64_t nu_cnt = has_null ? null_cnt : 0;
|
||||
int64_t res_arr_nu[1] = {nu_cnt};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_arr[3] = {-1, 50, 150};
|
||||
int64_t res_arr_gt[3] = {100, 100, 49};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 3, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[3] = {100, 100, 50};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 3, 1, res_arr_ge);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntDictPdFilter, test_int_dict_const_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt - 5; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_int(30);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = row_cnt - 5; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (i == row_cnt - 1) {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_int((i - row_cnt + 6) * 100);
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {1};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {119};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
int64_t ref_arr[4] = {-100, 30, 100, 101};
|
||||
int64_t res_arr_eq[4] = {0, 115, 1, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 4, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[4] = {119, 4, 118, 119};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 4, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_arr[4] = {-100, 30, 31, 300};
|
||||
int64_t res_arr_lt[4] = {0, 0, 115, 117};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 4, 1, res_arr_lt);
|
||||
int64_t res_arr_le[4] = {0, 115, 115, 118};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 4, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[4] = {30, 100, 400, 500};
|
||||
int64_t res_arr_gt[4] = {4, 3, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 4, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[4] = {119, 4, 1, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 4, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int64_t ref_arr[4] = {-100, 30, 105, 300};
|
||||
int64_t res_arr[1] = {116};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 4, res_arr);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[10] = {-100, -50, -10, 40, -1, 100, 31, 105, 50, 500};
|
||||
int64_t res_arr[5] = {0, 115, 116, 1, 4};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
|
||||
TEST_F(TestIntDictPdFilter, test_int_dict_null_const_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt - 5; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = row_cnt - 5; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_int(i);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {115};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {5};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
int64_t ref_arr[4] = {-100, 30, 115, 200};
|
||||
int64_t res_arr_eq[4] = {0, 0, 1, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 4, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[4] = {5, 5, 4, 5};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 4, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE
|
||||
{
|
||||
int64_t ref_arr[4] = {-100, 30, 115, 119};
|
||||
int64_t res_arr_lt[4] = {0, 0, 0, 4};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 4, 1, res_arr_lt);
|
||||
int64_t res_arr_le[4] = {0, 0, 1, 5};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 4, 1, res_arr_le);
|
||||
}
|
||||
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
|
||||
TEST_F(TestIntDictPdFilter, test_int_dict_const_without_null_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool is_force_raw = false;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObSmallIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
|
||||
const int64_t row_cnt = 1200;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt - 100; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_int(0);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
for (int64_t i = 0; i < 10; ++i) {
|
||||
for (int64_t j = 0; j < 10; ++j) {
|
||||
int64_t cur_idx = row_cnt - 100 + (i * 10 + j);
|
||||
row_arr[cur_idx].storage_datums_[0].set_int32(cur_idx);
|
||||
row_arr[cur_idx].storage_datums_[1].set_int(1 + i * 10);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[cur_idx]));
|
||||
}
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// NOTICE:
|
||||
// In this case, we will use 'abnormal' filter value, that means, although the column type is smallint,
|
||||
// we will use some value larger than INT16_MAX or less than INT16_MIN to check the correctness of filter.
|
||||
enable_abnormal_filter_type_ = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {1200};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
int64_t ref_arr[4] = {0, 1, 2, 32768};
|
||||
int64_t res_arr_eq[4] = {1100, 10, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 4, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[4] = {100, 1190, 1200, 1200};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 4, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_arr[4] = {-32769, -1, 1, 90};
|
||||
int64_t res_arr_lt[4] = {0, 0, 1100, 1190};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 4, 1, res_arr_lt);
|
||||
int64_t res_arr_le[4] = {0, 0, 1110, 1190};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 4, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[4] = {-32769, -1, 10, 32768};
|
||||
int64_t res_arr_gt[4] = {1200, 1200, 90, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 4, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[4] = {1200, 1200, 90, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 4, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int64_t ref_arr[4] = {-32769, 1, 91, 32768};
|
||||
int64_t res_arr[1] = {20};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 4, res_arr);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[10] = {-32769, -32768, -32769, 0, -1, 10, 0, 21, 1, 32768};
|
||||
int64_t res_arr[5] = {0, 1100, 1110, 1130, 100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
|
||||
TEST_F(TestIntDictPdFilter, test_all_null_int_dict_const_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool is_force_raw = false;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
/*
|
||||
// Actually, it won't use const encoding.
|
||||
*/
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {120};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ
|
||||
{
|
||||
int64_t ref_arr[2] = {-100, 30};
|
||||
int64_t res_arr_eq[2] = {0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 2, 1, res_arr_eq);
|
||||
}
|
||||
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
|
||||
//test fix of
|
||||
// but actually int dict encoding does not trigger this problem, because int dict are always sorted
|
||||
// and take other paths. only integer encoding may trigger this problem.
|
||||
TEST_F(TestIntDictPdFilter, test_exceed_range_compare_filter)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INT_DICT;
|
||||
|
||||
const int64_t row_cnt = 2;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.has_lob_out_row_ = true;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
row_arr[0].storage_datums_[0].set_int32(0);
|
||||
row_arr[0].storage_datums_[1].set_int(-10000000);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[0]));
|
||||
row_arr[1].storage_datums_[0].set_int32(1);
|
||||
row_arr[1].storage_datums_[1].set_int(-10000001);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[1]));
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check EQ NE
|
||||
{
|
||||
int64_t ref_arr[5] = {-10000002, -10000001, -10000000, -1, 10000000};
|
||||
int64_t res_arr_eq[5] = {0, 1, 1, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {2, 1, 1, 2, 2};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_arr[5] = {-10000002, -10000001, -10000000, -1, 10000000};
|
||||
int64_t res_arr_lt[5] = {0, 0, 1, 2, 2};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {0, 1, 2, 2, 2};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[5] = {-10000002, -10000001, -10000000, -1, 10000000};
|
||||
int64_t res_arr_gt[5] = {2, 1, 0, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {2, 2, 1, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_int_dict_pd_filter.log*");
|
||||
OB_LOGGER.set_file_name("test_int_dict_pd_filter.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@ -0,0 +1,567 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
#include "ob_pd_filter_test_base.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
class TestIntegerPdFilter : public ObPdFilterTestBase
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
TEST_F(TestIntegerPdFilter, test_integer_decoder_filter)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INTEGER;
|
||||
|
||||
for (int8_t flag = 0; flag <= 1; ++flag) {
|
||||
bool has_null = flag;
|
||||
const int64_t null_cnt = has_null ? 20 : 0;
|
||||
const int64_t row_cnt = 100 + null_cnt;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (i < 100) {
|
||||
row_arr[i].storage_datums_[1].set_int(i - 50);
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
const int64_t nu_cnt = has_null ? null_cnt : 0;
|
||||
int64_t res_arr_nu[1] = {nu_cnt};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
|
||||
int64_t res_arr_nn[1] = {100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
int64_t ref_arr[4] = {-55, -50, 40, 55};
|
||||
int64_t res_arr[4] = {0, 1, 1, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 4, 1, res_arr);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[4] = {-55, -50, 40, 55};
|
||||
int64_t res_arr[4] = {100, 99, 99, 100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 4, 1, res_arr);
|
||||
}
|
||||
|
||||
// check LE/LT/GE/GT
|
||||
{
|
||||
int64_t ref_arr[4] = {-55, -50, -40, 55};
|
||||
int64_t res_arr_le[4] = {0, 1, 11, 100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 4, 1, res_arr_le);
|
||||
int64_t res_arr_lt[4] = {0, 0, 10, 100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 4, 1, res_arr_lt);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[4] = {-55, 40, 49, 55};
|
||||
int64_t res_arr_ge[4] = {100, 10, 1, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 4, 1, res_arr_ge);
|
||||
int64_t res_arr_gt[4] = {100, 9, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 4, 1, res_arr_gt);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int64_t ref_arr[5] = {-55, -27, 0, 10, 100};
|
||||
int64_t res_arr[1] = {3};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[10] = {-100, -90, -55, -47, -4, 4, 47, 55, 90, 100};
|
||||
int64_t res_arr[5] = {0, 4, 9, 3, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerPdFilter, test_integer_decoder_uint_type)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool has_null = true;
|
||||
const bool is_force_raw = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObUSmallIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INTEGER;
|
||||
|
||||
for (int8_t flag = 0; flag <= 1; ++flag) {
|
||||
bool has_null = flag;
|
||||
const int64_t null_cnt = has_null ? 20 : 0;
|
||||
const int64_t row_cnt = 100 + null_cnt;
|
||||
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (i < 100) {
|
||||
row_arr[i].storage_datums_[1].set_uint(100 + i);
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
const int64_t nu_cnt = has_null ? null_cnt : 0;
|
||||
int64_t res_arr_nu[1] = {nu_cnt};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
uint64_t ref_arr[4] = {100, 199, 219, UINT32_MAX};
|
||||
int64_t res_arr_eq[4] = {1, 1, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 4, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[4] = {99, 99, 100, 100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 4, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_arr[4] = {100, 199, 219, UINT32_MAX};
|
||||
int64_t res_arr_gt[4] = {99, 0, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 4, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[4] = {100, 1, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 4, 1, res_arr_ge);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[4] = {100, 199, 219, UINT32_MAX};
|
||||
int64_t res_arr_lt[4] = {0, 99, 100, 100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 4, 1, res_arr_lt);
|
||||
int64_t res_arr_le[4] = {1, 100, 100, 100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 4, 1, res_arr_le);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerPdFilter, test_integer_decoder_nullbitmap_type)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool has_null = true;
|
||||
const bool is_force_raw = true;
|
||||
enable_abnormal_filter_type_ = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObTinyIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INTEGER;
|
||||
|
||||
const int64_t row_cnt = UINT8_MAX + 2;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (i < row_cnt - 1) {
|
||||
row_arr[i].storage_datums_[1].set_int(i - 128);
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {1};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {row_cnt - 1};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
int64_t ref_arr[6] = {INT32_MIN, -128, -1, 1, 127, INT32_MAX};
|
||||
int64_t res_arr_eq[6] = {0, 1, 1, 1, 1, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 6, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[6] = {row_cnt-1, row_cnt-2, row_cnt-2, row_cnt-2, row_cnt-2, row_cnt-1};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 6, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerPdFilter, test_integer_decoder_float_type)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = false;
|
||||
const bool is_force_raw = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObFloatType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INTEGER;
|
||||
|
||||
for (int8_t flag = 0; flag <= 1; ++flag) {
|
||||
bool has_null = flag;
|
||||
const int64_t null_cnt = has_null ? 20 : 0;
|
||||
const int64_t row_cnt = 100 + null_cnt;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
if (i < 100) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i-49, row_arr[i]));
|
||||
} else {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_seed_arr[1];
|
||||
const int64_t nu_cnt = has_null ? null_cnt : 0;
|
||||
int64_t res_arr_nu[1] = {nu_cnt};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {100};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
int64_t ref_seed_arr[5] = {-100, -49, 1, 50, 100};
|
||||
int64_t res_arr_eq[5] = {0, 1, 1, 1, 0};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {100, 99, 99, 99, 100};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_seed_arr[5] = {-100, -49, 1, 50, 100};
|
||||
int64_t res_arr_gt[5] = {100, 99, 49, 0, 0};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {100, 100, 50, 1, 0};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
{
|
||||
int64_t ref_seed_arr[5] = {-100, -49, 1, 50, 100};
|
||||
int64_t res_arr_lt[5] = {0, 0, 50, 99, 100};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {0, 1, 51, 100, 100};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int64_t ref_seed_arr[5] = {-100, -49, 1, 50, 100};
|
||||
int64_t res_arr[1] = {3};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
int64_t ref_seed_arr[10] = {-100, -50, -49, -10, -1, 10, 20, 70, 100, 200};
|
||||
int64_t res_arr[5] = {0, 40, 12, 31, 0};
|
||||
raw_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
|
||||
encoder.reuse();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerPdFilter, test_integer_abnormal_filter)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
enable_abnormal_filter_type_ = true;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObSmallIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INTEGER;
|
||||
|
||||
const int64_t row_cnt = 1000;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (i >= 900) {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
} else {
|
||||
row_arr[i].storage_datums_[1].set_int(i - 500);
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {100};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {900};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
int64_t ref_arr[4] = {INT64_MIN, -1, 1, INT64_MAX};
|
||||
int64_t res_arr_eq[4] = {0, 1, 1, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 4, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[4] = {900, 899, 899, 900};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 4, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_arr[4] = {INT64_MIN, -100, 100, INT64_MAX};
|
||||
int64_t res_arr_lt[4] = {0, 400, 600, 900};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 4, 1, res_arr_lt);
|
||||
int64_t res_arr_le[4] = {0, 401, 601, 900};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 4, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[4] = {INT64_MIN, -100, 100, INT64_MAX};
|
||||
int64_t res_arr_gt[4] = {900, 499, 299, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 4, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[4] = {900, 500, 300, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 4, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int64_t ref_arr[5] = {INT64_MIN, -500, -1, 100, INT64_MAX};
|
||||
int64_t res_arr[1] = {3};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[10] = {INT64_MIN, -501, -500, -100, -1, 11, 10, 100, 101, INT64_MAX};
|
||||
int64_t res_arr[5] = {0, 401, 13, 91, 299};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 5, 2, res_arr);
|
||||
}
|
||||
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerPdFilter, test_all_null_integer_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool is_force_raw = false;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER; // integer
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INTEGER; // integer
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
int64_t ref_arr[1];
|
||||
int64_t res_arr_nu[1] = {120};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
// check EQ
|
||||
{
|
||||
int64_t ref_arr[2] = {-100, 30};
|
||||
int64_t res_arr_eq[2] = {0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 2, 1, res_arr_eq);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_arr[3] = {0, INT32_MIN, INT64_MAX};
|
||||
int64_t res_arr_lt[3] = {0, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 3, 1, res_arr_lt);
|
||||
int64_t res_arr_le[4] = {0, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 4, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[3] = {0, INT32_MIN, INT32_MIN};
|
||||
int64_t res_arr_gt[3] = {0, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 3, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[3] = {0, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 3, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
int64_t ref_arr[4] = {0, 1, 2 ,3};
|
||||
int64_t res_arr[1] = {0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 4, res_arr);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[2] = {INT32_MIN, INT32_MAX};
|
||||
int64_t res_arr[1] = {0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 1, 2, res_arr);
|
||||
}
|
||||
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
|
||||
//
|
||||
TEST_F(TestIntegerPdFilter, test_exceed_range_compare_filter)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObIntType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER; // integer
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::INTEGER; // integer
|
||||
|
||||
const int64_t row_cnt = 2;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
row_arr[0].storage_datums_[0].set_int32(0);
|
||||
row_arr[0].storage_datums_[1].set_int(-10000000);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[0]));
|
||||
row_arr[1].storage_datums_[0].set_int32(1);
|
||||
row_arr[1].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[1]));
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check EQ NE
|
||||
{
|
||||
int64_t ref_arr[4] = {-10000001, -10000000, -1, 10000000};
|
||||
int64_t res_arr_eq[4] = {0, 1, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 4, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[4] = {1, 0, 1, 1};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 4, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
int64_t ref_arr[4] = {-10000001, -10000000, -1, 10000000};
|
||||
int64_t res_arr_lt[4] = {0, 0, 1, 1};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 4, 1, res_arr_lt);
|
||||
int64_t res_arr_le[4] = {0, 1, 1, 1};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 4, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
int64_t ref_arr[4] = {-10000001, -10000000, -1, 10000000};
|
||||
int64_t res_arr_gt[4] = {1, 0, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 4, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[4] = {1, 1, 0, 0};
|
||||
integer_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 4, 1, res_arr_ge);
|
||||
}
|
||||
LOG_INFO(">>>>>>>>>>FINISH PD FILTER<<<<<<<<<<<");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_integer_pd_filter.log*");
|
||||
OB_LOGGER.set_file_name("test_integer_pd_filter.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@ -0,0 +1,833 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#define USING_LOG_PREFIX STORAGE
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#define protected public
|
||||
#define private public
|
||||
#include "storage/blocksstable/cs_encoding/ob_integer_stream_encoder.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_integer_stream_decoder.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_column_encoding_struct.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_cs_decoding_util.h"
|
||||
#include "lib/codec/ob_fast_delta.h"
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
class TestIntegerStream : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
virtual void SetUp() {}
|
||||
virtual void TearDown() {}
|
||||
|
||||
TestIntegerStream() : tenant_ctx_(500) {
|
||||
share::ObTenantEnv::set_tenant(&tenant_ctx_);
|
||||
}
|
||||
virtual ~TestIntegerStream() {}
|
||||
|
||||
enum Monotonicity
|
||||
{
|
||||
RANDOM = 0,
|
||||
STRICT_INCREMENT = 1,
|
||||
STRICT_DECREMENT = 2,
|
||||
EQUAL = 3,
|
||||
};
|
||||
|
||||
template<class T>
|
||||
void generate_data(T *&int_arr, const int64_t size, T min, T max, Monotonicity mon) {
|
||||
std::random_device rd;
|
||||
std::mt19937_64 rng(rd());
|
||||
std::uniform_int_distribution<T> distribution(min, max);
|
||||
int_arr = reinterpret_cast<T *>(allocator_.alloc(sizeof(T) * size));
|
||||
|
||||
T first_value = distribution(rng);
|
||||
T second_value = distribution(rng);
|
||||
T delta = ((second_value > 0) ? second_value : (0 - second_value));
|
||||
delta = delta%3 + 1;
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
if (RANDOM == mon) {
|
||||
int_arr[i] = distribution(rng);
|
||||
} else if (STRICT_INCREMENT == mon) {
|
||||
int_arr[i] = first_value + i * delta;
|
||||
} else if (STRICT_DECREMENT == mon) {
|
||||
int_arr[i] = first_value - i * delta;
|
||||
} else if (EQUAL == mon) {
|
||||
int_arr[i] = first_value;
|
||||
}
|
||||
|
||||
LOG_TRACE("generate_data", K(i), K(int_arr[i]), K(min), K(max));
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void generate_datums(ObColDatums *datums, const int64_t size, bool has_null, T min, T max, Monotonicity mon)
|
||||
{
|
||||
uint64_t *datusms_content_ptr = reinterpret_cast<uint64_t *>(allocator_.alloc(sizeof(uint64_t) * size));
|
||||
memset(datusms_content_ptr, 0, sizeof(uint64_t) * size);
|
||||
T *data = nullptr;
|
||||
generate_data<T>(data, size, min, max, mon);
|
||||
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
ObDatum datum;
|
||||
datum.uint_ = (datusms_content_ptr + i);
|
||||
*(datusms_content_ptr + i) = data[i];
|
||||
|
||||
if (has_null && (i % 5 == 0)) {
|
||||
datum.set_null();
|
||||
} else {
|
||||
datum.pack_ = sizeof(T);
|
||||
}
|
||||
LOG_DEBUG("generate_datum", K(datum), K(i));
|
||||
ASSERT_EQ(OB_SUCCESS, datums->push_back(datum));
|
||||
}
|
||||
}
|
||||
|
||||
void generate_bitmap(char *bitmap, ObColDatums *datums)
|
||||
{
|
||||
for (int64_t i = 0; i < datums->count(); i++) {
|
||||
if (datums->at(i).is_null()) {
|
||||
bitmap[i/8] |= (1 << (7 - i%8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void buid_raw_integer_stream_data(const ObStreamData &stream_data,
|
||||
const int64_t count,
|
||||
const ObCompressorType type,
|
||||
ObIntegerStreamDecoderCtx &decode_ctx,
|
||||
ObStreamData &raw_stream_data)
|
||||
{
|
||||
uint16_t stream_meta_len = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::build_decoder_ctx(
|
||||
stream_data, count,type, decode_ctx, stream_meta_len));
|
||||
ObStreamData stream_data2(stream_data.buf_ + stream_meta_len, stream_data.len_ - stream_meta_len);
|
||||
const uint32_t width_size = decode_ctx.meta_.get_uint_width_size();
|
||||
uint32_t array_buf_size = width_size * decode_ctx.count_;
|
||||
char *array_buf = (char*)allocator_.alloc(array_buf_size);
|
||||
ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::transform_to_raw_array(stream_data2, decode_ctx, array_buf, allocator_));
|
||||
raw_stream_data.set(array_buf, array_buf_size);
|
||||
}
|
||||
|
||||
template<int32_t WIDTH_TAG>
|
||||
static void do_decode_raw_array(const ObStreamData &data,
|
||||
const ObIntegerStreamDecoderCtx &ctx,
|
||||
const int64_t *row_ids,
|
||||
const int64_t row_count,
|
||||
char *out_buf)
|
||||
{
|
||||
typedef typename ObCSEncodingStoreTypeInference<WIDTH_TAG>::Type StoreIntType;
|
||||
const StoreIntType *orig_arr = reinterpret_cast<const StoreIntType *>(data.buf_);
|
||||
StoreIntType *int_arr = reinterpret_cast<StoreIntType *>(out_buf);
|
||||
const uint64_t base = ctx.meta_.is_use_base() * ctx.meta_.base_value();
|
||||
for (int64_t i = 0; i < row_count; i++) {
|
||||
int_arr[i] = orig_arr[row_ids[i]] + base;
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_raw_array(const ObStreamData &data,
|
||||
const ObIntegerStreamDecoderCtx &ctx,
|
||||
const int64_t *row_ids,
|
||||
const int64_t row_count,
|
||||
char *out_buf)
|
||||
{
|
||||
const uint32_t width = ctx.meta_.width_;
|
||||
switch(width) {
|
||||
case ObIntegerStream::UW_1_BYTE :
|
||||
do_decode_raw_array<ObIntegerStream::UW_1_BYTE>(data, ctx, row_ids, row_count, out_buf);
|
||||
break;
|
||||
case ObIntegerStream::UW_2_BYTE :
|
||||
do_decode_raw_array<ObIntegerStream::UW_2_BYTE>(data, ctx, row_ids, row_count, out_buf);
|
||||
break;
|
||||
case ObIntegerStream::UW_4_BYTE :
|
||||
do_decode_raw_array<ObIntegerStream::UW_4_BYTE>(data, ctx, row_ids, row_count, out_buf);
|
||||
break;
|
||||
case ObIntegerStream::UW_8_BYTE :
|
||||
do_decode_raw_array<ObIntegerStream::UW_8_BYTE>(data, ctx, row_ids, row_count, out_buf);
|
||||
break;
|
||||
default :
|
||||
ob_abort();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
void test_and_check_int_encoding(
|
||||
int64_t size,
|
||||
const ObIntegerStream::EncodingType type,
|
||||
uint8_t attribute,
|
||||
T min,
|
||||
T max)
|
||||
{
|
||||
LOG_INFO("test_and_check_int_encoding", K(size), K(type), K(attribute));
|
||||
ObIntegerStreamEncoderCtx ctx;
|
||||
ObCSEncodingOpt encoding_opt;
|
||||
ObArenaAllocator alloctor;
|
||||
const ObCompressorType compress_type = ObCompressorType::ZSTD_1_3_8_COMPRESSOR;
|
||||
|
||||
ctx.meta_.set_4_byte_width();
|
||||
ctx.meta_.type_ = type;
|
||||
ctx.meta_.attr_ = attribute;
|
||||
if (ctx.meta_.is_use_base()) {
|
||||
ctx.meta_.set_base_value(min);
|
||||
}
|
||||
ctx.build_stream_encoder_info(false, false, &encoding_opt, nullptr, -1, compress_type, &alloctor);
|
||||
|
||||
ObIntegerStreamEncoder encoder;
|
||||
uint32_t *data = nullptr;
|
||||
generate_data<T>(data, size, min, max, RANDOM);
|
||||
uint32_t *orig_data = new uint32_t[size];
|
||||
memcpy(orig_data, data, size * sizeof(uint32_t));
|
||||
ObMicroBufferWriter writer;
|
||||
ASSERT_EQ(OB_SUCCESS, writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.encode(ctx, data, size, writer));
|
||||
|
||||
const char *stream_start = writer.data();
|
||||
int64_t stream_len = writer.length();
|
||||
ObStreamData stream_data(stream_start, stream_len);
|
||||
ObIntegerStreamDecoderCtx decode_ctx;
|
||||
ObStreamData raw_stream_data;
|
||||
buid_raw_integer_stream_data(stream_data, size, compress_type, decode_ctx, raw_stream_data);
|
||||
|
||||
const uint32_t width_size = decode_ctx.meta_.get_uint_width_size();
|
||||
uint32_t array_buf_size = width_size * decode_ctx.count_;
|
||||
char *dst_array_buf = (char*)allocator_.alloc(array_buf_size);
|
||||
|
||||
// test batch decode
|
||||
int64_t row_id_count = size;
|
||||
int64_t *row_ids = new int64_t[row_id_count];
|
||||
for (int64_t i = 0; i < row_id_count; i++) {
|
||||
row_ids[i] = i;
|
||||
}
|
||||
|
||||
decode_raw_array(raw_stream_data, decode_ctx, row_ids, row_id_count, dst_array_buf);
|
||||
for (int64_t i = 0; i < row_id_count; i++) {
|
||||
ASSERT_EQ(orig_data[i], *(uint32_t*)(dst_array_buf + i * width_size));
|
||||
}
|
||||
|
||||
// test disorder batch decode
|
||||
int64_t random_idx = ObTimeUtility::current_time()%size;
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
row_ids[i] = (i + random_idx) % size;
|
||||
if (i%7 == 0) {
|
||||
row_ids[i] = random_idx; //duplicate
|
||||
}
|
||||
}
|
||||
decode_raw_array(raw_stream_data, decode_ctx, row_ids, row_id_count, dst_array_buf);
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
if (orig_data[row_ids[i]] != *(uint32_t*)(dst_array_buf + i * width_size)) {
|
||||
LOG_INFO("missmatch", K(i), K(random_idx), K(row_ids[i]),
|
||||
K(*(uint32_t*)(dst_array_buf + i * width_size)), K(orig_data[row_ids[i]]));
|
||||
::abort();
|
||||
}
|
||||
}
|
||||
delete[] orig_data;
|
||||
orig_data = nullptr;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void test_and_check_int_datums_all_encoding_type(
|
||||
uint8_t attribute,
|
||||
bool has_null,
|
||||
T min,
|
||||
T max,
|
||||
T null_replace_value,
|
||||
ObIntegerStream::UintWidth actual_uint_width,
|
||||
bool use_nullbitmap,
|
||||
int64_t loop,
|
||||
Monotonicity mon,
|
||||
bool use_null_replace_ref)
|
||||
{
|
||||
ObIntegerStream::EncodingType type = ObIntegerStream::RAW;
|
||||
for (int64_t m = 1; m < ObIntegerStream::EncodingType::MAX_TYPE; m++) {
|
||||
int64_t random = ObTimeUtility::current_time()%13 + 1 + 1;
|
||||
for (int64_t i = 1; i < 100000; i=i * random) {
|
||||
type = (ObIntegerStream::EncodingType)(m);
|
||||
LOG_INFO("round", K(i), K(m), K(type), K(loop), K(mon), K(min), K(max));
|
||||
test_and_check_int_datums<T>(i, type, attribute, has_null, min, max,
|
||||
null_replace_value, actual_uint_width,
|
||||
use_nullbitmap, loop, mon, use_null_replace_ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void test_and_check_int_datums(
|
||||
int64_t size,
|
||||
const ObIntegerStream::EncodingType type,
|
||||
uint8_t attribute,
|
||||
bool has_null,
|
||||
T min,
|
||||
T max,
|
||||
T null_replace_value,
|
||||
ObIntegerStream::UintWidth actual_uint_width,
|
||||
bool use_nullbitmap,
|
||||
int64_t loop,
|
||||
Monotonicity mon,
|
||||
bool use_null_replace_ref)
|
||||
{
|
||||
LOG_INFO("test_and_check_datums_encoding", K(size), K(type), K(attribute), K(null_replace_value),
|
||||
K(loop), K(sizeof(T)), K(min), K(max), K(mon), K(use_null_replace_ref), K(actual_uint_width));
|
||||
ObIntegerStreamEncoderCtx ctx;
|
||||
ObCSEncodingOpt encoding_opt;
|
||||
ObArenaAllocator alloctor;
|
||||
const ObCompressorType compress_type = ObCompressorType::ZSTD_1_3_8_COMPRESSOR;
|
||||
ctx.meta_.width_ = actual_uint_width;
|
||||
ctx.meta_.type_ = type;
|
||||
ctx.meta_.attr_ = attribute;
|
||||
if (ctx.meta_.is_use_base()) {
|
||||
if (ctx.meta_.is_use_null_replace_value()) {
|
||||
if (null_replace_value < min) {
|
||||
ctx.meta_.set_base_value(null_replace_value);
|
||||
} else {
|
||||
ctx.meta_.set_base_value(min);
|
||||
}
|
||||
} else {
|
||||
ctx.meta_.set_base_value(min);
|
||||
}
|
||||
}
|
||||
if (ctx.meta_.is_use_null_replace_value()) {
|
||||
ctx.meta_.set_null_replaced_value(null_replace_value);
|
||||
}
|
||||
ctx.build_stream_encoder_info(has_null, false, &encoding_opt, nullptr, -1, compress_type, &alloctor);
|
||||
|
||||
ObIntegerStreamEncoder encoder;
|
||||
ObColDatums *datums = new ObColDatums();
|
||||
datums->reserve(1 << 20);
|
||||
generate_datums<T>(datums, size, has_null, min, max, mon);
|
||||
int64_t bitmap_size = pad8(size);
|
||||
char *bitmap = new char[bitmap_size];
|
||||
memset(bitmap, 0, bitmap_size);
|
||||
generate_bitmap(bitmap, datums);
|
||||
|
||||
ObMicroBufferWriter writer;
|
||||
ASSERT_EQ(OB_SUCCESS, writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
|
||||
|
||||
ObColumnDatumIter iter(*datums);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.encode(ctx, iter, writer));
|
||||
|
||||
const char *stream_start = writer.data();
|
||||
int64_t stream_len = writer.length();
|
||||
ObStreamData data(stream_start, stream_len);
|
||||
|
||||
ObIntegerStreamDecoderCtx decode_ctx;
|
||||
ObStreamData raw_stream_data;
|
||||
buid_raw_integer_stream_data(data, size, compress_type, decode_ctx, raw_stream_data);
|
||||
|
||||
ObDatum *datums2 = new ObDatum[size];
|
||||
char *datums2_buf = new char[size * sizeof(uint64_t)];
|
||||
memset(datums2_buf, 0, size * sizeof(uint64_t));
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
datums2[i].ptr_ = (datums2_buf + i * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
// decode batch
|
||||
int64_t *row_ids = new int64_t[size];
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
row_ids[i] = i;
|
||||
}
|
||||
ObBaseColumnDecoderCtx base_ctx;
|
||||
base_ctx.allocator_ = &allocator_;
|
||||
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NO_NULL;
|
||||
base_ctx.null_desc_ = nullptr;
|
||||
if (use_nullbitmap) {
|
||||
base_ctx.null_bitmap_ = bitmap;
|
||||
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NULL_BITMAP;
|
||||
}
|
||||
if (ctx.meta_.is_use_null_replace_value()) {
|
||||
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED;
|
||||
base_ctx.null_replaced_value_ = ctx.meta_.null_replaced_value();
|
||||
}
|
||||
|
||||
uint32_t ref_width_V = ObRefStoreWidthV::NOT_REF;
|
||||
int64_t null_replaced_ref = size;
|
||||
if (use_null_replace_ref) {
|
||||
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED_REF;
|
||||
base_ctx.null_replaced_ref_ = null_replaced_ref;
|
||||
ref_width_V = ObRefStoreWidthV::REF_IN_DATUMS;
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
// store row_id into datum.pack_
|
||||
if (datums->at(i).is_null()) {
|
||||
datums2[i].pack_ = null_replaced_ref;
|
||||
} else {
|
||||
datums2[i].pack_ = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ConvertUnitToDatumFunc convert_func = convert_uint_to_datum_funcs
|
||||
[decode_ctx.meta_.width_]
|
||||
[ref_width_V]
|
||||
[get_width_tag_map()[sizeof(T)]] /*datum_width_V*/
|
||||
[base_ctx.null_flag_]
|
||||
[decode_ctx.meta_.is_decimal_int()];
|
||||
convert_func(base_ctx, raw_stream_data.buf_, decode_ctx, nullptr, row_ids, size, datums2);
|
||||
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums2[i])) {
|
||||
LOG_INFO("missmatch", K(datums->at(row_ids[i])), K(datums2[i]), K(i));
|
||||
::abort();
|
||||
}
|
||||
}
|
||||
|
||||
// disorder batch
|
||||
uint32_t *ref_arr = new uint32_t[size];
|
||||
memset(datums2_buf, 0, size * sizeof(uint64_t));
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
datums2[i].reset();
|
||||
datums2[i].ptr_ = (datums2_buf + i * sizeof(uint64_t));
|
||||
}
|
||||
int64_t random_idx = ObTimeUtility::current_time() % size;
|
||||
int64_t row_id = 0;
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
ref_arr[i] = i;
|
||||
row_id = (i + random_idx) % size;
|
||||
row_ids[i] = row_id;
|
||||
if (use_null_replace_ref && datums->at(row_id).is_null()) {
|
||||
datums2[i].pack_ = size;
|
||||
} else {
|
||||
datums2[i].pack_ = row_id;
|
||||
}
|
||||
if (i%7 == 0) {
|
||||
row_ids[i] = random_idx; //duplicate
|
||||
if (use_null_replace_ref && datums->at(random_idx).is_null()) {
|
||||
datums2[i].pack_ = size;
|
||||
} else {
|
||||
datums2[i].pack_ = random_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
convert_func = convert_uint_to_datum_funcs
|
||||
[decode_ctx.meta_.width_]
|
||||
[ref_width_V]
|
||||
[get_width_tag_map()[sizeof(T)]] /*datum_width_V*/
|
||||
[base_ctx.null_flag_]
|
||||
[decode_ctx.meta_.is_decimal_int()];
|
||||
convert_func(base_ctx, raw_stream_data.buf_, decode_ctx, nullptr, row_ids, size, datums2);
|
||||
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
bool is_equal = false;
|
||||
ObDatum *tmp_datum = &datums->at(row_ids[i]);
|
||||
is_equal = ObDatum::binary_equal(*tmp_datum, datums2[i]);
|
||||
if (!is_equal) {
|
||||
for (int64_t j = 0; j < size; j++) {
|
||||
LOG_INFO("compare", K(datums->at(j)), K(datums2[j]), K(j), K(row_ids[j]));
|
||||
}
|
||||
LOG_INFO("missmatch", KPC(tmp_datum), K(datums2[i]), K(i), K(random_idx), K(row_ids[i]), K(size));
|
||||
::abort();
|
||||
}
|
||||
}
|
||||
|
||||
// disorder batch with using ref array
|
||||
memset(datums2_buf, 0, size * sizeof(uint64_t));
|
||||
if (use_null_replace_ref) {
|
||||
convert_func = convert_uint_to_datum_funcs
|
||||
[decode_ctx.meta_.width_]
|
||||
[ObRefStoreWidthV::REF_4_BYTE]
|
||||
[get_width_tag_map()[sizeof(T)]] /*datum_width_V*/
|
||||
[base_ctx.null_flag_]
|
||||
[decode_ctx.meta_.is_decimal_int()];
|
||||
convert_func(base_ctx, raw_stream_data.buf_, decode_ctx, (char*)ref_arr, row_ids, size, datums2);
|
||||
}
|
||||
|
||||
|
||||
delete []ref_arr;
|
||||
delete []row_ids;
|
||||
row_ids = nullptr;
|
||||
delete []datums2_buf;
|
||||
datums2_buf = nullptr;
|
||||
delete []datums2;
|
||||
datums2 = nullptr;
|
||||
delete datums;
|
||||
datums = nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
ObArenaAllocator allocator_;
|
||||
share::ObTenantBase tenant_ctx_;
|
||||
};
|
||||
|
||||
TEST_F(TestIntegerStream, test_uint32_encoding)
|
||||
{
|
||||
ObIntegerStream::EncodingType type = ObIntegerStream::RAW;
|
||||
uint8_t attribute = 0;
|
||||
for (int64_t j = 0; j < 2; j++) {
|
||||
if (0 == j) {
|
||||
attribute = ObIntegerStream::Attribute::USE_NONE;
|
||||
} else if (1 == j) {
|
||||
attribute = ObIntegerStream::Attribute::USE_BASE;
|
||||
}
|
||||
for (int64_t i = 1; i < 1000000; i=i * (i + 1)) {
|
||||
LOG_INFO("round", K(j), K(i));
|
||||
type = ObIntegerStream::RAW;
|
||||
test_and_check_int_encoding<uint32_t>(i, type, attribute, 2, UINT32_MAX);
|
||||
type = ObIntegerStream::SIMD_FIXEDPFOR;
|
||||
test_and_check_int_encoding<uint32_t>(i, type, attribute, 2, UINT32_MAX);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define DECLEAR_PARAM \
|
||||
ObIntegerStream::EncodingType type = ObIntegerStream::RAW; \
|
||||
uint8_t attribute = 0; \
|
||||
bool has_null = false; \
|
||||
uint64_t null_replaced_value = 1; \
|
||||
bool null_replaced_rowid = false; \
|
||||
bool use_nullbitmap = false; \
|
||||
uint64_t min = 2; \
|
||||
uint64_t max = UINT64_MAX; \
|
||||
uint64_t int_min = 2; \
|
||||
uint64_t int_max = INT64_MAX; \
|
||||
Monotonicity mon = RANDOM; \
|
||||
ObIntegerStream::UintWidth width = ObIntegerStream::UW_1_BYTE;
|
||||
|
||||
TEST_F(TestIntegerStream, test_datums_encoding)
|
||||
|
||||
{
|
||||
for (int64_t j = 0; j < 4; j++) {
|
||||
DECLEAR_PARAM;
|
||||
|
||||
if (0 == j) {
|
||||
attribute = ObIntegerStream::Attribute::USE_NONE;
|
||||
} else if (1 == j) {
|
||||
attribute = ObIntegerStream::Attribute::USE_BASE;
|
||||
} else if (2 == j) {
|
||||
mon = STRICT_INCREMENT;
|
||||
} else if (3 == j) {
|
||||
attribute = ObIntegerStream::Attribute::REPLACE_NULL_VALUE;
|
||||
has_null = true;
|
||||
}
|
||||
|
||||
test_and_check_int_datums_all_encoding_type<uint64_t>(
|
||||
attribute, has_null, min, max, null_replaced_value, ObIntegerStream::UW_8_BYTE,
|
||||
use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerStream, test_negative_int_datums)
|
||||
{
|
||||
for (int64_t j = 0; j < 8; j ++) {
|
||||
DECLEAR_PARAM;
|
||||
|
||||
if (0 == j) {
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
attribute |= ObIntegerStream::Attribute::REPLACE_NULL_VALUE;
|
||||
has_null = true;
|
||||
int_min = INT64_MIN + 1;
|
||||
int_max = INT64_MAX - 1;
|
||||
null_replaced_value = INT64_MIN;
|
||||
width = ObIntegerStream::UW_8_BYTE;
|
||||
} else if (1 == j) {
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
attribute |= ObIntegerStream::Attribute::REPLACE_NULL_VALUE;
|
||||
has_null = true;
|
||||
int_min = INT32_MIN;
|
||||
int_max = INT32_MAX - 1;
|
||||
null_replaced_value = INT32_MAX;
|
||||
width = ObIntegerStream::UW_4_BYTE;
|
||||
} else if (2 == j) {
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
attribute |= ObIntegerStream::Attribute::REPLACE_NULL_VALUE;
|
||||
has_null = true;
|
||||
int_min = INT16_MIN + 1;
|
||||
int_max = INT16_MAX - 1;
|
||||
null_replaced_value = INT16_MIN;
|
||||
width = ObIntegerStream::UW_2_BYTE;
|
||||
} else if (3 == j) {
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
attribute |= ObIntegerStream::Attribute::REPLACE_NULL_VALUE;
|
||||
has_null = true;
|
||||
int_min = INT8_MIN;
|
||||
int_max = INT8_MAX - 1;
|
||||
null_replaced_value = INT8_MAX;
|
||||
width = ObIntegerStream::UW_1_BYTE;
|
||||
} else if (4 == j) {
|
||||
use_nullbitmap = true;
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
int_min = INT64_MIN;
|
||||
int_max = INT64_MAX;
|
||||
width = ObIntegerStream::UW_8_BYTE;
|
||||
} else if (5 == j) {
|
||||
use_nullbitmap = true;
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
int_min = INT32_MIN;
|
||||
int_max = INT32_MAX;
|
||||
width = ObIntegerStream::UW_4_BYTE;
|
||||
} else if (6 == j) {
|
||||
use_nullbitmap = true;
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
int_min = INT16_MIN;
|
||||
int_max = INT16_MAX;
|
||||
width = ObIntegerStream::UW_2_BYTE;
|
||||
} else if (7 == j) {
|
||||
use_nullbitmap = true;
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
int_min = INT8_MIN;
|
||||
int_max = INT8_MAX;
|
||||
width = ObIntegerStream::UW_1_BYTE;
|
||||
}
|
||||
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_F(TestIntegerStream, test_all_zero)
|
||||
{
|
||||
for (int64_t j = 0; j < 3; j ++) {
|
||||
DECLEAR_PARAM;
|
||||
int_min = 0;
|
||||
int_max = 0;
|
||||
if (0 == j) {
|
||||
has_null = false;
|
||||
use_nullbitmap = false;
|
||||
attribute = 0;
|
||||
} else if (1 == j) {
|
||||
has_null = true;
|
||||
use_nullbitmap = true;
|
||||
} else if (2 == j) {
|
||||
has_null = true;
|
||||
use_nullbitmap = false;
|
||||
attribute |= ObIntegerStream::Attribute::REPLACE_NULL_VALUE;
|
||||
null_replaced_value = 2;
|
||||
}
|
||||
|
||||
width = ObIntegerStream::UW_1_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
|
||||
width = ObIntegerStream::UW_2_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
|
||||
width = ObIntegerStream::UW_4_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
|
||||
width = ObIntegerStream::UW_8_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_F(TestIntegerStream, test_all_min_max)
|
||||
{
|
||||
for (int64_t j = 0; j < 2; j ++) {
|
||||
DECLEAR_PARAM;
|
||||
|
||||
if (0 == j) {
|
||||
has_null = false;
|
||||
use_nullbitmap = false;
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
} else if (1 == j) {
|
||||
has_null = true;
|
||||
use_nullbitmap = true;
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
}
|
||||
|
||||
int_min = INT8_MIN;
|
||||
int_max = INT8_MAX;
|
||||
width = ObIntegerStream::UW_1_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
|
||||
int_min = INT16_MIN;
|
||||
int_max = INT16_MAX;
|
||||
width = ObIntegerStream::UW_2_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
|
||||
int_min = INT32_MIN;
|
||||
int_max = INT32_MAX;
|
||||
width = ObIntegerStream::UW_4_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
|
||||
int_min = INT64_MIN;
|
||||
int_max = INT64_MAX;
|
||||
width = ObIntegerStream::UW_8_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_F(TestIntegerStream, test_negative_inc_delta)
|
||||
{
|
||||
for (int64_t j = 1; j < 2; j ++) {
|
||||
DECLEAR_PARAM
|
||||
|
||||
if (0 == j) {
|
||||
int_min = INT32_MIN + 2;
|
||||
int_max = 0;
|
||||
has_null = false;
|
||||
mon = STRICT_INCREMENT;
|
||||
|
||||
width = ObIntegerStream::UW_4_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
} else if (1 == j) {
|
||||
mon = STRICT_DECREMENT;
|
||||
int_max = UINT32_MAX + 1LL; // first value > UINT32_MAX
|
||||
int_min = UINT32_MAX + 1LL;
|
||||
|
||||
width = ObIntegerStream::UW_8_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerStream, test_delta)
|
||||
{
|
||||
for (int64_t j = 0; j < 12; j ++) {
|
||||
DECLEAR_PARAM;
|
||||
has_null = false;
|
||||
|
||||
if (0 == j) {
|
||||
mon = STRICT_INCREMENT;
|
||||
int_min = 5;
|
||||
} else if (1 == j) {
|
||||
mon = STRICT_INCREMENT;
|
||||
int_min = INT32_MIN;
|
||||
} else if (2 == j) {
|
||||
mon = STRICT_INCREMENT;
|
||||
int_min = -5;
|
||||
} else if (3 == j) {
|
||||
mon = STRICT_DECREMENT;
|
||||
int_min = INT32_MAX;
|
||||
} else if (4 == j) {
|
||||
mon = STRICT_DECREMENT;
|
||||
int_min = 5;
|
||||
} else if (5 == j) {
|
||||
mon = STRICT_DECREMENT;
|
||||
int_min = -5;
|
||||
} else if (6 == j) {
|
||||
mon = EQUAL;
|
||||
int_min = 5;
|
||||
} else if (7 == j) {
|
||||
mon = EQUAL;
|
||||
int_min = -5;
|
||||
} else if (8 == j) {
|
||||
mon = EQUAL;
|
||||
int_min = 0;
|
||||
} else if (9 == j) {
|
||||
mon = EQUAL;
|
||||
int_min = 5;
|
||||
} else if (10 == j) {
|
||||
mon = EQUAL;
|
||||
int_min = -5;
|
||||
} else if (11 == j) {
|
||||
mon = EQUAL;
|
||||
int_min = 0;
|
||||
}
|
||||
|
||||
width = ObIntegerStream::UW_8_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerStream, test_all_0_and_replace_value_negative)
|
||||
{
|
||||
for (int64_t j = 0; j < 1; j ++) {
|
||||
DECLEAR_PARAM;
|
||||
attribute |= ObIntegerStream::Attribute::REPLACE_NULL_VALUE;
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
int_min = 0;
|
||||
int_max = 0;
|
||||
has_null = true;
|
||||
null_replaced_value = -1;
|
||||
|
||||
width = ObIntegerStream::UW_1_BYTE;
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerStream, test_raw_negative_with_null)
|
||||
{
|
||||
for (int64_t j = 0; j < 1; j ++) {
|
||||
DECLEAR_PARAM;
|
||||
attribute |= ObIntegerStream::Attribute::REPLACE_NULL_VALUE;
|
||||
int_min = -100;
|
||||
int_max = int_min + 100;
|
||||
has_null = true;
|
||||
null_replaced_value = -101;
|
||||
width = ObIntegerStream::UW_1_BYTE;
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, j, mon, null_replaced_rowid);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerStream, test_null_replaced_rowid)
|
||||
{
|
||||
DECLEAR_PARAM;
|
||||
int_min = -1000;
|
||||
int_max = int_min + 1000;
|
||||
width = ObIntegerStream::UW_2_BYTE;
|
||||
has_null = true;
|
||||
null_replaced_rowid = true;
|
||||
attribute |= ObIntegerStream::Attribute::USE_BASE;
|
||||
null_replaced_value = -1001;
|
||||
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, 1, mon, null_replaced_rowid);
|
||||
}
|
||||
|
||||
TEST_F(TestIntegerStream, test_rle)
|
||||
{
|
||||
for (int64_t j = 0; j < 4; j ++) {
|
||||
DECLEAR_PARAM;
|
||||
if (0 == j) {
|
||||
int_min = 10; // just one unique value
|
||||
int_max = 10;
|
||||
width = ObIntegerStream::UW_1_BYTE;
|
||||
} else if (1 == j) {
|
||||
int_min = 10000; // just two unique value
|
||||
int_max = 10001;
|
||||
width = ObIntegerStream::UW_2_BYTE;
|
||||
} else if (2 == j) {
|
||||
int_min = UINT32_MAX - 1; // just two unique value
|
||||
int_max = UINT32_MAX;
|
||||
width = ObIntegerStream::UW_4_BYTE;
|
||||
} else if (3 == j) {
|
||||
int_min = INT64_MAX - 2; // just three unique value
|
||||
int_max = INT64_MAX;
|
||||
width = ObIntegerStream::UW_8_BYTE;
|
||||
}
|
||||
|
||||
test_and_check_int_datums_all_encoding_type<int64_t>(
|
||||
attribute, has_null, int_min, int_max, null_replaced_value, width, use_nullbitmap, 1, mon, null_replaced_rowid);
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace blocksstable
|
||||
} // end namespace oceanbase
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_integer_stream.log*");
|
||||
OB_LOGGER.set_file_name("test_integer_stream.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("INFO");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@ -0,0 +1,477 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#define USING_LOG_PREFIX STORAGE
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#define protected public
|
||||
#define private public
|
||||
#include "test_decoder_filter_perf.h"
|
||||
#include "storage/compaction/ob_compaction_memory_pool.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
|
||||
namespace storage
|
||||
{
|
||||
|
||||
// just for test, skip to use
|
||||
int ObCompactionBufferWriter::ensure_space(const int64_t size)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
use_mem_pool_ = false;
|
||||
|
||||
if (size <= 0) {
|
||||
// do nothing
|
||||
} else if (nullptr == data_) { // first alloc
|
||||
if (OB_UNLIKELY(!block_.empty())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
} else {
|
||||
data_ = (char *) share::mtl_malloc(size, label_);
|
||||
pos_ = 0;
|
||||
capacity_ = size;
|
||||
block_.buffer_ = data_;
|
||||
block_.buffer_size_ = size;
|
||||
block_.type_ = ObCompactionBufferBlock::MTL_PIECE_TYPE;
|
||||
}
|
||||
} else if (capacity_ < size) {
|
||||
char *new_data = (char *) share::mtl_malloc(size, label_);
|
||||
MEMCPY(new_data, data_, pos_);
|
||||
data_ = new_data;
|
||||
capacity_ = size;
|
||||
block_.buffer_ = data_;
|
||||
block_.buffer_size_ = size;
|
||||
block_.type_ = ObCompactionBufferBlock::MTL_PIECE_TYPE;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ObCompactionBufferWriter::reset()
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
using namespace common;
|
||||
using namespace storage;
|
||||
using namespace share::schema;
|
||||
|
||||
class TestPerfCmpResult : public TestDecoderFilterPerf
|
||||
{
|
||||
public:
|
||||
TestPerfCmpResult() {}
|
||||
virtual ~TestPerfCmpResult() {}
|
||||
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
};
|
||||
|
||||
void TestPerfCmpResult::SetUp()
|
||||
{
|
||||
TestDecoderFilterPerf::SetUp();
|
||||
}
|
||||
|
||||
void TestPerfCmpResult::TearDown()
|
||||
{
|
||||
TestDecoderFilterPerf::TearDown();
|
||||
}
|
||||
|
||||
#define NEED_EXECUTE_CASE true
|
||||
|
||||
#define EXE_ROUND 1
|
||||
#define IS_BIT_PACKING false
|
||||
#define WITH_NULL true
|
||||
#define WITHOUT_NULL false
|
||||
#define WITH_WARM true
|
||||
|
||||
#define TEST_FILTER_DECODE_ALL_OP() \
|
||||
int64_t i = 100; \
|
||||
while (i-- > 0) { \
|
||||
basic_filter_pushdown_eqne_nunn_op_test(true/*is_column_store*/); \
|
||||
basic_filter_pushdown_eqne_nunn_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_comp_op_test(true/*is_column_store*/); \
|
||||
basic_filter_pushdown_comp_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_in_op_test(true/*is_column_store*/); \
|
||||
basic_filter_pushdown_in_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_bt_op_test(true/*is_column_store*/); \
|
||||
basic_filter_pushdown_bt_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
} \
|
||||
perf_ctx_.print_report(true/*print total*/, true/*print detail*/); \
|
||||
|
||||
#define TEST_EXECUTE_CS_PAX_ALL_OP() \
|
||||
basic_filter_pushdown_eqne_nunn_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_eqne_nunn_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_comp_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_comp_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_in_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_in_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_bt_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_bt_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
|
||||
#define TEST_EXECUTE_CS_PAX_ALL_OP_TR() \
|
||||
basic_filter_pushdown_eqne_nunn_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_eqne_nunn_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_comp_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_comp_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_in_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_in_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_bt_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_bt_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
|
||||
#define TEST_FILTER_DECODE_ALL_OP_ONCE(with_warm) \
|
||||
if (with_warm) { \
|
||||
TEST_EXECUTE_CS_PAX_ALL_OP() \
|
||||
perf_ctx_.reuse(); \
|
||||
} \
|
||||
TEST_EXECUTE_CS_PAX_ALL_OP() \
|
||||
TEST_EXECUTE_CS_PAX_ALL_OP_TR() \
|
||||
perf_ctx_.print_report(true/*print total*/, true/*print detail*/); \
|
||||
|
||||
#define TEST_CS_FILTER_DECODE_ALL_OP() \
|
||||
int64_t i = 1; \
|
||||
while (i-- > 0) { \
|
||||
basic_filter_pushdown_eqne_nunn_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_comp_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_in_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_bt_op_test(true/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
} \
|
||||
perf_ctx_.print_report(true/*print total*/, true/*print detail*/); \
|
||||
|
||||
#define TEST_PAX_FILTER_DECODE_ALL_OP() \
|
||||
int64_t i = 1; \
|
||||
while (i-- > 0) { \
|
||||
basic_filter_pushdown_eqne_nunn_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_comp_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_in_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
basic_filter_pushdown_bt_op_test(false/*is_column_store*/); \
|
||||
basic_reuse(); \
|
||||
} \
|
||||
perf_ctx_.print_report(true/*print total*/, true/*print detail*/); \
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_raw_decoder_010)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, false/*need_compress*/, true/*need_decode*/,
|
||||
// false/*need_cs_full_transform*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_raw_decoder_011)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, false/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_raw_decoder_111)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_raw_decoder_1110)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/, false/*is_bit_packing*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_raw_decoder_1111)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/, true/*is_bit_packing*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_dict_decoder_010)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, false/*need_compress*/, true/*need_decode*/,
|
||||
// false/*need_cs_full_transform*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_dict_decoder_011)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, false/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_dict_decoder_111)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_dict_decoder_1110)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/, false/*is_bit_packing*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_dict_decoder_1111)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/, true/*is_bit_packing*/));
|
||||
// TEST_FILTER_DECODE_ALL_OP();
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_raw_get_rows_with_null)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/, false/*is_bit_packing*/, false/*check_all_type*/, 1/*round*/));
|
||||
// basic_filter_pushdown_eqne_nunn_op_test(true/*is_column_store*/);
|
||||
// basic_reuse();
|
||||
// basic_filter_pushdown_eqne_nunn_op_test(false/*is_column_store*/);
|
||||
// basic_reuse();
|
||||
// perf_ctx_.print_report(true/*print total*/, true/*print detail*/);
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_raw_get_rows_without_null)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/, false/*is_bit_packing*/, false/*check_all_type*/, 50/*round*/));
|
||||
// basic_filter_pushdown_bt_op_test(true/*is_column_store*/);
|
||||
// basic_reuse();
|
||||
// basic_filter_pushdown_bt_op_test(false/*is_column_store*/);
|
||||
// basic_reuse();
|
||||
// perf_ctx_.print_report(true/*print total*/, true/*print detail*/);
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_dict_get_rows_with_null)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/, false/*is_bit_packing*/, false/*check_all_type*/, 50/*round*/));
|
||||
// basic_filter_pushdown_eqne_nunn_op_test(true/*is_column_store*/);
|
||||
// basic_reuse();
|
||||
// basic_filter_pushdown_eqne_nunn_op_test(false/*is_column_store*/);
|
||||
// basic_reuse();
|
||||
// perf_ctx_.print_report(true/*print total*/, true/*print detail*/);
|
||||
// }
|
||||
|
||||
// TEST_F(TestPerfCmpResult, test_dict_get_rows_without_null)
|
||||
// {
|
||||
// ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
// true/*need_cs_full_transform*/, false/*is_bit_packing*/, false/*check_all_type*/, 50/*round*/));
|
||||
// basic_filter_pushdown_bt_op_test(true/*is_column_store*/);
|
||||
// basic_reuse();
|
||||
// basic_filter_pushdown_bt_op_test(false/*is_column_store*/);
|
||||
// basic_reuse();
|
||||
// perf_ctx_.print_report(true/*print total*/, true/*print detail*/);
|
||||
// }
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_raw_get_rows_f5)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
5/*highest_data_pct*/, WITHOUT_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_raw_get_rows_f40)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
40/*highest_data_pct*/, WITHOUT_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_raw_get_rows_f95)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
95/*highest_data_pct*/, WITHOUT_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_raw_get_rows_f100)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
100/*highest_data_pct*/, WITHOUT_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_raw_get_rows_t5)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
5/*highest_data_pct*/, WITH_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_raw_get_rows_t40)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
40/*highest_data_pct*/, WITH_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_raw_get_rows_t95)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
95/*highest_data_pct*/, WITH_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_raw_get_rows_t100)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(true/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
100/*highest_data_pct*/, WITH_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_dict_get_rows_f5)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
5/*highest_data_pct*/, WITHOUT_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_dict_get_rows_f40)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
40/*highest_data_pct*/, WITHOUT_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_dict_get_rows_f95)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
95/*highest_data_pct*/, WITHOUT_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_dict_get_rows_f100)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
100/*highest_data_pct*/, WITHOUT_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_dict_get_rows_t5)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
5/*highest_data_pct*/, WITH_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_dict_get_rows_t40)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
40/*highest_data_pct*/, WITH_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_dict_get_rows_t95)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
95/*highest_data_pct*/, WITH_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestPerfCmpResult, test_general_dict_get_rows_t100)
|
||||
{
|
||||
if (NEED_EXECUTE_CASE) {
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(false/*is_raw_*/, true/*need_compress*/, true/*need_decode*/,
|
||||
true/*need_cs_full_transform*/, IS_BIT_PACKING, false/*check_all_type*/, EXE_ROUND/*round*/,
|
||||
100/*highest_data_pct*/, WITH_NULL));
|
||||
TEST_FILTER_DECODE_ALL_OP_ONCE(WITH_WARM);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_perf_cmp_result.log*");
|
||||
OB_LOGGER.set_file_name("test_perf_cmp_result.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("INFO");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
std::string filter = testing::GTEST_FLAG(filter);
|
||||
if (filter != "") {
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@ -0,0 +1,524 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_pd_filter_test_base.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
class TestStrDictPdFilter : public ObPdFilterTestBase
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
TEST_F(TestStrDictPdFilter, test_fixed_string_dict_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObCharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STR_DICT; // fixed length string
|
||||
|
||||
for (int8_t flag = 0; flag <= 1; ++flag) {
|
||||
bool has_null = flag;
|
||||
const int64_t null_cnt = has_null ? 20 : 0;
|
||||
const int64_t row_cnt = 100 + null_cnt;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
const int64_t char_data_arr_cnt = 4;
|
||||
const int64_t each_type_cnt = 25;
|
||||
char char_type_arr[char_data_arr_cnt] = {'a', 'b', 'c', 'd'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * char_data_arr_cnt));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(1024));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 1024);
|
||||
}
|
||||
|
||||
for (int64_t idx = 0; idx < char_data_arr_cnt; ++idx) {
|
||||
for (int64_t i = each_type_cnt * idx; i < each_type_cnt * (idx + 1); ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[idx], 100);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
}
|
||||
for (int64_t i = row_cnt - null_cnt; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[1] = {{0, 5}};
|
||||
const int64_t nu_cnt = has_null ? null_cnt : 0;
|
||||
int64_t res_arr_nu[1] = {nu_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {100};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 5}, {1, 30}, {2, 100}};
|
||||
int64_t res_arr_eq[3] = {0, 0, each_type_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 3, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[3] = {100, 100, 100 - each_type_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 3, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 100}, {1, 100}, {2, 100}};
|
||||
int64_t res_arr_lt[3] = {0, each_type_cnt , 2 * each_type_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 3, 1, res_arr_lt);
|
||||
int64_t res_arr_le[3] = {each_type_cnt, 2 * each_type_cnt , 3 * each_type_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 3, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{3, 100}, {2, 100}, {1, 100}};;
|
||||
int64_t res_arr_gt[3] = {0, each_type_cnt, 2 * each_type_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 3, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[3] = {each_type_cnt, each_type_cnt * 2, 3 * each_type_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 3, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 5}, {0, 100}, {1, 40}, {2, 100}, {3, 20}};
|
||||
int64_t res_arr[1] = {each_type_cnt * 2};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[6] = {{0, 100}, {1, 100}, {0, 100}, {2, 100}, {0, 100}, {3, 100}};
|
||||
int64_t res_arr[3] = {each_type_cnt * 2, each_type_cnt * 3, each_type_cnt * 4};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 3, 2, res_arr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestStrDictPdFilter, test_var_string_dict_decoder_filter)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 3;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool is_force_raw = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObVarcharType, ObCharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STR_DICT; // var string
|
||||
ctx_.column_encodings_[2] = ObCSColumnHeader::Type::STR_DICT; // fixed length string
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
const int64_t delta = 20;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
const int64_t char_data_arr_cnt = 4;
|
||||
const int64_t each_type_cnt = (row_cnt - delta) / char_data_arr_cnt;
|
||||
char char_type_arr[char_data_arr_cnt + 1] = {'a', 'b', 'c', 'd', 'e'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * (char_data_arr_cnt + 1)));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(1024));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 1024);
|
||||
}
|
||||
|
||||
for (int64_t idx = 0; idx < char_data_arr_cnt; ++idx) {
|
||||
for (int64_t i = each_type_cnt * idx; i < each_type_cnt * (idx + 1); ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[idx], idx * 10);
|
||||
row_arr[i].storage_datums_[2].set_string(char_data_arr[idx], 100);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
for (int64_t i = row_cnt - delta; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
row_arr[i].storage_datums_[2].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 0}, {1, 10}, {2, 10}};
|
||||
int64_t res_arr_eq[3] = {25, 25, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 3, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[3] = {75, 75, 100};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 3, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check GE/GT
|
||||
{
|
||||
col_offset = 2;
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 100}, {1, 100}, {2, 100}};
|
||||
int64_t res_arr_ge[3] = {100, 75, 50};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 3, 1, res_arr_ge);
|
||||
int64_t res_arr_gt[3] = {75, 50, 25};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 3, 1, res_arr_gt);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_F(TestStrDictPdFilter, test_fixed_string_dict_const_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool is_force_raw = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObCharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STR_DICT; // fixed length string
|
||||
|
||||
const int64_t row_cnt = 100;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
const int64_t char_data_arr_cnt = 4;
|
||||
const int64_t each_type_cnt = 25;
|
||||
char char_type_arr[char_data_arr_cnt] = {'a', 'b', 'c', 'd'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * char_data_arr_cnt));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(1024));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 1024);
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < 97; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[0], 100);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
for (int64_t i = 97; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[i-96], 100);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[1] = {{0, 5}};
|
||||
int64_t res_arr_nu[1] = {0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {100};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 5}, {0, 100}, {1, 100}};
|
||||
int64_t res_arr_eq[3] = {0, 97, 1};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 3, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[3] = {100, 3, 99};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 3, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 100}, {0, 200}, {1, 200}};
|
||||
int64_t res_arr_lt[3] = {0, 97, 98};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 3, 1, res_arr_lt);
|
||||
int64_t res_arr_le[3] = {97, 97, 98};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 3, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 50}, {0, 100}, {1, 100}};;
|
||||
int64_t res_arr_gt[3] = {100, 3, 2};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 3, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[3] = {100, 100, 3};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 3, 1, res_arr_ge);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestStrDictPdFilter, test_var_string_dict_const_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool has_null = true;
|
||||
const bool is_force_raw = false;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObVarcharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STR_DICT; // var string
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
const int64_t char_data_arr_cnt = 5;
|
||||
char char_type_arr[char_data_arr_cnt] = {'a', 'b', 'c', 'd', 'e'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * char_data_arr_cnt));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(512));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 512);
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt - 5; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[0], 50);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
for (int64_t i = row_cnt - 5; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
if (has_null && (i == row_cnt - 1)) {
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
} else {
|
||||
const int64_t cur_char_idx = (i - row_cnt + 6) % 5;
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[cur_char_idx], 50);
|
||||
}
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[1] = {{0, 5}};
|
||||
int64_t res_arr_nu[1] = {1};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {119};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 50}, {1, 50}, {0, 100}};
|
||||
int64_t res_arr_eq[3] = {115, 1, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 3, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[3] = {4, 118, 119};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 3, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 50}, {0, 51}, {2, 50}};
|
||||
int64_t res_arr_lt[3] = {0, 115 , 116};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 3, 1, res_arr_lt);
|
||||
int64_t res_arr_le[3] = {115, 115, 117};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 3, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 49}, {0, 50}, {2, 50}};
|
||||
int64_t res_arr_gt[3] = {119, 4, 2};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 3, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[3] = {119, 119, 3};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 3, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 50}, {0, 100}, {1, 40}, {2, 100}, {3, 50}};
|
||||
int64_t res_arr[1] = {116};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[6] = {{0, 10}, {0, 49}, {0, 10}, {1, 50}, {1, 50}, {4, 50}};
|
||||
int64_t res_arr[3] = {0, 116, 4};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 3, 2, res_arr);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestStrDictPdFilter, test_string_dict_all_const_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObVarcharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STR_DICT; // var string
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
const int64_t char_data_arr_cnt = 5;
|
||||
char char_type_arr[char_data_arr_cnt] = {'a', 'b', 'c', 'd', 'e'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * char_data_arr_cnt));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(512));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 512);
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[0], 50);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[1] = {{0, 5}};
|
||||
int64_t res_arr_nu[1] = {0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {120};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 50}, {1, 50}, {0, 100}};
|
||||
int64_t res_arr_eq[3] = {120, 0, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 3, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[3] = {0, 120, 120};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 3, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 50}, {0, 51}, {2, 50}};
|
||||
int64_t res_arr_lt[3] = {0, 120, 120};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 3, 1, res_arr_lt);
|
||||
int64_t res_arr_le[3] = {120, 120, 120};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 3, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 49}, {0, 50}, {2, 50}};
|
||||
int64_t res_arr_gt[3] = {120, 0, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 3, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[3] = {120, 120, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 3, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 50}, {0, 100}, {1, 40}, {2, 100}, {3, 50}};
|
||||
int64_t res_arr[1] = {120};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[6] = {{0, 10}, {0, 49}, {0, 10}, {1, 50}, {1, 50}, {4, 50}};
|
||||
int64_t res_arr[3] = {0, 120, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 3, 2, res_arr);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestStrDictPdFilter, test_string_dict_all_null_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObVarcharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STR_DICT; // var string
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
const int64_t char_data_arr_cnt = 5;
|
||||
char char_type_arr[char_data_arr_cnt] = {'a', 'b', 'c', 'd', 'e'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * char_data_arr_cnt));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(512));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 512);
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[1] = {{0, 5}};
|
||||
int64_t res_arr_nu[1] = {120};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 50}, {1, 50}, {0, 100}};
|
||||
int64_t res_arr_eq[3] = {0, 0, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 3, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[3] = {0, 0, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 3, 1, res_arr_ne);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blocksstable
|
||||
} // namespace oceanbase
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_str_dict_pd_filter.log*");
|
||||
OB_LOGGER.set_file_name("test_str_dict_pd_filter.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@ -0,0 +1,360 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_pd_filter_test_base.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
class TestStringPdFilter : public ObPdFilterTestBase
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
TEST_F(TestStringPdFilter, test_string_decoder_filter_varchar)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool is_force_raw = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObVarcharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STRING;
|
||||
|
||||
for (int8_t flag = 0; flag <= 1; ++flag) {
|
||||
bool has_null = flag;
|
||||
const int64_t null_cnt = has_null ? 20 : 0;
|
||||
const int64_t row_cnt = 100 + null_cnt;
|
||||
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
|
||||
const int64_t char_data_arr_cnt = 4;
|
||||
const int64_t each_type_cnt = 25;
|
||||
char char_type_arr[char_data_arr_cnt + 1] = {'a', 'b', 'c', 'd', 'e'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * (char_data_arr_cnt + 1)));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt + 1; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(1024));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 1024);
|
||||
}
|
||||
|
||||
for (int64_t idx = 0; idx < char_data_arr_cnt; ++idx) {
|
||||
for (int64_t i = each_type_cnt * idx; i < each_type_cnt * (idx + 1); ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[idx], i % each_type_cnt + 1);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
}
|
||||
for (int64_t i = 100; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
// check NU/NN
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[1];
|
||||
const int64_t nu_cnt = has_null ? null_cnt : 0;
|
||||
int64_t res_arr_nu[1] = {nu_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {100};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 1}, {0, 10}, {1, 2}, {1, 100}, {3, 10}};
|
||||
int64_t res_arr_eq[5] = {1, 1, 1, 0, 1};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {99, 99, 99, 100, 99};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 1}, {0, 10}, {1, 2}, {1, 10}, {3, 10}};
|
||||
int64_t res_arr_lt[5] = {0, 9, 26, 34, 84};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {1, 10, 27, 35, 85};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{3, each_type_cnt}, {2, each_type_cnt}, {1, 10}};
|
||||
int64_t res_arr_gt[3] = {0, each_type_cnt, 15 + each_type_cnt * 2};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 3, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[3] = {1, 1 + each_type_cnt, 16 + each_type_cnt * 2};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 3, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 5}, {1, 1}, {1, 40}, {2, 100}, {3, 20}};
|
||||
int64_t res_arr[1] = {3};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[8] = {{0, 1}, {0, 10}, {0, 1}, {1, 10}, {1, 10}, {3, 1}, {3, 10}, {4, 20}};
|
||||
int64_t res_arr[4] = {10, each_type_cnt + 10, each_type_cnt + 17, 16};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 4, 2, res_arr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestStringPdFilter, test_string_decoder_filter_char)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool is_force_raw = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObCharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STRING;
|
||||
|
||||
for (int8_t flag = 0; flag <= 1; ++flag) {
|
||||
bool has_null = flag;
|
||||
const int64_t null_cnt = has_null ? 20 : 0;
|
||||
const int64_t row_cnt = 100 + null_cnt;
|
||||
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
const int64_t char_data_arr_cnt = 4;
|
||||
const int64_t each_type_cnt = 25;
|
||||
char char_type_arr[char_data_arr_cnt + 1] = {'a', 'b', 'c', 'd', 'e'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * (char_data_arr_cnt + 1)));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt + 1; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(1024));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 1024);
|
||||
}
|
||||
|
||||
for (int64_t idx = 0; idx < char_data_arr_cnt; ++idx) {
|
||||
for (int64_t i = each_type_cnt * idx; i < each_type_cnt * (idx + 1); ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[idx], 100);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
}
|
||||
for (int64_t i = 100; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
// check NU/NN
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[1];
|
||||
const int64_t nu_cnt = has_null ? null_cnt : 0;
|
||||
int64_t res_arr_nu[1] = {nu_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {100};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 1}, {1, 100}, {2, 2}, {3, 100}, {4, 10}};
|
||||
int64_t res_arr_eq[5] = {0, 25, 0, 25, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {100, 75, 100, 75, 100};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
|
||||
// check LT/LE/GT/GE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 1}, {0, 100}, {1, 100}, {1, 200}, {3, 100}};
|
||||
int64_t res_arr_lt[5] = {0, 0, 25, 50, 75};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LT, 5, 1, res_arr_lt);
|
||||
int64_t res_arr_le[5] = {0, 25, 50, 50, 100};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_LE, 5, 1, res_arr_le);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 1}, {0, 100}, {1, 100}, {1, 200}, {3, 100}};
|
||||
int64_t res_arr_gt[5] = {100, 75, 50, 50, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GT, 5, 1, res_arr_gt);
|
||||
int64_t res_arr_ge[5] = {100, 100, 75, 50, 25};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_GE, 5, 1, res_arr_ge);
|
||||
}
|
||||
|
||||
// check IN/BT
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 5}, {1, 100}, {1, 200}, {2, 100}, {3, 20}};
|
||||
int64_t res_arr[1] = {50};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_IN, 1, 5, res_arr);
|
||||
}
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[8] = {{0, 1}, {0, 10}, {0, 10}, {1, 100}, {1, 10}, {2, 200}, {3, 100}, {4, 20}};
|
||||
int64_t res_arr[4] = {0, 50, 50, 25};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_BT, 4, 2, res_arr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestStringPdFilter, test_fixed_string_decoder_filter)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
const bool is_force_raw = true;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObCharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INTEGER;
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STRING;
|
||||
|
||||
for (int8_t flag = 0; flag <= 1; ++flag) {
|
||||
bool has_null = flag;
|
||||
const int64_t null_cnt = has_null ? 200 : 0;
|
||||
const int64_t row_cnt = 1000 + null_cnt;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
encoder.is_all_column_force_raw_ = is_force_raw;
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
const int64_t char_data_arr_cnt = 4;
|
||||
const int64_t each_type_cnt = 250;
|
||||
char char_type_arr[char_data_arr_cnt + 1] = {'a', 'b', 'c', 'd', 'e'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * (char_data_arr_cnt + 1)));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt + 1; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(1024));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 1024);
|
||||
}
|
||||
|
||||
for (int64_t idx = 0; idx < char_data_arr_cnt; ++idx) {
|
||||
for (int64_t i = each_type_cnt * idx; i < each_type_cnt * (idx + 1); ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_string(char_data_arr[idx], 100);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
}
|
||||
for (int64_t i = row_cnt - null_cnt; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
// check NU/NN
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[1];
|
||||
const int64_t nu_cnt = has_null ? null_cnt : 0;
|
||||
int64_t res_arr_nu[1] = {nu_cnt};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {1000};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[5] = {{0, 100}, {0, 10}, {1, 100}, {1, 10}, {3, 100}};
|
||||
int64_t res_arr_eq[5] = {250, 0, 250, 0, 250};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 5, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[5] = {750, 1000, 750, 1000, 750};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 5, 1, res_arr_ne);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestStringPdFilter, test_string_all_null_decoder)
|
||||
{
|
||||
const int64_t rowkey_cnt = 1;
|
||||
const int64_t col_cnt = 2;
|
||||
const bool enable_check = ENABLE_CASE_CHECK;
|
||||
ObObjType col_types[col_cnt] = {ObInt32Type, ObVarcharType};
|
||||
ASSERT_EQ(OB_SUCCESS, prepare(col_types, rowkey_cnt, col_cnt));
|
||||
ctx_.column_encodings_[0] = ObCSColumnHeader::Type::INT_DICT; // integer dict
|
||||
ctx_.column_encodings_[1] = ObCSColumnHeader::Type::STRING; // var string
|
||||
|
||||
const int64_t row_cnt = 120;
|
||||
ObMicroBlockCSEncoder encoder;
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.init(ctx_));
|
||||
ObDatumRow row_arr[row_cnt];
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
ASSERT_EQ(OB_SUCCESS, row_arr[i].init(allocator_, col_cnt));
|
||||
}
|
||||
const int64_t char_data_arr_cnt = 5;
|
||||
char char_type_arr[char_data_arr_cnt] = {'a', 'b', 'c', 'd', 'e'};
|
||||
char **char_data_arr = static_cast<char **>(allocator_.alloc(sizeof(char *) * char_data_arr_cnt));
|
||||
for (int64_t i = 0; i < char_data_arr_cnt; ++i) {
|
||||
char_data_arr[i] = static_cast<char *>(allocator_.alloc(512));
|
||||
ASSERT_TRUE(nullptr != char_data_arr[i]);
|
||||
MEMSET(char_data_arr[i], char_type_arr[i], 512);
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < row_cnt; ++i) {
|
||||
row_arr[i].storage_datums_[0].set_int32(i);
|
||||
row_arr[i].storage_datums_[1].set_null();
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.append_row(row_arr[i]));
|
||||
}
|
||||
|
||||
HANDLE_TRANSFORM();
|
||||
|
||||
const int64_t col_offset = 1;
|
||||
bool need_check = true;
|
||||
|
||||
// check NU/NN
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[1] = {{0, 5}};
|
||||
int64_t res_arr_nu[1] = {120};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NU, 1, 0, res_arr_nu);
|
||||
int64_t res_arr_nn[1] = {0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NN, 1, 0, res_arr_nn);
|
||||
}
|
||||
|
||||
// check EQ/NE
|
||||
{
|
||||
std::pair<int64_t, int64_t> ref_arr[3] = {{0, 50}, {1, 50}, {0, 100}};
|
||||
int64_t res_arr_eq[3] = {0, 0, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_EQ, 3, 1, res_arr_eq);
|
||||
int64_t res_arr_ne[3] = {0, 0, 0};
|
||||
string_type_filter_normal_check(true, ObWhiteFilterOperatorType::WHITE_OP_NE, 3, 1, res_arr_ne);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blocksstable
|
||||
} // namespace oceanbase
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_string_pd_filter.log*");
|
||||
OB_LOGGER.set_file_name("test_string_pd_filter.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
422
unittest/storage/blocksstable/cs_encoding/test_string_stream.cpp
Normal file
422
unittest/storage/blocksstable/cs_encoding/test_string_stream.cpp
Normal file
@ -0,0 +1,422 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#define USING_LOG_PREFIX STORAGE
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#define protected public
|
||||
#define private public
|
||||
#include "storage/blocksstable/cs_encoding/ob_string_stream_encoder.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_string_stream_decoder.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_column_encoding_struct.h"
|
||||
#include "storage/blocksstable/cs_encoding/ob_cs_decoding_util.h"
|
||||
#include "lib/codec/ob_fast_delta.h"
|
||||
#include "lib/compress/ob_compress_util.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace blocksstable
|
||||
{
|
||||
|
||||
class TestStringStream : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
virtual void SetUp() {}
|
||||
virtual void TearDown() {}
|
||||
|
||||
TestStringStream() : tenant_ctx_(500)
|
||||
{
|
||||
srand(time(NULL));
|
||||
share::ObTenantEnv::set_tenant(&tenant_ctx_);
|
||||
}
|
||||
virtual ~TestStringStream() {}
|
||||
|
||||
int64_t max_count = 64<<9;
|
||||
|
||||
void randstr(char *str, const int64_t len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len; ++i)
|
||||
{
|
||||
switch ((rand() % 3)) {
|
||||
case 1:
|
||||
str[i] = 'A' + rand() % 26;
|
||||
break;
|
||||
case 2:
|
||||
str[i] = 'a' + rand() % 26;
|
||||
break;
|
||||
default:
|
||||
str[i] = '0' + rand() % 10;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_datums(ObColDatums *datums, const int64_t size, bool has_null, bool is_fix_len,
|
||||
bool has_empty_string, bool all_empty, int64_t &total_len)
|
||||
{
|
||||
total_len = 0;
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
ObDatum datum;
|
||||
if (has_null && (i % 5 == 0)) {
|
||||
datum.set_null();
|
||||
} else {
|
||||
int64_t len = i%333 + 1;
|
||||
if (is_fix_len) {
|
||||
len = 5;
|
||||
}
|
||||
if ((has_empty_string && (i % 7 == 0))
|
||||
|| all_empty) {
|
||||
len = 0;
|
||||
}
|
||||
char *tmp = reinterpret_cast<char *>(allocator_.alloc(len));
|
||||
datum.ptr_ = tmp;
|
||||
datum.len_ = len;
|
||||
randstr(tmp, len);
|
||||
}
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, datums->push_back(datum));
|
||||
total_len += datum.len_;
|
||||
}
|
||||
}
|
||||
|
||||
void generate_bitmap(char *bitmap, ObColDatums *datums)
|
||||
{
|
||||
for (int64_t i = 0; i < datums->count(); i++) {
|
||||
if (datums->at(i).is_null()) {
|
||||
bitmap[i/8] |= (1 << (7 - i%8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void buid_raw_integer_stream_data(const ObStreamData &stream_data,
|
||||
const int64_t count,
|
||||
const ObCompressorType type,
|
||||
ObIntegerStreamDecoderCtx &decode_ctx,
|
||||
ObStreamData &raw_stream_data)
|
||||
{
|
||||
uint16_t stream_meta_len = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::build_decoder_ctx(
|
||||
stream_data, count,type, decode_ctx, stream_meta_len));
|
||||
ObStreamData stream_data2(stream_data.buf_ + stream_meta_len, stream_data.len_ - stream_meta_len);
|
||||
const uint32_t width_size = decode_ctx.meta_.get_uint_width_size();
|
||||
uint32_t array_buf_size = width_size * decode_ctx.count_;
|
||||
char *array_buf = (char*)allocator_.alloc(array_buf_size);
|
||||
ASSERT_EQ(OB_SUCCESS, ObIntegerStreamDecoder::transform_to_raw_array(stream_data2, decode_ctx, array_buf, allocator_));
|
||||
raw_stream_data.set(array_buf, array_buf_size);
|
||||
}
|
||||
|
||||
void test_and_check_str_datums(int64_t size, const ObCompressorType type, bool use_zero_len_as_null, bool has_null, bool is_fix_len,
|
||||
bool use_nullbitmap, bool has_empty_string, bool all_null, bool all_empty, bool half_null_half_empty, bool use_null_replaced_ref)
|
||||
{
|
||||
LOG_INFO("test_and_check_string_encoding", K(size), K(type), K(use_zero_len_as_null), K(has_null), K(is_fix_len),
|
||||
K(use_nullbitmap), K(all_null), K(half_null_half_empty), K(use_null_replaced_ref));
|
||||
ObStringStreamEncoderCtx ctx;
|
||||
ObCSEncodingOpt encoding_opt;
|
||||
bool is_use_zero_len_as_null = use_zero_len_as_null;
|
||||
int64_t fixed_len = -1;
|
||||
if (is_fix_len) {
|
||||
fixed_len = 5;
|
||||
}
|
||||
if (all_empty) {
|
||||
fixed_len = 0;
|
||||
}
|
||||
if (!(use_nullbitmap || all_empty || has_empty_string || half_null_half_empty || use_null_replaced_ref)) {
|
||||
is_use_zero_len_as_null = true;
|
||||
}
|
||||
common::ObCompressor *compressor = nullptr;
|
||||
ASSERT_EQ(OB_SUCCESS, ObCompressorPool::get_instance().get_compressor(type, compressor));
|
||||
|
||||
ObStringStreamEncoder encoder;
|
||||
uint32_t *data = nullptr;
|
||||
ObColDatums *datums = new ObColDatums();
|
||||
ASSERT_EQ(OB_SUCCESS, datums->resize(max_count));
|
||||
datums->reuse();
|
||||
if (half_null_half_empty) {
|
||||
all_empty = true;
|
||||
}
|
||||
int64_t total_len = 0;
|
||||
generate_datums(datums, size, has_null, is_fix_len, has_empty_string, all_empty, total_len);
|
||||
if (all_null) {
|
||||
for (int64_t i = 0; (i < datums->count()); i++) {
|
||||
total_len -= datums->at(i).len_;
|
||||
datums->at(i).set_null();
|
||||
}
|
||||
}
|
||||
if (half_null_half_empty) {
|
||||
for (int64_t i = 0; (i < datums->count()); i++) {
|
||||
if (i%2 == 0) {
|
||||
total_len -= datums->at(i).len_;
|
||||
datums->at(i).set_null();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fixed_len >= 0) {
|
||||
total_len = datums->count() * fixed_len;
|
||||
}
|
||||
ctx.build_string_stream_meta(fixed_len, is_use_zero_len_as_null, total_len);
|
||||
ctx.build_string_stream_encoder_info(type, false, &encoding_opt, nullptr, -1, &allocator_);
|
||||
int64_t bitmap_size = pad8(size);
|
||||
char *bitmap = new char[bitmap_size];
|
||||
memset(bitmap, 0, bitmap_size);
|
||||
generate_bitmap(bitmap, datums);
|
||||
|
||||
ObMicroBufferWriter writer;
|
||||
ObMicroBufferWriter all_string_writer;
|
||||
ASSERT_EQ(OB_SUCCESS, writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
|
||||
ASSERT_EQ(OB_SUCCESS, all_string_writer.init(OB_DEFAULT_MACRO_BLOCK_SIZE, OB_DEFAULT_MACRO_BLOCK_SIZE));
|
||||
common::ObArray<uint32_t> offsets;
|
||||
|
||||
ObColumnDatumIter iter(*datums);
|
||||
ASSERT_EQ(OB_SUCCESS, encoder.encode(ctx, iter, writer, &all_string_writer, offsets));
|
||||
|
||||
ObStreamData str_data;
|
||||
ObStreamData raw_offset_data;
|
||||
// 1. decode integer_stream header
|
||||
ObIntegerStreamDecoderCtx offset_decoder_ctx;
|
||||
uint16_t meta_len = 0;
|
||||
if (!ctx.meta_.is_fixed_len_string()) {
|
||||
const char *int_stream_start = writer.data() + offsets[0];
|
||||
int64_t int_stream_len = offsets[1] - offsets[0];
|
||||
ObStreamData data(int_stream_start, int_stream_len);
|
||||
buid_raw_integer_stream_data(data, size, type, offset_decoder_ctx, raw_offset_data);
|
||||
}
|
||||
|
||||
// 2. build_decoding_ctx for string stream
|
||||
ObStringStreamDecoderCtx str_decode_ctx;
|
||||
const char *str_stream_start = writer.data();
|
||||
int64_t str_stream_meta_len = offsets[0];
|
||||
ObStreamData data2(str_stream_start, str_stream_meta_len);
|
||||
uint16_t str_meta_len = 0;
|
||||
ASSERT_EQ(OB_SUCCESS, ObStringStreamDecoder::build_decoder_ctx(data2, str_decode_ctx, str_meta_len));
|
||||
str_data.set(all_string_writer.data(), all_string_writer.length());
|
||||
|
||||
// 3. decode str
|
||||
ObColDatums *datums2 = new ObColDatums();
|
||||
ASSERT_EQ(OB_SUCCESS, datums2->resize(max_count));
|
||||
datums2->reuse();
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
ObDatum datum;
|
||||
ASSERT_EQ(OB_SUCCESS, datums2->push_back(datum));
|
||||
}
|
||||
|
||||
// test batch decode
|
||||
int64_t *row_ids = new int64_t[size];
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
row_ids[i] = i;
|
||||
}
|
||||
ObDatum *datums3 = new ObDatum[size];
|
||||
char *datums2_buf = new char[size * sizeof(uint64_t)];
|
||||
memset(datums2_buf, 0, size * sizeof(uint64_t));
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
datums3[i].ptr_ = (datums2_buf + i * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
uint64_t *ref_arr = new uint64_t[size];
|
||||
int64_t null_replaced_ref = size;
|
||||
uint32_t ref_width_V = ObRefStoreWidthV::NOT_REF;
|
||||
|
||||
ObBaseColumnDecoderCtx base_ctx;
|
||||
base_ctx.allocator_ = &allocator_;
|
||||
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NO_NULL;
|
||||
base_ctx.null_desc_ = nullptr;
|
||||
if (use_nullbitmap) {
|
||||
base_ctx.null_bitmap_ = bitmap;
|
||||
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::HAS_NULL_BITMAP;
|
||||
} else if (use_null_replaced_ref) {
|
||||
ref_width_V = ObRefStoreWidthV::REF_IN_DATUMS;
|
||||
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED_REF;
|
||||
base_ctx.null_replaced_ref_ = null_replaced_ref;
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
if (use_null_replaced_ref && datums->at(i).is_null()) {
|
||||
datums3[i].pack_ = size;
|
||||
ref_arr[i] = size;
|
||||
} else {
|
||||
datums3[i].pack_ = i;
|
||||
ref_arr[i] = i;
|
||||
}
|
||||
}
|
||||
} else if (fixed_len < 0 && is_use_zero_len_as_null) {
|
||||
base_ctx.null_flag_ = ObBaseColumnDecoderCtx::ObNullFlag::IS_NULL_REPLACED;
|
||||
}
|
||||
|
||||
const uint8_t offset_width = str_decode_ctx.meta_.is_fixed_len_string() ?
|
||||
FIX_STRING_OFFSET_WIDTH_V : offset_decoder_ctx.meta_.width_;
|
||||
ConvertStringToDatumFunc convert_func = convert_string_to_datum_funcs
|
||||
[offset_width]
|
||||
[ref_width_V]
|
||||
[base_ctx.null_flag_]
|
||||
[false/*need_copy_V*/];
|
||||
convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, nullptr, row_ids, size, datums3);
|
||||
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
|
||||
LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]));
|
||||
::abort();
|
||||
}
|
||||
}
|
||||
|
||||
// disorder batch decode
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
datums3[i].reset();
|
||||
}
|
||||
int64_t random_idx = ObTimeUtility::current_time()%size;
|
||||
int64_t row_id = 0;
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
row_id = (i + random_idx) % size;
|
||||
row_ids[i] = row_id;
|
||||
if (use_null_replaced_ref && datums->at(row_id).is_null()) {
|
||||
datums3[i].pack_ = size;
|
||||
} else {
|
||||
datums3[i].pack_ = row_id;
|
||||
}
|
||||
if (i%9 == 0) {
|
||||
row_ids[i] = random_idx; //duplicate
|
||||
if (use_null_replaced_ref && datums->at(random_idx).is_null()) {
|
||||
datums3[i].pack_ = size;
|
||||
} else {
|
||||
datums3[i].pack_ = random_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
convert_func = convert_string_to_datum_funcs
|
||||
[offset_width]
|
||||
[ref_width_V]
|
||||
[base_ctx.null_flag_]
|
||||
[false/*need_copy_V*/];
|
||||
convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, nullptr, row_ids, size, datums3);
|
||||
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
|
||||
LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]));
|
||||
::abort();
|
||||
}
|
||||
}
|
||||
|
||||
// test batch decode with ref arr
|
||||
if (use_null_replaced_ref) {
|
||||
convert_func = convert_string_to_datum_funcs
|
||||
[offset_width]
|
||||
[ObRefStoreWidthV::REF_8_BYTE]
|
||||
[base_ctx.null_flag_]
|
||||
[false/*need_copy_V*/];
|
||||
convert_func(base_ctx, str_data.buf_, str_decode_ctx, raw_offset_data.buf_, (char*)ref_arr, row_ids, size, datums3);
|
||||
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
if (!ObDatum::binary_equal(datums->at(row_ids[i]), datums3[i])) {
|
||||
LOG_INFO("not equal", K(datums->at(row_ids[i])), K(datums3[i]), K(i), K(row_ids[i]), K(ref_arr[row_ids[i]]));
|
||||
::abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete []ref_arr;
|
||||
delete []row_ids;
|
||||
row_ids = nullptr;
|
||||
delete []datums3;
|
||||
datums3 = nullptr;
|
||||
delete datums2;
|
||||
datums2 = nullptr;
|
||||
delete datums;
|
||||
datums = nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
ObArenaAllocator allocator_;
|
||||
share::ObTenantBase tenant_ctx_;
|
||||
};
|
||||
|
||||
TEST_F(TestStringStream, test_datums_encoding)
|
||||
{
|
||||
for (int64_t j = 0; j < 13; j++) {
|
||||
common::ObCompressorType compress_type = ObCompressorType::NONE_COMPRESSOR;
|
||||
bool use_zero_len_as_null = 0;
|
||||
bool has_null = false;
|
||||
bool is_fix_len = false;
|
||||
bool use_nullbitmap = false;
|
||||
bool has_empty_string = false;
|
||||
bool all_null = false;
|
||||
bool all_empty = false;
|
||||
bool half_null_half_empty = false;
|
||||
bool use_null_replaced_ref = false;
|
||||
if (0 == j) {
|
||||
compress_type = NONE_COMPRESSOR;
|
||||
} else if (1 == j) {
|
||||
compress_type = LZ4_COMPRESSOR;
|
||||
} else if (2 == j) {
|
||||
compress_type = SNAPPY_COMPRESSOR;
|
||||
has_null = true;
|
||||
use_zero_len_as_null = true;
|
||||
} else if (3 == j) {
|
||||
compress_type = ZSTD_1_3_8_COMPRESSOR;
|
||||
is_fix_len = true;
|
||||
} else if (4 == j) {
|
||||
compress_type = SNAPPY_COMPRESSOR;
|
||||
is_fix_len = true;
|
||||
use_nullbitmap = true;
|
||||
has_null = true;
|
||||
} else if (5 == j) {
|
||||
compress_type = ZSTD_1_3_8_COMPRESSOR;
|
||||
has_null = true;
|
||||
use_nullbitmap = true;
|
||||
} else if (6 == j) {
|
||||
has_null = true;
|
||||
use_nullbitmap = true;
|
||||
has_empty_string = true;
|
||||
} else if (7 == j) {
|
||||
// all null, use null replace value
|
||||
all_null = true;
|
||||
} else if (8 == j) {
|
||||
// all empty string
|
||||
all_empty = true;
|
||||
} else if (9 == j) {
|
||||
// half null and half empty
|
||||
half_null_half_empty = true;
|
||||
use_nullbitmap = true;
|
||||
} else if (10 == j) {
|
||||
// null replace row id
|
||||
compress_type = SNAPPY_COMPRESSOR;
|
||||
use_null_replaced_ref = true;
|
||||
} else if (11 == j) {
|
||||
// null replace row id
|
||||
compress_type = SNAPPY_COMPRESSOR;
|
||||
use_null_replaced_ref = true;
|
||||
has_null = true;
|
||||
} else if (12 == j) {
|
||||
// null replace row id
|
||||
compress_type = SNAPPY_COMPRESSOR;
|
||||
use_null_replaced_ref = true;
|
||||
all_null = true;
|
||||
}
|
||||
|
||||
for (int64_t i = 1; i <= max_count; i=(i * (i + j + 1))) {
|
||||
LOG_INFO("round", K(i), K(j));
|
||||
test_and_check_str_datums(i, compress_type, use_zero_len_as_null, has_null, is_fix_len, use_nullbitmap,
|
||||
has_empty_string, all_null, all_empty, half_null_half_empty, use_null_replaced_ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace blocksstable
|
||||
} // end namespace oceanbase
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
system("rm -f test_string_stream.log*");
|
||||
OB_LOGGER.set_file_name("test_string_stream.log", true, false);
|
||||
oceanbase::common::ObLogger::get_logger().set_log_level("INFO");
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
Reference in New Issue
Block a user