Merge branch 'column_store'

Co-authored-by: wangt1xiuyi <13547954130@163.com>
Co-authored-by: yangqise7en <877793735@qq.com>
Co-authored-by: Zach41 <zach_41@163.com>
This commit is contained in:
chaser-ch
2023-10-31 15:39:20 +00:00
committed by ob-robot
parent 4057fbc4ae
commit 566e920620
1375 changed files with 239147 additions and 56014 deletions

View File

@ -21,6 +21,7 @@
#include "lib/string/ob_sql_string.h"
#include "../ob_row_generate.h"
#include "common/rowkey/ob_rowkey.h"
#include "share/ob_storage_format.h"
namespace oceanbase
{
@ -34,8 +35,17 @@ using namespace share::schema;
class TestMicroBlockDecoder : public ::testing::Test
{
public:
static const int64_t ROWKEY_CNT = 2;
static const int64_t COLUMN_CNT = ObExtendType - 1 + 4;
TestMicroBlockDecoder(): tenant_ctx_(500)
{
decode_res_pool_ = new(allocator_.alloc(sizeof(ObDecodeResourcePool))) ObDecodeResourcePool();
tenant_ctx_.set(decode_res_pool_);
share::ObTenantEnv::set_tenant(&tenant_ctx_);
encoder_.data_buffer_.allocator_.set_tenant_id(500);
encoder_.row_buf_holder_.allocator_.set_tenant_id(500);
decode_res_pool_->init();
}
static const int64_t ROWKEY_CNT = 1;
static const int64_t COLUMN_CNT = ObExtendType - 1 + 7;
static const int64_t ROW_CNT = 64;
virtual void SetUp();
@ -46,23 +56,64 @@ protected:
ObMicroBlockEncodingCtx ctx_;
common::ObArray<share::schema::ObColDesc> col_descs_;
ObMicroBlockEncoder encoder_;
ObTableReadInfo read_info_;
ObArenaAllocator allocator_;
ObObjType *col_obj_types_;
share::ObTenantBase tenant_ctx_;
ObDecodeResourcePool *decode_res_pool_;
int64_t extra_rowkey_cnt_;
int64_t column_cnt_;
int64_t full_column_cnt_;
int64_t rowkey_cnt_;
};
void TestMicroBlockDecoder::SetUp()
{
const int64_t tid = combine_id(1, 50001);
if (OB_NOT_NULL(col_obj_types_)) {
allocator_.free(col_obj_types_);
}
column_cnt_ = ObExtendType - 1 + 7;
rowkey_cnt_ = 2;
col_obj_types_ = reinterpret_cast<ObObjType *>(allocator_.alloc(sizeof(ObObjType) * column_cnt_));
for (int64_t i = 0; i < column_cnt_; ++i) {
ObObjType type = static_cast<ObObjType>(i + 1);
if (column_cnt_ - 1 == i) {
type = ObURowIDType;
} else if (column_cnt_ - 2 == i) {
type = ObIntervalYMType;
} else if (column_cnt_ - 3 == i) {
type = ObIntervalDSType;
} else if (column_cnt_ - 4 == i) {
type = ObTimestampTZType;
} else if (column_cnt_ - 5 == i) {
type = ObTimestampLTZType;
} else if (column_cnt_ - 6 == i) {
type = ObTimestampNanoType;
} else if (column_cnt_ - 7 == i) {
type = ObRawType;
} else if (type == ObExtendType || type == ObUnknownType) {
type = ObVarcharType;
}
col_obj_types_[i] = type;
}
extra_rowkey_cnt_ = ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt();
full_column_cnt_ = column_cnt_ + extra_rowkey_cnt_;
const int64_t tid = 200001;
ObTableSchema table;
ObColumnSchemaV2 col;
table.reset();
table.set_tenant_id(1);
table.set_tablegroup_id(combine_id(1, 1));
table.set_database_id(combine_id(1, 1));
table.set_tablegroup_id(1);
table.set_database_id(1);
table.set_table_id(tid);
table.set_table_name("test_micro_decoder_schema");
table.set_rowkey_column_num(ROWKEY_CNT);
table.set_max_column_id(COLUMN_CNT * 2);
table.set_table_name("test_micro_block_decoder_schema");
table.set_rowkey_column_num(rowkey_cnt_);
table.set_max_column_id(column_cnt_ * 2);
table.set_block_size(2 * 1024);
table.set_compress_func_name("none");
table.set_row_store_type(ENCODING_ROW_STORE);
table.set_storage_format_version(OB_STORAGE_FORMAT_VERSION_V4);
ObSqlString str;
for (int64_t i = 0; i < COLUMN_CNT; ++i) {
@ -71,14 +122,20 @@ void TestMicroBlockDecoder::SetUp()
col.set_column_id(i + OB_APP_MIN_COLUMN_ID);
str.assign_fmt("test%ld", i);
col.set_column_name(str.ptr());
ObObjType type = static_cast<ObObjType>(i + 1); // 0 is ObNullType
if (COLUMN_CNT - 1 == i) { // test urowid for last column
type = ObURowIDType;
} else if (type >= ObExtendType) {
type = ObVarcharType;
}
ObObjType type = col_obj_types_[i]; // 0 is ObNullType
col.set_data_type(type);
col.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI);
if (ObVarcharType == type || ObCharType == type || ObHexStringType == type
|| ObNVarchar2Type == type || ObNCharType == type || ObTextType == type){
col.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI);
if (ObCharType == type) {
const int64_t max_char_length = lib::is_oracle_mode()
? OB_MAX_ORACLE_CHAR_LENGTH_BYTE
: OB_MAX_CHAR_LENGTH;
col.set_data_length(max_char_length);
}
} else {
col.set_collation_type(CS_TYPE_BINARY);
}
if (type == ObIntType) {
col.set_rowkey_position(1);
} else if (type == ObUInt64Type) {
@ -88,123 +145,49 @@ void TestMicroBlockDecoder::SetUp()
}
ASSERT_EQ(OB_SUCCESS, table.add_column(col));
}
ASSERT_EQ(OB_SUCCESS, row_generate_.init(table, true/*multi_version*/));
ASSERT_EQ(OB_SUCCESS, table.get_multi_version_column_descs(col_descs_));
ASSERT_EQ(OB_SUCCESS, row_generate_.init(table));
ASSERT_EQ(OB_SUCCESS, row_generate_.get_schema().get_column_ids(col_descs_));
ASSERT_EQ(OB_SUCCESS, read_info_.init(allocator_,
row_generate_.get_schema().get_schema_version(),
row_generate_.get_schema().get_rowkey_column_num(),
lib::is_oracle_mode(),
col_descs_));
ctx_.micro_block_size_ = 64L << 10;
ctx_.micro_block_size_ = 64L << 11;
ctx_.macro_block_size_ = 2L << 20;
ctx_.rowkey_column_cnt_ = ROWKEY_CNT;
ctx_.column_cnt_ = COLUMN_CNT;
ctx_.rowkey_column_cnt_ = rowkey_cnt_ + extra_rowkey_cnt_;
ctx_.column_cnt_ = column_cnt_ + extra_rowkey_cnt_;
ctx_.col_descs_ = &col_descs_;
ctx_.row_store_type_ = common::ENCODING_ROW_STORE;
ctx_.compressor_type_ = common::ObCompressorType::NONE_COMPRESSOR;
ASSERT_EQ(OB_SUCCESS, encoder_.init(ctx_));
}
ObStoreRow row;
oceanbase::common::ObObj objs[COLUMN_CNT];
row.row_val_.cells_ = objs;
row.row_val_.count_ = COLUMN_CNT;
TEST_F(TestMicroBlockDecoder, decode_test)
{
ObDatumRow row;
ASSERT_EQ(OB_SUCCESS, row.init(allocator_, full_column_cnt_));
int64_t seeds[ROW_CNT];
for (int64_t i = 0; i < ROW_CNT; ++i) {
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(row));
LOG_DEBUG("row", K(row));
seeds[i] = 10000 + i;
}
for (int64_t i = 0; i < ROW_CNT; ++i) {
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seeds[i], row));
LOG_INFO("generate row", K(i), K(row));
ASSERT_EQ(OB_SUCCESS, encoder_.append_row(row)) << "i: " << i << std::endl;
}
char *buf = NULL;
int64_t size = 0;
ASSERT_EQ(OB_SUCCESS, encoder_.build_block(buf, size));
}
TEST_F(TestMicroBlockDecoder, perf)
{
int64_t i_size = sizeof(ObIEncodeBlockReader);
int64_t r_size = sizeof(ObEncodeBlockGetReader);
LOG_INFO("class size", K(i_size), K(r_size));
const int64_t count = 100;
int64_t i = 0;
int64_t start = 0;
int64_t duration = 0;
int ret = common::OB_SUCCESS;
ObMicroBlockData data(encoder_.get_data().data(), encoder_.get_data().pos());
start = common::ObTimeUtility::current_time();
for (i = 0; i < count; ++i) {
ObEncodeBlockGetReader reader;
UNUSED(reader);
}
duration = common::ObTimeUtility::current_time() - start;
LOG_INFO("perf ctr", K(ret), K(i), K(duration));
ObEncodeBlockGetReader reader2;
start = common::ObTimeUtility::current_time();
for (i = 0; OB_SUCC(ret) && i < count; ++i) {
reader2.reuse();
ret = reader2.init_by_read_info(data, read_info_);
}
duration = common::ObTimeUtility::current_time() - start;
LOG_INFO("perf init", K(ret), K(i), K(duration));
start = common::ObTimeUtility::current_time();
for (i = 0; OB_SUCC(ret) && i < count; ++i) {
ObEncodeBlockGetReader reader3;
ret = reader3.init_by_read_info(data, read_info_);
}
duration = common::ObTimeUtility::current_time() - start;
LOG_INFO("perf all", K(ret), K(i), K(duration));
}
TEST_F(TestMicroBlockDecoder, read)
{
ObMicroBlockDecoder decoder;
ObMicroBlockData data(encoder_.get_data().data(), encoder_.get_data().pos());
ASSERT_EQ(OB_SUCCESS, decoder.init(data, read_info_)) << "buffer size: " << data.get_buf_size() << std::endl;
ObMicroBlockDecoder batch_decoder;
ObMicroBlockData batch_data(encoder_.get_data().data(), encoder_.get_data().pos());
ASSERT_EQ(OB_SUCCESS, batch_decoder.init(batch_data, read_info_));
int64_t iter = 0;
ObStoreRow *rows = new ObStoreRow[ObIMicroBlockReader::OB_MAX_BATCH_ROW_COUNT];
char *obj_buf = new char[common::OB_ROW_MAX_COLUMNS_COUNT * sizeof(ObObj) * ObIMicroBlockReader::OB_MAX_BATCH_ROW_COUNT];
for (int64_t i = 0; i < ObIMicroBlockReader::OB_MAX_BATCH_ROW_COUNT; ++i) {
rows[i].row_val_.cells_ = reinterpret_cast<ObObj *>(obj_buf) + i * common::OB_ROW_MAX_COLUMNS_COUNT;
rows[i].row_val_.count_ = common::OB_ROW_MAX_COLUMNS_COUNT;
}
int64_t batch_count = 0;
int64_t batch_num = ROW_CNT;
ASSERT_EQ(OB_SUCCESS, batch_decoder.get_rows(iter, iter + batch_num,
ObIMicroBlockReader::OB_MAX_BATCH_ROW_COUNT, rows, batch_count));
ASSERT_LE(batch_count, batch_num);
LOG_INFO("batch get rows", K(batch_count), K(batch_num));
ObStoreRow single_row;
char *single_row_buf = new char[common::OB_ROW_MAX_COLUMNS_COUNT * sizeof(ObObj)];
for (int64_t i = 0; i < batch_count; ++i) {
const ObStoreRow *cur_row = rows + i;
LOG_INFO("read row", K(i), K(*cur_row));
bool exist = false;
ASSERT_EQ(OB_SUCCESS, row_generate_.check_one_row(*cur_row, exist));
ASSERT_TRUE(exist) << "\n index: " << i;
single_row.row_val_.cells_ = reinterpret_cast<ObObj *>(single_row_buf);
single_row.row_val_.count_ = common::OB_ROW_MAX_COLUMNS_COUNT;
int64_t tmp_iter = i;
ASSERT_EQ(OB_SUCCESS, decoder.get_row(tmp_iter, single_row));
LOG_INFO("read row", K(i), K(single_row));
ASSERT_EQ(OB_SUCCESS, row_generate_.check_one_row(single_row, exist));
ASSERT_TRUE(exist) << "\n index: " << i;
ASSERT_EQ(cur_row->row_val_.count_, single_row.row_val_.count_);
for (int64_t j = 0; j < single_row.row_val_.count_; ++j) {
ASSERT_TRUE(cur_row->row_val_.cells_[j] == single_row.row_val_.cells_[j])
<< "\n i: " << i << " j: " << j
<< "\n writer: "<< to_cstring(single_row.row_val_.cells_[j])
<< "\n reader: " << to_cstring(cur_row->row_val_.cells_[j]);
ObMicroBlockData data(encoder_.data_buffer_.data(), encoder_.data_buffer_.length());
LOG_INFO("col_descs before init decoder : ",K(col_descs_));
ASSERT_EQ(OB_SUCCESS, decoder.init(data, nullptr)) << "buffer size: " << data.get_buf_size() << std::endl;
ObDatumRow single_row;
ASSERT_EQ(OB_SUCCESS, single_row.init(allocator_, full_column_cnt_));
for (int64_t i = 0; i < ROW_CNT; ++i) {
ASSERT_EQ(OB_SUCCESS, decoder.get_row(i, single_row));
ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seeds[i], row));
LOG_INFO("Current row: ", K(i));
for (int64_t j = 0; j < full_column_cnt_; ++j) {
LOG_INFO("current col: ", K(j), K(single_row.storage_datums_[j]), K(row.storage_datums_[j]));
ASSERT_TRUE(ObDatum::binary_equal(single_row.storage_datums_[j], row.storage_datums_[j]));
}
}
}
@ -214,6 +197,8 @@ TEST_F(TestMicroBlockDecoder, read)
int main(int argc, char **argv)
{
system("rm -f test_micro_block_decoder.log*");
OB_LOGGER.set_file_name("test_micro_block_decoder.log", true, false);
oceanbase::common::ObLogger::get_logger().set_log_level("INFO");
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();