[FEAT MERGE] impl vectorization 2.0

Co-authored-by: Naynahs <cfzy002@126.com>
Co-authored-by: hwx65 <1780011298@qq.com>
Co-authored-by: oceanoverflow <oceanoverflow@gmail.com>
This commit is contained in:
obdev
2023-12-22 03:43:19 +00:00
committed by ob-robot
parent 1178245448
commit b6773084c6
592 changed files with 358124 additions and 303288 deletions

View File

@ -37,17 +37,19 @@ public:
virtual void TearDown() {}
void init_schema(const int64_t col_count, const ObObjType *col_obj_types);
void init_skip_index_meta(const int64_t idx_col_count, const int64_t *min_max_col_idxs);
void init_sum_meta(const int64_t idx_col_count, const int64_t *sum_col_idxs);
void generate_row_by_seed(const int64_t seed, ObDatumRow &datum_row);
void reset_min_max_row();
void update_min_max_row(const ObDatumRow &row);
void update_sum_row(const ObDatumRow &row, ObObj *sum_res, ObObj *data);
void validate_sum_agg_row(const ObDatumRow &agg_row, const ObObj *sum_res, int64_t nop_col_cnt= 0, int64_t *nop_col_idxs = nullptr);
void validate_agg_row(const ObDatumRow &row, int64_t nop_col_cnt = 0, int64_t *nop_col_idxs = nullptr, ObSkipIndexColType *nop_col_types = nullptr);
void set_nop_cols(ObDatumRow &row, int64_t nop_col_cnt = 0, int64_t *nop_col_idxs = nullptr, ObSkipIndexColType *nop_col_types = nullptr);
bool is_col_in_nop_col_arr(const int64_t col_idx, const int64_t nop_col_cnt, int64_t *nop_col_idxs, int64_t &index);
void serialize_agg_row(const ObDatumRow &agg_row, const char *&row_buf, int64_t &row_size);
void get_cmp_func(const ObColDesc &col_desc, ObStorageDatumCmpFunc &cmp_func);
private:
ObArenaAllocator allocator_;
ObRowGenerate row_generate_;
ObArray<ObColDesc> col_descs_;
@ -121,6 +123,19 @@ void TestIndexBlockAggregator::init_skip_index_meta(
}
}
void TestIndexBlockAggregator::init_sum_meta(
const int64_t idx_col_count, const int64_t *sum_col_idxs)
{
for (int64_t i = 0; i < idx_col_count; ++i) {
ObSkipIndexColMeta meta;
ObSkipIndexColMeta max;
ObSkipIndexColMeta null_count;
meta.col_idx_ = sum_col_idxs[i];
meta.col_type_ = SK_IDX_SUM;
ASSERT_EQ(OB_SUCCESS, full_agg_metas_.push_back(meta));
}
}
void TestIndexBlockAggregator::generate_row_by_seed(const int64_t seed, ObDatumRow &datum_row)
{
// if (0 == seed) {
@ -181,6 +196,21 @@ void TestIndexBlockAggregator::update_min_max_row(const ObDatumRow &row)
}
}
void TestIndexBlockAggregator::update_sum_row(const ObDatumRow &row, ObObj *sum_res, ObObj *data)
{
for (int64_t col_id = 0; col_id < row.get_column_count(); ++col_id) {
const ObObjMeta col_type = col_descs_[col_id].col_type_;
if (!col_type.is_numeric_type()|| col_type.get_type_class() == ObObjTypeClass::ObBitTC || row.storage_datums_[col_id].is_null()) {
} else if (sum_res[col_id].is_null()) {
row.storage_datums_[col_id].to_obj(sum_res[col_id], col_type);
} else {
row.storage_datums_[col_id].to_obj(data[col_id], col_type);
ASSERT_EQ(OB_SUCCESS, sql::ObExprAdd::calc(sum_res[col_id], data[col_id], sum_res[col_id],
&allocator_, col_type.get_scale()));
}
}
}
void TestIndexBlockAggregator::validate_agg_row(
const ObDatumRow &datum_row, int64_t nop_col_cnt, int64_t *nop_col_idxs, ObSkipIndexColType *nop_col_types)
{
@ -223,6 +253,54 @@ void TestIndexBlockAggregator::validate_agg_row(
}
}
void TestIndexBlockAggregator::validate_sum_agg_row(const ObDatumRow &agg_row, const ObObj *sum_res,
int64_t nop_col_cnt, int64_t *nop_col_idxs)
{
for (int64_t i = 0; i < full_agg_metas_.count(); ++i) {
ObSkipIndexColMeta idx_meta = full_agg_metas_.at(i);
const int64_t col_idx = idx_meta.col_idx_;
const ObObjMeta col_type = col_descs_[col_idx].col_type_;
int64_t index = 0;
bool is_nop_column = is_col_in_nop_col_arr(col_idx, nop_col_cnt, nop_col_idxs, index);
if (is_nop_column || !col_type.is_numeric_type()|| col_type.get_type_class() == ObObjTypeClass::ObBitTC) {
ASSERT_TRUE(agg_row.storage_datums_[i].is_nop());
} else {
const ObObjTypeClass obj_tc = col_type.get_type_class();
switch (obj_tc) {
case ObObjTypeClass::ObIntTC:
case ObObjTypeClass::ObUIntTC:
case ObObjTypeClass::ObDecimalIntTC:
case ObObjTypeClass::ObNumberTC: {
int cmp = 0;
ObObj agg;
agg.set_number(agg_row.storage_datums_[i].get_number());
ASSERT_EQ(0, sum_res[i].compare(agg, cmp));
break;
}
case ObObjTypeClass::ObFloatTC: {
ObObj agg_obj;
agg_row.storage_datums_[i].to_obj(agg_obj, col_type);
int cmp = 0;
ASSERT_EQ(0, sum_res[col_idx].compare(agg_obj, cmp));
break;
}
case ObObjTypeClass::ObDoubleTC: {
ObObj agg_obj;
agg_row.storage_datums_[i].to_obj(agg_obj, col_type);
int cmp = 0;
ASSERT_EQ(0, sum_res[col_idx].compare(agg_obj, cmp));
break;
}
default: {
int ret = OB_ERR_UNEXPECTED;
STORAGE_LOG(WARN, "unexpect type", K(obj_tc));
break;
}
}
}
}
}
void TestIndexBlockAggregator::set_nop_cols(
ObDatumRow &row, int64_t nop_col_cnt, int64_t *nop_col_idxs, ObSkipIndexColType *nop_col_types)
{
@ -431,6 +509,116 @@ TEST_F(TestIndexBlockAggregator, basic_aggregate)
}
TEST_F(TestIndexBlockAggregator, test_sum)
{
static const int64_t test_column_cnt = 4;
const int64_t test_row_cnt = 10;
const int64_t extra_rowkey_cnt = ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt();
ObObjType col_obj_types[test_column_cnt];
col_obj_types[0] = ObIntType;
col_obj_types[1] = ObFloatType;
col_obj_types[2] = ObDoubleType;
col_obj_types[3] = ObCharType;
init_schema(test_column_cnt, col_obj_types);
int64_t sum_col_idxs[test_column_cnt];
for (int64_t i = 0; i < test_column_cnt; ++i) {
const int64_t agg_col_idx = i < rowkey_count_ ? i : i + extra_rowkey_cnt;
sum_col_idxs[i] = agg_col_idx;
}
ObObj data[test_column_cnt + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt()];
ObObj sum_res[test_column_cnt + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt()];
init_sum_meta(test_column_cnt, sum_col_idxs);
ObSkipIndexAggregator data_aggregator;
ObSkipIndexAggregator reuse_data_aggregator;
ObSkipIndexAggregator index_aggregator;
ObDatumRow data_agg_result;
ObDatumRow reuse_data_agg_result;
ObDatumRow index_agg_result;
ASSERT_EQ(OB_SUCCESS, data_agg_result.init(full_agg_metas_.count()));
ASSERT_EQ(OB_SUCCESS, reuse_data_agg_result.init(full_agg_metas_.count()));
ASSERT_EQ(OB_SUCCESS, index_agg_result.init(full_agg_metas_.count()));
ObArenaAllocator allocator;
for (int64_t test_round = 0; test_round < 7; ++test_round) {
allocator.reuse();
data_agg_result.reuse();
reuse_data_aggregator.reuse();
index_agg_result.reuse();
ASSERT_EQ(OB_SUCCESS, data_aggregator.init(full_agg_metas_, col_descs_, true, data_agg_result, allocator_));
ASSERT_EQ(OB_SUCCESS, reuse_data_aggregator.init(full_agg_metas_, col_descs_, true, reuse_data_agg_result, allocator_));
ASSERT_EQ(OB_SUCCESS, index_aggregator.init(full_agg_metas_, col_descs_, false, index_agg_result, allocator_));
const ObDatumRow *data_agg_row = nullptr;
const ObDatumRow *reuse_data_agg_row = nullptr;
const ObDatumRow *index_agg_row = nullptr;
ObDatumRow generate_row;
ASSERT_EQ(OB_SUCCESS, generate_row.init(full_column_count_));
for (int64_t i = 0; i < test_row_cnt; ++i) {
const int64_t seed = random() % test_row_cnt;
generate_row_by_seed(seed, generate_row);
update_sum_row(generate_row, sum_res, data);
ASSERT_EQ(OB_SUCCESS, data_aggregator.eval(generate_row));
ASSERT_EQ(OB_SUCCESS, data_aggregator.get_aggregated_row(data_agg_row));
ASSERT_TRUE(nullptr != data_agg_row);
const char *row_buf = nullptr;
int64_t row_size = 0;
serialize_agg_row(*data_agg_row, row_buf, row_size);
ASSERT_TRUE(nullptr != row_buf);
ASSERT_EQ(OB_SUCCESS, reuse_data_aggregator.eval(row_buf, row_size, i));
ASSERT_EQ(OB_SUCCESS, reuse_data_aggregator.get_aggregated_row(reuse_data_agg_row));
ASSERT_TRUE(nullptr != reuse_data_agg_row);
if (0 == i / 2) {
ASSERT_EQ(OB_SUCCESS, index_aggregator.eval(*data_agg_row));
} else {
ASSERT_EQ(OB_SUCCESS, index_aggregator.eval(row_buf, row_size, i));
}
ASSERT_EQ(OB_SUCCESS, index_aggregator.get_aggregated_row(index_agg_row));
ASSERT_TRUE(nullptr != index_agg_row);
validate_sum_agg_row(*data_agg_row, sum_res);
validate_sum_agg_row(*reuse_data_agg_row, sum_res);
validate_sum_agg_row(*index_agg_row, sum_res);
reuse_data_aggregator.reuse();
index_aggregator.reuse();
}
data_aggregator.reset();
reuse_data_aggregator.reset();
index_aggregator.reset();
}
// test nop agg
const int64_t nop_col_cnt = 1;
int64_t nop_col_idxs[nop_col_cnt] = {3};
for (int64_t col_id = 0; col_id < test_column_cnt + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); ++col_id) {
sum_res[col_id].set_null();
}
data_agg_result.reuse();
index_agg_result.reuse();
ASSERT_EQ(OB_SUCCESS, data_aggregator.init(full_agg_metas_, col_descs_, true, data_agg_result, allocator_));
ASSERT_EQ(OB_SUCCESS, index_aggregator.init(full_agg_metas_, col_descs_, false, index_agg_result, allocator_));
const ObDatumRow *data_agg_row = nullptr;
const ObDatumRow *index_agg_row = nullptr;
ObDatumRow generate_row;
ASSERT_EQ(OB_SUCCESS, generate_row.init(full_column_count_));
for (int64_t i = 0; i < test_row_cnt; ++i) {
const int64_t seed = random() % test_row_cnt;
generate_row_by_seed(seed, generate_row);
update_sum_row(generate_row, sum_res, data);
ASSERT_EQ(OB_SUCCESS, data_aggregator.eval(generate_row));
ASSERT_EQ(OB_SUCCESS, data_aggregator.get_aggregated_row(data_agg_row));
ASSERT_TRUE(nullptr != data_agg_row);
set_nop_cols(*const_cast<ObDatumRow *>(data_agg_row), nop_col_cnt, nop_col_idxs);
ASSERT_EQ(OB_SUCCESS, index_aggregator.eval(*data_agg_row));
ASSERT_EQ(OB_SUCCESS, index_aggregator.get_aggregated_row(index_agg_row));
ASSERT_TRUE(nullptr != index_agg_row);
validate_sum_agg_row(*data_agg_row, sum_res, nop_col_cnt, nop_col_idxs);
validate_sum_agg_row(*index_agg_row, sum_res, nop_col_cnt, nop_col_idxs);
index_aggregator.reuse();
}
}
}
}