Files
doris/be/test/vec/exec/vgeneric_iterators_test.cpp
wangbo d17ed5e27a [vectorization](storage)support seq column in storage layer (#8186)
[vectorization](storage)support seq column in storage layer (#8186)
2022-02-23 12:23:31 +08:00

316 lines
10 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/olap/vgeneric_iterators.h"
#include <gtest/gtest.h>
#include <vector>
#include "olap/olap_common.h"
#include "olap/row_block2.h"
#include "olap/schema.h"
#include "util/slice.h"
namespace doris {
namespace vectorized {
class VGenericIteratorsTest : public testing::Test {
public:
VGenericIteratorsTest() {}
virtual ~VGenericIteratorsTest() {}
};
Schema create_schema() {
std::vector<TabletColumn> col_schemas;
col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_SMALLINT, true);
// c2: int
col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_INT, true);
// c3: big int
col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_SUM, OLAP_FIELD_TYPE_BIGINT, true);
Schema schema(col_schemas, 2);
return schema;
}
static void create_block(Schema& schema, vectorized::Block& block)
{
for (auto &column_desc : schema.columns()) {
ASSERT_TRUE(column_desc);
auto data_type = Schema::get_data_type_ptr(column_desc->type());
ASSERT_NE(data_type, nullptr);
if (column_desc->is_nullable()) {
data_type = std::make_shared<vectorized::DataTypeNullable>(std::move(data_type));
}
auto column = data_type->create_column();
vectorized::ColumnWithTypeAndName ctn(std::move(column), data_type, column_desc->name());
block.insert(ctn);
}
}
TEST(VGenericIteratorsTest, AutoIncrement) {
auto schema = create_schema();
auto iter = vectorized::new_auto_increment_iterator(schema, 10);
StorageReadOptions opts;
auto st = iter->init(opts);
ASSERT_TRUE(st.ok());
vectorized::Block block;
create_block(schema, block);
auto ret = iter->next_batch(&block);
ASSERT_TRUE(ret.ok());
ASSERT_EQ(block.rows(), 10);
auto c0 = block.get_by_position(0).column;
auto c1 = block.get_by_position(1).column;
auto c2 = block.get_by_position(2).column;
int row_count = 0;
size_t rows = block.rows();
for (size_t i = 0; i < rows; ++i) {
ASSERT_EQ(row_count, (*c0)[i].get<int>());
ASSERT_EQ(row_count + 1, (*c1)[i].get<int>());
ASSERT_EQ(row_count + 2, (*c2)[i].get<int>());
row_count++;
}
delete iter;
}
TEST(VGenericIteratorsTest, Union) {
auto schema = create_schema();
std::vector<RowwiseIterator*> inputs;
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 100));
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200));
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300));
auto iter = vectorized::new_union_iterator(inputs, MemTracker::CreateTracker(-1, "VUnionIterator", nullptr, false));
StorageReadOptions opts;
auto st = iter->init(opts);
ASSERT_TRUE(st.ok());
vectorized::Block block;
create_block(schema, block);
do {
st = iter->next_batch(&block);
} while (st.ok());
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(block.rows(), 600);
auto c0 = block.get_by_position(0).column;
auto c1 = block.get_by_position(1).column;
auto c2 = block.get_by_position(2).column;
size_t row_count = 0;
for (size_t i = 0; i < block.rows(); ++i) {
size_t base_value = row_count;
if (row_count >= 300) {
base_value -= 300;
} else if (row_count >= 100) {
base_value -= 100;
}
ASSERT_EQ(base_value, (*c0)[i].get<int>());
ASSERT_EQ(base_value + 1, (*c1)[i].get<int>());
ASSERT_EQ(base_value + 2, (*c2)[i].get<int>());
row_count++;
}
delete iter;
}
TEST(VGenericIteratorsTest, Merge) {
ASSERT_TRUE(1);
auto schema = create_schema();
std::vector<RowwiseIterator*> inputs;
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 100));
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200));
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300));
auto iter = vectorized::new_merge_iterator(inputs, MemTracker::CreateTracker(-1, "VMergeIterator", nullptr, false), -1);
StorageReadOptions opts;
auto st = iter->init(opts);
ASSERT_TRUE(st.ok());
vectorized::Block block;
create_block(schema, block);
do {
st = iter->next_batch(&block);
} while (st.ok());
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(block.rows(), 600);
auto c0 = block.get_by_position(0).column;
auto c1 = block.get_by_position(1).column;
auto c2 = block.get_by_position(2).column;
size_t row_count = 0;
for (size_t i = 0; i < block.rows(); ++i) {
size_t base_value = row_count;
// 100 * 3, 200 * 2, 300
if (row_count < 300) {
base_value = row_count / 3;
} else if (row_count < 500) {
base_value = (row_count - 300) / 2 + 100;
} else {
base_value = row_count - 300;
}
ASSERT_EQ(base_value, (*c0)[i].get<int>());
ASSERT_EQ(base_value + 1, (*c1)[i].get<int>());
ASSERT_EQ(base_value + 2, (*c2)[i].get<int>());
row_count++;
}
delete iter;
}
// only used for Seq Column UT
class SeqColumnUtIterator : public RowwiseIterator {
public:
// Will generate num_rows rows in total
SeqColumnUtIterator(const Schema& schema, size_t num_rows, size_t rows_returned, size_t seq_col_idx, size_t seq_col_rows_returned)
: _schema(schema), _num_rows(num_rows), _rows_returned(rows_returned), _seq_col_idx(seq_col_idx), _seq_col_rows_returned(seq_col_rows_returned) {}
~SeqColumnUtIterator() override {}
// NOTE: Currently, this function will ignore StorageReadOptions
Status init(const StorageReadOptions& opts) override {
return Status::OK();
};
Status next_batch(vectorized::Block* block) override {
int row_idx = 0;
while (_rows_returned < _num_rows) {
for (int j = 0; j < _schema.num_columns(); ++j) {
vectorized::ColumnWithTypeAndName& vc = block->get_by_position(j);
vectorized::IColumn& vi = (vectorized::IColumn&)(*vc.column);
char data[16] = {};
size_t data_len = 0;
const auto* col_schema = _schema.column(j);
switch (col_schema->type()) {
case OLAP_FIELD_TYPE_SMALLINT:
*(int16_t*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(int16_t);
break;
case OLAP_FIELD_TYPE_INT:
*(int32_t*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(int32_t);
break;
case OLAP_FIELD_TYPE_BIGINT:
*(int64_t*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(int64_t);
break;
case OLAP_FIELD_TYPE_FLOAT:
*(float*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(float);
break;
case OLAP_FIELD_TYPE_DOUBLE:
*(double*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(double);
break;
default:
break;
}
vi.insert_data(data, data_len);
}
++_rows_returned;
_seq_col_rows_returned++;
row_idx++;
}
if (row_idx > 0)
return Status::OK();
return Status::EndOfFile("End of VAutoIncrementIterator");
}
const Schema& schema() const override { return _schema; }
const Schema& _schema;
size_t _num_rows;
size_t _rows_returned;
int _seq_col_idx = -1;
int _seq_col_rows_returned = -1;
};
TEST(VGenericIteratorsTest, MergeWithSeqColumn) {
ASSERT_TRUE(1);
auto schema = create_schema();
std::vector<RowwiseIterator*> inputs;
int seq_column_id = 2;
int seg_iter_num = 10;
int num_rows = 1;
int rows_begin = 0;
// The same key in each file will only keep one with the largest seq id
// keep the key columns all the same, but seq column value different
// input seg file in Ascending, expect output seq column in Descending
for (int i = 0; i < seg_iter_num; i++) {
int seq_id_in_every_file = i;
inputs.push_back(new SeqColumnUtIterator(schema, num_rows, rows_begin, seq_column_id, seq_id_in_every_file));
}
auto iter = vectorized::new_merge_iterator(inputs, MemTracker::CreateTracker(-1, "VMergeIterator", nullptr, false), seq_column_id);
StorageReadOptions opts;
auto st = iter->init(opts);
ASSERT_TRUE(st.ok());
vectorized::Block block;
create_block(schema, block);
do {
st = iter->next_batch(&block);
} while (st.ok());
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(block.rows(), seg_iter_num);
auto col0 = block.get_by_position(0).column;
auto col1 = block.get_by_position(1).column;
auto seq_col = block.get_by_position(seq_column_id).column;
for (size_t i = 0; i < seg_iter_num; i++) {
size_t expected_value = seg_iter_num - i - 1; // in Descending
size_t actual_value = (*seq_col)[i].get<int>();
ASSERT_EQ(expected_value, actual_value);
}
delete iter;
}
} // namespace vectorized
} // namespace doris
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}