[FEAT MERGE] impl vectorization 2.0

Co-authored-by: Naynahs <cfzy002@126.com>
Co-authored-by: hwx65 <1780011298@qq.com>
Co-authored-by: oceanoverflow <oceanoverflow@gmail.com>
This commit is contained in:
obdev
2023-12-22 03:43:19 +00:00
committed by ob-robot
parent 1178245448
commit b6773084c6
592 changed files with 358124 additions and 303288 deletions

View File

@ -7,3 +7,8 @@
#aggr_unittest(test_merge_groupby)
#aggr_unittest(test_scalar_aggregate)
#aggr_unittest(test_merge_distinct)
function(aggr_unittest2 case)
sql_unittest(${ARGV})
target_sources(${case} PRIVATE ../test_op_engine.cpp ../ob_fake_table_scan_vec_op.cpp)
endfunction()
aggr_unittest2(test_hash_groupby2)

View File

@ -0,0 +1,8 @@
digit_data_format=4
string_data_format=4
data_range_level=0
skips_probability=10
nulls_probability=30
round=10
batch_size=256
output_result_to_file=1

View File

@ -0,0 +1,135 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
// #define USING_LOG_PREFIX SQL_ENGINE
#define USING_LOG_PREFIX COMMON
#include <iterator>
#include <gtest/gtest.h>
#include "../test_op_engine.h"
#include "../ob_test_config.h"
#include <vector>
#include <string>
using namespace ::oceanbase::sql;
namespace test
{
class TestHashGroupByVec : public TestOpEngine
{
public:
TestHashGroupByVec();
virtual ~TestHashGroupByVec();
virtual void SetUp();
virtual void TearDown();
private:
// disallow copy
DISALLOW_COPY_AND_ASSIGN(TestHashGroupByVec);
protected:
// function members
protected:
// data members
};
TestHashGroupByVec::TestHashGroupByVec()
{
std::string schema_filename = ObTestOpConfig::get_instance().test_filename_prefix_ + ".schema";
strcpy(schema_file_path_, schema_filename.c_str());
}
TestHashGroupByVec::~TestHashGroupByVec()
{}
void TestHashGroupByVec::SetUp()
{
TestOpEngine::SetUp();
}
void TestHashGroupByVec::TearDown()
{
destroy();
}
TEST_F(TestHashGroupByVec, basic_test)
{
std::string test_file_path = ObTestOpConfig::get_instance().test_filename_prefix_ + ".test";
int ret = basic_random_test(test_file_path);
EXPECT_EQ(ret, 0);
}
// TEST_F(TestHashGroupByVec, basic_test2)
// {
// int ret = OB_SUCCESS;
// std::string test_file_path = ObTestOpConfig::get_instance().test_filename_prefix_ + ".test";
// if(OB_FAIL(basic_random_test_output_to_file(test_file_path, true))) {
// LOG_ERROR("Some error occur in running vectorization 2.0 operator", K(ret));
// } else if (OB_FAIL(basic_random_test_output_to_file(test_file_path, false))) {
// LOG_ERROR("Some error occur in running original operator", K(ret));
// }
// EXPECT_EQ(ret, 0);
// }
// TEST_F(TestHashGroupByVec, your_own_test)
// {
// std::string test_file_path = ObTestOpConfig::get_instance().test_filename_prefix_ + ".test";
// std::ifstream if_tests(test_file_path);
// if (if_tests.is_open() == false) { return; }
// std::string line;
// while (std::getline(if_tests, line)) {
// // handle query
// if (line.size() <= 0) continue;
// if (line.at(0) == '#') continue;
// ObOperator *root = NULL;
// ObExecutor exector;
// if (OB_FAIL(get_tested_op_from_string(line, false, root, exector))) {
// LOG_WARN("generate tested op fail, sql: ", K(line.data()));
// } else {
// int round = 1;
// const int64_t max_row_cnt = 256;
// const ObBatchRows *child_brs = nullptr;
// LOG_INFO("============== Final output ===============", K(round));
// while (!root->brs_.end_) {
// if (OB_FAIL(root->get_next_batch(max_row_cnt, child_brs))) {
// LOG_ERROR("root op fail to get_next_batch data", K(original_root));
// break;
// }
// }
// }
// }
// }
} // namespace test
int main(int argc, char **argv)
{
ObTestOpConfig::get_instance().test_filename_prefix_ = "test_hash_groupby2";
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-bg") == 0) {
ObTestOpConfig::get_instance().test_filename_prefix_ += "_bg";
ObTestOpConfig::get_instance().run_in_background_ = true;
}
}
ObTestOpConfig::get_instance().init();
system(("rm -f " + ObTestOpConfig::get_instance().test_filename_prefix_ + ".log").data());
system(("rm -f " + ObTestOpConfig::get_instance().test_filename_prefix_ + ".log.*").data());
oceanbase::common::ObClockGenerator::init();
observer::ObReqTimeGuard req_timeinfo_guard;
OB_LOGGER.set_log_level("INFO");
OB_LOGGER.set_file_name((ObTestOpConfig::get_instance().test_filename_prefix_ + ".log").data(), true);
init_sql_factories();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -0,0 +1,5 @@
#create database opt;
#use opt;
create table t1(c1 int, c2 int);
create table t2(c1 int, c2 int);
create table t3(c1 int, c2 int, c3 double, c4 char(20), c5 varchar(40));

View File

@ -0,0 +1,98 @@
#!/bin/sh
# data format
# enum VectorFormat: uint8_t
#{
# VEC_INVALID = 0,
# VEC_FIXED,
# VEC_DISCRETE,
# VEC_CONTINUOUS,
# VEC_UNIFORM,
# VEC_UNIFORM_CONST,
# VEC_MAX_FORMAT
#};
#
# batch size
batch_size_round=(1 50 150 256)
# 4 rounds test cases
round_array=(10 100 1000 10000)
# 3 data range test cases
data_range_level_array=(0 1 2)
# 3 skips_probability test cases
skips_probability_array=(0 30 80)
# 3 nulls_probability test cases
nulls_probability_array=(0 30 80)
# 6 combined data format test cases
# VEC_UNIFORM VEC_FIX
fix_data_format_array=("fix_data_format=4" "fix_data_format=1")
# VEC_UNIFORM VEC_DISCRETE VEC_CONTINUOUS
#string_data_format_array=("string_data_format=4" "string_data_format=2" "string_data_format=3")
string_data_format_array=("string_data_format=4")
test_file_prefix="./test_hash_groupby2_bg"
cfg_file="./test_hash_groupby2_bg.cfg"
origin_result_file="./origin_result_bg.data"
vec_result_file="./vec_result_bg.data"
test_case_round=1
for batch_size in ${batch_size_round[@]}
do
for round in ${round_array[@]}
do
for data_range_level in ${data_range_level_array[@]}
do
for skips_probability in ${skips_probability_array[@]}
do
for nulls_probability in ${nulls_probability_array[@]}
do
for fix_data_format in ${fix_data_format_array[@]}
do
for string_data_format in ${string_data_format_array[@]}
do
> ${cfg_file}
echo "batch_size="${batch_size} >> ${cfg_file}
echo "output_result_to_file=1" >> ${cfg_file}
echo "round="${round} >> ${cfg_file}
echo "data_range_level="${data_range_level} >> ${cfg_file}
echo "skips_probability="${skips_probability} >> ${cfg_file}
echo "nulls_probability="${nulls_probability} >> ${cfg_file}
echo ${fix_data_format} >> ${cfg_file}
echo ${string_data_format} >> ${cfg_file}
echo "###################"
echo "Test Case Round: "${test_case_round}
echo "{"
echo "round: "$round
echo "data_range_level: "${data_range_level}
echo "skips_probability: "${skips_probability}
echo "nulls_probability: "${nulls_probability}
echo "fix_data_format: "${fix_data_format}
echo "string_data_format: "${string_data_format}
echo "}"
echo "###################"
./test_hash_groupby2_bg -bg
sort $origin_result_file -o $origin_result_file
sort $vec_result_file -o $vec_result_file
diff $origin_result_file $vec_result_file > /dev/null
if [ $? == 0 ]; then
echo "Both result file are the same!"
else
echo "Get Incorrect Result! Exit!"
exit
fi
test_case_round=$((test_case_round+1))
done
done
done
done
done
done
done
echo "Done"

View File

@ -0,0 +1,8 @@
#select c2, sum(c1), min(c1), max(c1) from t1 group by c2;
#select/*+USE_HASH_AGGREGATION*/ c2, count(c1), sum(c1 + 1), min(c1*2), max(c1 - 1) from t1 group by c2;
#select /*+leading(t1, t2) USE_HASH(t1, t2)*/* from t1, t2 where t1.c1 = t2.c1;
#select /*+leading(t1, t2) USE_HASH(t1, t2)*/* from t1, t2 where t1.c2 = t2.c2;
#select /*+ use_hash(t1 t2)*/ * from t1 a left outer join t2 b on a.c1=b.c1 order by a.c1, a.c2;
#select /*+ use_hash(t1 t2)*/ * from t1 a right outer join t2 b on t1.c1=t2.c1;
#select /*+ use_hash(t1 t2)*/ * from t1 a full outer join t2 b on t1.c1=t2.c1;
#select * from t1 order by c1, c2;