[FEAT MERGE] impl vectorization 2.0

Co-authored-by: Naynahs <cfzy002@126.com>
Co-authored-by: hwx65 <1780011298@qq.com>
Co-authored-by: oceanoverflow <oceanoverflow@gmail.com>
This commit is contained in:
obdev
2023-12-22 03:43:19 +00:00
committed by ob-robot
parent 1178245448
commit b6773084c6
592 changed files with 358124 additions and 303288 deletions

View File

@ -1,6 +1,7 @@
#sql_unittest(test_exec_context)
sql_unittest(test_physical_plan)
sql_unittest(test_sql_fixed_array)
sql_unittest(test_bit_vector)
add_subdirectory(aggregate)
add_subdirectory(dml)

View File

@ -7,3 +7,8 @@
#aggr_unittest(test_merge_groupby)
#aggr_unittest(test_scalar_aggregate)
#aggr_unittest(test_merge_distinct)
function(aggr_unittest2 case)
sql_unittest(${ARGV})
target_sources(${case} PRIVATE ../test_op_engine.cpp ../ob_fake_table_scan_vec_op.cpp)
endfunction()
aggr_unittest2(test_hash_groupby2)

View File

@ -0,0 +1,8 @@
digit_data_format=4
string_data_format=4
data_range_level=0
skips_probability=10
nulls_probability=30
round=10
batch_size=256
output_result_to_file=1

View File

@ -0,0 +1,135 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
// #define USING_LOG_PREFIX SQL_ENGINE
#define USING_LOG_PREFIX COMMON
#include <iterator>
#include <gtest/gtest.h>
#include "../test_op_engine.h"
#include "../ob_test_config.h"
#include <vector>
#include <string>
using namespace ::oceanbase::sql;
namespace test
{
class TestHashGroupByVec : public TestOpEngine
{
public:
TestHashGroupByVec();
virtual ~TestHashGroupByVec();
virtual void SetUp();
virtual void TearDown();
private:
// disallow copy
DISALLOW_COPY_AND_ASSIGN(TestHashGroupByVec);
protected:
// function members
protected:
// data members
};
TestHashGroupByVec::TestHashGroupByVec()
{
std::string schema_filename = ObTestOpConfig::get_instance().test_filename_prefix_ + ".schema";
strcpy(schema_file_path_, schema_filename.c_str());
}
TestHashGroupByVec::~TestHashGroupByVec()
{}
void TestHashGroupByVec::SetUp()
{
TestOpEngine::SetUp();
}
void TestHashGroupByVec::TearDown()
{
destroy();
}
TEST_F(TestHashGroupByVec, basic_test)
{
std::string test_file_path = ObTestOpConfig::get_instance().test_filename_prefix_ + ".test";
int ret = basic_random_test(test_file_path);
EXPECT_EQ(ret, 0);
}
// TEST_F(TestHashGroupByVec, basic_test2)
// {
// int ret = OB_SUCCESS;
// std::string test_file_path = ObTestOpConfig::get_instance().test_filename_prefix_ + ".test";
// if(OB_FAIL(basic_random_test_output_to_file(test_file_path, true))) {
// LOG_ERROR("Some error occur in running vectorization 2.0 operator", K(ret));
// } else if (OB_FAIL(basic_random_test_output_to_file(test_file_path, false))) {
// LOG_ERROR("Some error occur in running original operator", K(ret));
// }
// EXPECT_EQ(ret, 0);
// }
// TEST_F(TestHashGroupByVec, your_own_test)
// {
// std::string test_file_path = ObTestOpConfig::get_instance().test_filename_prefix_ + ".test";
// std::ifstream if_tests(test_file_path);
// if (if_tests.is_open() == false) { return; }
// std::string line;
// while (std::getline(if_tests, line)) {
// // handle query
// if (line.size() <= 0) continue;
// if (line.at(0) == '#') continue;
// ObOperator *root = NULL;
// ObExecutor exector;
// if (OB_FAIL(get_tested_op_from_string(line, false, root, exector))) {
// LOG_WARN("generate tested op fail, sql: ", K(line.data()));
// } else {
// int round = 1;
// const int64_t max_row_cnt = 256;
// const ObBatchRows *child_brs = nullptr;
// LOG_INFO("============== Final output ===============", K(round));
// while (!root->brs_.end_) {
// if (OB_FAIL(root->get_next_batch(max_row_cnt, child_brs))) {
// LOG_ERROR("root op fail to get_next_batch data", K(original_root));
// break;
// }
// }
// }
// }
// }
} // namespace test
int main(int argc, char **argv)
{
ObTestOpConfig::get_instance().test_filename_prefix_ = "test_hash_groupby2";
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-bg") == 0) {
ObTestOpConfig::get_instance().test_filename_prefix_ += "_bg";
ObTestOpConfig::get_instance().run_in_background_ = true;
}
}
ObTestOpConfig::get_instance().init();
system(("rm -f " + ObTestOpConfig::get_instance().test_filename_prefix_ + ".log").data());
system(("rm -f " + ObTestOpConfig::get_instance().test_filename_prefix_ + ".log.*").data());
oceanbase::common::ObClockGenerator::init();
observer::ObReqTimeGuard req_timeinfo_guard;
OB_LOGGER.set_log_level("INFO");
OB_LOGGER.set_file_name((ObTestOpConfig::get_instance().test_filename_prefix_ + ".log").data(), true);
init_sql_factories();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -0,0 +1,5 @@
#create database opt;
#use opt;
create table t1(c1 int, c2 int);
create table t2(c1 int, c2 int);
create table t3(c1 int, c2 int, c3 double, c4 char(20), c5 varchar(40));

View File

@ -0,0 +1,98 @@
#!/bin/sh
# data format
# enum VectorFormat: uint8_t
#{
# VEC_INVALID = 0,
# VEC_FIXED,
# VEC_DISCRETE,
# VEC_CONTINUOUS,
# VEC_UNIFORM,
# VEC_UNIFORM_CONST,
# VEC_MAX_FORMAT
#};
#
# batch size
batch_size_round=(1 50 150 256)
# 4 rounds test cases
round_array=(10 100 1000 10000)
# 3 data range test cases
data_range_level_array=(0 1 2)
# 3 skips_probability test cases
skips_probability_array=(0 30 80)
# 3 nulls_probability test cases
nulls_probability_array=(0 30 80)
# 6 combined data format test cases
# VEC_UNIFORM VEC_FIX
fix_data_format_array=("fix_data_format=4" "fix_data_format=1")
# VEC_UNIFORM VEC_DISCRETE VEC_CONTINUOUS
#string_data_format_array=("string_data_format=4" "string_data_format=2" "string_data_format=3")
string_data_format_array=("string_data_format=4")
test_file_prefix="./test_hash_groupby2_bg"
cfg_file="./test_hash_groupby2_bg.cfg"
origin_result_file="./origin_result_bg.data"
vec_result_file="./vec_result_bg.data"
test_case_round=1
for batch_size in ${batch_size_round[@]}
do
for round in ${round_array[@]}
do
for data_range_level in ${data_range_level_array[@]}
do
for skips_probability in ${skips_probability_array[@]}
do
for nulls_probability in ${nulls_probability_array[@]}
do
for fix_data_format in ${fix_data_format_array[@]}
do
for string_data_format in ${string_data_format_array[@]}
do
> ${cfg_file}
echo "batch_size="${batch_size} >> ${cfg_file}
echo "output_result_to_file=1" >> ${cfg_file}
echo "round="${round} >> ${cfg_file}
echo "data_range_level="${data_range_level} >> ${cfg_file}
echo "skips_probability="${skips_probability} >> ${cfg_file}
echo "nulls_probability="${nulls_probability} >> ${cfg_file}
echo ${fix_data_format} >> ${cfg_file}
echo ${string_data_format} >> ${cfg_file}
echo "###################"
echo "Test Case Round: "${test_case_round}
echo "{"
echo "round: "$round
echo "data_range_level: "${data_range_level}
echo "skips_probability: "${skips_probability}
echo "nulls_probability: "${nulls_probability}
echo "fix_data_format: "${fix_data_format}
echo "string_data_format: "${string_data_format}
echo "}"
echo "###################"
./test_hash_groupby2_bg -bg
sort $origin_result_file -o $origin_result_file
sort $vec_result_file -o $vec_result_file
diff $origin_result_file $vec_result_file > /dev/null
if [ $? == 0 ]; then
echo "Both result file are the same!"
else
echo "Get Incorrect Result! Exit!"
exit
fi
test_case_round=$((test_case_round+1))
done
done
done
done
done
done
done
echo "Done"

View File

@ -0,0 +1,8 @@
#select c2, sum(c1), min(c1), max(c1) from t1 group by c2;
#select/*+USE_HASH_AGGREGATION*/ c2, count(c1), sum(c1 + 1), min(c1*2), max(c1 - 1) from t1 group by c2;
#select /*+leading(t1, t2) USE_HASH(t1, t2)*/* from t1, t2 where t1.c1 = t2.c1;
#select /*+leading(t1, t2) USE_HASH(t1, t2)*/* from t1, t2 where t1.c2 = t2.c2;
#select /*+ use_hash(t1 t2)*/ * from t1 a left outer join t2 b on a.c1=b.c1 order by a.c1, a.c2;
#select /*+ use_hash(t1 t2)*/ * from t1 a right outer join t2 b on t1.c1=t2.c1;
#select /*+ use_hash(t1 t2)*/ * from t1 a full outer join t2 b on t1.c1=t2.c1;
#select * from t1 order by c1, c2;

View File

@ -0,0 +1,212 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include <random>
#include <chrono>
#include <vector>
#include <unordered_map>
#include <type_traits>
#include "src/sql/engine/expr/ob_expr.h"
#include "src/sql/engine/ob_bit_vector.h"
#include "ob_test_config.h"
namespace oceanbase
{
namespace sql
{
class ObDataGenerator
{
public:
static ObDataGenerator &get_instance()
{
static ObDataGenerator data_generator;
return data_generator;
}
template <typename T>
int generate_data(const int64_t op_id, const int expr_i, const int expr_count, const int round, const int batch_size,
const int len, bool &is_duplicate)
{
int ret = OB_SUCCESS;
LOG_DEBUG("[DG]Start generate data for: ", K(op_id), K(round), K(expr_i), K(expr_count));
// if it's a exist generated round data and corresponding expr_i have data
if (op_2_round_2_temp_store_.count(op_id) != 0 && op_2_round_2_temp_store_[op_id].count(round) != 0
&& expr_i < op_2_round_2_temp_store_[op_id][round].size()) {
return ret;
}
// generate new random data
std::string generate_data;
std::vector<TempDataStore> &temp_store = op_2_round_2_temp_store_[op_id][round];
temp_store.push_back(TempDataStore());
for (int i = 0; i < batch_size; i++) {
if (std::is_same<T, int>::value) {
int random_data;
// if (expr_i == expr_count - 1) {
// random_data = 1;
// } else {
// random_data = u_int32_dis_(e_);
// }
random_data = u_int32_dis_->operator()(e_);
temp_store[expr_i].temp_int32_vector_.push_back(random_data);
generate_data += std::to_string(random_data) + " ";
} else if (std::is_same<T, int64_t>::value) {
int64_t random_data;
random_data = u_int64_dis_->operator()(e_);
temp_store[expr_i].temp_int64_vector_.push_back(random_data);
generate_data += std::to_string(random_data) + " ";
} else if (std::is_same<T, double>::value) {
double random_data;
random_data = u_r_dis_->operator()(e_);
temp_store[expr_i].temp_double_vector_.push_back(random_data);
generate_data += std::to_string(random_data) + " ";
} else if (std::is_same<T, std::string>::value) {
std::string random_data;
random_data = str_rand(len);
temp_store[expr_i].temp_string_vector_.push_back(random_data);
generate_data += random_data + " ";
} else {
LOG_INFO("Can not generate random value so far for: ");
assert(false);
}
}
LOG_DEBUG("Generate data: ", K(generate_data.data()));
set_random_null(op_id, expr_i, expr_count, round, batch_size);
return ret;
}
void set_random_null(const int64_t op_id, const int expr_i, const int expr_count, const int round,
const int batch_size)
{
std::string generate_data_nulls;
bool is_null;
for (int i = 0; i < batch_size; i++) {
is_null = zero_one_rand_by_probability(ObTestOpConfig::get_instance().nulls_probability_);
generate_data_nulls += std::to_string(!is_null) + " ";
op_2_round_2_temp_store_[op_id][round][expr_i].null_.push_back(is_null);
}
LOG_INFO("nulls : ", K(generate_data_nulls.data()));
}
void reset_temp_store(const uint64_t op_id, const int round)
{
if (op_2_round_2_temp_store_.count(op_id)) { op_2_round_2_temp_store_[op_id].erase(round); }
}
// Todo: replace this struct with ObTempColumnStore when br finish that
struct TempDataStore
{
bool empty_{true};
// temp store
std::vector<int> temp_int32_vector_;
std::vector<int64_t> temp_int64_vector_;
std::vector<double> temp_double_vector_;
std::vector<std::string> temp_string_vector_;
std::vector<bool> null_;
};
using TempDataStores = std::vector<TempDataStore>;
void register_op(const ObOperator *op)
{
LOG_INFO("id is ", K(op->get_spec().get_id()));
intereseting_op_count_++;
}
private:
ObDataGenerator()
{
e_ = std::default_random_engine(std::chrono::system_clock::now().time_since_epoch().count());
switch (ObTestOpConfig::get_instance().data_range_level_) {
case 0:
u_int32_dis_ = new std::uniform_int_distribution<int32_t>(-100, 100);
u_int64_dis_ = new std::uniform_int_distribution<int64_t>(-100, 100);
u_r_dis_ = new std::uniform_real_distribution<double>(-100, 100);
break;
case 1:
u_int32_dis_ = new std::uniform_int_distribution<int32_t>(-10000, 10000);
u_int64_dis_ = new std::uniform_int_distribution<int64_t>(-50000, 50000);
u_r_dis_ = new std::uniform_real_distribution<double>(-10000, 10000);
break;
case 2:
u_int32_dis_ = new std::uniform_int_distribution<int32_t>(INT32_MIN, INT32_MAX);
u_int64_dis_ = new std::uniform_int_distribution<int64_t>(INT64_MIN, INT64_MAX);
u_r_dis_ = new std::uniform_real_distribution<double>(-100000, 100000);
break;
default:
u_int32_dis_ = new std::uniform_int_distribution<int32_t>(-100, 100);
u_int64_dis_ = new std::uniform_int_distribution<int64_t>(-100, 100);
u_r_dis_ = new std::uniform_real_distribution<double>(-100, 100);
break;
}
}
~ObDataGenerator()
{
delete u_int32_dis_;
delete u_int64_dis_;
delete u_r_dis_;
}
std::string str_rand(int length)
{
std::uniform_int_distribution<int> u_l(0, length);
std::uniform_int_distribution<int> u_data(0, 10000);
int final_len = u_l(e_);
char tmp;
std::string buffer;
for (int i = 0; i < final_len; i++) {
tmp = u_data(e_) % 36;
if (tmp < 10) {
tmp += '0';
} else {
tmp -= 10;
tmp += 'A';
}
buffer += tmp;
}
return buffer;
}
// probability means the probability of generating result of 1
int zero_one_rand_by_probability(int probability)
{
OB_ASSERT(probability >= 0 && probability <= 100);
std::uniform_int_distribution<int> u_l(0, 100);
if (u_l(e_) < probability) { return 1; }
return 0;
}
private:
std::uniform_int_distribution<int32_t> *u_int32_dis_{nullptr};
std::uniform_int_distribution<int64_t> *u_int64_dis_{nullptr};
std::uniform_real_distribution<double> *u_r_dis_{nullptr};
std::default_random_engine e_;
int last_round_{-1};
int intereseting_op_count_{0};
bool inited_{false};
// temp store
std::unordered_map<int, std::unordered_map<int, TempDataStores>> op_2_round_2_temp_store_;
std::unordered_map<int, std::unordered_map<int, std::vector<bool>>> op_2_round_2_skips_;
};
} // end namespace sql
} // end namespace oceanbase

View File

@ -269,7 +269,7 @@ TEST_F(ObMonitoringDumpTest, test_get_next_row)
ObExecContext ctx;
ObTableScanFake table_scan;
ObMonitoringDump root(ctx.get_allocator());
root.set_flags(ObMonitorHint::OB_MONITOR_STAT | ObMonitorHint::OB_MONITOR_TRACING);
root.set_flags(ObAllocOpHint::OB_MONITOR_STAT | ObAllocOpHint::OB_MONITOR_TRACING);
ObPhysicalPlan physical_plan;
int64_t op_size = 2;
ObSQLSessionInfo origin_session;

View File

@ -0,0 +1,299 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX COMMON
#include "deps/oblib/src/common/object/ob_obj_type.h"
#include "test_op_engine.h"
#include "ob_fake_table_scan_vec_op.h"
#include "data_generator.h"
namespace oceanbase
{
namespace sql
{
int ObFakeTableScanVecOp::inner_open()
{
ObDataGenerator::get_instance().register_op(this);
std::string round;
ObTestOpConfig::get_instance().get_config("round", round);
if (!round.empty()) { max_round_ = current_round_ + std::stoi(round); }
return OB_SUCCESS;
}
int ObFakeTableScanVecOp::inner_get_next_batch(const int64_t max_row_cnt)
{
int ret = OB_SUCCESS;
clear_evaluated_flag();
uint64_t op_id = get_spec().get_id();
int64_t generate_random_value = 0;
bool is_duplicate = false;
const ObPushdownExprSpec &pd_expr_spec =
reinterpret_cast<const ObTableScanSpec *>(&spec_)->tsc_ctdef_.scan_ctdef_.pd_expr_spec_;
for (int j = 0; j < pd_expr_spec.access_exprs_.count(); j++) {
ObExpr *expr = pd_expr_spec.access_exprs_.at(j);
OB_FAIL(fill_random_data_into_expr_datum_frame(j, pd_expr_spec.access_exprs_.count(), expr,
ObTestOpConfig::get_instance().batch_size_, is_duplicate));
}
// random set skip
set_random_skip(current_round_, ObTestOpConfig::get_instance().batch_size_);
if (is_duplicate) { ObDataGenerator::get_instance().reset_temp_store(op_id, current_round_); }
current_round_++;
brs_.size_ = ObTestOpConfig::get_instance().batch_size_;
if (current_round_ == max_round_) { brs_.end_ = true; }
// print generate data
LOG_INFO("[DG] data generated by DataGenerator in ", K(current_round_ - 1));
if (op_id_2_output_streams_.count(op_id) == 0) {
std::string output_file_name = "generate_data_" + std::to_string(op_id) + ".data";
op_id_2_output_streams_[op_id].open(output_file_name.data(), std::ios::out | std::ios::trunc);
}
test::TestOpEngine::print_to_file(&brs_, this, pd_expr_spec.access_exprs_, false, &op_id_2_output_streams_[op_id]);
return ret;
}
int ObFakeTableScanVecOp::fill_random_data_into_expr_datum_frame(int expr_i, int expr_count, const ObExpr *expr,
const int output_row_count, bool &is_duplicate)
{
int ret = OB_SUCCESS;
LOG_DEBUG("generate_random_value for expr: ", K(*expr));
ObIVector *i_vec = NULL;
ObDatum *datums = NULL;
if (expr->enable_rich_format()) {
// vectorization 2.0 new operator
i_vec = expr->get_vector(eval_ctx_);
if (expr->is_fixed_length_data_) {
// for VEC_FIXED and VEC_UNIFORM
// we must first call init_vector because we need to use i_vec->set_xxx() which need use vector's meta data
OB_ASSERT(ObTestOpConfig::get_instance().digit_data_format_ == VEC_UNIFORM
|| ObTestOpConfig::get_instance().digit_data_format_ == VEC_FIXED);
expr->init_vector(eval_ctx_, ObTestOpConfig::get_instance().digit_data_format_, output_row_count, true);
} else {
// while for VEC_DISCRETE and VEC_CONTINUOUS it doesn't matter
OB_ASSERT(ObTestOpConfig::get_instance().string_data_format_ == VEC_UNIFORM
|| ObTestOpConfig::get_instance().string_data_format_ == VEC_DISCRETE
|| ObTestOpConfig::get_instance().string_data_format_ == VEC_CONTINUOUS);
expr->init_vector(eval_ctx_, ObTestOpConfig::get_instance().string_data_format_, output_row_count, true);
}
} else {
// vectorization 1.0 old operator
datums = expr->locate_datums_for_update(eval_ctx_, output_row_count);
}
// generate random data
get_random_data(expr_i, expr_count, expr, current_round_, output_row_count, expr->max_length_, is_duplicate);
int vec_continuous_offset = 0; // only used in VEC_CONTINUOUS
std::string vec_continuous_data; // use to store data of VEC_CONTINUOUS temporarily
ObDataGenerator::TempDataStore &data_store =
ObDataGenerator::get_instance().op_2_round_2_temp_store_[get_spec().get_id()][current_round_][expr_i];
for (int row = 0; row < output_row_count; row++) {
bool is_null = data_store.null_[row];
switch (expr->datum_meta_.get_type()) {
// why expr->datum_meta_.get_type() == ObInt32Type while expr->res_buf_len_ == 8 ?????
case ObInt32Type: {
int data = data_store.temp_int32_vector_[row];
if (i_vec != NULL) {
i_vec->set_int(row, static_cast<int64_t>(data));
if (is_null) { i_vec->set_null(row); }
} else {
datums[row].set_int32(data);
if (is_null) { datums[row].set_null(); }
}
break;
}
case ObIntType: {
int64_t data = data_store.temp_int64_vector_[row];
if (i_vec != NULL) {
i_vec->set_int(row, data);
if (is_null) { i_vec->set_null(row); }
} else {
datums[row].set_int(data);
if (is_null) { datums[row].set_null(); }
}
break;
}
case ObDoubleType: {
double data = data_store.temp_double_vector_[row];
if (i_vec != NULL) {
i_vec->set_double(row, data);
if (is_null) { i_vec->set_null(row); }
} else {
datums[row].set_double(data);
if (is_null) { datums[row].set_null(); }
}
break;
}
case ObNumberType: {
break;
}
case ObDecimalIntType: {
// have bug
// if decimal(20, 10)
// expr->res_buf_len_ = 16 is *not* equal to datums[x].len_ == 8
// where datums[x].len_ is set?
// i_vec->set_int(row,
// ObDataGenerator::get_instance().round_2_temp_store_[round_].first[expr_i].temp_int64_vector_[row]);
break;
}
case ObVarcharType:
case ObCharType: {
std::string tmp_str = data_store.temp_string_vector_[row];
int str_len = static_cast<int>(tmp_str.size());
if (i_vec != NULL) {
// vectorization 1.0 new operator
if (expr->get_format(eval_ctx_) == VEC_UNIFORM) {
if (is_null) {
i_vec->set_null(row);
} else {
i_vec->set_payload(row, tmp_str.data(), str_len);
}
} else {
if (is_null) { i_vec->set_null(row); }
if (expr->get_format(eval_ctx_) == VEC_DISCRETE) {
char **ptrs = expr->get_discrete_vector_ptrs(eval_ctx_);
int32_t *lens = expr->get_discrete_vector_lens(eval_ctx_);
ptrs[row] = expr->get_res_buf(eval_ctx_) + row * (expr->res_buf_len_);
lens[row] = str_len;
i_vec->set_payload(row, tmp_str.data(), str_len);
} else {
// VEC_CONTINUOUS
// offset[0] == 0 which is set in cg phase
vec_continuous_offset += str_len;
uint32_t *offset = expr->get_continuous_vector_offsets(eval_ctx_);
offset[row + 1] = vec_continuous_offset;
vec_continuous_data += tmp_str; // temporarily store data here
}
}
} else {
// vectorization 1.0 old operator
if (is_null) {
datums[row].set_null();
} else {
datums[row].set_string(tmp_str.data(), str_len);
}
}
break;
}
default: LOG_INFO("Can not generate random value so far for: ", K(expr->datum_meta_.get_type()));
}
}
if (expr->enable_rich_format() && expr->get_format(eval_ctx_) == VEC_CONTINUOUS && !vec_continuous_data.empty()) {
ObDynReserveBuf *drb = reinterpret_cast<ObDynReserveBuf *>(expr->get_continuous_vector_data(eval_ctx_));
// Todo: replace below logic when shengle handle continuous format memory
// in ObExpr::get_str_res_mem()
char *mem = NULL;
const int64_t alloc_size = next_pow2(vec_continuous_data.size());
if (OB_UNLIKELY(alloc_size > UINT32_MAX)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(vec_continuous_data.size()), K(alloc_size), K(ret));
} else if (OB_ISNULL(mem = static_cast<char *>(eval_ctx_.alloc_expr_res(alloc_size)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", K(ret), K(ret));
} else {
// When extend memory, the old memory can not free, because the old memory may
// still be referenced. see: ob_datum_cast.cpp::common_copy_string
if (0 == drb->len_) { drb->magic_ = ObDynReserveBuf::MAGIC_NUM; }
drb->len_ = alloc_size;
drb->mem_ = mem;
MEMCPY(drb->mem_, vec_continuous_data.data(), vec_continuous_data.size());
ObContinuousBase *cont_vec = static_cast<ObContinuousBase *>(expr->get_vector(eval_ctx_));
cont_vec->set_data(drb->mem_);
}
LOG_DEBUG("extend expr result memory", K(ret), K(vec_continuous_data.size()), K(alloc_size), KP(this), KP(mem));
}
return ret;
}
int ObFakeTableScanVecOp::get_random_data(int expr_i, int expr_count, const ObExpr *expr, const int round,
const int batch_size, const int len, bool &is_duplicate)
{
int ret = OB_SUCCESS;
uint64_t op_id = get_spec().get_id();
switch (expr->datum_meta_.get_type()) {
// why expr->datum_meta_.get_type() == ObInt32Type while expr->res_buf_len_ == 8 ?????
case ObInt32Type: {
ObDataGenerator::get_instance().generate_data<int>(op_id, expr_i, expr_count, round, batch_size, len, is_duplicate);
break;
}
case ObIntType: {
ObDataGenerator::get_instance().generate_data<int64_t>(op_id, expr_i, expr_count, round, batch_size, len,
is_duplicate);
break;
}
case ObDoubleType: {
ObDataGenerator::get_instance().generate_data<double>(op_id, expr_i, expr_count, round, batch_size, len,
is_duplicate);
break;
}
case ObNumberType: {
break;
}
case ObDecimalIntType: {
// have bug
// if decimal(20, 10)
// expr->res_buf_len_ = 16 is *not* equal to datums[x].len_ == 8
// where datums[x].len_ is set?
ObDataGenerator::get_instance().generate_data<int64_t>(op_id, expr_i, expr_count, round, batch_size, len,
is_duplicate);
break;
}
case ObVarcharType:
case ObCharType: {
ObDataGenerator::get_instance().generate_data<std::string>(op_id, expr_i, expr_count, round, batch_size, len,
is_duplicate);
break;
}
default: LOG_INFO("Can not generate random value so far for: ", K(expr->datum_meta_.get_type()));
}
return ret;
}
void ObFakeTableScanVecOp::set_random_skip(const int round, const int batch_size)
{
std::string generate_data_skips;
uint64_t op_id = get_spec().get_id();
if (ObDataGenerator::get_instance().op_2_round_2_skips_[op_id].count(round) == 0) {
std::uniform_int_distribution<int> u_i(0, 1);
bool if_skip;
for (int i = 0; i < batch_size; i++) {
if_skip =
ObDataGenerator::get_instance().zero_one_rand_by_probability(ObTestOpConfig::get_instance().skips_probability_);
generate_data_skips += std::to_string(!if_skip) + " ";
ObDataGenerator::get_instance().op_2_round_2_skips_[op_id][round].push_back(if_skip);
}
}
for (int i = 0; i < batch_size; i++) {
if (ObDataGenerator::get_instance().op_2_round_2_skips_[op_id][round][i] == true) { brs_.skip_->set(i); }
}
LOG_INFO("skips : ", K(generate_data_skips.data()));
return;
}
} // namespace sql
} // namespace oceanbase

View File

@ -0,0 +1,55 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include <random>
#include <chrono>
#include <fstream>
#include "../test_sql_utils.h"
#include "sql/engine/table/ob_table_scan_op.h"
#include "sql/engine/ob_operator.h"
#include "sql/engine/ob_operator_reg.h"
namespace oceanbase
{
namespace sql
{
class ObFakeTableScanVecOp : public ObTableScanOp
{
friend class ObDASScanOp;
friend class ObGlobalIndexLookupOpImpl;
public:
static constexpr int64_t CHECK_STATUS_ROWS_INTERVAL = 1 << 13;
ObFakeTableScanVecOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOpInput *input) :
ObTableScanOp(exec_ctx, spec, input)
{}
~ObFakeTableScanVecOp() = default;
int inner_open() override;
int inner_get_next_batch(const int64_t max_row_cnt) override;
int fill_random_data_into_expr_datum_frame(int expr_i, int expr_count, const ObExpr *expr, const int output_max_count,
bool &is_duplicate);
int get_random_data(int expr_i, int expr_count, const ObExpr *expr, const int round, const int batch_size,
const int len, bool &is_duplicate);
void set_random_skip(const int round, const int batch_size);
public:
int max_round_{2};
int current_round_{1};
// io
std::unordered_map<uint64_t, std::ofstream> op_id_2_output_streams_;
};
} // end namespace sql
} // end namespace oceanbase

View File

@ -0,0 +1,154 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include <string>
#include <random>
#include <chrono>
#include <vector>
#include <unordered_map>
#include <type_traits>
#include <fstream>
#include "lib/ob_define.h"
// class ObOperator;
namespace oceanbase
{
namespace sql
{
class ObTestOpConfig
{
public:
static ObTestOpConfig &get_instance()
{
static ObTestOpConfig test_op_config;
return test_op_config;
}
private:
ObTestOpConfig()
{}
~ObTestOpConfig()
{}
void init()
{
std::string nulls_probability;
std::string test_config_file = test_filename_prefix_ + ".cfg";
test_filename_origin_output_file_ = test_filename_prefix_ + "_origin_result.data";
test_filename_vec_output_file_ = test_filename_prefix_ + "_vec_result.data";
std::ifstream if_tests_config(test_config_file);
OB_ASSERT(if_tests_config.is_open() == true);
std::string line;
while (std::getline(if_tests_config, line)) {
if (line.size() <= 0) continue;
if (line.at(0) == '#') continue;
std::vector<std::string> out;
const char delim = '=';
tokenize(line, delim, out);
OB_ASSERT(out.size() == 2);
configs_map_[out[0]] = out[1];
}
set_configs();
LOG_INFO("Testing config is: ", K(*this));
}
void tokenize(std::string const &str, const char delim, std::vector<std::string> &out)
{
size_t start;
size_t end = 0;
while ((start = str.find_first_not_of(delim, end)) != std::string::npos) {
end = str.find(delim, start);
out.push_back(str.substr(start, end - start));
}
}
int get_config(std::string key, std::string &value)
{
int ret = OB_SUCCESS;
if (configs_map_.count(key) == 0) {
ret = OB_INVALID_ARGUMENT;
} else {
value = configs_map_[key];
}
return ret;
}
void set_configs()
{
std::string digit_data_format;
std::string string_data_format;
std::string data_range_level;
std::string nulls_probability;
std::string skips_probability;
std::string round;
std::string batch_size;
std::string output_result_to_file;
get_config("digit_data_format", digit_data_format);
get_config("string_data_format", string_data_format);
get_config("data_range_level", data_range_level);
get_config("nulls_probability", nulls_probability);
get_config("skips_probability", skips_probability);
get_config("round", round);
get_config("batch_size", batch_size);
get_config("output_result_to_file", output_result_to_file);
if (!digit_data_format.empty()) { digit_data_format_ = static_cast<VectorFormat>(std::stoi(digit_data_format)); }
if (!string_data_format.empty()) { string_data_format_ = static_cast<VectorFormat>(std::stoi(string_data_format)); }
if (!data_range_level.empty()) { data_range_level_ = std::stoi(data_range_level); }
if (!nulls_probability.empty()) { nulls_probability_ = std::stoi(nulls_probability); }
if (!skips_probability.empty()) { skips_probability_ = std::stoi(skips_probability); }
if (!round.empty()) { round_ = std::stoi(round); }
if (!batch_size.empty()) { batch_size_ = std::stoi(batch_size); }
if (!output_result_to_file.empty()) { output_result_to_file_ = std::stoi(output_result_to_file); }
}
int64_t to_string(char *buf, const int64_t buf_len) const
{
int64_t pos = 0;
J_KV(K(digit_data_format_), K(string_data_format_), K(data_range_level_), K(nulls_probability_),
K(skips_probability_), K(round_), K(batch_size_), K(output_result_to_file_));
return pos;
}
private:
std::unordered_map<std::string, std::string> configs_map_;
bool run_in_background_{false};
std::string test_filename_prefix_;
std::string test_filename_origin_output_file_;
std::string test_filename_vec_output_file_;
bool output_result_to_file_{false};
// data foramt
VectorFormat digit_data_format_{VEC_UNIFORM};
VectorFormat string_data_format_{VEC_UNIFORM};
// distribution and data range
int data_range_level_{0};
// nulls and skips
int nulls_probability_{0};
int skips_probability_{0};
// round and batch_size
int round_{10};
int batch_size_{256};
};
} // end namespace sql
} // end namespace oceanbase

View File

@ -0,0 +1,120 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include <gtest/gtest.h>
#include <cstring>
#include "lib/allocator/page_arena.h"
#include "common/object/ob_object.h"
#include "lib/container/ob_se_array.h"
#include "src/sql/engine/ob_bit_vector.h"
namespace oceanbase
{
namespace sql
{
class ObTestBitVector: public ::testing::Test
{
public:
ObTestBitVector() {}
~ObTestBitVector() {}
virtual void SetUp() {}
virtual void TearDown() {}
private:
DISALLOW_COPY_AND_ASSIGN(ObTestBitVector);
};
void expect_range(ObBitVector *dest_bit_vector, int64_t start, int64_t middle, int64_t end) {
for (int64_t i = 0; i < start; i++) {
EXPECT_EQ(0, dest_bit_vector->at(i));
}
for (int64_t i = start; i < middle; i++) {
EXPECT_EQ(1, dest_bit_vector->at(i));
dest_bit_vector->unset(i);
}
EXPECT_EQ(0, dest_bit_vector->at(middle));
for (int64_t i = middle + 1; i < end; i++) {
EXPECT_EQ(1, dest_bit_vector->at(i));
dest_bit_vector->unset(i);
}
for (int64_t i = end; i < end + 100; i++) {
EXPECT_EQ(0, dest_bit_vector->at(i));
}
}
void test_range(ObBitVector *dest_bit_vector, ObBitVector *src_bit_vector, int64_t start,
int64_t end)
{
for (int i = 0; i < 2000; i++) {
src_bit_vector->set(i);
}
int64_t middle = (start + end) / 2;
dest_bit_vector->set_all(start, end);
dest_bit_vector->unset(middle);
expect_range(dest_bit_vector, start, middle, end);
src_bit_vector->unset(middle);
dest_bit_vector->deep_copy(*src_bit_vector, start, end);
expect_range(dest_bit_vector, start, middle, end);
dest_bit_vector->bit_or(*src_bit_vector, start, end);
expect_range(dest_bit_vector, start, middle, end);
src_bit_vector->set(middle);
for (int64_t i = start; i < end; i++) {
dest_bit_vector->set(i);
}
EXPECT_EQ(1, dest_bit_vector->is_all_true(start, end));
if (start > 0) {
EXPECT_EQ(0, dest_bit_vector->is_all_true(start - 1, end));
}
EXPECT_EQ(0, dest_bit_vector->is_all_true(start, end + 1));
for (int64_t i = start; i < end; i++) {
dest_bit_vector->unset(i);
}
}
TEST(ObTestBitVector, bit_or_range)
{
char src_buf[1024];
char dest_buf[1024];
MEMSET(src_buf, 0, 1024);
MEMSET(dest_buf, 0, 1024);
ObBitVector *src_bit_vector = new (src_buf) ObBitVector;
ObBitVector *dest_bit_vector = new (dest_buf) ObBitVector;
test_range(dest_bit_vector, src_bit_vector, 13, 40);
test_range(dest_bit_vector, src_bit_vector, 13, 63);
test_range(dest_bit_vector, src_bit_vector, 13, 64);
test_range(dest_bit_vector, src_bit_vector, 13, 127);
test_range(dest_bit_vector, src_bit_vector, 13, 128);
test_range(dest_bit_vector, src_bit_vector, 13, 258);
test_range(dest_bit_vector, src_bit_vector, 0, 50);
test_range(dest_bit_vector, src_bit_vector, 0, 100);
test_range(dest_bit_vector, src_bit_vector, 0, 63);
test_range(dest_bit_vector, src_bit_vector, 0, 64);
test_range(dest_bit_vector, src_bit_vector, 0, 0);
test_range(dest_bit_vector, src_bit_vector, 64, 64);
test_range(dest_bit_vector, src_bit_vector, 64, 127);
}
}
}
int main(int argc, char **argv)
{
::testing::InitGoogleTest(&argc, argv);
int ret = RUN_ALL_TESTS();
return ret;
}

View File

@ -1,24 +1,26 @@
create database opt;
use opt;
#create database opt;
#use opt;
#create table t1(c1 int primary key, c2 int) partition by hash (c1) partitions 5
create table t2(c1 int primary key, c2 int, c3 varchar(32)) partition by hash (c1) partitions 3
create table t3(c1 int primary key, c2 int, c3 varchar(32)) partition by hash (c1) partitions 2
#create table t2(c1 int primary key, c2 int, c3 varchar(32)) partition by hash (c1) partitions 3
#create table t3(c1 int primary key, c2 int, c3 varchar(32)) partition by hash (c1) partitions 2
#create index idx_t1_c2 on t1(c2) LOCAL
create table t4(c1 int, c2 int, c3 int, primary key(c1, c2))
create index idx_t4_c2_c3 on t4(c2, c3)
create index idx_t4_c3 on t4(c3)
create index idx_t4_c2 on t4(c2)
create table t5(c1 int, c2 int, c3 int, primary key(c2,c3)) partition by key(c2, c3) partitions 3
create index idx_t5_c2 on t5(c2) LOCAL
create index idx_t5_c3 on t5(c3) LOCAL
create table t6(c1 int primary key, c2 tinyint) partition by key(c1) partitions 3
create table t7(c1 int primary key, c2 int)
create table t8(c1 int primary key, c2 int)
create table t2_no_part(c1 int primary key, c2 int, c3 int);
create table t9(c1 int primary key, c2 int not null default 1, c3 int default 2);
create table t10(c1 int, c2 int, c3 int, primary key(c2,c1)) partition by hash (c2) partitions 2;
create table t11(c1 int, c2 int, c3 int, primary key(c2,c1)) partition by hash (c2) partitions 2;
create table t12 (a bigint primary key, b char(20), c bigint);
create table t13 (b char(20), c bigint, a bigint primary key);
create table te1(c1 int primary key);
create table tab1(id int, parent_id int);
#create table t4(c1 int, c2 int, c3 int, primary key(c1, c2))
#create index idx_t4_c2_c3 on t4(c2, c3)
#create index idx_t4_c3 on t4(c3)
#create index idx_t4_c2 on t4(c2)
#create table t5(c1 int, c2 int, c3 int, primary key(c2,c3)) partition by key(c2, c3) partitions 3
#create index idx_t5_c2 on t5(c2) LOCAL
#create index idx_t5_c3 on t5(c3) LOCAL
#create table t6(c1 int primary key, c2 tinyint) partition by key(c1) partitions 3
create table t7(c1 int, c2 int)
#create table t8(c1 int primary key, c2 int)
#create table t2_no_part(c1 int primary key, c2 int, c3 int);
#create table t9(c1 int primary key, c2 int not null default 1, c3 int default 2);
#create table t10(c1 int, c2 int, c3 int, primary key(c2,c1)) partition by hash (c2) partitions 2;
#create table t11(c1 int, c2 int, c3 int, primary key(c2,c1)) partition by hash (c2) partitions 2;
#create table t12 (a bigint primary key, b char(20), c bigint);
#create table t13 (b char(20), c bigint, a bigint primary key);
#create table te1(c1 int primary key);
#create table tab1(id int, parent_id int);
#create table t1(c1 int, c2 double, c3 decimal(20,10), c4 char(20), c5 varchar(40));
create table t1(c1 int, c2 double, c4 char(20), c5 varchar(40));

View File

@ -3,3 +3,10 @@
#insert into te1 values (0)
#update te1 set c1 = 1 where c1 = 2
#delete from te1 where c1 = 1
#select avg(c1) from t7 group by c2;
#select c2, sum(c1), min(c1), max(c1) from t7 group by c2;
select/*+USE_HASH_AGGREGATION*/ c2, count(c1), sum(c1 + 1), min(c1*2), max(c1 - 1) from t7 group by c2;
#select c2 from t7 order by c2;
#select /*+blocking('all')*/* from t7;
#select /*+blocking('all')*/* from t1;
#select distinct(c2) from t7;

View File

@ -0,0 +1,810 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
// #define USING_LOG_PREFIX SQL_ENGINE
#define USING_LOG_PREFIX COMMON
#include <stdlib.h>
#include <sys/wait.h>
#include <iterator>
#include <gtest/gtest.h>
#include "test_op_engine.h"
#include "src/observer/omt/ob_tenant_config_mgr.h"
#include "sql/test_sql_utils.h"
#include "lib/container/ob_array.h"
#include "sql/ob_sql_init.h"
#include "sql/plan_cache/ob_cache_object_factory.h"
#include "observer/ob_req_time_service.h"
#include "ob_fake_table_scan_vec_op.h"
#include "share/ob_simple_mem_limit_getter.h"
#include "src/share/ob_local_device.h"
#include "src/share/ob_device_manager.h"
#include "src/storage/blocksstable/ob_storage_cache_suite.h"
#include "ob_test_config.h"
#include <vector>
#include <string>
using namespace oceanbase::sql;
namespace test
{
TestOpEngine::TestOpEngine() : tbase_{sys_tenant_id_}, vec_2_exec_ctx_(vec_2_alloc_)
{
vec_2_exec_ctx_.set_sql_ctx(&sql_ctx_);
}
TestOpEngine::~TestOpEngine()
{
destory();
}
void TestOpEngine::SetUp()
{
TestOptimizerUtils::SetUp();
addr_.set_ip_addr("1.1.1.1", 8888);
vec_2_exec_ctx_.set_my_session(&session_info_);
vec_2_exec_ctx_.create_physical_plan_ctx();
ASSERT_EQ(prepare_io(ObTestOpConfig::get_instance().test_filename_prefix_), OB_SUCCESS);
// init mock location service, used in optimizer compute table property
GCTX.location_service_ = &mock_location_service_;
// init MTL, used in ObTableScanOp::ObTableScanOp constructor
static ObDataAccessService instance;
tbase_.inner_set(&instance);
ASSERT_EQ(tbase_.init(), 0);
ObTenantEnv::set_tenant(&tbase_);
out_origin_result_stream_.open(ObTestOpConfig::get_instance().test_filename_origin_output_file_, std::ios::out | std::ios::trunc);
out_vec_result_stream_.open(ObTestOpConfig::get_instance().test_filename_vec_output_file_, std::ios::out | std::ios::trunc);
}
void TestOpEngine::TearDown()
{
destroy();
}
void TestOpEngine::destory()
{
OB_SERVER_BLOCK_MGR.stop();
OB_SERVER_BLOCK_MGR.wait();
OB_SERVER_BLOCK_MGR.destroy();
OB_STORE_CACHE.destroy();
ObIOManager::get_instance().destroy();
ObKVGlobalCache::get_instance().destroy();
ObClusterVersion::get_instance().destroy();
ObTmpFileManager::get_instance().destroy();
// THE_IO_DEVICE->destroy();
}
common::ObIODevice *TestOpEngine::get_device_inner()
{
int ret = OB_SUCCESS;
common::ObIODevice *device = NULL;
common::ObString storage_info(OB_LOCAL_PREFIX);
// for the local and nfs, storage_prefix and storage info are same
if (OB_FAIL(common::ObDeviceManager::get_instance().get_device(storage_info, storage_info, device))) {
LOG_WARN("get_device_inner", K(ret));
}
return device;
}
// copy from mittest/mtlenv/mock_tenant_module_env.h and unittest/storage/blocksstable/ob_data_file_prepare.h
// refine some code
// call prepare_io() for testing operators that needs to dump intermediate data
int TestOpEngine::prepare_io(const string & test_data_name_suffix)
{
int ret = OB_SUCCESS;
ObIODOpt iod_opt_array[5];
ObIODOpts iod_opts;
iod_opts.opts_ = iod_opt_array;
int64_t macro_block_count = 5 * 1024;
int64_t macro_block_size = 64 * 1024;
char cur_dir[OB_MAX_FILE_NAME_LENGTH];
char test_data_name[OB_MAX_FILE_NAME_LENGTH];
char data_dir[OB_MAX_FILE_NAME_LENGTH];
char file_dir[OB_MAX_FILE_NAME_LENGTH];
char clog_dir[OB_MAX_FILE_NAME_LENGTH];
char slog_dir[OB_MAX_FILE_NAME_LENGTH];
if (NULL == getcwd(cur_dir, OB_MAX_FILE_NAME_LENGTH)) {
ret = OB_BUF_NOT_ENOUGH;
STORAGE_LOG(WARN, "cannot get cur dir", K(ret));
} else if (OB_FAIL(databuff_printf(test_data_name, OB_MAX_FILE_NAME_LENGTH, "%s", test_data_name_suffix.data()))) {
STORAGE_LOG(WARN, "failed to gen test name", K(ret));
} else if (OB_FAIL(databuff_printf(data_dir, OB_MAX_FILE_NAME_LENGTH, "%s/data_%s", cur_dir, test_data_name))) {
STORAGE_LOG(WARN, "failed to gen data dir", K(ret));
} else if (OB_FAIL(databuff_printf(file_dir, OB_MAX_FILE_NAME_LENGTH, "%s/sstable/", data_dir))) {
STORAGE_LOG(WARN, "failed to databuff printf", K(ret));
} else if (OB_FAIL(databuff_printf(slog_dir, OB_MAX_FILE_NAME_LENGTH, "%s/slog/", data_dir))) {
STORAGE_LOG(WARN, "failed to gen slog dir", K(ret));
} else if (OB_FAIL(databuff_printf(clog_dir, OB_MAX_FILE_NAME_LENGTH, "%s/clog/", data_dir))) {
STORAGE_LOG(WARN, "failed to gen clog dir", K(ret));
}
storage_env_.data_dir_ = data_dir;
storage_env_.sstable_dir_ = file_dir;
storage_env_.clog_dir_ = clog_dir;
storage_env_.default_block_size_ = common::OB_DEFAULT_MACRO_BLOCK_SIZE;
storage_env_.data_disk_size_ = macro_block_count * common::OB_DEFAULT_MACRO_BLOCK_SIZE;
storage_env_.data_disk_percentage_ = 0;
storage_env_.log_disk_size_ = 20 * 1024 * 1024 * 1024ll;
share::ObLocalDevice *local_device = static_cast<share::ObLocalDevice *>(get_device_inner());
THE_IO_DEVICE = local_device;
iod_opt_array[0].set("data_dir", storage_env_.data_dir_);
iod_opt_array[1].set("sstable_dir", storage_env_.sstable_dir_);
iod_opt_array[2].set("block_size", storage_env_.default_block_size_);
iod_opt_array[3].set("datafile_disk_percentage", storage_env_.data_disk_percentage_);
iod_opt_array[4].set("datafile_size", storage_env_.data_disk_size_);
iod_opts.opt_cnt_ = 5;
ObTenantIOConfig io_config = ObTenantIOConfig::default_instance();
const int64_t async_io_thread_count = 8;
const int64_t sync_io_thread_count = 2;
const int64_t max_io_depth = 256;
const int64_t bucket_num = 1024L;
const int64_t block_size = common::OB_MALLOC_BIG_BLOCK_SIZE;
char cmd[OB_MAX_FILE_NAME_LENGTH];
if (OB_FAIL(ret)) {
// do nothing
} else if (OB_FAIL(databuff_printf(cmd, OB_MAX_FILE_NAME_LENGTH, "rm -rf %s", data_dir))) {
LOG_WARN("failed to gen cmd", K(ret));
} else if (0 != system(cmd)) {
ret = OB_ERR_SYS;
LOG_WARN("failed to exec cmd", K(ret), K(cmd), K(errno), KERRMSG);
} else if (OB_FAIL(THE_IO_DEVICE->init(iod_opts))) {
LOG_WARN("fail to init io device", K(ret), K_(storage_env));
} else if (OB_FAIL(ObIOManager::get_instance().init())) {
LOG_WARN("fail to init io manager", K(ret));
} else if (OB_FAIL(ObIOManager::get_instance().add_device_channel(THE_IO_DEVICE, async_io_thread_count,
sync_io_thread_count, max_io_depth))) {
LOG_WARN("add device channel failed", K(ret));
} else if (OB_FAIL(ObIOManager::get_instance().start())) {
LOG_WARN("fail to start io manager", K(ret));
} else if (OB_FAIL(ObIOManager::get_instance().add_tenant_io_manager(OB_SYS_TENANT_ID, io_config))) {
LOG_WARN("add tenant io config failed", K(ret));
} else if (OB_FAIL(OB_SERVER_BLOCK_MGR.init(THE_IO_DEVICE, storage_env_.default_block_size_))) {
STORAGE_LOG(WARN, "init block manager fail", K(ret));
} else if (OB_FAIL(FileDirectoryUtils::create_full_path(file_dir))) {
STORAGE_LOG(WARN, "failed to create file dir", K(ret), K(file_dir));
} else if (OB_FAIL(OB_SERVER_BLOCK_MGR.start(0 /*reserver_size*/))) {
STORAGE_LOG(WARN, "Fail to start server block mgr", K(ret));
} else if (OB_FAIL(OB_SERVER_BLOCK_MGR.first_mark_device())) {
STORAGE_LOG(WARN, "Fail to start first mark device", K(ret));
} else if (OB_FAIL(OB_STORE_CACHE.init(10, 1, 1, 1, 1, 10000, 10))) {
LOG_WARN("fail to init OB_STORE_CACHE, ", K(ret));
} else {
}
FILE_MANAGER_INSTANCE_V2.init();
return ret;
}
int TestOpEngine::do_rewrite(ObStmt *&stmt, ObPhysicalPlan *phy_plan)
{
int ret = OB_SUCCESS;
ObSchemaChecker schema_checker;
if (OB_FAIL(schema_checker.init(sql_schema_guard_))) {
LOG_WARN("fail to init schema_checker", K(ret));
} else {
ObTransformerCtx transformer_ctx;
transformer_ctx.allocator_ = &allocator_;
transformer_ctx.session_info_ = &session_info_;
transformer_ctx.schema_checker_ = &schema_checker;
transformer_ctx.exec_ctx_ = &exec_ctx_;
transformer_ctx.expr_factory_ = &expr_factory_;
transformer_ctx.stmt_factory_ = &stmt_factory_;
// ctx.stat_mgr_ = &stat_manager_;
transformer_ctx.sql_schema_guard_ = &sql_schema_guard_;
transformer_ctx.self_addr_ = &addr_;
transformer_ctx.merged_version_ = OB_MERGED_VERSION_INIT;
transformer_ctx.phy_plan_ = phy_plan;
ObTransformerImpl transformer(&transformer_ctx);
ObDMLStmt *sql_stmt = dynamic_cast<ObDMLStmt *>(stmt);
if (stmt->is_explain_stmt()) { sql_stmt = static_cast<ObExplainStmt *>(stmt)->get_explain_query_stmt(); }
if (OB_SUCC(ret)) {
if (OB_FAIL(transformer.transform(sql_stmt))) {
LOG_WARN("Failed to transform statement", K(ret));
} else {
stmt = sql_stmt;
}
}
}
return ret;
}
int TestOpEngine::do_optimize(ObStmt *stmt, ObLogPlan *&plan, ObPhyPlanType distr, ObArenaAllocator &allocator,
ObExecContext &exec_ctx)
{
int ret = OB_SUCCESS;
ObDMLStmt *dml_stmt = dynamic_cast<ObDMLStmt *>(stmt);
ObOptimizerContext *ctx_ptr = static_cast<ObOptimizerContext *>(allocator.alloc(sizeof(ObOptimizerContext)));
exec_ctx.get_sql_ctx()->session_info_ = &session_info_;
ObOptimizerContext *opt_ctx = new (ctx_ptr) ObOptimizerContext(
&session_info_, &exec_ctx,
// schema_mgr_, // schema manager
&sql_schema_guard_,
//&stat_manager_, // statistics manager
NULL, // statistics manager
static_cast<ObIAllocator &>(allocator_), &param_store_, addr_, NULL, dml_stmt->get_query_ctx()->get_global_hint(),
expr_factory_, dml_stmt, false, stmt_factory_.get_query_ctx());
opt_ctx->set_opt_stat_manager(&opt_stat_manager_);
opt_ctx->disable_batch_rpc();
opt_ctx->set_local_server_addr("1.1.1.1", 8888);
opt_ctx->set_use_default_stat();
ObTableLocation table_location;
ret = opt_ctx->get_table_location_list().push_back(table_location);
ObOptimizer optimizer(*opt_ctx);
if (OB_FAIL(optimizer.optimize(*dml_stmt, plan))) { LOG_WARN("failed to optimize", "SQL", *dml_stmt); }
return ret;
}
int TestOpEngine::do_code_generate(const ObLogPlan &log_plan, ObCodeGenerator &code_gen, ObPhysicalPlan &phy_plan)
{
int ret = OB_SUCCESS;
const uint64_t cur_cluster_version = CLUSTER_CURRENT_VERSION;
// WARN: may have bug here
log_plan.get_optimizer_context().set_batch_size(ObTestOpConfig::get_instance().batch_size_);
phy_plan.set_batch_size(ObTestOpConfig::get_instance().batch_size_);
if (OB_FAIL(ret)) {
} else if (OB_FAIL(code_gen.generate_exprs(log_plan, phy_plan, cur_cluster_version))) {
LOG_WARN("fail to get all raw exprs", K(ret));
} else if (OB_FAIL(code_gen.generate_operators(log_plan, phy_plan, cur_cluster_version))) {
LOG_WARN("fail to generate plan", K(ret));
}
return ret;
}
int TestOpEngine::test_phy_plan(ObPhysicalPlan &plan)
{
UNUSED(plan);
int ret = OB_SUCCESS;
return ret;
}
ObOperator *TestOpEngine::subtitude_table_scan_to_fake(ObOperator *root)
{
for (uint32 i = 0; i < root->get_child_cnt(); i++) {
root->children_[i] = subtitude_table_scan_to_fake(root->children_[i]);
if (i == 0) {
root->left_ = root->children_[i];
root->left_->parent_ = root;
}
if (i == 1) {
root->right_ = root->children_[i];
root->right_->parent_ = root;
}
}
if (root->get_spec().get_type() == PHY_TABLE_SCAN) {
root = new oceanbase::sql::ObFakeTableScanVecOp(root->get_exec_ctx(), root->get_spec(), root->get_input());
}
return root;
}
int TestOpEngine::get_tested_op_from_string(const std::string &sql, bool vector_2, ObOperator *&op,
ObExecutor &executor)
{
int ret = OB_SUCCESS;
ObStmt *stmt = NULL;
ObLogPlan *log_plan = NULL;
ObPhysicalPlan *phy_plan = NULL;
ObArenaAllocator *p_alloc = NULL;
ObExecContext *p_exec_ctx = NULL;
if (vector_2) {
p_alloc = &vec_2_alloc_;
p_exec_ctx = &vec_2_exec_ctx_;
} else {
p_alloc = &allocator_;
p_exec_ctx = &exec_ctx_;
}
// 1.resolve
// 2.rewrite
// 3.optimize
do_resolve(sql.c_str(), stmt, true, JSON_FORMAT, OB_SUCCESS, false);
if (OB_FAIL(do_rewrite(stmt, phy_plan))) {
LOG_ERROR("rewrite failed", K(ret));
} else if (OB_FAIL(do_optimize(stmt, log_plan, OB_PHY_PLAN_LOCAL, *p_alloc, *p_exec_ctx))) {
LOG_ERROR("optimize failed", K(ret));
} else if (NULL == log_plan) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("log_plan is null");
} else if (OB_FAIL(ObCacheObjectFactory::alloc(phy_plan))) {
LOG_ERROR("fail to allocate mem to phy_plan", K(ret));
} else if (NULL == phy_plan) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("phy_plan is null");
} else {
// display explain plan
oceanbase::sql::ObExplainDisplayOpt option;
option.with_tree_line_ = true;
ObSqlPlan sql_plan(log_plan->get_allocator());
ObSEArray<common::ObString, 64> plan_strs;
if (OB_FAIL(sql_plan.print_sql_plan(log_plan, EXPLAIN_EXTENDED_NOADDR, option, plan_strs))) {
LOG_WARN("failed to store sql plan", K(ret));
} else {
LOG_INFO("Generate Logical plan:");
_OB_LOG(INFO, "%*s", plan_strs.at(0).length(), plan_strs.at(0).ptr());
}
// 4.generate physical plan
if (OB_FAIL(generate_physical_plan(log_plan, *phy_plan, *p_exec_ctx, vector_2))) {
LOG_ERROR("generate physical plan failed", K(ret));
}
// 5.open and get runtime op
// ObFakeTableScanOp will replace bottom ObTableScanOp here
if (OB_FAIL(open_and_get_op(*p_exec_ctx, executor, *phy_plan, op))) { LOG_ERROR("open operators failed", K(ret)); }
}
return ret;
}
std::string TestOpEngine::get_decimal_result_from_datum(ObExpr *expr, const ObDatum &datum)
{
int ret = OB_SUCCESS;
std::string result;
switch (get_decimalint_type(expr->datum_meta_.precision_)) {
case common::DECIMAL_INT_32: result = std::to_string(datum.get_decimal_int32()); break;
case common::DECIMAL_INT_64: result = std::to_string(datum.get_decimal_int64()); break;
case common::DECIMAL_INT_128: result = std::to_string(datum.get_decimal_int128()); break;
case common::DECIMAL_INT_256: result = std::to_string(datum.get_decimal_int256()); break;
case common::DECIMAL_INT_512: result = std::to_string(datum.get_decimal_int512()); break;
default: LOG_WARN("unexpected precision", K(expr->datum_meta_));
}
return result;
}
std::string TestOpEngine::get_decimal_result_from_payload(ObExpr *expr, const char *payload)
{
int ret = OB_SUCCESS;
std::string result;
switch (get_decimalint_type(expr->datum_meta_.precision_)) {
case common::DECIMAL_INT_32: result = std::to_string(*(int32_t *)payload); break;
case common::DECIMAL_INT_64: result = std::to_string(*(int64_t *)payload); break;
case common::DECIMAL_INT_128: result = std::to_string(*(int128_t *)payload); break;
case common::DECIMAL_INT_256: result = std::to_string(*(int256_t *)payload); break;
case common::DECIMAL_INT_512: result = std::to_string(*(int512_t *)payload); break;
default: LOG_WARN("unexpected precision", K(expr->datum_meta_));
}
return result;
}
std::string TestOpEngine::get_data_by_datum_type(const ObOperator *op, ObExpr *expr, ObEvalCtx &eval_ctx, int row)
{
std::string result_str;
ObIVector *i_vector = NULL;
ObDatum *datums = NULL;
if (expr->enable_rich_format() && op->spec_.get_type() != PHY_EXPR_VALUES) {
i_vector = expr->get_vector(eval_ctx);
if (i_vector->is_null(row)) { result_str = "null"; }
} else {
datums = expr->locate_batch_datums(eval_ctx);
if (datums[row].is_null()) { result_str = "null"; }
}
if (result_str != "null") {
switch (expr->datum_meta_.get_type()) {
// why expr->datum_meta_.get_type() == ObInt32Type while expr->res_buf_len_ == 8 ?????
case ObInt32Type:
if (i_vector != NULL) {
result_str = std::to_string(i_vector->get_int32(row));
// str_result = std::to_string(i_vector->get_int(row));
} else {
result_str = std::to_string(datums[row].get_int32());
}
break;
case ObIntType: {
if (i_vector != NULL) {
result_str = std::to_string(i_vector->get_int(row));
} else {
result_str = std::to_string(datums[row].get_int());
}
break;
}
case ObDoubleType: {
if (i_vector != NULL) {
result_str = std::to_string(i_vector->get_double(row));
} else {
result_str = std::to_string(datums[row].get_double());
}
break;
}
case ObNumberType: {
// result type of avg()
if (i_vector != NULL) {
ObNumber ob_num(i_vector->get_number(row));
char buf[ob_num.get_length()];
ob_num.to_string(buf, ob_num.get_length());
result_str = std::string(buf, ob_num.get_length());
} else {
ObNumber ob_num(datums[row].get_number());
char buf[ob_num.get_length()];
ob_num.to_string(buf, ob_num.get_length());
result_str = std::string(buf, ob_num.get_length());
}
break;
}
case ObDecimalIntType: {
// result type of sum()
if (i_vector != NULL) {
// TODO: replace this when i_vector have get_decimalXXX() api
result_str = get_decimal_result_from_payload(expr, i_vector->get_payload(row));
} else {
result_str = get_decimal_result_from_datum(expr, datums[row]);
}
break;
}
case ObVarcharType:
case ObCharType: {
if (i_vector != NULL) {
result_str = std::string(i_vector->get_payload(row), i_vector->get_length(row));
} else {
// str_result = std::to_string(datums[row].get_int32());
result_str = std::string(datums[row].ptr_, datums[row].len_);
}
break;
}
default: LOG_INFO("Can not display value so far for: ", K(expr->datum_meta_.get_type()));
}
}
return result_str;
}
int TestOpEngine::generate_physical_plan(ObLogPlan *log_plan, ObPhysicalPlan &phy_plan, ObExecContext &exec_ctx,
bool enable_rich_format)
{
int ret = OB_SUCCESS;
// begin generate phsical plan
ObPhysicalPlanCtx *pctx = exec_ctx.get_physical_plan_ctx();
if (NULL == pctx) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("pctx is null");
} else {
/*
bool ObStaticEngineExprCG::enable_rich_format() const {
//TODO shengle change the version
return cur_cluster_version_ >= CLUSTER_VERSION_4_1_0_0
&& op_cg_ctx_.session_->enable_rich_format();
}
*/
// So we need set here to support rich_format
ObCodeGenerator code_gen(false /*use_jit*/, CLUSTER_VERSION_4_3_0_0, &(pctx->get_datum_param_store()));
log_plan->get_optimizer_context().get_session_info()->sys_vars_cache_.set_enable_rich_vector_format(enable_rich_format);
phy_plan.set_use_rich_format(enable_rich_format);
if (OB_FAIL(do_code_generate(*log_plan, code_gen, phy_plan))) {
LOG_ERROR("Can not generate physical plan ", K(ret));
} else {
pctx->set_phy_plan(&phy_plan);
}
}
return ret;
}
int TestOpEngine::open_and_get_op(ObExecContext &exec_ctx, ObExecutor &ob_exe, ObPhysicalPlan &phy_plan,
ObOperator *&root)
{
int ret = OB_SUCCESS;
ObTaskExecutorCtx &task_exec_ctx = exec_ctx.get_task_exec_ctx();
ObExecuteResult &exe_result = task_exec_ctx.get_execute_result();
// ObExecutor ob_exe;
if (OB_FAIL(ob_exe.init(&phy_plan))) {
LOG_WARN("fail init exec ObExecutor", K(ret));
} else if (OB_FAIL(exec_ctx.init_phy_op(phy_plan.get_phy_operator_size()))) {
LOG_WARN("fail init exec phy op ctx", K(ret));
} else if (OB_FAIL(exec_ctx.init_expr_op(phy_plan.get_expr_operator_size()))) {
LOG_WARN("fail init exec expr op ctx", K(ret));
} else if (OB_FAIL(ob_exe.execute_plan(exec_ctx))) {
LOG_ERROR("execute plan fail ", K(ret));
} else {
// ob_exe.execute_plan(exec_ctx_) will use ObArenaAllocator to allocate memory for original ObTableScanOp
// while *subtitude_table_scan_to_fake()* will directly use *new* to new a ObFakeTableScanVecOp, for simple
exe_result.static_engine_root_ = subtitude_table_scan_to_fake(exe_result.static_engine_root_);
if (OB_FAIL(exe_result.open(exec_ctx))) {
LOG_ERROR("open plan fail ", K(ret));
} else {
root = exe_result.static_engine_root_;
}
}
return ret;
}
int TestOpEngine::print_and_cmp_final_output(const ObBatchRows *brs, ObOperator *root, bool is_comparing)
{
int ret = OB_SUCCESS;
std::string output_line;
for (int i = 0; i < brs->size_; i++) {
if (!brs->skip_->exist(i)) {
for (int j = 0; j < root->get_spec().output_.count(); j++) {
ObExpr *output_expr = root->get_spec().output_.at(j);
string result_str = get_data_by_datum_type(root, output_expr, root->eval_ctx_, i);
output_line += result_str + " ";
// compare whether is the same output
// cmp output results
if (is_comparing) {
if (result_str.compare(temp_cmp_data_[i][j]) != 0) {
// different
ret = OB_ERR_UNEXPECTED;
LOG_WARN("Got an different output result in :", K(i), K(j));
LOG_WARN("Original op output value is :", K(ObString(temp_cmp_data_[i][j].data())));
LOG_WARN(" New op output value is :", K(ObString(result_str.data())));
}
} else {
temp_cmp_data_[i].push_back(result_str);
}
}
LOG_INFO(output_line.data());
output_line.clear();
}
}
return ret;
}
//
int TestOpEngine::print_to_file(const ObBatchRows *brs, ObOperator *root, const ExprFixedArray &exprs, bool is_result,
std::ofstream *out_data_stream)
{
int ret = OB_SUCCESS;
std::string output_line;
for (int i = 0; i < brs->size_; i++) {
if (!brs->skip_->exist(i)) {
for (int j = 0; j < exprs.count(); j++) {
ObExpr *expr = exprs.at(j);
string result_str = get_data_by_datum_type(root, expr, root->eval_ctx_, i);
output_line += result_str;
if (j != exprs.count() - 1) { output_line += ", "; }
}
if (is_result) {
*out_data_stream << output_line << std::endl;
} else {
*out_data_stream << "insert into " << reinterpret_cast<ObFakeTableScanVecOp *>(root)->get_tsc_spec().table_name_
<< " values(" << output_line << ");" << std::endl;
}
output_line.clear();
}
}
return ret;
}
int TestOpEngine::basic_random_test(const std::string &test_file)
{
// run tests
std::ifstream if_tests(test_file);
if (if_tests.is_open() != true) { return -1; }
int ret = OB_SUCCESS;
std::string line;
while (std::getline(if_tests, line)) {
// handle query
if (line.size() <= 0) continue;
if (line.at(0) == '#') continue;
if (ObTestOpConfig::get_instance().output_result_to_file_) {
system(("rm -f " + ObTestOpConfig::get_instance().test_filename_prefix_ + ".log").data());
system(("rm -f " + ObTestOpConfig::get_instance().test_filename_prefix_ + ".log.*").data());
system(("cat /dev/null > " + ObTestOpConfig::get_instance().test_filename_origin_output_file_).c_str());
system(("cat /dev/null > " + ObTestOpConfig::get_instance().test_filename_vec_output_file_).c_str());
}
ret = OB_SUCCESS;
ObOperator *original_root = NULL;
ObOperator *vec_2_root = NULL;
ObExecutor original_exector;
ObExecutor vec_2_exector;
if (OB_FAIL(get_tested_op_from_string(line, false, original_root, original_exector))) {
LOG_WARN("generate original tested op fail, sql: ", K(line.data()));
} else if (OB_FAIL(get_tested_op_from_string(line, true, vec_2_root, vec_2_exector))) {
LOG_WARN("generate vectorization 2.0 tested op fail, sql: ", K(line.data()));
} else {
// begin get_next_batch()
// 5.compare two op outputs
int round = 1;
const int64_t max_row_cnt = 256;
const ObBatchRows *original_child_brs = nullptr;
const ObBatchRows *vec_2_child_brs = nullptr;
LOG_INFO("============== Final output ===============", K(round));
while (!original_root->brs_.end_ || !vec_2_root->brs_.end_) {
if (OB_FAIL(original_root->get_next_batch(max_row_cnt, original_child_brs))) {
LOG_ERROR("root op fail to get_next_batch data", K(original_root));
break;
}
temp_cmp_data_.resize(original_child_brs->size_);
LOG_INFO("============== Original ===============", K(round));
if (ObTestOpConfig::get_instance().output_result_to_file_) {
if (OB_FAIL(print_to_file(original_child_brs, original_root, original_root->spec_.output_, true,
&out_origin_result_stream_))) {
// break, other error log has already print in inner function
break;
}
} else {
if (OB_FAIL(print_and_cmp_final_output(original_child_brs, original_root, false))) {
// break, other error log has already print in inner function
break;
}
}
if (OB_FAIL(vec_2_root->get_next_batch(max_row_cnt, vec_2_child_brs))) {
LOG_ERROR("root op fail to get_next_batch data", K(vec_2_root));
break;
}
LOG_INFO("============== Vectorization 2.0 ===============", K(round));
if (ObTestOpConfig::get_instance().output_result_to_file_) {
if (OB_FAIL(
print_to_file(vec_2_child_brs, vec_2_root, vec_2_root->spec_.output_, true, &out_vec_result_stream_))) {
// break, other error log has already print in inner function
break;
}
} else {
if (OB_FAIL(print_and_cmp_final_output(vec_2_child_brs, vec_2_root, true))) {
// break, other error log has already print in inner function
break;
}
}
if (!ObTestOpConfig::get_instance().output_result_to_file_) {
if (original_child_brs->size_ != vec_2_child_brs->size_) {
LOG_ERROR("Two operator output different [batch size] in", K(round), K(original_child_brs->size_),
K(vec_2_child_brs->size_));
ret = OB_ERR_UNEXPECTED;
break;
}
if (MEMCMP(original_child_brs->skip_->data_, vec_2_child_brs->skip_->data_,
original_child_brs->skip_->memory_size(original_child_brs->size_))
!= 0) {
LOG_ERROR("Two operator output different [skip size] in", K(round), K(original_child_brs->size_),
K(vec_2_child_brs->size_));
ret = OB_ERR_UNEXPECTED;
break;
}
}
round++;
temp_cmp_data_.clear();
}
//if output to file, compare data in file at last
if (ObTestOpConfig::get_instance().output_result_to_file_) {
if (original_root->get_spec().get_type() == PHY_HASH_JOIN && original_root->get_spec().get_type() == PHY_VEC_HASH_JOIN) {
system(("sort " + ObTestOpConfig::get_instance().test_filename_origin_output_file_ + " -o "
+ ObTestOpConfig::get_instance().test_filename_origin_output_file_)
.c_str());
system(("sort " + ObTestOpConfig::get_instance().test_filename_vec_output_file_ + " -o "
+ ObTestOpConfig::get_instance().test_filename_vec_output_file_)
.c_str());
}
int system_ret = system(("diff " + ObTestOpConfig::get_instance().test_filename_origin_output_file_ + " " + ObTestOpConfig::get_instance().test_filename_vec_output_file_ + " > /dev/null").c_str());
LOG_ERROR("CODE: ", K(system_ret));
LOG_ERROR("CODE: ", K(WEXITSTATUS(system_ret)));
if (WEXITSTATUS(system_ret) != 0) {
LOG_ERROR("Two operator output different!");
// Preserve the site
uint64_t ms = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
std::fstream error_output_stream;
std::string error_file_name = "mismatch." + std::to_string(ms);
LOG_ERROR("error_file_name: ", K(error_file_name.data()));
error_output_stream.open(error_file_name, std::fstream::in | std::fstream::out | std::fstream::app);
error_output_stream << line << std::endl;
system(("cat " + ObTestOpConfig::get_instance().test_filename_prefix_ + ".cfg" + " >> " + error_file_name)
.c_str());
system(("mv " + ObTestOpConfig::get_instance().test_filename_origin_output_file_ + " "
+ ObTestOpConfig::get_instance().test_filename_origin_output_file_ + std::to_string(ms))
.c_str());
system(("mv " + ObTestOpConfig::get_instance().test_filename_vec_output_file_ + " "
+ ObTestOpConfig::get_instance().test_filename_vec_output_file_ + std::to_string(ms))
.c_str());
}
}
}
exec_ctx_.~ObExecContext();
new (&exec_ctx_) ObExecContext(allocator_);
exec_ctx_.set_sql_ctx(&sql_ctx_);
exec_ctx_.set_my_session(&session_info_);
exec_ctx_.create_physical_plan_ctx();
vec_2_exec_ctx_.~ObExecContext();
new (&vec_2_exec_ctx_) ObExecContext(allocator_);
vec_2_exec_ctx_.set_sql_ctx(&sql_ctx_);
vec_2_exec_ctx_.set_my_session(&session_info_);
vec_2_exec_ctx_.create_physical_plan_ctx();
}
EXPECT_EQ(ret, 0);
if (ret == OB_SUCCESS) {
LOG_INFO(" ======================= ");
LOG_INFO("Test Pass!");
LOG_INFO("All new operator output is equal to the original one.");
LOG_INFO(" ======================= ");
} else {
LOG_INFO(" ======================= ");
LOG_INFO("Test Fail!");
LOG_INFO(" ======================= ");
}
return ret;
}
int TestOpEngine::basic_random_test_output_to_file(const std::string &test_file, bool vector_2)
{
// run tests
std::ifstream if_tests(test_file);
if (if_tests.is_open() != true) { return -1; }
int ret = OB_SUCCESS;
std::string line;
while (std::getline(if_tests, line)) {
// handle query
if (line.size() <= 0) continue;
if (line.at(0) == '#') continue;
ObOperator *root = NULL;
ObExecutor exector;
if (OB_FAIL(get_tested_op_from_string(line, vector_2, root, exector))) {
LOG_WARN("generate tested op fail, sql: ", K(line.data()));
} else {
int round = 1;
const int64_t max_row_cnt = 256;
const ObBatchRows *child_brs = nullptr;
LOG_INFO("============== Final output ===============", K(round));
while (!root->brs_.end_) {
if (OB_FAIL(root->get_next_batch(max_row_cnt, child_brs))) {
LOG_ERROR("root op fail to get_next_batch data", K(root));
break;
}
if (OB_FAIL(print_to_file(child_brs, root, root->spec_.output_, true,
vector_2 ? &out_vec_result_stream_ : &out_origin_result_stream_))) {
// break, other error log has already print in inner function
break;
}
round++;
}
}
exec_ctx_.~ObExecContext();
new (&exec_ctx_) ObExecContext(allocator_);
exec_ctx_.set_sql_ctx(&sql_ctx_);
exec_ctx_.set_my_session(&session_info_);
exec_ctx_.create_physical_plan_ctx();
}
EXPECT_EQ(ret, 0);
return ret;
}
} // namespace test

View File

@ -0,0 +1,115 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
// #define USING_LOG_PREFIX SQL_ENGINE
#include <gtest/gtest.h>
#include <fstream>
#include "../optimizer/test_optimizer_utils.h"
#include "sql/code_generator/ob_code_generator.h"
#include "storage/blocksstable/ob_block_sstable_struct.h"
namespace test
{
class MockLocationService : public share::ObLocationService
{
public:
MockLocationService()
{}
virtual ~MockLocationService()
{}
virtual int nonblock_get(const uint64_t tenant_id, const ObTabletID &tablet_id, ObLSID &ls_id)
{
ls_id = ObLSID::SYS_LS_ID;
return OB_SUCCESS;
}
virtual int nonblock_get(const int64_t cluster_id, const uint64_t tenant_id, const ObLSID &ls_id,
ObLSLocation &location)
{
int ret = OB_SUCCESS;
ObAddr add;
ObReplicaProperty relica_pro;
ObLSRestoreStatus ls_restore_sta;
add.set_ip_addr("1.1.1.1", 8888);
ObLSReplicaLocation ls_replica_loc;
if (OB_FAIL(location.init(1, 1, ls_id, 1))) {
} else if (OB_FAIL(ls_replica_loc.init(add, LEADER, 100, REPLICA_TYPE_FULL, relica_pro, ls_restore_sta, 100))) {
} else {
location.add_replica_location(ls_replica_loc);
}
return ret;
}
};
class TestOpEngine : public TestOptimizerUtils
{
public:
TestOpEngine();
virtual ~TestOpEngine();
virtual void SetUp();
virtual void TearDown();
virtual void destory();
int basic_random_test(const std::string &line);
int basic_random_test_output_to_file(const std::string &test_file, bool vector_2);
private:
// disallow copy
DISALLOW_COPY_AND_ASSIGN(TestOpEngine);
protected:
// function members
common::ObIODevice *get_device_inner();
int prepare_io(const std::string & test_data_name_suffix);
int do_optimize(ObStmt *stmt, ObLogPlan *&plan, ObPhyPlanType distr, ObArenaAllocator &allocator,
ObExecContext &exec_ctx);
int do_code_generate(const ObLogPlan &log_plan, ObCodeGenerator &code_gen, ObPhysicalPlan &phy_plan);
int do_rewrite(ObStmt *&stmt, ObPhysicalPlan *phy_plan);
int test_phy_plan(ObPhysicalPlan &plan);
ObOperator *subtitude_table_scan_to_fake(ObOperator *root);
int get_tested_op_from_string(const std::string &sql, bool vector_2, ObOperator *&op, ObExecutor &executor);
int generate_physical_plan(ObLogPlan *log_plan, ObPhysicalPlan &phy_plan, ObExecContext &exec_ctx,
bool enable_rich_format);
int open_and_get_op(ObExecContext &exec_ctx, ObExecutor &ob_exe, ObPhysicalPlan &phy_plan, ObOperator *&root);
int print_and_cmp_final_output(const ObBatchRows *brs, ObOperator *root, bool is_comparing);
static std::string get_decimal_result_from_datum(ObExpr *expr, const ObDatum &datum);
static std::string get_decimal_result_from_payload(ObExpr *expr, const char *payload);
static std::string get_data_by_datum_type(const ObOperator *op, ObExpr *expr, ObEvalCtx &eval_ctx, int row);
static int print_to_file(const ObBatchRows *brs, ObOperator *root, const ExprFixedArray &exprs, bool is_result,
std::ofstream *out_data_stream);
protected:
// data members
std::vector<std::vector<std::string>> temp_cmp_data_;
ObTenantBase tbase_;
MockLocationService mock_location_service_;
//
std::string test_config_file_;
std::string env_dir_;
blocksstable::ObStorageEnv storage_env_;
std::ofstream out_origin_result_stream_;
std::ofstream out_vec_result_stream_;
ParamStore param_store_;
ObAddr addr_;
ObArenaAllocator vec_2_alloc_;
ObExecContext vec_2_exec_ctx_; // vec_2_exec_ctx_ for vectorization 2.0, there is a exec_ctx_ in father class which is
// used in vectorization 1.0
};
} // namespace test