[FEAT MERGE] impl vectorization 2.0

Co-authored-by: Naynahs <cfzy002@126.com> Co-authored-by: hwx65 <1780011298@qq.com> Co-authored-by: oceanoverflow <oceanoverflow@gmail.com>
2023-12-22 03:43:19 +00:00
parent 1178245448
commit b6773084c6
592 changed files with 358124 additions and 303288 deletions
--- a/unittest/sql/engine/aggregate/CMakeLists.txt
+++ b/unittest/sql/engine/aggregate/CMakeLists.txt
@ -7,3 +7,8 @@
 #aggr_unittest(test_merge_groupby)
 #aggr_unittest(test_scalar_aggregate)
 #aggr_unittest(test_merge_distinct)
+function(aggr_unittest2 case)
+ sql_unittest(${ARGV})
+ target_sources(${case} PRIVATE ../test_op_engine.cpp  ../ob_fake_table_scan_vec_op.cpp)
+endfunction()
+aggr_unittest2(test_hash_groupby2)
--- a/unittest/sql/engine/aggregate/test_hash_groupby2.cfg
+++ b/unittest/sql/engine/aggregate/test_hash_groupby2.cfg
@ -0,0 +1,8 @@
+digit_data_format=4
+string_data_format=4
+data_range_level=0
+skips_probability=10
+nulls_probability=30
+round=10
+batch_size=256
+output_result_to_file=1
--- a/unittest/sql/engine/aggregate/test_hash_groupby2.cpp
+++ b/unittest/sql/engine/aggregate/test_hash_groupby2.cpp
@ -0,0 +1,135 @@
+/**
+ * Copyright (c) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PubL v2 for more details.
+ */
+
+// #define USING_LOG_PREFIX SQL_ENGINE
+#define USING_LOG_PREFIX COMMON
+#include <iterator>
+#include <gtest/gtest.h>
+#include "../test_op_engine.h"
+#include "../ob_test_config.h"
+#include <vector>
+#include <string>
+
+using namespace ::oceanbase::sql;
+
+namespace test
+{
+class TestHashGroupByVec : public TestOpEngine
+{
+public:
+  TestHashGroupByVec();
+  virtual ~TestHashGroupByVec();
+  virtual void SetUp();
+  virtual void TearDown();
+
+private:
+  // disallow copy
+  DISALLOW_COPY_AND_ASSIGN(TestHashGroupByVec);
+
+protected:
+  // function members
+protected:
+  // data members
+};
+
+TestHashGroupByVec::TestHashGroupByVec()
+{
+  std::string schema_filename = ObTestOpConfig::get_instance().test_filename_prefix_ + ".schema";
+  strcpy(schema_file_path_, schema_filename.c_str());
+}
+
+TestHashGroupByVec::~TestHashGroupByVec()
+{}
+
+void TestHashGroupByVec::SetUp()
+{
+  TestOpEngine::SetUp();
+}
+
+void TestHashGroupByVec::TearDown()
+{
+  destroy();
+}
+
+TEST_F(TestHashGroupByVec, basic_test)
+{
+  std::string test_file_path = ObTestOpConfig::get_instance().test_filename_prefix_ + ".test";
+  int ret = basic_random_test(test_file_path);
+  EXPECT_EQ(ret, 0);
+}
+
+// TEST_F(TestHashGroupByVec, basic_test2)
+// {
+//   int ret = OB_SUCCESS;
+//   std::string test_file_path = ObTestOpConfig::get_instance().test_filename_prefix_ + ".test";
+//   if(OB_FAIL(basic_random_test_output_to_file(test_file_path, true))) {
+//     LOG_ERROR("Some error occur in running vectorization 2.0 operator", K(ret));
+//   } else if (OB_FAIL(basic_random_test_output_to_file(test_file_path, false))) {
+//     LOG_ERROR("Some error occur in running original operator", K(ret));
+//   }
+//   EXPECT_EQ(ret, 0);
+// }
+
+// TEST_F(TestHashGroupByVec, your_own_test)
+// {
+//   std::string test_file_path = ObTestOpConfig::get_instance().test_filename_prefix_ + ".test";
+//   std::ifstream if_tests(test_file_path);
+//   if (if_tests.is_open() == false) { return; }
+//   std::string line;
+
+//   while (std::getline(if_tests, line)) {
+//     // handle query
+//     if (line.size() <= 0) continue;
+//     if (line.at(0) == '#') continue;
+
+//     ObOperator *root = NULL;
+//     ObExecutor exector;
+//     if (OB_FAIL(get_tested_op_from_string(line, false, root, exector))) {
+//       LOG_WARN("generate tested op fail, sql: ", K(line.data()));
+//     } else {
+//       int round = 1;
+//       const int64_t max_row_cnt = 256;
+//       const ObBatchRows *child_brs = nullptr;
+
+//       LOG_INFO("============== Final output ===============", K(round));
+//       while (!root->brs_.end_) {
+//         if (OB_FAIL(root->get_next_batch(max_row_cnt, child_brs))) {
+//           LOG_ERROR("root op fail to get_next_batch data", K(original_root));
+//           break;
+//         }
+//       }
+//     }
+//   }
+// }
+} // namespace test
+
+int main(int argc, char **argv)
+{
+  ObTestOpConfig::get_instance().test_filename_prefix_ = "test_hash_groupby2";
+  for (int i = 1; i < argc; i++) {
+    if (strcmp(argv[i], "-bg") == 0) {
+      ObTestOpConfig::get_instance().test_filename_prefix_ += "_bg";
+      ObTestOpConfig::get_instance().run_in_background_ = true;
+    }
+  }
+  ObTestOpConfig::get_instance().init();
+
+  system(("rm -f " + ObTestOpConfig::get_instance().test_filename_prefix_ + ".log").data());
+  system(("rm -f " + ObTestOpConfig::get_instance().test_filename_prefix_ + ".log.*").data());
+  oceanbase::common::ObClockGenerator::init();
+  observer::ObReqTimeGuard req_timeinfo_guard;
+  OB_LOGGER.set_log_level("INFO");
+  OB_LOGGER.set_file_name((ObTestOpConfig::get_instance().test_filename_prefix_ + ".log").data(), true);
+  init_sql_factories();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
--- a/unittest/sql/engine/aggregate/test_hash_groupby2.schema
+++ b/unittest/sql/engine/aggregate/test_hash_groupby2.schema
@ -0,0 +1,5 @@
+#create database opt;
+#use opt;
+create table t1(c1 int, c2 int);
+create table t2(c1 int, c2 int);
+create table t3(c1 int, c2 int, c3 double, c4 char(20), c5 varchar(40));
--- a/unittest/sql/engine/aggregate/test_hash_groupby2.sh
+++ b/unittest/sql/engine/aggregate/test_hash_groupby2.sh
@ -0,0 +1,98 @@
+#!/bin/sh
+# data format
+# enum VectorFormat: uint8_t
+#{
+#  VEC_INVALID = 0,
+#  VEC_FIXED,
+#  VEC_DISCRETE,
+#  VEC_CONTINUOUS,
+#  VEC_UNIFORM,
+#  VEC_UNIFORM_CONST,
+#  VEC_MAX_FORMAT
+#};
+#
+
+# batch size
+batch_size_round=(1 50 150 256)
+# 4 rounds test cases
+round_array=(10 100 1000 10000)
+# 3 data range test cases
+data_range_level_array=(0 1 2)
+# 3 skips_probability test cases
+skips_probability_array=(0 30 80)
+# 3 nulls_probability test cases
+nulls_probability_array=(0 30 80)
+# 6 combined data format test cases
+# VEC_UNIFORM VEC_FIX
+fix_data_format_array=("fix_data_format=4" "fix_data_format=1")
+# VEC_UNIFORM VEC_DISCRETE VEC_CONTINUOUS
+#string_data_format_array=("string_data_format=4" "string_data_format=2" "string_data_format=3")
+string_data_format_array=("string_data_format=4")
+
+test_file_prefix="./test_hash_groupby2_bg"
+
+cfg_file="./test_hash_groupby2_bg.cfg"
+origin_result_file="./origin_result_bg.data"
+vec_result_file="./vec_result_bg.data"
+
+test_case_round=1
+for batch_size in ${batch_size_round[@]}
+do
+    for round in ${round_array[@]}
+    do
+        for data_range_level in ${data_range_level_array[@]}
+        do
+            for skips_probability in ${skips_probability_array[@]}
+            do
+                for nulls_probability in ${nulls_probability_array[@]}
+                do
+                    for fix_data_format in ${fix_data_format_array[@]}
+                    do
+                        for string_data_format in ${string_data_format_array[@]}
+                        do
+                            > ${cfg_file}
+
+                            echo "batch_size="${batch_size} >> ${cfg_file}
+                            echo "output_result_to_file=1" >> ${cfg_file}
+                            echo "round="${round} >> ${cfg_file}
+                            echo "data_range_level="${data_range_level} >> ${cfg_file}
+                            echo "skips_probability="${skips_probability} >> ${cfg_file}
+                            echo "nulls_probability="${nulls_probability} >> ${cfg_file}
+                            echo ${fix_data_format} >> ${cfg_file}
+                            echo ${string_data_format} >> ${cfg_file}
+
+                            echo "###################"
+                            echo "Test Case Round: "${test_case_round}
+                            echo "{"
+                            echo "round: "$round
+                            echo "data_range_level: "${data_range_level}
+                            echo "skips_probability: "${skips_probability}
+                            echo "nulls_probability: "${nulls_probability}
+                            echo "fix_data_format: "${fix_data_format}
+                            echo "string_data_format: "${string_data_format}
+                            echo "}"
+                            echo "###################"
+
+                            ./test_hash_groupby2_bg -bg
+
+                            sort $origin_result_file -o $origin_result_file
+                            sort $vec_result_file -o $vec_result_file
+
+                            diff $origin_result_file $vec_result_file > /dev/null
+                            if [ $? == 0 ]; then
+                                echo "Both result file are the same!"
+                            else
+                                echo "Get Incorrect Result! Exit!"
+                                exit
+                            fi
+
+                            test_case_round=$((test_case_round+1))
+                        done
+                    done
+                done
+            done
+        done
+    done
+done
+
+echo "Done"
--- a/unittest/sql/engine/aggregate/test_hash_groupby2.test
+++ b/unittest/sql/engine/aggregate/test_hash_groupby2.test
@ -0,0 +1,8 @@
+#select c2, sum(c1), min(c1), max(c1) from t1 group by c2;
+#select/*+USE_HASH_AGGREGATION*/ c2, count(c1), sum(c1 + 1), min(c1*2), max(c1 - 1) from t1 group by c2;
+#select /*+leading(t1, t2) USE_HASH(t1, t2)*/* from t1, t2 where t1.c1 = t2.c1;
+#select /*+leading(t1, t2) USE_HASH(t1, t2)*/* from t1, t2 where t1.c2 = t2.c2;
+#select /*+ use_hash(t1 t2)*/ * from t1 a left outer join t2 b on a.c1=b.c1 order by a.c1, a.c2;
+#select /*+ use_hash(t1 t2)*/ * from t1 a right outer join t2 b on t1.c1=t2.c1;
+#select /*+ use_hash(t1 t2)*/ * from t1 a full outer join t2 b on t1.c1=t2.c1;
+#select * from t1 order by c1, c2;