[Function][Vectorized] Support least/greast function (#8107)

Co-authored-by: lihaopeng <lihaopeng@baidu.com>
2022-02-18 11:57:07 +08:00
parent 68b24d608f
commit bcde1f265a
8 changed files with 260 additions and 39 deletions
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@ -148,6 +148,7 @@ set(VEC_FILES
  functions/function_grouping.cpp
  functions/function_rpc.cpp
  functions/function_convert_tz.cpp
+  functions/least_greast.cpp
  olap/vgeneric_iterators.cpp
  olap/vcollect_iterator.cpp
  olap/block_reader.cpp
--- a/be/src/vec/core/accurate_comparison.h
+++ b/be/src/vec/core/accurate_comparison.h
@ -27,6 +27,7 @@
 #include "util/binary_cast.hpp"

 #include "vec/common/nan_utils.h"
+#include "vec/common/string_ref.h"
 #include "vec/common/uint128.h"
 #include "vec/core/types.h"
 #include "vec/runtime/vdatetime_value.h"
@ -487,8 +488,8 @@ struct EqualsOp {
 };

 template <>
-struct EqualsOp<VecDateTimeValue, VecDateTimeValue> {
-    static UInt8 apply(const Int64& a, const Int64& b) {
+struct EqualsOp<DecimalV2Value, DecimalV2Value> {
+    static UInt8 apply(const Int128& a, const Int128& b) {
        return a == b;
    }
 };
@ -500,8 +501,8 @@ struct NotEqualsOp {
 };

 template <>
-struct NotEqualsOp<VecDateTimeValue, VecDateTimeValue> {
-    static UInt8 apply(const Int64& a, const Int64& b) {
+struct NotEqualsOp<DecimalV2Value, DecimalV2Value> {
+    static UInt8 apply(const Int128& a, const Int128& b) {
        return a != b;
    }
 };
@ -516,9 +517,16 @@ struct LessOp {
 };

 template <>
-struct LessOp<VecDateTimeValue, VecDateTimeValue> {
-    static UInt8 apply(Int64 a, Int64 b) {
-        return binary_cast<Int64, VecDateTimeValue>(a) < binary_cast<Int64, VecDateTimeValue>(b);
+struct LessOp<DecimalV2Value, DecimalV2Value> {
+    static UInt8 apply(Int128 a, Int128 b) {
+        return binary_cast<Int128, DecimalV2Value>(a) < binary_cast<Int128, DecimalV2Value>(b);
+    }
+};
+
+template <>
+struct LessOp<StringRef, StringRef> {
+    static UInt8 apply(StringRef a, StringRef b) {
+        return a < b;
    }
 };

@ -529,9 +537,16 @@ struct GreaterOp {
 };

 template <>
-struct GreaterOp<VecDateTimeValue, VecDateTimeValue> {
-    static UInt8 apply(Int64 a, Int64 b) {
-        return binary_cast<Int64, VecDateTimeValue>(a) > binary_cast<Int64, VecDateTimeValue>(b);
+struct GreaterOp<DecimalV2Value, DecimalV2Value> {
+    static UInt8 apply(Int128 a, Int128 b) {
+        return binary_cast<Int128, DecimalV2Value>(a) > binary_cast<Int128, DecimalV2Value>(b);
+    }
+};
+
+template <>
+struct GreaterOp<StringRef, StringRef> {
+    static UInt8 apply(StringRef a, StringRef b) {
+        return a > b;
    }
 };

@ -545,9 +560,9 @@ struct LessOrEqualsOp {
 };

 template <>
-struct LessOrEqualsOp<VecDateTimeValue, VecDateTimeValue> {
-    static UInt8 apply(Int64 a, Int64 b) {
-        return binary_cast<Int64, VecDateTimeValue>(a) <= binary_cast<Int64, VecDateTimeValue>(b);
+struct LessOrEqualsOp<DecimalV2Value, DecimalV2Value> {
+    static UInt8 apply(Int128 a, Int128 b) {
+        return binary_cast<Int128, DecimalV2Value>(a) <= binary_cast<Int128, DecimalV2Value>(b);
    }
 };

@ -558,9 +573,9 @@ struct GreaterOrEqualsOp {
 };

 template <>
-struct GreaterOrEqualsOp<VecDateTimeValue, VecDateTimeValue> {
-    static UInt8 apply(Int64 a, Int64 b) {
-        return binary_cast<Int64, VecDateTimeValue>(a) >= binary_cast<Int64, VecDateTimeValue>(b);
+struct GreaterOrEqualsOp<DecimalV2Value, DecimalV2Value> {
+    static UInt8 apply(Int128 a, Int128 b) {
+        return binary_cast<Int128, DecimalV2Value>(a) >= binary_cast<Int128, DecimalV2Value>(b);
    }
 };

--- a/be/src/vec/functions/function_coalesce.cpp
+++ b/be/src/vec/functions/function_coalesce.cpp
@ -17,7 +17,6 @@

 #include "udf/udf.h"
 #include "vec/data_types/get_least_supertype.h"
-#include "vec/functions/function_helpers.h"
 #include "vec/functions/simple_function_factory.h"
 #include "vec/utils/template_helpers.hpp"
 #include "vec/utils/util.hpp"
--- a/be/src/vec/functions/function_multi_same_args.h
+++ b/be/src/vec/functions/function_multi_same_args.h
@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "udf/udf.h"
+#include "vec/data_types/get_least_supertype.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/template_helpers.hpp"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+template <typename Impl>
+class FunctionMultiSameArgs : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+
+    static FunctionPtr create() { return std::make_shared<FunctionMultiSameArgs>(); }
+
+    String get_name() const override { return name; }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    bool use_default_implementation_for_nulls() const override { return true; }
+
+    bool is_variadic() const override { return true; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return Impl::get_return_type_impl(arguments);
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK_GE(arguments.size(), 1);
+        block.replace_by_position(result, Impl::execute(block, arguments, input_rows_count));
+        return Status::OK();
+    }
+};
+};
--- a/be/src/vec/functions/least_greast.cpp
+++ b/be/src/vec/functions/least_greast.cpp
@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "udf/udf.h"
+#include "vec/core/accurate_comparison.h"
+#include "vec/data_types/get_least_supertype.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/function_multi_same_args.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/template_helpers.hpp"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+template <template <typename, typename> class Op, typename Impl>
+struct CompareMultiImpl {
+    static constexpr auto name = Impl::name;
+
+    static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
+        return arguments[0];
+    }
+
+    template <typename ColumnType>
+    static void insert_result_data(MutableColumnPtr& result_column, ColumnPtr& argument_column,
+                              const size_t input_rows_count) {
+        auto* __restrict result_raw_data =
+                reinterpret_cast<ColumnType*>(result_column.get())->get_data().data();
+        auto* __restrict column_raw_data =
+                reinterpret_cast<const ColumnType*>(argument_column.get())->get_data().data();
+
+        if constexpr (std::is_same_v<ColumnType, ColumnDecimal128>) {
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                result_raw_data[i] =
+                        Op<DecimalV2Value, DecimalV2Value>::apply(column_raw_data[i], result_raw_data[i]) ? column_raw_data[i] :
+                        result_raw_data[i];
+            }
+        } else {
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                using type = std::decay_t<decltype(result_raw_data[0])>;
+                result_raw_data[i] =
+                        Op<type, type>::apply(column_raw_data[i], result_raw_data[i]) ? column_raw_data[i] :
+                        result_raw_data[i];
+            }
+        }
+    }
+
+    static ColumnPtr execute(Block& block, const ColumnNumbers& arguments, size_t input_rows_count) {
+        if (arguments.size() == 1) return block.get_by_position(arguments.back()).column;
+
+        const auto& data_type = block.get_by_position(arguments.back()).type;
+        MutableColumnPtr result_column = data_type->create_column();
+
+        Columns args;
+        for (int i = 0; i < arguments.size(); ++i) {
+            args.emplace_back(block.get_by_position(arguments[i]).column->convert_to_full_column_if_const());
+        }
+        // because now the string types does not support random position writing,
+        // so insert into result data have two methods, one is for string types, one is for others type remaining
+        bool is_string_result = result_column->is_column_string();
+        if (is_string_result) {
+            result_column->reserve(input_rows_count);
+        } else {
+            result_column->insert_range_from(
+                    *(args[0]), 0, input_rows_count);
+        }
+
+        if (is_string_result) {
+            const auto& column_string = reinterpret_cast<const ColumnString&>(*args[0]);
+            auto& column_res = reinterpret_cast<ColumnString&>(*result_column);
+
+            for (int i = 0; i < input_rows_count; ++i) {
+                auto str_data = column_string.get_data_at(i);
+                for (int j = 1; j < arguments.size(); ++j) {
+                    auto temp_data =
+                            reinterpret_cast<const ColumnString&>(*args[j]).get_data_at(i);
+                    str_data = Op<StringRef, StringRef>::apply(temp_data, str_data) ? temp_data : str_data;
+                }
+                column_res.insert_data(str_data.data, str_data.size);
+            }
+
+        } else {
+            WhichDataType which(data_type);
+        #define DISPATCH(TYPE, COLUMN_TYPE)                                                               \
+            if (which.idx == TypeIndex::TYPE) {                                                           \
+                for (int i = 1; i < arguments.size(); ++i) {                                              \
+                    insert_result_data<COLUMN_TYPE>(result_column, args[i], input_rows_count);            \
+                }                                                                                         \
+            }
+            NUMERIC_TYPE_TO_COLUMN_TYPE(DISPATCH)
+            DISPATCH(Decimal128, ColumnDecimal<Decimal128>)
+            TIME_TYPE_TO_COLUMN_TYPE(DISPATCH)
+        #undef DISPATCH
+        }
+
+        return result_column;
+    }
+};
+
+struct LeastName {
+    static constexpr auto name = "least";
+};
+struct GreastName {
+    static constexpr auto name = "greatest";
+};
+using FunctionLeast = FunctionMultiSameArgs<CompareMultiImpl<LessOp, LeastName>>;
+using FunctionGreaest = FunctionMultiSameArgs<CompareMultiImpl<GreaterOp, GreastName>>;
+
+
+void register_function_least_greast(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionLeast>();
+    factory.register_function<FunctionGreaest>();
+}
+};
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@ -69,6 +69,7 @@ void register_function_coalesce(SimpleFunctionFactory& factory);
 void register_function_grouping(SimpleFunctionFactory& factory);
 void register_function_datetime_floor_ceil(SimpleFunctionFactory& factory);
 void register_function_convert_tz(SimpleFunctionFactory& factory);
+void register_function_least_greast(SimpleFunctionFactory& factory);

 class SimpleFunctionFactory {
    using Creator = std::function<FunctionBuilderPtr()>;
@ -187,6 +188,7 @@ public:
            register_function_grouping(instance);
            register_function_datetime_floor_ceil(instance);
            register_function_convert_tz(instance);
+            register_function_least_greast(instance);
        });
        return instance;
    }
--- a/be/test/vec/function/function_math_test.cpp
+++ b/be/test/vec/function/function_math_test.cpp
@ -379,6 +379,28 @@ TEST(MathFunctionTest, round_test) {
    }
 }

+TEST(MathFunctionTest, least_test) {
+    std::string func_name = "least";
+
+    InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+
+    DataSet data_set = {{{3, 2}, 2}, {{3, 3}, 3}, {{Null(), -2}, Null()},
+                            {{193, -2}, -2},   {{193, -1}, -1}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, greatest_test) {
+    std::string func_name = "greatest";
+
+    InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+
+    DataSet data_set = {{{3, 2}, 3}, {{3, 3}, 3}, {{Null(), -2}, Null()},
+                            {{193, -2}, 193},   {{193, -1}, 193}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
 TEST(MathFunctionTest, bin_test) {
    std::string func_name = "bin";

--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@ -687,71 +687,71 @@ visible_functions = [

    [['least'], 'TINYINT', ['TINYINT', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_10TinyIntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'SMALLINT', ['SMALLINT', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_11SmallIntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'INT', ['INT', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_6IntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'BIGINT', ['BIGINT', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9BigIntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'LARGEINT', ['LARGEINT', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_11LargeIntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'FLOAT', ['FLOAT', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_8FloatValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'DOUBLE', ['DOUBLE', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9DoubleValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'DATETIME', ['DATETIME', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_11DateTimeValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'DECIMALV2', ['DECIMALV2', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_12DecimalV2ValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'VARCHAR', ['VARCHAR', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['least'], 'STRING', ['STRING', '...'],
            '_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],

    [['greatest'], 'TINYINT', ['TINYINT', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_10TinyIntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'SMALLINT', ['SMALLINT', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_11SmallIntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'INT', ['INT', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_6IntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'BIGINT', ['BIGINT', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9BigIntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'LARGEINT', ['LARGEINT', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_11LargeIntValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'FLOAT', ['FLOAT', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_8FloatValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'DOUBLE', ['DOUBLE', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9DoubleValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'DECIMALV2', ['DECIMALV2', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_12DecimalV2ValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'DATETIME', ['DATETIME', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_11DateTimeValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'VARCHAR', ['VARCHAR', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],
    [['greatest'], 'STRING', ['STRING', '...'],
            '_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
-            '', '', '', ''],
+            '', '', 'vec', ''],

    # Conditional Functions
    # Some of these have empty symbols because the BE special-cases them based on the