[Enhancement] [Vectorized] Refactor and optimize BinaryOperation (#9087)

2022-05-07 10:55:15 +08:00
parent 2ccaa6338c
commit 98bfeaf560
15 changed files with 685 additions and 985 deletions
--- a/be/src/vec/data_types/data_type_decimal.h
+++ b/be/src/vec/data_types/data_type_decimal.h
@ -106,10 +106,12 @@ public:
        }

        // Now, Doris only support precision:27, scale: 9
-        DCHECK(precision_ == 27);
-        DCHECK(scale_ == 9);
+        DCHECK(precision == 27);
+        DCHECK(scale == 9);
    }

+    DataTypeDecimal(const DataTypeDecimal& rhs) : precision(rhs.precision), scale(rhs.scale) {}
+
    const char* get_family_name() const override { return "Decimal"; }
    std::string do_get_name() const override;
    TypeIndex get_type_id() const override { return TypeId<T>::value; }
--- a/be/src/vec/data_types/number_traits.h
+++ b/be/src/vec/data_types/number_traits.h
@ -22,6 +22,8 @@

 #include <type_traits>

+#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_vector.h"
 #include "vec/common/uint128.h"
 #include "vec/core/types.h"

@ -155,7 +157,8 @@ struct ResultOfSubtraction {
    */
 template <typename A, typename B>
 struct ResultOfFloatingPointDivision {
-    using Type = Float64;
+    using Type = std::conditional_t<IsDecimalNumber<A>, A,
+                                    std::conditional_t<IsDecimalNumber<B>, B, Float64>>;
 };

 /** For integer division, we get a number with the same number of bits as in divisible.
@ -171,13 +174,8 @@ struct ResultOfIntegerDivision {
 template <typename A, typename B>
 struct ResultOfModulo {
    using Type = typename Construct<std::is_signed_v<A> || std::is_signed_v<B>,
-                                    std::is_floating_point_v<A>, max(sizeof(A), sizeof(B))>::Type;
-};
-
-template <typename A>
-struct ResultOfNegate {
-    using Type = typename Construct<true, std::is_floating_point_v<A>,
-                                    std::is_signed_v<A> ? sizeof(A) : next_size(sizeof(A))>::Type;
+                                    std::is_floating_point_v<A> || std::is_floating_point_v<B>,
+                                    max(sizeof(A), sizeof(B))>::Type;
 };

 template <typename A>
@ -200,76 +198,15 @@ struct ResultOfBitNot {
    using Type = typename Construct<std::is_signed_v<A>, false, sizeof(A)>::Type;
 };

-/** Type casting for `if` function:
-  * UInt<x>,  UInt<y>   ->  UInt<max(x,y)>
-  * Int<x>,   Int<y>    ->   Int<max(x,y)>
-  * Float<x>, Float<y>  -> Float<max(x, y)>
-  * UInt<x>,  Int<y>    ->   Int<max(x*2, y)>
-  * Float<x>, [U]Int<y> -> Float<max(x, y*2)>
-  * Decimal<x>, Decimal<y> -> Decimal<max(x,y)>
-  * UUID, UUID          -> UUID
-  * UInt64 ,  Int<x>    -> Error
-  * Float<x>, [U]Int64  -> Error
-  */
 template <typename A, typename B>
-struct ResultOfIf {
-    static constexpr bool has_float = std::is_floating_point_v<A> || std::is_floating_point_v<B>;
-    static constexpr bool has_integer = std::is_integral_v<A> || std::is_integral_v<B>;
-    static constexpr bool has_signed = std::is_signed_v<A> || std::is_signed_v<B>;
-    static constexpr bool has_unsigned = !std::is_signed_v<A> || !std::is_signed_v<B>;
-
-    static constexpr size_t max_size_of_unsigned_integer =
-            max(std::is_signed_v<A> ? 0 : sizeof(A), std::is_signed_v<B> ? 0 : sizeof(B));
-    static constexpr size_t max_size_of_signed_integer =
-            max(std::is_signed_v<A> ? sizeof(A) : 0, std::is_signed_v<B> ? sizeof(B) : 0);
-    static constexpr size_t max_size_of_integer =
-            max(std::is_integral_v<A> ? sizeof(A) : 0, std::is_integral_v<B> ? sizeof(B) : 0);
-    static constexpr size_t max_size_of_float = max(std::is_floating_point_v<A> ? sizeof(A) : 0,
-                                                    std::is_floating_point_v<B> ? sizeof(B) : 0);
-
-    using ConstructedType =
-            typename Construct<has_signed, has_float,
-                               ((has_float && has_integer &&
-                                 max_size_of_integer >= max_size_of_float) ||
-                                (has_signed && has_unsigned &&
-                                 max_size_of_unsigned_integer >= max_size_of_signed_integer))
-                                       ? max(sizeof(A), sizeof(B)) * 2
-                                       : max(sizeof(A), sizeof(B))>::Type;
-
-    using ConstructedWithUUID =
-            std::conditional_t<std::is_same_v<A, UInt128> && std::is_same_v<B, UInt128>, A,
-                               ConstructedType>;
-
-    using Type = std::conditional_t<
-            !IsDecimalNumber<A> && !IsDecimalNumber<B>, ConstructedWithUUID,
-            std::conditional_t<IsDecimalNumber<A> && IsDecimalNumber<B>,
-                               std::conditional_t<(sizeof(A) > sizeof(B)), A, B>, Error>>;
+struct BinaryOperatorTraits {
+    using ColumnVectorA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>;
+    using ColumnVectorB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>;
+    using ArrayA = typename ColumnVectorA::Container;
+    using ArrayB = typename ColumnVectorB::Container;
+    using ArrayNull = PaddedPODArray<UInt8>;
 };

-/** Before applying operator `%` and bitwise operations, operands are casted to whole numbers. */
-template <typename A>
-struct ToInteger {
-    using Type = typename Construct<std::is_signed_v<A>, false,
-                                    std::is_floating_point_v<A> ? 8 : sizeof(A)>::Type;
-};
-
-// CLICKHOUSE-29. The same depth, different signs
-// NOTE: This case is applied for 64-bit integers only (for backward compatibility), but could be used for any-bit integers
-template <typename A, typename B>
-constexpr bool LeastGreatestSpecialCase = std::is_integral_v<A>&& std::is_integral_v<B> &&
-                                          (8 == sizeof(A) && sizeof(A) == sizeof(B)) &&
-                                          (std::is_signed_v<A> ^ std::is_signed_v<B>);
-
-template <typename A, typename B>
-using ResultOfLeast = std::conditional_t<LeastGreatestSpecialCase<A, B>,
-                                         typename Construct<true, false, sizeof(A)>::Type,
-                                         typename ResultOfIf<A, B>::Type>;
-
-template <typename A, typename B>
-using ResultOfGreatest = std::conditional_t<LeastGreatestSpecialCase<A, B>,
-                                            typename Construct<false, false, sizeof(A)>::Type,
-                                            typename ResultOfIf<A, B>::Type>;
-
 } // namespace NumberTraits

 } // namespace doris::vectorized
--- a/be/src/vec/functions/divide.cpp
+++ b/be/src/vec/functions/divide.cpp
@ -18,7 +18,7 @@
 // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/divide.cpp
 // and modified by Doris

-#include "vec/functions/function_binary_arithmetic_to_null_type.h"
+#include "vec/functions/function_binary_arithmetic.h"
 #include "vec/functions/simple_function_factory.h"

 namespace doris::vectorized {
@ -28,26 +28,42 @@ static const DecimalV2Value one(1, 0);
 template <typename A, typename B>
 struct DivideFloatingImpl {
    using ResultType = typename NumberTraits::ResultOfFloatingPointDivision<A, B>::Type;
+    using Traits = NumberTraits::BinaryOperatorTraits<A, B>;
+
    static const constexpr bool allow_decimal = true;

+    template <typename Result = ResultType>
+    static void apply(const typename Traits::ArrayA& a, B b,
+                      typename ColumnVector<Result>::Container& c,
+                      typename Traits::ArrayNull& null_map) {
+        size_t size = c.size();
+        UInt8 is_null = b == 0;
+        memset(null_map.data(), is_null, size);
+
+        if (!is_null) {
+            for (size_t i = 0; i < size; i++) {
+                c[i] = (double)a[i] / (double)b;
+            }
+        }
+    }
+
    template <typename Result = DecimalV2Value>
-    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, NullMap& null_map,
-                                       size_t index) {
-        null_map[index] = b.is_zero();
-        return a / (b.is_zero() ? one : b);
+    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, UInt8& is_null) {
+        is_null = b.is_zero();
+        return a / (is_null ? one : b);
    }

    template <typename Result = ResultType>
-    static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
-        null_map[index] = b == 0;
-        return static_cast<Result>(a) / (b + (b == 0));
+    static inline Result apply(A a, B b, UInt8& is_null) {
+        is_null = b == 0;
+        return static_cast<Result>(a) / (b + is_null);
    }
 };

 struct NameDivide {
    static constexpr auto name = "divide";
 };
-using FunctionDivide = FunctionBinaryArithmeticToNullType<DivideFloatingImpl, NameDivide>;
+using FunctionDivide = FunctionBinaryArithmetic<DivideFloatingImpl, NameDivide, true>;

 void register_function_divide(SimpleFunctionFactory& factory) {
    factory.register_function<FunctionDivide>();
--- a/be/src/vec/functions/function.h
+++ b/be/src/vec/functions/function.h
@ -95,11 +95,6 @@ protected:
      */
    virtual ColumnNumbers get_arguments_that_are_always_constant() const { return {}; }

-    /** True if function can be called on default arguments (include Nullable's) and won't throw.
-      * Counterexample: modulo(0, 0)
-      */
-    virtual bool can_be_executed_on_default_arguments() const { return true; }
-
 private:
    Status default_implementation_for_nulls(FunctionContext* context, Block& block,
                                            const ColumnNumbers& args, size_t result,
@ -386,7 +381,6 @@ public:
    bool use_default_implementation_for_constants() const override { return false; }
    bool use_default_implementation_for_low_cardinality_columns() const override { return true; }
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; }
-    bool can_be_executed_on_default_arguments() const override { return true; }
    bool can_be_executed_on_low_cardinality_dictionary() const override {
        return is_deterministic_in_scope_of_query();
    }
@ -460,9 +454,6 @@ protected:
    ColumnNumbers get_arguments_that_are_always_constant() const final {
        return function->get_arguments_that_are_always_constant();
    }
-    bool can_be_executed_on_default_arguments() const override {
-        return function->can_be_executed_on_default_arguments();
-    }

 private:
    std::shared_ptr<IFunction> function;
--- a/be/src/vec/functions/function_binary_arithmetic.h
+++ b/be/src/vec/functions/function_binary_arithmetic.h
--- a/be/src/vec/functions/function_binary_arithmetic_to_null_type.h
+++ b/be/src/vec/functions/function_binary_arithmetic_to_null_type.h
@ -1,247 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "vec/functions/function_binary_arithmetic.h"
-
-namespace doris::vectorized {
-
-/**
- * Arithmetic operations: /, %
- * intDiv (integer division)
- */
-
-template <template <typename, typename> class Op, typename Name,
-          bool CanBeExecutedOnDefaultArguments = true>
-class FunctionBinaryArithmeticToNullType : public IFunction {
-    bool check_decimal_overflow = true;
-
-    template <typename F>
-    static bool cast_type(const IDataType* type, F&& f) {
-        return cast_type_to_either<DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64,
-                                   DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64,
-                                   DataTypeInt128, DataTypeFloat32, DataTypeFloat64,
-                                   DataTypeDecimal<Decimal32>, DataTypeDecimal<Decimal64>,
-                                   DataTypeDecimal<Decimal128>>(type, std::forward<F>(f));
-    }
-
-    template <typename F>
-    static bool cast_both_types(const IDataType* left, const IDataType* right, F&& f) {
-        return cast_type(left, [&](const auto& left_) {
-            return cast_type(right, [&](const auto& right_) { return f(left_, right_); });
-        });
-    }
-
-public:
-    static constexpr auto name = Name::name;
-    static FunctionPtr create() { return std::make_shared<FunctionBinaryArithmeticToNullType>(); }
-
-    FunctionBinaryArithmeticToNullType() {}
-    String get_name() const override { return name; }
-
-    size_t get_number_of_arguments() const override { return 2; }
-
-    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
-        DataTypePtr type_res;
-
-        const IDataType* first_type = arguments[0].get();
-        const IDataType* secord_type = arguments[1].get();
-        if (first_type->is_nullable()) {
-            first_type = static_cast<const DataTypeNullable*>(first_type)->get_nested_type().get();
-        }
-        if (secord_type->is_nullable()) {
-            secord_type =
-                    static_cast<const DataTypeNullable*>(secord_type)->get_nested_type().get();
-        }
-
-        bool valid =
-                cast_both_types(first_type, secord_type, [&](const auto& left, const auto& right) {
-                    using LeftDataType = std::decay_t<decltype(left)>;
-                    using RightDataType = std::decay_t<decltype(right)>;
-                    using ResultDataType =
-                            typename BinaryOperationTraits<Op, LeftDataType,
-                                                           RightDataType>::ResultDataType;
-                    if constexpr (!std::is_same_v<ResultDataType, InvalidType>) {
-                        if constexpr (IsDataTypeDecimal<LeftDataType> &&
-                                      IsDataTypeDecimal<RightDataType>) {
-                            constexpr bool is_multiply = false;
-                            constexpr bool is_division =
-                                    std::is_same_v<Op<UInt8, UInt8>,
-                                                   DivideFloatingImpl<UInt8, UInt8>> ||
-                                    std::is_same_v<Op<UInt8, UInt8>,
-                                                   DivideIntegralImpl<UInt8, UInt8>> ||
-                                    std::is_same_v<Op<UInt8, UInt8>,
-                                                   DivideIntegralOrZeroImpl<UInt8, UInt8>>;
-
-                            ResultDataType result_type =
-                                    decimal_result_type(left, right, is_multiply, is_division);
-                            type_res = std::make_shared<ResultDataType>(result_type.get_precision(),
-                                                                        result_type.get_scale());
-                        } else if constexpr (IsDataTypeDecimal<LeftDataType>)
-                            type_res = std::make_shared<LeftDataType>(left.get_precision(),
-                                                                      left.get_scale());
-                        else if constexpr (IsDataTypeDecimal<RightDataType>)
-                            type_res = std::make_shared<RightDataType>(right.get_precision(),
-                                                                       right.get_scale());
-                        else if constexpr (IsDataTypeDecimal<ResultDataType>)
-                            type_res = std::make_shared<ResultDataType>(27, 9);
-                        else
-                            type_res = std::make_shared<ResultDataType>();
-                        return true;
-                    }
-                    return false;
-                });
-        if (!valid) {
-            LOG(FATAL) << fmt::format("Illegal types {} and {} of arguments of function {}",
-                                      arguments[0]->get_name(), arguments[1]->get_name(),
-                                      get_name());
-        }
-
-        return make_nullable(type_res);
-    }
-
-    bool use_default_implementation_for_nulls() const override { return true; }
-
-    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
-                        size_t result, size_t input_rows_count) override {
-        auto* left_generic = block.get_by_position(arguments[0]).type.get();
-        auto* right_generic = block.get_by_position(arguments[1]).type.get();
-        if (left_generic->is_nullable()) {
-            left_generic =
-                    static_cast<const DataTypeNullable*>(left_generic)->get_nested_type().get();
-        }
-        if (right_generic->is_nullable()) {
-            right_generic =
-                    static_cast<const DataTypeNullable*>(right_generic)->get_nested_type().get();
-        }
-
-        bool valid = cast_both_types(
-                left_generic, right_generic, [&](const auto& left, const auto& right) {
-                    using LeftDataType = std::decay_t<decltype(left)>;
-                    using RightDataType = std::decay_t<decltype(right)>;
-                    using ResultDataType =
-                            typename BinaryOperationTraits<Op, LeftDataType,
-                                                           RightDataType>::ResultDataType;
-
-                    if constexpr (!std::is_same_v<ResultDataType, InvalidType>) {
-                        constexpr bool result_is_decimal =
-                                IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>;
-                        constexpr bool is_multiply = false;
-                        constexpr bool is_division =
-                                std::is_same_v<Op<UInt8, UInt8>,
-                                               DivideFloatingImpl<UInt8, UInt8>> ||
-                                std::is_same_v<Op<UInt8, UInt8>,
-                                               DivideIntegralImpl<UInt8, UInt8>> ||
-                                std::is_same_v<Op<UInt8, UInt8>,
-                                               DivideIntegralOrZeroImpl<UInt8, UInt8>>;
-
-                        using T0 = typename LeftDataType::FieldType;
-                        using T1 = typename RightDataType::FieldType;
-                        using ResultType = typename ResultDataType::FieldType;
-                        using ColVecT0 = std::conditional_t<IsDecimalNumber<T0>, ColumnDecimal<T0>,
-                                                            ColumnVector<T0>>;
-                        using ColVecT1 = std::conditional_t<IsDecimalNumber<T1>, ColumnDecimal<T1>,
-                                                            ColumnVector<T1>>;
-                        using ColVecResult = std::conditional_t<IsDecimalNumber<ResultType>,
-                                                                ColumnDecimal<ResultType>,
-                                                                ColumnVector<ResultType>>;
-
-                        /// Decimal operations need scale. Operations are on result type.
-                        using OpImpl = std::conditional_t<
-                                IsDataTypeDecimal<ResultDataType>,
-                                DecimalBinaryOperation<T0, T1, Op, ResultType>,
-                                BinaryOperationImpl<T0, T1, Op<T0, T1>, ResultType>>;
-
-                        auto null_map = ColumnUInt8::create(input_rows_count, 0);
-                        auto& null_map_data = null_map->get_data();
-                        size_t argument_size = arguments.size();
-                        ColumnPtr argument_columns[argument_size];
-
-                        for (size_t i = 0; i < argument_size; ++i) {
-                            argument_columns[i] =
-                                    block.get_by_position(arguments[i])
-                                            .column->convert_to_full_column_if_const();
-                        }
-
-                        auto col_left_raw = argument_columns[0].get();
-                        auto col_right_raw = argument_columns[1].get();
-
-                        typename ColVecResult::MutablePtr col_res = nullptr;
-                        if constexpr (result_is_decimal) {
-                            ResultDataType type =
-                                    decimal_result_type(left, right, is_multiply, is_division);
-                            col_res = ColVecResult::create(0, type.get_scale());
-                        } else {
-                            col_res = ColVecResult::create();
-                        }
-
-                        auto& vec_res = col_res->get_data();
-                        vec_res.resize(block.rows());
-
-                        if (auto col_left = check_and_get_column<ColVecT0>(col_left_raw)) {
-                            if constexpr (result_is_decimal) {
-                                ResultDataType type =
-                                        decimal_result_type(left, right, is_multiply, is_division);
-
-                                typename ResultDataType::FieldType scale_a =
-                                        type.scale_factor_for(left, is_multiply);
-                                typename ResultDataType::FieldType scale_b =
-                                        type.scale_factor_for(right, is_multiply || is_division);
-                                if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
-                                    scale_a = right.get_scale_multiplier();
-                                if (auto col_right =
-                                            check_and_get_column<ColVecT1>(col_right_raw)) {
-                                    OpImpl::vector_vector(col_left->get_data(),
-                                                          col_right->get_data(), vec_res, scale_a,
-                                                          scale_b, check_decimal_overflow,
-                                                          null_map_data);
-                                }
-                            } else {
-                                if (auto col_right =
-                                            check_and_get_column<ColVecT1>(col_right_raw)) {
-                                    OpImpl::vector_vector(col_left->get_data(),
-                                                          col_right->get_data(), vec_res,
-                                                          null_map_data);
-                                }
-                            }
-                        } else {
-                            return false;
-                        }
-
-                        block.get_by_position(result).column =
-                                ColumnNullable::create(std::move(col_res), std::move(null_map));
-                        return true;
-                    } else {
-                        return false;
-                    }
-                });
-
-        if (!valid) {
-            return Status::RuntimeError(
-                    fmt::format("{}'s arguments do not match the expected data types", get_name()));
-        }
-
-        return Status::OK();
-    }
-
-    bool can_be_executed_on_default_arguments() const override {
-        return CanBeExecutedOnDefaultArguments;
-    }
-};
-
-} // namespace doris::vectorized
--- a/be/src/vec/functions/function_bit.cpp
+++ b/be/src/vec/functions/function_bit.cpp
@ -20,9 +20,9 @@

 #include "vec/data_types/number_traits.h"
 #include "vec/functions/function_binary_arithmetic.h"
+#include "vec/functions/function_totype.h"
 #include "vec/functions/function_unary_arithmetic.h"
 #include "vec/functions/simple_function_factory.h"
-#include "vec/functions/function_totype.h"

 namespace doris::vectorized {

@ -101,10 +101,10 @@ struct BitLengthImpl {
    }
 };

-using FunctionBitAnd = FunctionBinaryArithmetic<BitAndImpl, NameBitAnd>;
+using FunctionBitAnd = FunctionBinaryArithmetic<BitAndImpl, NameBitAnd, false>;
 using FunctionBitNot = FunctionUnaryArithmetic<BitNotImpl, NameBitNot, false>;
-using FunctionBitOr = FunctionBinaryArithmetic<BitOrImpl, NameBitOr>;
-using FunctionBitXor = FunctionBinaryArithmetic<BitXorImpl, NameBitXor>;
+using FunctionBitOr = FunctionBinaryArithmetic<BitOrImpl, NameBitOr, false>;
+using FunctionBitXor = FunctionBinaryArithmetic<BitXorImpl, NameBitXor, false>;
 using FunctionBitLength = FunctionUnaryToType<BitLengthImpl, NameBitLength>;

 void register_function_bit(SimpleFunctionFactory& factory) {
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@ -518,7 +518,6 @@ public:

    bool use_default_implementation_for_constants() const override { return true; }
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
-    bool can_be_executed_on_default_arguments() const override { return false; }

    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
                        size_t result, size_t input_rows_count) override {
--- a/be/src/vec/functions/int_div.cpp
+++ b/be/src/vec/functions/int_div.cpp
@ -18,133 +18,17 @@
 // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IntDiv.cpp
 // and modified by Doris

-#ifdef __SSE2__
-#define LIBDIVIDE_SSE2 1
-#endif
-
 #include "vec/functions/int_div.h"

-#include <libdivide.h>
-
 #include "vec/functions/function_binary_arithmetic.h"
-#include "vec/functions/function_binary_arithmetic_to_null_type.h"
 #include "vec/functions/simple_function_factory.h"

 namespace doris::vectorized {

-/// Optimizations for integer division by a constant.
-
-template <typename A, typename B>
-struct DivideIntegralByConstantImpl : BinaryOperationImplBase<A, B, DivideIntegralImpl<A, B>> {
-    using ResultType = typename DivideIntegralImpl<A, B>::ResultType;
-
-    static void vector_constant(const PaddedPODArray<A>& a, B b, PaddedPODArray<ResultType>& c) {
-        // TODO: Support return null in the furture
-        if (UNLIKELY(b == 0)) {
-            //            throw Exception("Division by zero", TStatusCode::VEC_ILLEGAL_DIVISION);
-            memset(c.data(), 0, sizeof(ResultType) * c.size());
-            return;
-        }
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wsign-compare"
-
-        if (UNLIKELY(std::is_signed_v<B> && b == -1)) {
-            size_t size = a.size();
-            for (size_t i = 0; i < size; ++i) c[i] = -c[i];
-            return;
-        }
-
-#pragma GCC diagnostic pop
-
-        libdivide::divider<A> divider(b);
-
-        size_t size = a.size();
-        const A* a_pos = a.data();
-        const A* a_end = a_pos + size;
-        ResultType* c_pos = c.data();
-
-#ifdef __SSE2__
-        static constexpr size_t values_per_sse_register = 16 / sizeof(A);
-        const A* a_end_sse = a_pos + size / values_per_sse_register * values_per_sse_register;
-
-        while (a_pos < a_end_sse) {
-            _mm_storeu_si128(reinterpret_cast<__m128i*>(c_pos),
-                             _mm_loadu_si128(reinterpret_cast<const __m128i*>(a_pos)) / divider);
-
-            a_pos += values_per_sse_register;
-            c_pos += values_per_sse_register;
-        }
-#endif
-
-        while (a_pos < a_end) {
-            *c_pos = *a_pos / divider;
-            ++a_pos;
-            ++c_pos;
-        }
-    }
-};
-
-/** Specializations are specified for dividing numbers of the type UInt64 and UInt32 by the numbers of the same sign.
-  * Can be expanded to all possible combinations, but more code is needed.
-  */
-
-template <>
-struct BinaryOperationImpl<UInt64, UInt8, DivideIntegralImpl<UInt64, UInt8>>
-        : DivideIntegralByConstantImpl<UInt64, UInt8> {};
-template <>
-struct BinaryOperationImpl<UInt64, UInt16, DivideIntegralImpl<UInt64, UInt16>>
-        : DivideIntegralByConstantImpl<UInt64, UInt16> {};
-template <>
-struct BinaryOperationImpl<UInt64, UInt32, DivideIntegralImpl<UInt64, UInt32>>
-        : DivideIntegralByConstantImpl<UInt64, UInt32> {};
-template <>
-struct BinaryOperationImpl<UInt64, UInt64, DivideIntegralImpl<UInt64, UInt64>>
-        : DivideIntegralByConstantImpl<UInt64, UInt64> {};
-
-template <>
-struct BinaryOperationImpl<UInt32, UInt8, DivideIntegralImpl<UInt32, UInt8>>
-        : DivideIntegralByConstantImpl<UInt32, UInt8> {};
-template <>
-struct BinaryOperationImpl<UInt32, UInt16, DivideIntegralImpl<UInt32, UInt16>>
-        : DivideIntegralByConstantImpl<UInt32, UInt16> {};
-template <>
-struct BinaryOperationImpl<UInt32, UInt32, DivideIntegralImpl<UInt32, UInt32>>
-        : DivideIntegralByConstantImpl<UInt32, UInt32> {};
-template <>
-struct BinaryOperationImpl<UInt32, UInt64, DivideIntegralImpl<UInt32, UInt64>>
-        : DivideIntegralByConstantImpl<UInt32, UInt64> {};
-
-template <>
-struct BinaryOperationImpl<Int64, Int8, DivideIntegralImpl<Int64, Int8>>
-        : DivideIntegralByConstantImpl<Int64, Int8> {};
-template <>
-struct BinaryOperationImpl<Int64, Int16, DivideIntegralImpl<Int64, Int16>>
-        : DivideIntegralByConstantImpl<Int64, Int16> {};
-template <>
-struct BinaryOperationImpl<Int64, Int32, DivideIntegralImpl<Int64, Int32>>
-        : DivideIntegralByConstantImpl<Int64, Int32> {};
-template <>
-struct BinaryOperationImpl<Int64, Int64, DivideIntegralImpl<Int64, Int64>>
-        : DivideIntegralByConstantImpl<Int64, Int64> {};
-
-template <>
-struct BinaryOperationImpl<Int32, Int8, DivideIntegralImpl<Int32, Int8>>
-        : DivideIntegralByConstantImpl<Int32, Int8> {};
-template <>
-struct BinaryOperationImpl<Int32, Int16, DivideIntegralImpl<Int32, Int16>>
-        : DivideIntegralByConstantImpl<Int32, Int16> {};
-template <>
-struct BinaryOperationImpl<Int32, Int32, DivideIntegralImpl<Int32, Int32>>
-        : DivideIntegralByConstantImpl<Int32, Int32> {};
-template <>
-struct BinaryOperationImpl<Int32, Int64, DivideIntegralImpl<Int32, Int64>>
-        : DivideIntegralByConstantImpl<Int32, Int64> {};
-
 struct NameIntDiv {
    static constexpr auto name = "int_divide";
 };
-using FunctionIntDiv = FunctionBinaryArithmeticToNullType<DivideIntegralImpl, NameIntDiv, false>;
+using FunctionIntDiv = FunctionBinaryArithmetic<DivideIntegralImpl, NameIntDiv, true>;

 void register_function_int_div(SimpleFunctionFactory& factory) {
    factory.register_function<FunctionIntDiv>();
--- a/be/src/vec/functions/int_div.h
+++ b/be/src/vec/functions/int_div.h
@ -20,19 +20,49 @@

 #pragma once

+#include <libdivide.h>
+
+#include <type_traits>
+
+#include "vec/columns/column_decimal.h"
 #include "vec/columns/column_nullable.h"
+#include "vec/core/types.h"
 #include "vec/data_types/number_traits.h"
+#include "vec/functions/function_binary_arithmetic.h"

 namespace doris::vectorized {

 template <typename A, typename B>
 struct DivideIntegralImpl {
    using ResultType = typename NumberTraits::ResultOfIntegerDivision<A, B>::Type;
+    using Traits = NumberTraits::BinaryOperatorTraits<A, B>;

    template <typename Result = ResultType>
-    static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
-        null_map[index] = b == 0;
-        return a / (b + null_map[index]);
+    static void apply(const typename Traits::ArrayA& a, B b,
+                      typename ColumnVector<Result>::Container& c,
+                      typename Traits::ArrayNull& null_map) {
+        size_t size = c.size();
+        UInt8 is_null = b == 0;
+        memset(null_map.data(), is_null, size);
+
+        if (!is_null) {
+            if constexpr (!std::is_floating_point_v<A> && !std::is_same_v<A, Int128> &&
+                          !std::is_same_v<A, Int8> && !std::is_same_v<A, UInt8>) {
+                for (size_t i = 0; i < size; i++) {
+                    c[i] = a[i] / libdivide::divider<A>(b);
+                }
+            } else {
+                for (size_t i = 0; i < size; i++) {
+                    c[i] = a[i] / b;
+                }
+            }
+        }
+    }
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b, UInt8& is_null) {
+        is_null = b == 0;
+        return a / (b + is_null);
    }
 };

--- a/be/src/vec/functions/math.cpp
+++ b/be/src/vec/functions/math.cpp
@ -15,10 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.

+#include "vec/core/types.h"
 #include "vec/data_types/number_traits.h"
 #include "vec/functions/function_const.h"
 #include "vec/functions/function_binary_arithmetic.h"
-#include "vec/functions/function_binary_arithmetic_to_null_type.h"
+#include "vec/functions/function_binary_arithmetic.h"
 #include "vec/functions/function_math_unary.h"
 #include "vec/functions/function_math_unary_to_null_type.h"
 #include "vec/functions/function_string.h"
@ -150,17 +151,39 @@ struct LogName {
 template <typename A, typename B>
 struct LogImpl {
    using ResultType = Float64;
+    using Traits = NumberTraits::BinaryOperatorTraits<A, B>;
+
    static const constexpr bool allow_decimal = false;
+    static constexpr double EPSILON = 1e-9;
+
+    template <typename Result = ResultType>
+    static void apply(const typename Traits::ArrayA& a, B b,
+                      typename ColumnVector<Result>::Container& c,
+                      typename Traits::ArrayNull& null_map) {
+        size_t size = c.size();
+        UInt8 is_null = b <= 0;
+        memset(null_map.data(), is_null, size);
+
+        if (!is_null) {
+            for (size_t i = 0; i < size; i++) {
+                if (a[i] <= 0 || std::fabs(a[i] - 1.0) < EPSILON) {
+                    null_map[i] = 1;
+                } else {
+                    c[i] = static_cast<Float64>(std::log(static_cast<Float64>(b)) /
+                                                std::log(static_cast<Float64>(a[i])));
+                }
+            }
+        }
+    }

    template <typename Result>
-    static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
-        constexpr double EPSILON = 1e-9;
-        null_map[index] = a <= 0 || b <= 0 || std::fabs(a - 1.0) < EPSILON;
+    static inline Result apply(A a, B b, UInt8& is_null) {
+        is_null = a <= 0 || b <= 0 || std::fabs(a - 1.0) < EPSILON;
        return static_cast<Float64>(std::log(static_cast<Float64>(b)) /
                                    std::log(static_cast<Float64>(a)));
    }
 };
-using FunctionLog = FunctionBinaryArithmeticToNullType<LogImpl, LogName>;
+using FunctionLog = FunctionBinaryArithmetic<LogImpl, LogName, true>;

 struct CeilName {
    static constexpr auto name = "ceil";
@ -357,7 +380,7 @@ struct PowImpl {
 struct PowName {
    static constexpr auto name = "pow";
 };
-using FunctionPow = FunctionBinaryArithmetic<PowImpl, PowName>;
+using FunctionPow = FunctionBinaryArithmetic<PowImpl, PowName, false>;

 template <typename A, typename B>
 struct TruncateImpl {
@ -374,7 +397,7 @@ struct TruncateImpl {
 struct TruncateName {
    static constexpr auto name = "truncate";
 };
-using FunctionTruncate = FunctionBinaryArithmetic<TruncateImpl, TruncateName>;
+using FunctionTruncate = FunctionBinaryArithmetic<TruncateImpl, TruncateName, false>;

 /// round(double,int32)-->double
 /// key_str:roundFloat64Int32
@ -395,7 +418,7 @@ struct RoundTwoImpl {
                my_double_round(static_cast<Float64>(a), static_cast<Int32>(b), false, false));
    }
 };
-using FunctionRoundTwo = FunctionBinaryArithmetic<RoundTwoImpl, RoundName>;
+using FunctionRoundTwo = FunctionBinaryArithmetic<RoundTwoImpl, RoundName, false>;

 // TODO: Now math may cause one thread compile time too long, because the function in math
 // so mush. Split it to speed up compile time in the future
--- a/be/src/vec/functions/minus.cpp
+++ b/be/src/vec/functions/minus.cpp
@ -49,7 +49,7 @@ struct MinusImpl {
 struct NameMinus {
    static constexpr auto name = "subtract";
 };
-using FunctionMinus = FunctionBinaryArithmetic<MinusImpl, NameMinus>;
+using FunctionMinus = FunctionBinaryArithmetic<MinusImpl, NameMinus, false>;

 void register_function_minus(SimpleFunctionFactory& factory) {
    factory.register_function<FunctionMinus>();
--- a/be/src/vec/functions/modulo.cpp
+++ b/be/src/vec/functions/modulo.cpp
@ -18,16 +18,12 @@
 // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Modulo.cpp
 // and modified by Doris

-#include "runtime/decimalv2_value.h"
-#ifdef __SSE2__
-#define LIBDIVIDE_SSE2 1
-#endif
-
 #include <libdivide.h>

 #include "common/status.h"
+#include "runtime/decimalv2_value.h"
+#include "vec/core/types.h"
 #include "vec/functions/function_binary_arithmetic.h"
-#include "vec/functions/function_binary_arithmetic_to_null_type.h"
 #include "vec/functions/simple_function_factory.h"

 namespace doris::vectorized {
@ -35,144 +31,90 @@ namespace doris::vectorized {
 template <typename A, typename B>
 struct ModuloImpl {
    using ResultType = typename NumberTraits::ResultOfModulo<A, B>::Type;
+    using Traits = NumberTraits::BinaryOperatorTraits<A, B>;

    template <typename Result = ResultType>
-    static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
+    static void apply(const typename Traits::ArrayA& a, B b,
+                      typename ColumnVector<Result>::Container& c,
+                      typename Traits::ArrayNull& null_map) {
+        size_t size = c.size();
+        UInt8 is_null = b == 0;
+        memset(null_map.data(), is_null, sizeof(UInt8) * size);
+
+        if (!is_null) {
+            for (size_t i = 0; i < size; i++) {
+                if constexpr (std::is_floating_point_v<ResultType>) {
+                    c[i] = std::fmod((double)a[i], (double)b);
+                } else {
+                    c[i] = a[i] % b;
+                }
+            }
+        }
+    }
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b, UInt8& is_null) {
+        is_null = b == 0;
+        b += is_null;
+
        if constexpr (std::is_floating_point_v<Result>) {
-            null_map[index] = b == 0;
            return std::fmod((double)a, (double)b);
        } else {
-            null_map[index] = b == 0;
-            return typename NumberTraits::ToInteger<A>::Type(a) %
-                   (typename NumberTraits::ToInteger<B>::Type(b) + (b == 0));
+            return a % b;
        }
    }

    template <typename Result = DecimalV2Value>
-    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, NullMap& null_map,
-                                       size_t index) {
-        null_map[index] = b == DecimalV2Value(0);
-        return a % (b + DecimalV2Value(b == DecimalV2Value(0)));
+    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, UInt8& is_null) {
+        is_null = b == DecimalV2Value(0);
+        return a % (b + DecimalV2Value(is_null));
    }
 };

 template <typename A, typename B>
 struct PModuloImpl {
    using ResultType = typename NumberTraits::ResultOfModulo<A, B>::Type;
+    using Traits = NumberTraits::BinaryOperatorTraits<A, B>;

    template <typename Result = ResultType>
-    static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
+    static void apply(const typename Traits::ArrayA& a, B b,
+                      typename ColumnVector<Result>::Container& c,
+                      typename Traits::ArrayNull& null_map) {
+        size_t size = c.size();
+        UInt8 is_null = b == 0;
+        memset(null_map.data(), is_null, size);
+
+        if (!is_null) {
+            for (size_t i = 0; i < size; i++) {
+                if constexpr (std::is_floating_point_v<ResultType>) {
+                    c[i] = std::fmod(std::fmod((double)a[i], (double)b) + (double)b, double(b));
+                } else {
+                    c[i] = (a[i] % b + b) % b;
+                }
+            }
+        }
+    }
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b, UInt8& is_null) {
+        is_null = b == 0;
+        b += is_null;
+
        if constexpr (std::is_floating_point_v<Result>) {
-            null_map[index] = 0;
            return std::fmod(std::fmod((double)a, (double)b) + (double)b, (double)b);
        } else {
-            null_map[index] = b == 0;
-            return (typename NumberTraits::ToInteger<A>::Type(a) %
-                            (typename NumberTraits::ToInteger<B>::Type(b) + (b == 0)) +
-                    typename NumberTraits::ToInteger<B>::Type(b)) %
-                   (typename NumberTraits::ToInteger<B>::Type(b) + (b == 0));
+            return (a % b + b) % b;
        }
    }

    template <typename Result = DecimalV2Value>
-    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, NullMap& null_map,
-                                       size_t index) {
-        null_map[index] = b == DecimalV2Value(0);
-        return (a % (b + DecimalV2Value(b == DecimalV2Value(0))) + b) %
-               (b + DecimalV2Value(b == DecimalV2Value(0)));
+    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, UInt8& is_null) {
+        is_null = b == DecimalV2Value(0);
+        b += DecimalV2Value(is_null);
+        return (a % b + b) % b;
    }
 };

-template <typename A, typename B>
-struct ModuloByConstantImpl : BinaryOperationImplBase<A, B, ModuloImpl<A, B>> {
-    using ResultType = typename ModuloImpl<A, B>::ResultType;
-
-    static void vector_constant(const PaddedPODArray<A>& a, B b, PaddedPODArray<ResultType>& c) {
-        // TODO: Support return NULL in the future
-        if (UNLIKELY(b == 0)) {
-            //        throw Exception("Division by zero", TStatusCode::VEC_ILLEGAL_DIVISION);
-            memset(c.data(), 0, sizeof(ResultType) * c.size());
-            return;
-        }
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wsign-compare"
-
-        if (UNLIKELY((std::is_signed_v<B> && b == -1) || b == 1)) {
-            size_t size = a.size();
-            for (size_t i = 0; i < size; ++i) c[i] = 0;
-            return;
-        }
-
-#pragma GCC diagnostic pop
-
-        libdivide::divider<A> divider(b);
-
-        /// Here we failed to make the SSE variant from libdivide give an advantage.
-        size_t size = a.size();
-        for (size_t i = 0; i < size; ++i)
-            c[i] = a[i] -
-                   (a[i] / divider) *
-                           b; /// NOTE: perhaps, the division semantics with the remainder of negative numbers is not preserved.
-    }
-};
-
-/** Specializations are specified for dividing numbers of the type UInt64 and UInt32 by the numbers of the same sign.
-  * Can be expanded to all possible combinations, but more code is needed.
-  */
-
-template <>
-struct BinaryOperationImpl<UInt64, UInt8, ModuloImpl<UInt64, UInt8>>
-        : ModuloByConstantImpl<UInt64, UInt8> {};
-template <>
-struct BinaryOperationImpl<UInt64, UInt16, ModuloImpl<UInt64, UInt16>>
-        : ModuloByConstantImpl<UInt64, UInt16> {};
-template <>
-struct BinaryOperationImpl<UInt64, UInt32, ModuloImpl<UInt64, UInt32>>
-        : ModuloByConstantImpl<UInt64, UInt32> {};
-template <>
-struct BinaryOperationImpl<UInt64, UInt64, ModuloImpl<UInt64, UInt64>>
-        : ModuloByConstantImpl<UInt64, UInt64> {};
-
-template <>
-struct BinaryOperationImpl<UInt32, UInt8, ModuloImpl<UInt32, UInt8>>
-        : ModuloByConstantImpl<UInt32, UInt8> {};
-template <>
-struct BinaryOperationImpl<UInt32, UInt16, ModuloImpl<UInt32, UInt16>>
-        : ModuloByConstantImpl<UInt32, UInt16> {};
-template <>
-struct BinaryOperationImpl<UInt32, UInt32, ModuloImpl<UInt32, UInt32>>
-        : ModuloByConstantImpl<UInt32, UInt32> {};
-template <>
-struct BinaryOperationImpl<UInt32, UInt64, ModuloImpl<UInt32, UInt64>>
-        : ModuloByConstantImpl<UInt32, UInt64> {};
-
-template <>
-struct BinaryOperationImpl<Int64, Int8, ModuloImpl<Int64, Int8>>
-        : ModuloByConstantImpl<Int64, Int8> {};
-template <>
-struct BinaryOperationImpl<Int64, Int16, ModuloImpl<Int64, Int16>>
-        : ModuloByConstantImpl<Int64, Int16> {};
-template <>
-struct BinaryOperationImpl<Int64, Int32, ModuloImpl<Int64, Int32>>
-        : ModuloByConstantImpl<Int64, Int32> {};
-template <>
-struct BinaryOperationImpl<Int64, Int64, ModuloImpl<Int64, Int64>>
-        : ModuloByConstantImpl<Int64, Int64> {};
-
-template <>
-struct BinaryOperationImpl<Int32, Int8, ModuloImpl<Int32, Int8>>
-        : ModuloByConstantImpl<Int32, Int8> {};
-template <>
-struct BinaryOperationImpl<Int32, Int16, ModuloImpl<Int32, Int16>>
-        : ModuloByConstantImpl<Int32, Int16> {};
-template <>
-struct BinaryOperationImpl<Int32, Int32, ModuloImpl<Int32, Int32>>
-        : ModuloByConstantImpl<Int32, Int32> {};
-template <>
-struct BinaryOperationImpl<Int32, Int64, ModuloImpl<Int32, Int64>>
-        : ModuloByConstantImpl<Int32, Int64> {};
-
 struct NameModulo {
    static constexpr auto name = "mod";
 };
@ -180,8 +122,8 @@ struct NamePModulo {
    static constexpr auto name = "pmod";
 };

-using FunctionModulo = FunctionBinaryArithmeticToNullType<ModuloImpl, NameModulo, false>;
-using FunctionPModulo = FunctionBinaryArithmeticToNullType<PModuloImpl, NamePModulo, false>;
+using FunctionModulo = FunctionBinaryArithmetic<ModuloImpl, NameModulo, true>;
+using FunctionPModulo = FunctionBinaryArithmetic<PModuloImpl, NamePModulo, true>;

 void register_function_modulo(SimpleFunctionFactory& factory) {
    factory.register_function<FunctionModulo>();
--- a/be/src/vec/functions/multiply.cpp
+++ b/be/src/vec/functions/multiply.cpp
@ -48,7 +48,7 @@ struct MultiplyImpl {
 struct NameMultiply {
    static constexpr auto name = "multiply";
 };
-using FunctionMultiply = FunctionBinaryArithmetic<MultiplyImpl, NameMultiply>;
+using FunctionMultiply = FunctionBinaryArithmetic<MultiplyImpl, NameMultiply, false>;

 void register_function_multiply(SimpleFunctionFactory& factory) {
    factory.register_function<FunctionMultiply>();
--- a/be/src/vec/functions/plus.cpp
+++ b/be/src/vec/functions/plus.cpp
@ -50,7 +50,7 @@ struct PlusImpl {
 struct NamePlus {
    static constexpr auto name = "add";
 };
-using FunctionPlus = FunctionBinaryArithmetic<PlusImpl, NamePlus>;
+using FunctionPlus = FunctionBinaryArithmetic<PlusImpl, NamePlus, false>;

 void register_function_plus(SimpleFunctionFactory& factory) {
    factory.register_function<FunctionPlus>();