[improvement](decimal) use new way for decimal arithmetic precision promotion (#27787)

* [DNM](decimal) use new way for decimal arithmetic precision promotion * [improvement](decimal) [DNM](decimal) use new way for decimal arithmetic precision promotion 1. [DNM](decimal) use new way for decimal arithmetic precision promotion 2. throw exception if it overflows for decimal arithmetics 3. throw exception if it overflows when casting among number types * fix compile error of gcc * improvement --------- Co-authored-by: morrySnow <morrysnow@126.com>
2023-12-05 12:54:40 +08:00
parent ca6949ee3e
commit 17016b9797
121 changed files with 17662 additions and 12466 deletions
--- a/be/src/vec/functions/function_binary_arithmetic.h
+++ b/be/src/vec/functions/function_binary_arithmetic.h
@ -22,6 +22,8 @@

 #include <type_traits>

+#include "common/exception.h"
+#include "common/status.h"
 #include "runtime/decimalv2_value.h"
 #include "udf/udf.h"
 #include "vec/columns/column_const.h"
@ -30,12 +32,14 @@
 #include "vec/columns/column_vector.h"
 #include "vec/common/arithmetic_overflow.h"
 #include "vec/core/types.h"
+#include "vec/core/wide_integer.h"
 #include "vec/data_types/data_type_decimal.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/data_types/number_traits.h"
 #include "vec/functions/cast_type_to_either.h"
 #include "vec/functions/function.h"
+#include "vec/utils/template_helpers.hpp"

 namespace doris::vectorized {

@ -220,10 +224,12 @@ struct BinaryOperationImpl {
 /// +|- scale one of args (which scale factor is not 1). ScaleR = oneof(Scale1, Scale2);
 /// *   no agrs scale. ScaleR = Scale1 + Scale2;
 /// /   first arg scale. ScaleR = Scale1 (scale_a = DecimalType<B>::get_scale()).
-template <typename A, typename B, template <typename, typename> typename Operation,
-          typename ResultType, bool is_to_null_type, bool return_nullable_type,
-          bool check_overflow = true>
+template <typename LeftDataType, typename RightDataType,
+          template <typename, typename> typename Operation, typename ResultType,
+          bool is_to_null_type, bool check_overflow>
 struct DecimalBinaryOperation {
+    using A = typename LeftDataType::FieldType;
+    using B = typename RightDataType::FieldType;
    using OpTraits = OperationTraits<Operation, A, B>;

    using NativeResultType = typename NativeType<ResultType>::Type;
@ -232,16 +238,26 @@ struct DecimalBinaryOperation {
    using Traits = NumberTraits::BinaryOperatorTraits<A, B>;
    using ArrayC = typename ColumnDecimal<ResultType>::Container;

+private:
    static void vector_vector(const typename Traits::ArrayA::value_type* __restrict a,
                              const typename Traits::ArrayB::value_type* __restrict b,
-                              typename ArrayC::value_type* c, size_t size) {
+                              typename ArrayC::value_type* c, size_t size,
+                              const ResultType& max_result_number,
+                              const ResultType& scale_diff_multiplier) {
+        // TODO: handle overflow of decimalv2
        if constexpr (OpTraits::is_multiply && IsDecimalV2<A> && IsDecimalV2<B> &&
                      IsDecimalV2<ResultType>) {
            Op::vector_vector(a, b, c, size);
        } else {
-            for (size_t i = 0; i < size; i++) {
-                c[i] = typename ArrayC::value_type(apply(a[i], b[i]));
-            }
+            bool need_adjust_scale = scale_diff_multiplier.value > 1;
+            std::visit(
+                    [&](auto need_adjust_scale) {
+                        for (size_t i = 0; i < size; i++) {
+                            c[i] = typename ArrayC::value_type(apply<need_adjust_scale>(
+                                    a[i], b[i], max_result_number, scale_diff_multiplier));
+                        }
+                    },
+                    make_bool_variant(need_adjust_scale));
        }
    }

@ -280,18 +296,27 @@ struct DecimalBinaryOperation {
    }

    static void vector_constant(const typename Traits::ArrayA::value_type* __restrict a, B b,
-                                typename ArrayC::value_type* c, size_t size) {
+                                typename ArrayC::value_type* c, size_t size,
+                                const ResultType& max_result_number,
+                                const ResultType& scale_diff_multiplier) {
        if constexpr (OpTraits::is_division && IsDecimalNumber<B>) {
            for (size_t i = 0; i < size; ++i) {
-                c[i] = typename ArrayC::value_type(apply_scaled_div(a[i], b));
+                // code never executed????
+                c[i] = typename ArrayC::value_type(apply_scaled_div(a[i], b, a));
            }
            return;
        }

        /// default: use it if no return before
-        for (size_t i = 0; i < size; ++i) {
-            c[i] = typename ArrayC::value_type(apply(a[i], b));
-        }
+        bool need_adjust_scale = scale_diff_multiplier.value > 1;
+        std::visit(
+                [&](auto need_adjust_scale) {
+                    for (size_t i = 0; i < size; ++i) {
+                        c[i] = typename ArrayC::value_type(apply<need_adjust_scale>(
+                                a[i], b, max_result_number, scale_diff_multiplier));
+                    }
+                },
+                make_bool_variant(need_adjust_scale));
    }

    static void vector_constant(const typename Traits::ArrayA::value_type* __restrict a, B b,
@ -313,7 +338,9 @@ struct DecimalBinaryOperation {
    }

    static void constant_vector(A a, const typename Traits::ArrayB::value_type* __restrict b,
-                                typename ArrayC::value_type* c, size_t size) {
+                                typename ArrayC::value_type* c, size_t size,
+                                const ResultType& max_result_number,
+                                const ResultType& scale_diff_multiplier) {
        if constexpr (IsDecimalV2<A> || IsDecimalV2<B>) {
            DecimalV2Value da(a);
            for (size_t i = 0; i < size; ++i) {
@ -321,9 +348,15 @@ struct DecimalBinaryOperation {
                        Op::template apply(da, DecimalV2Value(b[i])).value());
            }
        } else {
-            for (size_t i = 0; i < size; ++i) {
-                c[i] = typename ArrayC::value_type(apply(a, b[i]));
-            }
+            bool need_adjust_scale = scale_diff_multiplier.value > 1;
+            std::visit(
+                    [&](auto need_adjust_scale) {
+                        for (size_t i = 0; i < size; ++i) {
+                            c[i] = typename ArrayC::value_type(apply<need_adjust_scale>(
+                                    a, b[i], max_result_number, scale_diff_multiplier));
+                        }
+                    },
+                    make_bool_variant(need_adjust_scale));
        }
    }

@ -345,7 +378,10 @@ struct DecimalBinaryOperation {
        }
    }

-    static ResultType constant_constant(A a, B b) { return ResultType(apply(a, b)); }
+    static ResultType constant_constant(A a, B b, const ResultType& max_result_number,
+                                        const ResultType& scale_diff_multiplier) {
+        return ResultType(apply<true>(a, b, max_result_number, scale_diff_multiplier));
+    }

    static ResultType constant_constant(A a, B b, UInt8& is_null) {
        if constexpr (OpTraits::is_division && IsDecimalNumber<B>) {
@ -364,26 +400,32 @@ struct DecimalBinaryOperation {
        }
    }

-    static ColumnPtr adapt_decimal_constant_constant(A a, B b, DataTypePtr res_data_type) {
+public:
+    static ColumnPtr adapt_decimal_constant_constant(A a, B b, const ResultType& max_result_number,
+                                                     const ResultType& scale_diff_multiplier,
+                                                     DataTypePtr res_data_type) {
        auto column_result = ColumnDecimal<ResultType>::create(
                1, assert_cast<const DataTypeDecimal<ResultType>&>(*res_data_type).get_scale());

-        if constexpr (return_nullable_type && !is_to_null_type &&
+        if constexpr (check_overflow && !is_to_null_type &&
                      ((!OpTraits::is_multiply && !OpTraits::is_plus_minus) || IsDecimalV2<A> ||
                       IsDecimalV2<B>)) {
            LOG(FATAL) << "Invalid function type!";
            return column_result;
-        } else if constexpr (return_nullable_type || is_to_null_type) {
+        } else if constexpr (is_to_null_type) {
            auto null_map = ColumnUInt8::create(1, 0);
            column_result->get_element(0) = constant_constant(a, b, null_map->get_element(0));
            return ColumnNullable::create(std::move(column_result), std::move(null_map));
        } else {
-            column_result->get_element(0) = constant_constant(a, b);
+            column_result->get_element(0) =
+                    constant_constant(a, b, max_result_number, scale_diff_multiplier);
            return column_result;
        }
    }

    static ColumnPtr adapt_decimal_vector_constant(ColumnPtr column_left, B b,
+                                                   const ResultType& max_result_number,
+                                                   const ResultType& scale_diff_multiplier,
                                                   DataTypePtr res_data_type) {
        auto column_left_ptr = check_and_get_column<typename Traits::ColumnVectorA>(column_left);
        auto column_result = ColumnDecimal<ResultType>::create(
@ -391,24 +433,26 @@ struct DecimalBinaryOperation {
                assert_cast<const DataTypeDecimal<ResultType>&>(*res_data_type).get_scale());
        DCHECK(column_left_ptr != nullptr);

-        if constexpr (return_nullable_type && !is_to_null_type &&
+        if constexpr (check_overflow && !is_to_null_type &&
                      ((!OpTraits::is_multiply && !OpTraits::is_plus_minus) || IsDecimalV2<A> ||
                       IsDecimalV2<B>)) {
            LOG(FATAL) << "Invalid function type!";
            return column_result;
-        } else if constexpr (return_nullable_type || is_to_null_type) {
+        } else if constexpr (is_to_null_type) {
            auto null_map = ColumnUInt8::create(column_left->size(), 0);
            vector_constant(column_left_ptr->get_data().data(), b, column_result->get_data().data(),
                            null_map->get_data(), column_left->size());
            return ColumnNullable::create(std::move(column_result), std::move(null_map));
        } else {
            vector_constant(column_left_ptr->get_data().data(), b, column_result->get_data().data(),
-                            column_left->size());
+                            column_left->size(), max_result_number, scale_diff_multiplier);
            return column_result;
        }
    }

    static ColumnPtr adapt_decimal_constant_vector(A a, ColumnPtr column_right,
+                                                   const ResultType& max_result_number,
+                                                   const ResultType& scale_diff_multiplier,
                                                   DataTypePtr res_data_type) {
        auto column_right_ptr = check_and_get_column<typename Traits::ColumnVectorB>(column_right);
        auto column_result = ColumnDecimal<ResultType>::create(
@ -416,12 +460,12 @@ struct DecimalBinaryOperation {
                assert_cast<const DataTypeDecimal<ResultType>&>(*res_data_type).get_scale());
        DCHECK(column_right_ptr != nullptr);

-        if constexpr (return_nullable_type && !is_to_null_type &&
+        if constexpr (check_overflow && !is_to_null_type &&
                      ((!OpTraits::is_multiply && !OpTraits::is_plus_minus) || IsDecimalV2<A> ||
                       IsDecimalV2<B>)) {
            LOG(FATAL) << "Invalid function type!";
            return column_result;
-        } else if constexpr (return_nullable_type || is_to_null_type) {
+        } else if constexpr (is_to_null_type) {
            auto null_map = ColumnUInt8::create(column_right->size(), 0);
            constant_vector(a, column_right_ptr->get_data().data(),
                            column_result->get_data().data(), null_map->get_data(),
@ -429,27 +473,30 @@ struct DecimalBinaryOperation {
            return ColumnNullable::create(std::move(column_result), std::move(null_map));
        } else {
            constant_vector(a, column_right_ptr->get_data().data(),
-                            column_result->get_data().data(), column_right->size());
+                            column_result->get_data().data(), column_right->size(),
+                            max_result_number, scale_diff_multiplier);
            return column_result;
        }
    }

    static ColumnPtr adapt_decimal_vector_vector(ColumnPtr column_left, ColumnPtr column_right,
+                                                 const ResultType& max_result_number,
+                                                 const ResultType& scale_diff_multiplier,
                                                 DataTypePtr res_data_type) {
        auto column_left_ptr = check_and_get_column<typename Traits::ColumnVectorA>(column_left);
        auto column_right_ptr = check_and_get_column<typename Traits::ColumnVectorB>(column_right);

-        auto column_result = ColumnDecimal<ResultType>::create(
-                column_left->size(),
-                assert_cast<const DataTypeDecimal<ResultType>&>(*res_data_type).get_scale());
+        const auto& type_result = assert_cast<const DataTypeDecimal<ResultType>&>(*res_data_type);
+        auto column_result =
+                ColumnDecimal<ResultType>::create(column_left->size(), type_result.get_scale());
        DCHECK(column_left_ptr != nullptr && column_right_ptr != nullptr);

-        if constexpr (return_nullable_type && !is_to_null_type &&
+        if constexpr (check_overflow && !is_to_null_type &&
                      ((!OpTraits::is_multiply && !OpTraits::is_plus_minus) || IsDecimalV2<A> ||
                       IsDecimalV2<B>)) {
            LOG(FATAL) << "Invalid function type!";
            return column_result;
-        } else if constexpr (return_nullable_type || is_to_null_type) {
+        } else if constexpr (is_to_null_type) {
            auto null_map = ColumnUInt8::create(column_result->size(), 0);
            vector_vector(column_left_ptr->get_data().data(), column_right_ptr->get_data().data(),
                          column_result->get_data().data(), null_map->get_data(),
@ -457,28 +504,84 @@ struct DecimalBinaryOperation {
            return ColumnNullable::create(std::move(column_result), std::move(null_map));
        } else {
            vector_vector(column_left_ptr->get_data().data(), column_right_ptr->get_data().data(),
-                          column_result->get_data().data(), column_left->size());
+                          column_result->get_data().data(), column_left->size(), max_result_number,
+                          scale_diff_multiplier);
            return column_result;
        }
    }

 private:
    /// there's implicit type conversion here
-    static ALWAYS_INLINE NativeResultType apply(NativeResultType a, NativeResultType b) {
+    template <bool need_adjust_scale>
+    static ALWAYS_INLINE NativeResultType apply(NativeResultType a, NativeResultType b,
+                                                const ResultType& max_result_number,
+                                                const ResultType& scale_diff_multiplier) {
+        // TODO: handle overflow of decimalv2
        if constexpr (IsDecimalV2<B> || IsDecimalV2<A>) {
            // Now, Doris only support decimal +-*/ decimal.
            // overflow in consider in operator
            return Op::template apply(DecimalV2Value(a), DecimalV2Value(b)).value();
        } else {
+            NativeResultType res;
            if constexpr (OpTraits::can_overflow && check_overflow) {
-                NativeResultType res;
                // TODO handle overflow gracefully
-                if (Op::template apply<NativeResultType>(a, b, res)) {
-                    res = type_limit<ResultType>::max().value;
+                if (UNLIKELY(Op::template apply<NativeResultType>(a, b, res))) {
+                    if constexpr (OpTraits::is_plus_minus) {
+                        throw Exception(ErrorCode::ARITHMETIC_OVERFLOW_ERRROR,
+                                        "Arithmetic overflow");
+                    }
+                    // multiply
+                    if constexpr (std::is_same_v<NativeResultType, __int128>) {
+                        wide::Int256 res256 = Op::template apply<wide::Int256>(a, b);
+                        if constexpr (OpTraits::is_multiply && need_adjust_scale) {
+                            if (res256 > 0) {
+                                res256 = (res256 + scale_diff_multiplier.value / 2) /
+                                         scale_diff_multiplier.value;
+
+                            } else {
+                                res256 = (res256 - scale_diff_multiplier.value / 2) /
+                                         scale_diff_multiplier.value;
+                            }
+                        }
+                        // check if final result is overflow
+                        if (res256 > wide::Int256(max_result_number.value) ||
+                            res256 < wide::Int256(-max_result_number.value)) {
+                            throw Exception(ErrorCode::ARITHMETIC_OVERFLOW_ERRROR,
+                                            "Arithmetic overflow");
+                        } else {
+                            res = res256;
+                        }
+                    } else {
+                        throw Exception(ErrorCode::ARITHMETIC_OVERFLOW_ERRROR,
+                                        "Arithmetic overflow");
+                    }
+                } else {
+                    // round to final result precision
+                    if constexpr (OpTraits::is_multiply && need_adjust_scale) {
+                        if (res >= 0) {
+                            res = (res + scale_diff_multiplier.value / 2) /
+                                  scale_diff_multiplier.value;
+                        } else {
+                            res = (res - scale_diff_multiplier.value / 2) /
+                                  scale_diff_multiplier.value;
+                        }
+                    }
+                    if (res > max_result_number.value || res < -max_result_number.value) {
+                        throw Exception(ErrorCode::ARITHMETIC_OVERFLOW_ERRROR,
+                                        "Arithmetic overflow");
+                    }
                }
                return res;
            } else {
-                return Op::template apply<NativeResultType>(a, b);
+                res = Op::template apply<NativeResultType>(a, b);
+                if constexpr (OpTraits::is_multiply && need_adjust_scale) {
+                    if (res >= 0) {
+                        res = (res + scale_diff_multiplier.value / 2) / scale_diff_multiplier.value;
+                    } else {
+                        res = (res - scale_diff_multiplier.value / 2) / scale_diff_multiplier.value;
+                    }
+                }
+                return res;
            }
        }
    }
@ -611,20 +714,20 @@ struct BinaryOperationTraits {

 template <typename LeftDataType, typename RightDataType, typename ExpectedResultDataType,
          template <typename, typename> class Operation, bool is_to_null_type,
-          bool return_nullable_type>
+          bool check_overflow_for_decimal>
 struct ConstOrVectorAdapter {
    static constexpr bool result_is_decimal =
            IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>;
-
    using ResultDataType = ExpectedResultDataType;
    using ResultType = typename ResultDataType::FieldType;
    using A = typename LeftDataType::FieldType;
    using B = typename RightDataType::FieldType;
+    using OpTraits = OperationTraits<Operation, A, B>;

    using OperationImpl = std::conditional_t<
            IsDataTypeDecimal<ResultDataType>,
-            DecimalBinaryOperation<A, B, Operation, ResultType, is_to_null_type,
-                                   return_nullable_type>,
+            DecimalBinaryOperation<LeftDataType, RightDataType, Operation, ResultType,
+                                   is_to_null_type, check_overflow_for_decimal>,
            BinaryOperationImpl<A, B, Operation<A, B>, is_to_null_type, ResultType>>;

    static ColumnPtr execute(ColumnPtr column_left, ColumnPtr column_right,
@ -646,6 +749,25 @@ struct ConstOrVectorAdapter {
    }

 private:
+    // for multiply, e1: {p1, s1}, e2: {p2, s2}, the original result precision
+    // is {p1 + p2, s1 + s2}, but if the precision or scale is overflow, FE will adjust
+    // the result precsion and scale to the values specified in type_result, so
+    // we need to adjust the multiply result accordingly.
+    static std::pair<ResultType, ResultType> get_max_and_multiplier(
+            const LeftDataType& type_left, const RightDataType& type_right,
+            const DataTypeDecimal<ResultType>& type_result) {
+        auto max_result_number =
+                DataTypeDecimal<ResultType>::get_max_digits_number(type_result.get_precision());
+
+        auto orig_result_scale = type_left.get_scale() + type_right.get_scale();
+        auto result_scale = type_result.get_scale();
+        DCHECK(orig_result_scale >= result_scale);
+        auto scale_diff_multiplier =
+                DataTypeDecimal<ResultType>::get_scale_multiplier(orig_result_scale - result_scale)
+                        .value;
+        return {ResultType(max_result_number), ResultType(scale_diff_multiplier)};
+    }
+
    static ColumnPtr constant_constant(ColumnPtr column_left, ColumnPtr column_right,
                                       const LeftDataType& type_left,
                                       const RightDataType& type_right, DataTypePtr res_data_type) {
@ -656,9 +778,14 @@ private:
        ColumnPtr column_result = nullptr;

        if constexpr (result_is_decimal) {
+            const auto& type_result =
+                    assert_cast<const DataTypeDecimal<ResultType>&>(*res_data_type);
+            auto max_and_multiplier = get_max_and_multiplier(type_left, type_right, type_result);
+
            column_result = OperationImpl::adapt_decimal_constant_constant(
                    column_left_ptr->template get_value<A>(),
-                    column_right_ptr->template get_value<B>(), res_data_type);
+                    column_right_ptr->template get_value<B>(), max_and_multiplier.first,
+                    max_and_multiplier.second, res_data_type);

        } else {
            column_result = OperationImpl::adapt_normal_constant_constant(
@ -676,9 +803,12 @@ private:
        DCHECK(column_right_ptr != nullptr);

        if constexpr (result_is_decimal) {
+            const auto& type_result =
+                    assert_cast<const DataTypeDecimal<ResultType>&>(*res_data_type);
+            auto max_and_multiplier = get_max_and_multiplier(type_left, type_right, type_result);
            return OperationImpl::adapt_decimal_vector_constant(
                    column_left->get_ptr(), column_right_ptr->template get_value<B>(),
-                    res_data_type);
+                    max_and_multiplier.first, max_and_multiplier.second, res_data_type);
        } else {
            return OperationImpl::adapt_normal_vector_constant(
                    column_left->get_ptr(), column_right_ptr->template get_value<B>());
@ -692,9 +822,12 @@ private:
        DCHECK(column_left_ptr != nullptr);

        if constexpr (result_is_decimal) {
+            const auto& type_result =
+                    assert_cast<const DataTypeDecimal<ResultType>&>(*res_data_type);
+            auto max_and_multiplier = get_max_and_multiplier(type_left, type_right, type_result);
            return OperationImpl::adapt_decimal_constant_vector(
                    column_left_ptr->template get_value<A>(), column_right->get_ptr(),
-                    res_data_type);
+                    max_and_multiplier.first, max_and_multiplier.second, res_data_type);
        } else {
            return OperationImpl::adapt_normal_constant_vector(
                    column_left_ptr->template get_value<A>(), column_right->get_ptr());
@ -705,8 +838,12 @@ private:
                                   const LeftDataType& type_left, const RightDataType& type_right,
                                   DataTypePtr res_data_type) {
        if constexpr (result_is_decimal) {
+            const auto& type_result =
+                    assert_cast<const DataTypeDecimal<ResultType>&>(*res_data_type);
+            auto max_and_multiplier = get_max_and_multiplier(type_left, type_right, type_result);
            return OperationImpl::adapt_decimal_vector_vector(
-                    column_left->get_ptr(), column_right->get_ptr(), res_data_type);
+                    column_left->get_ptr(), column_right->get_ptr(), max_and_multiplier.first,
+                    max_and_multiplier.second, res_data_type);
        } else {
            return OperationImpl::adapt_normal_vector_vector(column_left->get_ptr(),
                                                             column_right->get_ptr());
@ -818,12 +955,12 @@ public:
            right_generic =
                    static_cast<const DataTypeNullable*>(right_generic)->get_nested_type().get();
        }
-        bool result_is_nullable = context->check_overflow_for_decimal();
        if (result_generic->is_nullable()) {
            result_generic =
                    static_cast<const DataTypeNullable*>(result_generic)->get_nested_type().get();
        }

+        bool check_overflow_for_decimal = context->check_overflow_for_decimal();
        bool valid = cast_both_types(
                left_generic, right_generic, result_generic,
                [&](const auto& left, const auto& right, const auto& res) {
@ -840,7 +977,7 @@ public:
                                     ResultDataType>)&&(IsDataTypeDecimal<ExpectedResultDataType> ==
                                                        (IsDataTypeDecimal<LeftDataType> ||
                                                         IsDataTypeDecimal<RightDataType>))) {
-                        if (result_is_nullable) {
+                        if (check_overflow_for_decimal) {
                            auto column_result = ConstOrVectorAdapter<
                                    LeftDataType, RightDataType,
                                    std::conditional_t<IsDataTypeDecimal<ExpectedResultDataType>,