From 98bfeaf560f7ac04f2324499e4dc7cc3bbf0a437 Mon Sep 17 00:00:00 2001 From: Pxl <952130278@qq.com> Date: Sat, 7 May 2022 10:55:15 +0800 Subject: [PATCH] [Enhancement] [Vectorized] Refactor and optimize BinaryOperation (#9087) --- be/src/vec/data_types/data_type_decimal.h | 6 +- be/src/vec/data_types/number_traits.h | 87 +- be/src/vec/functions/divide.cpp | 34 +- be/src/vec/functions/function.h | 9 - .../functions/function_binary_arithmetic.h | 899 ++++++++++-------- .../function_binary_arithmetic_to_null_type.h | 247 ----- be/src/vec/functions/function_bit.cpp | 8 +- be/src/vec/functions/function_cast.h | 1 - be/src/vec/functions/int_div.cpp | 118 +-- be/src/vec/functions/int_div.h | 36 +- be/src/vec/functions/math.cpp | 39 +- be/src/vec/functions/minus.cpp | 2 +- be/src/vec/functions/modulo.cpp | 180 ++-- be/src/vec/functions/multiply.cpp | 2 +- be/src/vec/functions/plus.cpp | 2 +- 15 files changed, 685 insertions(+), 985 deletions(-) delete mode 100644 be/src/vec/functions/function_binary_arithmetic_to_null_type.h diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h index d701ecee71..0613653324 100644 --- a/be/src/vec/data_types/data_type_decimal.h +++ b/be/src/vec/data_types/data_type_decimal.h @@ -106,10 +106,12 @@ public: } // Now, Doris only support precision:27, scale: 9 - DCHECK(precision_ == 27); - DCHECK(scale_ == 9); + DCHECK(precision == 27); + DCHECK(scale == 9); } + DataTypeDecimal(const DataTypeDecimal& rhs) : precision(rhs.precision), scale(rhs.scale) {} + const char* get_family_name() const override { return "Decimal"; } std::string do_get_name() const override; TypeIndex get_type_id() const override { return TypeId::value; } diff --git a/be/src/vec/data_types/number_traits.h b/be/src/vec/data_types/number_traits.h index 70830bfcd1..8b87e55d93 100644 --- a/be/src/vec/data_types/number_traits.h +++ b/be/src/vec/data_types/number_traits.h @@ -22,6 +22,8 @@ #include +#include "vec/columns/column_decimal.h" +#include "vec/columns/column_vector.h" #include "vec/common/uint128.h" #include "vec/core/types.h" @@ -155,7 +157,8 @@ struct ResultOfSubtraction { */ template struct ResultOfFloatingPointDivision { - using Type = Float64; + using Type = std::conditional_t, A, + std::conditional_t, B, Float64>>; }; /** For integer division, we get a number with the same number of bits as in divisible. @@ -171,13 +174,8 @@ struct ResultOfIntegerDivision { template struct ResultOfModulo { using Type = typename Construct || std::is_signed_v, - std::is_floating_point_v, max(sizeof(A), sizeof(B))>::Type; -}; - -template -struct ResultOfNegate { - using Type = typename Construct, - std::is_signed_v ? sizeof(A) : next_size(sizeof(A))>::Type; + std::is_floating_point_v || std::is_floating_point_v, + max(sizeof(A), sizeof(B))>::Type; }; template @@ -200,76 +198,15 @@ struct ResultOfBitNot { using Type = typename Construct, false, sizeof(A)>::Type; }; -/** Type casting for `if` function: - * UInt, UInt -> UInt - * Int, Int -> Int - * Float, Float -> Float - * UInt, Int -> Int - * Float, [U]Int -> Float - * Decimal, Decimal -> Decimal - * UUID, UUID -> UUID - * UInt64 , Int -> Error - * Float, [U]Int64 -> Error - */ template -struct ResultOfIf { - static constexpr bool has_float = std::is_floating_point_v || std::is_floating_point_v; - static constexpr bool has_integer = std::is_integral_v || std::is_integral_v; - static constexpr bool has_signed = std::is_signed_v || std::is_signed_v; - static constexpr bool has_unsigned = !std::is_signed_v || !std::is_signed_v; - - static constexpr size_t max_size_of_unsigned_integer = - max(std::is_signed_v ? 0 : sizeof(A), std::is_signed_v ? 0 : sizeof(B)); - static constexpr size_t max_size_of_signed_integer = - max(std::is_signed_v ? sizeof(A) : 0, std::is_signed_v ? sizeof(B) : 0); - static constexpr size_t max_size_of_integer = - max(std::is_integral_v ? sizeof(A) : 0, std::is_integral_v ? sizeof(B) : 0); - static constexpr size_t max_size_of_float = max(std::is_floating_point_v ? sizeof(A) : 0, - std::is_floating_point_v ? sizeof(B) : 0); - - using ConstructedType = - typename Construct= max_size_of_float) || - (has_signed && has_unsigned && - max_size_of_unsigned_integer >= max_size_of_signed_integer)) - ? max(sizeof(A), sizeof(B)) * 2 - : max(sizeof(A), sizeof(B))>::Type; - - using ConstructedWithUUID = - std::conditional_t && std::is_same_v, A, - ConstructedType>; - - using Type = std::conditional_t< - !IsDecimalNumber && !IsDecimalNumber, ConstructedWithUUID, - std::conditional_t && IsDecimalNumber, - std::conditional_t<(sizeof(A) > sizeof(B)), A, B>, Error>>; +struct BinaryOperatorTraits { + using ColumnVectorA = std::conditional_t, ColumnDecimal, ColumnVector>; + using ColumnVectorB = std::conditional_t, ColumnDecimal, ColumnVector>; + using ArrayA = typename ColumnVectorA::Container; + using ArrayB = typename ColumnVectorB::Container; + using ArrayNull = PaddedPODArray; }; -/** Before applying operator `%` and bitwise operations, operands are casted to whole numbers. */ -template -struct ToInteger { - using Type = typename Construct, false, - std::is_floating_point_v ? 8 : sizeof(A)>::Type; -}; - -// CLICKHOUSE-29. The same depth, different signs -// NOTE: This case is applied for 64-bit integers only (for backward compatibility), but could be used for any-bit integers -template -constexpr bool LeastGreatestSpecialCase = std::is_integral_v&& std::is_integral_v && - (8 == sizeof(A) && sizeof(A) == sizeof(B)) && - (std::is_signed_v ^ std::is_signed_v); - -template -using ResultOfLeast = std::conditional_t, - typename Construct::Type, - typename ResultOfIf::Type>; - -template -using ResultOfGreatest = std::conditional_t, - typename Construct::Type, - typename ResultOfIf::Type>; - } // namespace NumberTraits } // namespace doris::vectorized diff --git a/be/src/vec/functions/divide.cpp b/be/src/vec/functions/divide.cpp index b6d3a2b35e..a602cb8b33 100644 --- a/be/src/vec/functions/divide.cpp +++ b/be/src/vec/functions/divide.cpp @@ -18,7 +18,7 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/divide.cpp // and modified by Doris -#include "vec/functions/function_binary_arithmetic_to_null_type.h" +#include "vec/functions/function_binary_arithmetic.h" #include "vec/functions/simple_function_factory.h" namespace doris::vectorized { @@ -28,26 +28,42 @@ static const DecimalV2Value one(1, 0); template struct DivideFloatingImpl { using ResultType = typename NumberTraits::ResultOfFloatingPointDivision::Type; + using Traits = NumberTraits::BinaryOperatorTraits; + static const constexpr bool allow_decimal = true; + template + static void apply(const typename Traits::ArrayA& a, B b, + typename ColumnVector::Container& c, + typename Traits::ArrayNull& null_map) { + size_t size = c.size(); + UInt8 is_null = b == 0; + memset(null_map.data(), is_null, size); + + if (!is_null) { + for (size_t i = 0; i < size; i++) { + c[i] = (double)a[i] / (double)b; + } + } + } + template - static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, NullMap& null_map, - size_t index) { - null_map[index] = b.is_zero(); - return a / (b.is_zero() ? one : b); + static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, UInt8& is_null) { + is_null = b.is_zero(); + return a / (is_null ? one : b); } template - static inline Result apply(A a, B b, NullMap& null_map, size_t index) { - null_map[index] = b == 0; - return static_cast(a) / (b + (b == 0)); + static inline Result apply(A a, B b, UInt8& is_null) { + is_null = b == 0; + return static_cast(a) / (b + is_null); } }; struct NameDivide { static constexpr auto name = "divide"; }; -using FunctionDivide = FunctionBinaryArithmeticToNullType; +using FunctionDivide = FunctionBinaryArithmetic; void register_function_divide(SimpleFunctionFactory& factory) { factory.register_function(); diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 0ea494a06b..802ef6fdf4 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -95,11 +95,6 @@ protected: */ virtual ColumnNumbers get_arguments_that_are_always_constant() const { return {}; } - /** True if function can be called on default arguments (include Nullable's) and won't throw. - * Counterexample: modulo(0, 0) - */ - virtual bool can_be_executed_on_default_arguments() const { return true; } - private: Status default_implementation_for_nulls(FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, @@ -386,7 +381,6 @@ public: bool use_default_implementation_for_constants() const override { return false; } bool use_default_implementation_for_low_cardinality_columns() const override { return true; } ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; } - bool can_be_executed_on_default_arguments() const override { return true; } bool can_be_executed_on_low_cardinality_dictionary() const override { return is_deterministic_in_scope_of_query(); } @@ -460,9 +454,6 @@ protected: ColumnNumbers get_arguments_that_are_always_constant() const final { return function->get_arguments_that_are_always_constant(); } - bool can_be_executed_on_default_arguments() const override { - return function->can_be_executed_on_default_arguments(); - } private: std::shared_ptr function; diff --git a/be/src/vec/functions/function_binary_arithmetic.h b/be/src/vec/functions/function_binary_arithmetic.h index 744b55a09f..74b7df4260 100644 --- a/be/src/vec/functions/function_binary_arithmetic.h +++ b/be/src/vec/functions/function_binary_arithmetic.h @@ -20,66 +20,27 @@ #pragma once -#include "common/logging.h" +#include "runtime/tuple.h" #include "vec/columns/column_const.h" #include "vec/columns/column_decimal.h" +#include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" -#include "vec/common/assert_cast.h" -#include "vec/common/typeid_cast.h" -#include "vec/data_types/data_type.h" -#include "vec/data_types/data_type_decimal.h" -#include "vec/data_types/data_type_number.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_nullable.h" #include "vec/data_types/number_traits.h" #include "vec/functions/cast_type_to_either.h" #include "vec/functions/function.h" -#include "vec/functions/function_helpers.h" -#include "vec/functions/int_div.h" -#include "vec/utils/util.hpp" namespace doris::vectorized { -/** Arithmetic operations: +, -, *, - * Bitwise operations: |, &, ^, ~. - * Etc. - */ +// Arithmetic operations: +, -, *, |, &, ^, ~ +// need implement apply(a, b) -template -struct BinaryOperationImplBase { - using ResultType = ResultType_; +// Arithmetic operations (to null type): /, %, intDiv (integer division), log +// need implement apply(a, b, is_null), apply(array_a, b, null_map) +// apply(array_a, b, null_map) is only used on vector_constant - static void NO_INLINE vector_vector(const PaddedPODArray& a, const PaddedPODArray& b, - PaddedPODArray& c) { - size_t size = a.size(); - for (size_t i = 0; i < size; ++i) { - c[i] = Op::template apply(a[i], b[i]); - } - } - - static void NO_INLINE vector_vector(const PaddedPODArray& a, const PaddedPODArray& b, - PaddedPODArray& c, NullMap& null_map) { - size_t size = a.size(); - for (size_t i = 0; i < size; ++i) { - c[i] = Op::template apply(a[i], b[i], null_map, i); - } - } - - static void NO_INLINE vector_constant(const PaddedPODArray& a, B b, - PaddedPODArray& c) { - size_t size = a.size(); - for (size_t i = 0; i < size; ++i) c[i] = Op::template apply(a[i], b); - } - - static void NO_INLINE constant_vector(A a, const PaddedPODArray& b, - PaddedPODArray& c) { - size_t size = b.size(); - for (size_t i = 0; i < size; ++i) c[i] = Op::template apply(a, b[i]); - } - - static ResultType constant_constant(A a, B b) { return Op::template apply(a, b); } -}; - -template -struct BinaryOperationImpl : BinaryOperationImplBase {}; +// TODO: vector_constant optimization not work on decimal type now template struct PlusImpl; @@ -92,95 +53,177 @@ struct DivideFloatingImpl; template struct DivideIntegralImpl; template -struct DivideIntegralOrZeroImpl; -template -struct LeastBaseImpl; -template -struct GreatestBaseImpl; -template struct ModuloImpl; +template