Files
doris/be/src/vec/functions/math.cpp

443 lines
17 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/data_types/number_traits.h"
#include "vec/functions/function_const.h"
#include "vec/functions/function_binary_arithmetic.h"
#include "vec/functions/function_binary_arithmetic_to_null_type.h"
#include "vec/functions/function_math_unary.h"
#include "vec/functions/function_math_unary_to_null_type.h"
#include "vec/functions/function_string.h"
#include "vec/functions/function_totype.h"
#include "vec/functions/function_unary_arithmetic.h"
#include "vec/functions/simple_function_factory.h"
namespace doris::vectorized {
const double log_10[] = {
1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, 1e010, 1e011, 1e012,
1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019, 1e020, 1e021, 1e022, 1e023, 1e024, 1e025,
1e026, 1e027, 1e028, 1e029, 1e030, 1e031, 1e032, 1e033, 1e034, 1e035, 1e036, 1e037, 1e038,
1e039, 1e040, 1e041, 1e042, 1e043, 1e044, 1e045, 1e046, 1e047, 1e048, 1e049, 1e050, 1e051,
1e052, 1e053, 1e054, 1e055, 1e056, 1e057, 1e058, 1e059, 1e060, 1e061, 1e062, 1e063, 1e064,
1e065, 1e066, 1e067, 1e068, 1e069, 1e070, 1e071, 1e072, 1e073, 1e074, 1e075, 1e076, 1e077,
1e078, 1e079, 1e080, 1e081, 1e082, 1e083, 1e084, 1e085, 1e086, 1e087, 1e088, 1e089, 1e090,
1e091, 1e092, 1e093, 1e094, 1e095, 1e096, 1e097, 1e098, 1e099, 1e100, 1e101, 1e102, 1e103,
1e104, 1e105, 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116,
1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129,
1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, 1e142,
1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, 1e151, 1e152, 1e153, 1e154, 1e155,
1e156, 1e157, 1e158, 1e159, 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168,
1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, 1e180, 1e181,
1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194,
1e195, 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207,
1e208, 1e209, 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220,
1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, 1e232, 1e233,
1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246,
1e247, 1e248, 1e249, 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259,
1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, 1e270, 1e271, 1e272,
1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285,
1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298,
1e299, 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308};
#define ARRAY_ELEMENTS(A) ((uint64_t)(sizeof(A) / sizeof(A[0])))
double my_double_round(double value, int64_t dec, bool dec_unsigned, bool truncate) {
bool dec_negative = (dec < 0) && !dec_unsigned;
uint64_t abs_dec = dec_negative ? -dec : dec;
/*
tmp2 is here to avoid return the value with 80 bit precision
This will fix that the test round(0.1,1) = round(0.1,1) is true
Tagging with volatile is no guarantee, it may still be optimized away...
*/
volatile double tmp2 = 0.0;
double tmp =
(abs_dec < ARRAY_ELEMENTS(log_10) ? log_10[abs_dec] : std::pow(10.0, (double)abs_dec));
// Pre-compute these, to avoid optimizing away e.g. 'floor(v/tmp) * tmp'.
volatile double value_div_tmp = value / tmp;
volatile double value_mul_tmp = value * tmp;
if (dec_negative && std::isinf(tmp)) {
tmp2 = 0.0;
} else if (!dec_negative && std::isinf(value_mul_tmp)) {
tmp2 = value;
} else if (truncate) {
if (value >= 0.0) {
tmp2 = dec < 0 ? std::floor(value_div_tmp) * tmp : std::floor(value_mul_tmp) / tmp;
} else {
tmp2 = dec < 0 ? std::ceil(value_div_tmp) * tmp : std::ceil(value_mul_tmp) / tmp;
}
} else {
tmp2 = dec < 0 ? std::round(value_div_tmp) * tmp : std::round(value_mul_tmp) / tmp;
}
return tmp2;
}
struct AcosName {
static constexpr auto name = "acos";
};
using FunctionAcos = FunctionMathUnary<UnaryFunctionVectorized<AcosName, std::acos>>;
struct AsinName {
static constexpr auto name = "asin";
};
using FunctionAsin = FunctionMathUnary<UnaryFunctionVectorized<AsinName, std::asin>>;
struct AtanName {
static constexpr auto name = "atan";
};
using FunctionAtan = FunctionMathUnary<UnaryFunctionVectorized<AtanName, std::atan>>;
struct CosName {
static constexpr auto name = "cos";
};
using FunctionCos = FunctionMathUnary<UnaryFunctionVectorized<CosName, std::cos>>;
struct EImpl {
static constexpr auto name = "e";
static constexpr double value = 2.7182818284590452353602874713526624977572470;
};
using FunctionE = FunctionMathConstFloat64<EImpl>;
struct PiImpl {
static constexpr auto name = "pi";
static constexpr double value = 3.1415926535897932384626433832795028841971693;
};
using FunctionPi = FunctionMathConstFloat64<PiImpl>;
struct ExpName {
static constexpr auto name = "exp";
};
using FunctionExp = FunctionMathUnary<UnaryFunctionVectorized<ExpName, std::exp>>;
#define LOG_FUNCTION_IMPL(CLASS, NAME, FUNC) \
struct CLASS##Impl { \
using Type = DataTypeFloat64; \
using RetType = Float64; \
static constexpr auto name = #NAME; \
template <typename T, typename U> \
static void execute(const T* src, U* dst, UInt8& null_flag) { \
null_flag = src[0] <= 0; \
dst[0] = static_cast<U>(FUNC((double)src[0])); \
} \
}; \
using Function##CLASS = FunctionMathUnaryToNullType<CLASS##Impl>;
LOG_FUNCTION_IMPL(Log10, log10, std::log10);
LOG_FUNCTION_IMPL(Log2, log2, std::log2);
LOG_FUNCTION_IMPL(Ln, ln, std::log);
struct LogName {
static constexpr auto name = "log";
};
template <typename A, typename B>
struct LogImpl {
using ResultType = Float64;
static const constexpr bool allow_decimal = false;
template <typename Result>
static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
constexpr double EPSILON = 1e-9;
null_map[index] = a <= 0 || b <= 0 || std::fabs(a - 1.0) < EPSILON;
return static_cast<Float64>(std::log(static_cast<Float64>(b)) /
std::log(static_cast<Float64>(a)));
}
};
using FunctionLog = FunctionBinaryArithmeticToNullType<LogImpl, LogName>;
struct CeilName {
static constexpr auto name = "ceil";
};
using FunctionCeil = FunctionMathUnary<UnaryFunctionVectorized<CeilName, std::ceil, DataTypeInt64>>;
template <typename A>
struct SignImpl {
using ResultType = Int8;
static inline ResultType apply(A a) {
if constexpr (IsDecimalNumber<A> || std::is_floating_point_v<A>)
return static_cast<ResultType>(a < A(0) ? -1 : a == A(0) ? 0 : 1);
else if constexpr (std::is_signed_v<A>)
return static_cast<ResultType>(a < 0 ? -1 : a == 0 ? 0 : 1);
else if constexpr (std::is_unsigned_v<A>)
return static_cast<ResultType>(a == 0 ? 0 : 1);
}
};
struct NameSign {
static constexpr auto name = "sign";
};
using FunctionSign = FunctionUnaryArithmetic<SignImpl, NameSign, false>;
template <typename A>
struct AbsImpl {
using ResultType =
std::conditional_t<IsDecimalNumber<A>, A, typename NumberTraits::ResultOfAbs<A>::Type>;
static inline ResultType apply(A a) {
if constexpr (IsDecimalNumber<A>)
return a < 0 ? A(-a) : a;
else if constexpr (std::is_integral_v<A> && std::is_signed_v<A>)
return a < 0 ? static_cast<ResultType>(~a) + 1 : a;
else if constexpr (std::is_integral_v<A> && std::is_unsigned_v<A>)
return static_cast<ResultType>(a);
else if constexpr (std::is_floating_point_v<A>)
return static_cast<ResultType>(std::abs(a));
}
};
struct NameAbs {
static constexpr auto name = "abs";
};
using FunctionAbs = FunctionUnaryArithmetic<AbsImpl, NameAbs, false>;
template <typename A>
struct NegativeImpl {
using ResultType = A;
static inline ResultType apply(A a) {
return -a;
}
};
struct NameNegative {
static constexpr auto name = "negative";
};
using FunctionNegative = FunctionUnaryArithmetic<NegativeImpl, NameNegative, false>;
template <typename A>
struct PositiveImpl {
using ResultType = A;
static inline ResultType apply(A a) { return static_cast<ResultType>(a); }
};
struct NamePositive {
static constexpr auto name = "positive";
};
using FunctionPositive = FunctionUnaryArithmetic<PositiveImpl, NamePositive, false>;
struct SinName {
static constexpr auto name = "sin";
};
using FunctionSin = FunctionMathUnary<UnaryFunctionVectorized<SinName, std::sin>>;
struct SqrtName {
static constexpr auto name = "sqrt";
};
using FunctionSqrt = FunctionMathUnary<UnaryFunctionVectorized<SqrtName, std::sqrt>>;
struct TanName {
static constexpr auto name = "tan";
};
using FunctionTan = FunctionMathUnary<UnaryFunctionVectorized<TanName, std::tan>>;
struct FloorName {
static constexpr auto name = "floor";
};
using FunctionFloor = FunctionMathUnary<UnaryFunctionVectorized<FloorName, std::floor, DataTypeInt64>>;
template <typename A>
struct RadiansImpl {
using ResultType = A;
static inline ResultType apply(A a) {
return static_cast<ResultType>(a * PiImpl::value / 180.0);
}
};
struct NameRadians {
static constexpr auto name = "radians";
};
using FunctionRadians = FunctionUnaryArithmetic<RadiansImpl, NameRadians, false>;
template <typename A>
struct DegreesImpl {
using ResultType = A;
static inline ResultType apply(A a) {
return static_cast<ResultType>(a * 180.0 / PiImpl::value);
}
};
struct NameDegrees {
static constexpr auto name = "degrees";
};
using FunctionDegrees = FunctionUnaryArithmetic<DegreesImpl, NameDegrees, false>;
struct NameBin {
static constexpr auto name = "bin";
};
struct BinImpl {
using ReturnType = DataTypeString;
static constexpr auto TYPE_INDEX = TypeIndex::Int64;
using Type = Int64;
using ReturnColumnType = ColumnString;
static std::string bin_impl(Int64 value) {
uint64_t n = static_cast<uint64_t>(value);
const size_t max_bits = sizeof(uint64_t) * 8;
char result[max_bits];
uint32_t index = max_bits;
do {
result[--index] = '0' + (n & 1);
} while (n >>= 1);
return std::string(result + index, max_bits - index);
}
static Status vector(const ColumnInt64::Container& data, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets) {
res_offsets.resize(data.size());
size_t input_size = res_offsets.size();
for (size_t i = 0; i < input_size; ++i) {
StringOP::push_value_string(bin_impl(data[i]), i, res_data, res_offsets);
}
return Status::OK();
}
};
using FunctionBin = FunctionUnaryToType<BinImpl, NameBin>;
struct RoundName {
static constexpr auto name = "round";
};
/// round(double)-->int64
/// key_str:roundFloat64
template <typename Name>
struct RoundOneImpl {
using Type = DataTypeInt64;
static constexpr auto name = RoundName::name;
static constexpr auto rows_per_iteration = 1;
static constexpr bool always_returns_float64 = false;
static DataTypes get_variadic_argument_types() {
return {std::make_shared<vectorized::DataTypeFloat64>()};
}
template <typename T, typename U>
static void execute(const T* src, U* dst) {
dst[0] = static_cast<Int64>(std::round(static_cast<Float64>(src[0])));
}
};
using FunctionRoundOne = FunctionMathUnary<RoundOneImpl<RoundName>>;
template <typename A, typename B>
struct PowImpl {
using ResultType = double;
static const constexpr bool allow_decimal = false;
template <typename type>
static inline double apply(A a, B b) {
/// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1).
return std::pow((double)a, (double) b);
}
};
struct PowName {
static constexpr auto name = "pow";
};
using FunctionPow = FunctionBinaryArithmetic<PowImpl, PowName>;
template <typename A, typename B>
struct TruncateImpl {
using ResultType = double;
static const constexpr bool allow_decimal = false;
template <typename type>
static inline double apply(A a, B b) {
/// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1).
return static_cast<Float64>(my_double_round(
static_cast<Float64>(a), static_cast<Int32>(b), false, true));
}
};
struct TruncateName {
static constexpr auto name = "truncate";
};
using FunctionTruncate = FunctionBinaryArithmetic<TruncateImpl, TruncateName>;
/// round(double,int32)-->double
/// key_str:roundFloat64Int32
template <typename A, typename B>
struct RoundTwoImpl {
using ResultType = double;
static const constexpr bool allow_decimal = false;
static DataTypes get_variadic_argument_types() {
return {std::make_shared<vectorized::DataTypeFloat64>(),
std::make_shared<vectorized::DataTypeInt32>()};
}
template <typename type>
static inline double apply(A a, B b) {
/// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1).
return static_cast<Float64>(my_double_round(
static_cast<Float64>(a), static_cast<Int32>(b), false, false));
}
};
using FunctionRoundTwo = FunctionBinaryArithmetic<RoundTwoImpl, RoundName>;
// TODO: Now math may cause one thread compile time too long, because the function in math
// so mush. Split it to speed up compile time in the future
void register_function_math(SimpleFunctionFactory& factory) {
factory.register_function<FunctionAcos>();
factory.register_function<FunctionAsin>();
factory.register_function<FunctionAtan>();
factory.register_function<FunctionCos>();
factory.register_function<FunctionCeil>();
factory.register_alias("ceil", "dceil");
factory.register_alias("ceil", "ceiling");
factory.register_function<FunctionE>();
factory.register_function<FunctionLn>();
factory.register_alias("ln", "dlog1");
factory.register_function<FunctionLog>();
factory.register_function<FunctionLog2>();
factory.register_function<FunctionLog10>();
factory.register_alias("log10", "dlog10");
factory.register_function<FunctionPi>();
factory.register_function<FunctionSign>();
factory.register_function<FunctionAbs>();
factory.register_function<FunctionNegative>();
factory.register_function<FunctionPositive>();
factory.register_function<FunctionSin>();
factory.register_function<FunctionSqrt>();
factory.register_alias("sqrt", "dsqrt");
factory.register_function<FunctionTan>();
factory.register_function<FunctionFloor>();
factory.register_alias("floor", "dfloor");
factory.register_function<FunctionRoundOne>();
factory.register_function<FunctionRoundTwo>();
factory.register_function<FunctionPow>();
factory.register_alias("pow", "power");
factory.register_alias("pow", "dpow");
factory.register_alias("pow", "fpow");
factory.register_function<FunctionExp>();
factory.register_alias("exp", "dexp");
factory.register_function<FunctionTruncate>();
factory.register_function<FunctionRadians>();
factory.register_function<FunctionDegrees>();
factory.register_function<FunctionBin>();
}
} // namespace doris::vectorized