556 lines
19 KiB
C++
556 lines
19 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
// This file is copied from
|
|
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionRound.h
|
|
// and modified by Doris
|
|
|
|
#pragma once
|
|
|
|
#ifdef __SSE4_1__
|
|
#include <smmintrin.h>
|
|
#else
|
|
#include <fenv.h>
|
|
#endif
|
|
|
|
#include "vec/columns/column.h"
|
|
#include "vec/columns/column_decimal.h"
|
|
#include "vec/data_types/data_type_decimal.h"
|
|
#include "vec/data_types/data_type_number.h"
|
|
|
|
namespace doris::vectorized {
|
|
|
|
enum class ScaleMode {
|
|
Positive, // round to a number with N decimal places after the decimal point
|
|
Negative, // round to an integer with N zero characters
|
|
Zero, // round to an integer
|
|
};
|
|
|
|
enum class RoundingMode {
|
|
#ifdef __SSE4_1__
|
|
Round = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
|
|
Floor = _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC,
|
|
Ceil = _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC,
|
|
Trunc = _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
|
|
#else
|
|
Round = 8, /// Values are correspond to above just in case.
|
|
Floor = 9,
|
|
Ceil = 10,
|
|
Trunc = 11,
|
|
#endif
|
|
};
|
|
|
|
enum class TieBreakingMode {
|
|
Auto, // use banker's rounding for floating point numbers, round up otherwise
|
|
Bankers, // use banker's rounding
|
|
};
|
|
|
|
template <typename T, RoundingMode rounding_mode, ScaleMode scale_mode,
|
|
TieBreakingMode tie_breaking_mode>
|
|
struct IntegerRoundingComputation {
|
|
static const size_t data_count = 1;
|
|
|
|
static size_t prepare(size_t scale) { return scale; }
|
|
|
|
/// Integer overflow is Ok.
|
|
static ALWAYS_INLINE T compute_impl(T x, T scale) {
|
|
switch (rounding_mode) {
|
|
case RoundingMode::Trunc: {
|
|
return x / scale * scale;
|
|
}
|
|
case RoundingMode::Floor: {
|
|
if (x < 0) {
|
|
x -= scale - 1;
|
|
}
|
|
return x / scale * scale;
|
|
}
|
|
case RoundingMode::Ceil: {
|
|
if (x >= 0) {
|
|
x += scale - 1;
|
|
}
|
|
return x / scale * scale;
|
|
}
|
|
case RoundingMode::Round: {
|
|
if (x < 0) {
|
|
x -= scale;
|
|
}
|
|
switch (tie_breaking_mode) {
|
|
case TieBreakingMode::Auto: {
|
|
x = (x + scale / 2) / scale * scale;
|
|
break;
|
|
}
|
|
case TieBreakingMode::Bankers: {
|
|
T quotient = (x + scale / 2) / scale;
|
|
if (quotient * scale == x + scale / 2) {
|
|
// round half to even
|
|
x = ((quotient + (x < 0)) & ~1) * scale;
|
|
} else {
|
|
// round the others as usual
|
|
x = quotient * scale;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return x;
|
|
}
|
|
}
|
|
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
static ALWAYS_INLINE T compute(T x, T scale) {
|
|
switch (scale_mode) {
|
|
case ScaleMode::Zero:
|
|
case ScaleMode::Positive:
|
|
return x;
|
|
case ScaleMode::Negative:
|
|
return compute_impl(x, scale);
|
|
}
|
|
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
static ALWAYS_INLINE void compute(const T* __restrict in, size_t scale, T* __restrict out) {
|
|
if constexpr (sizeof(T) <= sizeof(scale) && scale_mode == ScaleMode::Negative) {
|
|
if (scale > size_t(std::numeric_limits<T>::max())) {
|
|
*out = 0;
|
|
return;
|
|
}
|
|
}
|
|
*out = compute(*in, scale);
|
|
}
|
|
};
|
|
|
|
template <typename T, RoundingMode rounding_mode, TieBreakingMode tie_breaking_mode>
|
|
class DecimalRoundingImpl {
|
|
private:
|
|
using NativeType = typename T::NativeType;
|
|
using Op = IntegerRoundingComputation<NativeType, rounding_mode, ScaleMode::Negative,
|
|
tie_breaking_mode>;
|
|
using Container = typename ColumnDecimal<T>::Container;
|
|
|
|
public:
|
|
static NO_INLINE void apply(const Container& in, UInt32 in_scale, Container& out,
|
|
Int16 scale_arg) {
|
|
scale_arg = in_scale - scale_arg;
|
|
if (scale_arg > 0) {
|
|
size_t scale = int_exp10(scale_arg);
|
|
|
|
const NativeType* __restrict p_in = reinterpret_cast<const NativeType*>(in.data());
|
|
const NativeType* end_in = reinterpret_cast<const NativeType*>(in.data()) + in.size();
|
|
NativeType* __restrict p_out = reinterpret_cast<NativeType*>(out.data());
|
|
|
|
while (p_in < end_in) {
|
|
Op::compute(p_in, scale, p_out);
|
|
++p_in;
|
|
++p_out;
|
|
}
|
|
} else {
|
|
memcpy(out.data(), in.data(), in.size() * sizeof(T));
|
|
}
|
|
}
|
|
};
|
|
|
|
#ifdef __SSE4_1__
|
|
|
|
template <typename T>
|
|
class BaseFloatRoundingComputation;
|
|
|
|
template <>
|
|
class BaseFloatRoundingComputation<Float32> {
|
|
public:
|
|
using ScalarType = Float32;
|
|
using VectorType = __m128;
|
|
static const size_t data_count = 4;
|
|
|
|
static VectorType load(const ScalarType* in) { return _mm_loadu_ps(in); }
|
|
static VectorType load1(const ScalarType in) { return _mm_load1_ps(&in); }
|
|
static void store(ScalarType* out, VectorType val) { _mm_storeu_ps(out, val); }
|
|
static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_ps(val, scale); }
|
|
static VectorType divide(VectorType val, VectorType scale) { return _mm_div_ps(val, scale); }
|
|
template <RoundingMode mode>
|
|
static VectorType apply(VectorType val) {
|
|
return _mm_round_ps(val, int(mode));
|
|
}
|
|
|
|
static VectorType prepare(size_t scale) { return load1(scale); }
|
|
};
|
|
|
|
template <>
|
|
class BaseFloatRoundingComputation<Float64> {
|
|
public:
|
|
using ScalarType = Float64;
|
|
using VectorType = __m128d;
|
|
static const size_t data_count = 2;
|
|
|
|
static VectorType load(const ScalarType* in) { return _mm_loadu_pd(in); }
|
|
static VectorType load1(const ScalarType in) { return _mm_load1_pd(&in); }
|
|
static void store(ScalarType* out, VectorType val) { _mm_storeu_pd(out, val); }
|
|
static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_pd(val, scale); }
|
|
static VectorType divide(VectorType val, VectorType scale) { return _mm_div_pd(val, scale); }
|
|
template <RoundingMode mode>
|
|
static VectorType apply(VectorType val) {
|
|
return _mm_round_pd(val, int(mode));
|
|
}
|
|
|
|
static VectorType prepare(size_t scale) { return load1(scale); }
|
|
};
|
|
|
|
#else
|
|
|
|
/// Implementation for ARM. Not vectorized.
|
|
|
|
inline float roundWithMode(float x, RoundingMode mode) {
|
|
switch (mode) {
|
|
case RoundingMode::Round:
|
|
return nearbyintf(x);
|
|
case RoundingMode::Floor:
|
|
return floorf(x);
|
|
case RoundingMode::Ceil:
|
|
return ceilf(x);
|
|
case RoundingMode::Trunc:
|
|
return truncf(x);
|
|
}
|
|
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
inline double roundWithMode(double x, RoundingMode mode) {
|
|
switch (mode) {
|
|
case RoundingMode::Round:
|
|
return nearbyint(x);
|
|
case RoundingMode::Floor:
|
|
return floor(x);
|
|
case RoundingMode::Ceil:
|
|
return ceil(x);
|
|
case RoundingMode::Trunc:
|
|
return trunc(x);
|
|
}
|
|
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
template <typename T>
|
|
class BaseFloatRoundingComputation {
|
|
public:
|
|
using ScalarType = T;
|
|
using VectorType = T;
|
|
static const size_t data_count = 1;
|
|
|
|
static VectorType load(const ScalarType* in) { return *in; }
|
|
static VectorType load1(const ScalarType in) { return in; }
|
|
static VectorType store(ScalarType* out, ScalarType val) { return *out = val; }
|
|
static VectorType multiply(VectorType val, VectorType scale) { return val * scale; }
|
|
static VectorType divide(VectorType val, VectorType scale) { return val / scale; }
|
|
template <RoundingMode mode>
|
|
static VectorType apply(VectorType val) {
|
|
return roundWithMode(val, mode);
|
|
}
|
|
|
|
static VectorType prepare(size_t scale) { return load1(scale); }
|
|
};
|
|
|
|
#endif
|
|
|
|
/** Implementation of low-level round-off functions for floating-point values.
|
|
*/
|
|
template <typename T, RoundingMode rounding_mode, ScaleMode scale_mode>
|
|
class FloatRoundingComputation : public BaseFloatRoundingComputation<T> {
|
|
using Base = BaseFloatRoundingComputation<T>;
|
|
|
|
public:
|
|
static inline void compute(const T* __restrict in, const typename Base::VectorType& scale,
|
|
T* __restrict out) {
|
|
auto val = Base::load(in);
|
|
|
|
if (scale_mode == ScaleMode::Positive) {
|
|
val = Base::multiply(val, scale);
|
|
} else if (scale_mode == ScaleMode::Negative) {
|
|
val = Base::divide(val, scale);
|
|
}
|
|
|
|
val = Base::template apply<rounding_mode>(val);
|
|
|
|
if (scale_mode == ScaleMode::Positive) {
|
|
val = Base::divide(val, scale);
|
|
} else if (scale_mode == ScaleMode::Negative) {
|
|
val = Base::multiply(val, scale);
|
|
}
|
|
|
|
Base::store(out, val);
|
|
}
|
|
};
|
|
|
|
/** Implementing high-level rounding functions.
|
|
*/
|
|
template <typename T, RoundingMode rounding_mode, ScaleMode scale_mode>
|
|
struct FloatRoundingImpl {
|
|
private:
|
|
static_assert(!IsDecimalNumber<T>);
|
|
|
|
using Op = FloatRoundingComputation<T, rounding_mode, scale_mode>;
|
|
using Data = std::array<T, Op::data_count>;
|
|
using ColumnType = ColumnVector<T>;
|
|
using Container = typename ColumnType::Container;
|
|
|
|
public:
|
|
static NO_INLINE void apply(const Container& in, size_t scale, Container& out) {
|
|
auto mm_scale = Op::prepare(scale);
|
|
|
|
const size_t data_count = std::tuple_size<Data>();
|
|
|
|
const T* end_in = in.data() + in.size();
|
|
const T* limit = in.data() + in.size() / data_count * data_count;
|
|
|
|
const T* __restrict p_in = in.data();
|
|
T* __restrict p_out = out.data();
|
|
|
|
while (p_in < limit) {
|
|
Op::compute(p_in, mm_scale, p_out);
|
|
p_in += data_count;
|
|
p_out += data_count;
|
|
}
|
|
|
|
if (p_in < end_in) {
|
|
Data tmp_src {{}};
|
|
Data tmp_dst;
|
|
|
|
size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in);
|
|
|
|
memcpy(&tmp_src, p_in, tail_size_bytes);
|
|
Op::compute(reinterpret_cast<T*>(&tmp_src), mm_scale, reinterpret_cast<T*>(&tmp_dst));
|
|
memcpy(p_out, &tmp_dst, tail_size_bytes);
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename T, RoundingMode rounding_mode, ScaleMode scale_mode,
|
|
TieBreakingMode tie_breaking_mode>
|
|
struct IntegerRoundingImpl {
|
|
private:
|
|
using Op = IntegerRoundingComputation<T, rounding_mode, scale_mode, tie_breaking_mode>;
|
|
using Container = typename ColumnVector<T>::Container;
|
|
|
|
public:
|
|
template <size_t scale>
|
|
static NO_INLINE void applyImpl(const Container& in, Container& out) {
|
|
const T* end_in = in.data() + in.size();
|
|
|
|
const T* __restrict p_in = in.data();
|
|
T* __restrict p_out = out.data();
|
|
|
|
while (p_in < end_in) {
|
|
Op::compute(p_in, scale, p_out);
|
|
++p_in;
|
|
++p_out;
|
|
}
|
|
}
|
|
|
|
static NO_INLINE void apply(const Container& in, size_t scale, Container& out) {
|
|
/// Manual function cloning for compiler to generate integer division by constant.
|
|
switch (scale) {
|
|
case 1ULL:
|
|
return applyImpl<1ULL>(in, out);
|
|
case 10ULL:
|
|
return applyImpl<10ULL>(in, out);
|
|
case 100ULL:
|
|
return applyImpl<100ULL>(in, out);
|
|
case 1000ULL:
|
|
return applyImpl<1000ULL>(in, out);
|
|
case 10000ULL:
|
|
return applyImpl<10000ULL>(in, out);
|
|
case 100000ULL:
|
|
return applyImpl<100000ULL>(in, out);
|
|
case 1000000ULL:
|
|
return applyImpl<1000000ULL>(in, out);
|
|
case 10000000ULL:
|
|
return applyImpl<10000000ULL>(in, out);
|
|
case 100000000ULL:
|
|
return applyImpl<100000000ULL>(in, out);
|
|
case 1000000000ULL:
|
|
return applyImpl<1000000000ULL>(in, out);
|
|
case 10000000000ULL:
|
|
return applyImpl<10000000000ULL>(in, out);
|
|
case 100000000000ULL:
|
|
return applyImpl<100000000000ULL>(in, out);
|
|
case 1000000000000ULL:
|
|
return applyImpl<1000000000000ULL>(in, out);
|
|
case 10000000000000ULL:
|
|
return applyImpl<10000000000000ULL>(in, out);
|
|
case 100000000000000ULL:
|
|
return applyImpl<100000000000000ULL>(in, out);
|
|
case 1000000000000000ULL:
|
|
return applyImpl<1000000000000000ULL>(in, out);
|
|
case 10000000000000000ULL:
|
|
return applyImpl<10000000000000000ULL>(in, out);
|
|
case 100000000000000000ULL:
|
|
return applyImpl<100000000000000000ULL>(in, out);
|
|
case 1000000000000000000ULL:
|
|
return applyImpl<1000000000000000000ULL>(in, out);
|
|
case 10000000000000000000ULL:
|
|
return applyImpl<10000000000000000000ULL>(in, out);
|
|
default:
|
|
__builtin_unreachable();
|
|
}
|
|
}
|
|
};
|
|
|
|
/** Select the appropriate processing algorithm depending on the scale.
|
|
*/
|
|
template <typename T, RoundingMode rounding_mode, TieBreakingMode tie_breaking_mode>
|
|
struct Dispatcher {
|
|
template <ScaleMode scale_mode>
|
|
using FunctionRoundingImpl = std::conditional_t<
|
|
IsDecimalNumber<T>, DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>,
|
|
std::conditional_t<
|
|
std::is_floating_point_v<T>, FloatRoundingImpl<T, rounding_mode, scale_mode>,
|
|
IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>>;
|
|
|
|
static ColumnPtr apply(const IColumn* col_general, Int16 scale_arg) {
|
|
if constexpr (IsNumber<T>) {
|
|
const auto* const col = check_and_get_column<ColumnVector<T>>(col_general);
|
|
auto col_res = ColumnVector<T>::create();
|
|
|
|
typename ColumnVector<T>::Container& vec_res = col_res->get_data();
|
|
vec_res.resize(col->get_data().size());
|
|
|
|
if (!vec_res.empty()) {
|
|
if (scale_arg == 0) {
|
|
size_t scale = 1;
|
|
FunctionRoundingImpl<ScaleMode::Zero>::apply(col->get_data(), scale, vec_res);
|
|
} else if (scale_arg > 0) {
|
|
size_t scale = int_exp10(scale_arg);
|
|
FunctionRoundingImpl<ScaleMode::Positive>::apply(col->get_data(), scale,
|
|
vec_res);
|
|
} else {
|
|
size_t scale = int_exp10(-scale_arg);
|
|
FunctionRoundingImpl<ScaleMode::Negative>::apply(col->get_data(), scale,
|
|
vec_res);
|
|
}
|
|
}
|
|
|
|
return col_res;
|
|
} else if constexpr (IsDecimalNumber<T>) {
|
|
const auto* const decimal_col = check_and_get_column<ColumnDecimal<T>>(col_general);
|
|
const auto& vec_src = decimal_col->get_data();
|
|
|
|
auto col_res = ColumnDecimal<T>::create(vec_src.size(), decimal_col->get_scale());
|
|
auto& vec_res = col_res->get_data();
|
|
|
|
if (!vec_res.empty()) {
|
|
FunctionRoundingImpl<ScaleMode::Negative>::apply(
|
|
decimal_col->get_data(), decimal_col->get_scale(), vec_res, scale_arg);
|
|
}
|
|
|
|
return col_res;
|
|
} else {
|
|
__builtin_unreachable();
|
|
return nullptr;
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename Impl, RoundingMode rounding_mode, TieBreakingMode tie_breaking_mode>
|
|
class FunctionRounding : public IFunction {
|
|
public:
|
|
static constexpr auto name = Impl::name;
|
|
static FunctionPtr create() { return std::make_shared<FunctionRounding>(); }
|
|
|
|
String get_name() const override { return name; }
|
|
|
|
bool is_variadic() const override { return true; }
|
|
size_t get_number_of_arguments() const override { return 0; }
|
|
|
|
DataTypes get_variadic_argument_types_impl() const override {
|
|
return Impl::get_variadic_argument_types();
|
|
}
|
|
|
|
/// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
|
|
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
|
|
if ((arguments.empty()) || (arguments.size() > 2)) {
|
|
LOG(FATAL) << "Number of arguments for function " + get_name() +
|
|
" doesn't match: should be 1 or 2. ";
|
|
}
|
|
|
|
return arguments[0];
|
|
}
|
|
|
|
static Status get_scale_arg(const ColumnWithTypeAndName& arguments, Int16* scale) {
|
|
const IColumn& scale_column = *arguments.column;
|
|
if (!is_column_const(scale_column)) {
|
|
return Status::InvalidArgument("2nd argument for function {} should be constant", name);
|
|
}
|
|
|
|
Field scale_field = assert_cast<const ColumnConst&>(scale_column).get_field();
|
|
|
|
Int64 scale64 = scale_field.get<Int64>();
|
|
if (scale64 > std::numeric_limits<Int16>::max() ||
|
|
scale64 < std::numeric_limits<Int16>::min()) {
|
|
return Status::InvalidArgument("Scale argument for function {} is too large: {}", name,
|
|
scale64);
|
|
}
|
|
|
|
*scale = scale64;
|
|
return Status::OK();
|
|
}
|
|
|
|
bool use_default_implementation_for_constants() const override { return true; }
|
|
ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
|
|
|
|
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
|
size_t result, size_t /*input_rows_count*/) override {
|
|
const ColumnWithTypeAndName& column = block.get_by_position(arguments[0]);
|
|
Int16 scale_arg = 0;
|
|
if (arguments.size() == 2) {
|
|
RETURN_IF_ERROR(get_scale_arg(block.get_by_position(arguments[1]), &scale_arg));
|
|
}
|
|
|
|
ColumnPtr res;
|
|
auto call = [&](const auto& types) -> bool {
|
|
using Types = std::decay_t<decltype(types)>;
|
|
using DataType = typename Types::LeftType;
|
|
|
|
if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>) {
|
|
using FieldType = typename DataType::FieldType;
|
|
res = Dispatcher<FieldType, rounding_mode, tie_breaking_mode>::apply(
|
|
column.column.get(), scale_arg);
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
#if !defined(__SSE4_1__)
|
|
/// In case of "nearbyint" function is used, we should ensure the expected rounding mode for the Banker's rounding.
|
|
/// Actually it is by default. But we will set it just in case.
|
|
|
|
if constexpr (rounding_mode == RoundingMode::Round) {
|
|
if (0 != fesetround(FE_TONEAREST)) {
|
|
return Status::InvalidArgument("Cannot set floating point rounding mode");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (!call_on_index_and_data_type<void>(column.type->get_type_id(), call)) {
|
|
return Status::InvalidArgument("Invalid argument type {} for function {}",
|
|
column.type->get_name(), name);
|
|
}
|
|
|
|
block.replace_by_position(result, std::move(res));
|
|
return Status::OK();
|
|
}
|
|
};
|
|
|
|
} // namespace doris::vectorized
|