From efdd75f2864258a15548c1d1afad84e044995b1b Mon Sep 17 00:00:00 2001 From: Mryange <59914473+Mryange@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:53:17 +0800 Subject: [PATCH] =?UTF-8?q?[fix](function)=20stddev=20with=20DecimalV2=20t?= =?UTF-8?q?ype=20will=20result=20in=20an=20error=20(#=E2=80=A6=20(#39072)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …38731) https://github.com/apache/doris/pull/38731 The stddev function has a separate implementation for the DecimalV2 type, but there are issues with the implementation. Given that there is almost no existing data for DecimalV2, it will be removed here. For be, upgrading to this situation will result in an error directly. ``` SELECT STDDEV(data) FROM DECIMALV2_10_0_DATA; ERROR 1105 (HY000): errCode = 2, detailMessage = (127.0.0.1)[INTERNAL_ERROR]Agg Function stddev(decimal(10,0)) is not implemented ``` After removing DecimalV2, parameters of type DecimalV2 will be converted to double for calculations. ## Proposed changes Issue Number: close #xxx --- .../aggregate_function_stddev.cpp | 9 -- .../aggregate_function_stddev.h | 102 +----------------- .../expressions/functions/agg/Stddev.java | 5 +- .../expressions/functions/agg/StddevSamp.java | 5 +- .../expressions/functions/agg/Variance.java | 5 +- .../functions/agg/VarianceSamp.java | 5 +- 6 files changed, 8 insertions(+), 123 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_stddev.cpp b/be/src/vec/aggregate_functions/aggregate_function_stddev.cpp index 2c4fa002d5..46682fd1a4 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_stddev.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_stddev.cpp @@ -46,15 +46,6 @@ AggregateFunctionPtr create_function_single_value(const String& name, FOR_NUMERIC_TYPES(DISPATCH) #undef DISPATCH -#define DISPATCH(TYPE) \ - if (which.idx == TypeIndex::TYPE) \ - return creator_without_type::create>>, is_nullable>>( \ - custom_nullable ? remove_nullable(argument_types) : argument_types, \ - result_is_nullable); - FOR_DECIMAL_TYPES(DISPATCH) -#undef DISPATCH - LOG(WARNING) << fmt::format("create_function_single_value with unknowed type {}", argument_types[0]->get_name()); return nullptr; diff --git a/be/src/vec/aggregate_functions/aggregate_function_stddev.h b/be/src/vec/aggregate_functions/aggregate_function_stddev.h index f095afb41b..721cd92d0b 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_stddev.h +++ b/be/src/vec/aggregate_functions/aggregate_function_stddev.h @@ -32,7 +32,6 @@ #include "vec/columns/column.h" #include "vec/columns/column_nullable.h" #include "vec/common/assert_cast.h" -#include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_number.h" @@ -125,103 +124,6 @@ struct BaseData { int64_t count; }; -template -struct BaseDatadecimal { - BaseDatadecimal() : mean(0), m2(0), count(0) {} - virtual ~BaseDatadecimal() = default; - - void write(BufferWritable& buf) const { - write_binary(mean, buf); - write_binary(m2, buf); - write_binary(count, buf); - } - - void read(BufferReadable& buf) { - read_binary(mean, buf); - read_binary(m2, buf); - read_binary(count, buf); - } - - void reset() { - mean = DecimalV2Value(); - m2 = DecimalV2Value(); - count = {}; - } - - DecimalV2Value get_result(DecimalV2Value res) const { - if constexpr (is_stddev) { - return DecimalV2Value::sqrt(res); - } else { - return res; - } - } - - DecimalV2Value get_pop_result() const { - DecimalV2Value new_count = DecimalV2Value(); - if (count == 1) { - return new_count; - } - DecimalV2Value res = m2 / new_count.assign_from_double(count); - return get_result(res); - } - - DecimalV2Value get_samp_result() const { - DecimalV2Value new_count = DecimalV2Value(); - DecimalV2Value res = m2 / new_count.assign_from_double(count - 1); - return get_result(res); - } - - void merge(const BaseDatadecimal& rhs) { - if (rhs.count == 0) { - return; - } - DecimalV2Value new_count = DecimalV2Value(); - new_count.assign_from_double(count); - DecimalV2Value rhs_count = DecimalV2Value(); - rhs_count.assign_from_double(rhs.count); - - DecimalV2Value delta = mean - rhs.mean; - DecimalV2Value sum_count = new_count + rhs_count; - mean = rhs.mean + delta * (new_count / sum_count); - m2 = rhs.m2 + m2 + (delta * delta) * (rhs_count * new_count / sum_count); - count += rhs.count; - } - - void add(const IColumn* column, size_t row_num) { - const auto& sources = assert_cast&>(*column); - Field field = sources[row_num]; - auto decimal_field = field.template get>(); - int128_t value; - if (decimal_field.get_scale() > DecimalV2Value::SCALE) { - value = static_cast(decimal_field.get_value()) / - (decimal_field.get_scale_multiplier() / DecimalV2Value::ONE_BILLION); - } else { - value = static_cast(decimal_field.get_value()) * - (DecimalV2Value::ONE_BILLION / decimal_field.get_scale_multiplier()); - } - DecimalV2Value source_data = DecimalV2Value(value); - - DecimalV2Value new_count = DecimalV2Value(); - new_count.assign_from_double(count); - DecimalV2Value increase_count = DecimalV2Value(); - increase_count.assign_from_double(1 + count); - - DecimalV2Value delta = source_data - mean; - DecimalV2Value r = delta / increase_count; - mean += r; - m2 += new_count * delta * r; - count += 1; - } - - static DataTypePtr get_return_type() { - return std::make_shared>(27, 9); - } - - DecimalV2Value mean; - DecimalV2Value m2; - int64_t count; -}; - template struct PopData : Data { using ColVecResult = std::conditional_t, ColumnDecimal, @@ -236,6 +138,10 @@ struct PopData : Data { } }; +// For this series of functions, the Decimal type is not supported +// because the operations involve squaring, +// which can easily exceed the range of the Decimal type. + template struct StddevName : Data { static const char* name() { return "stddev"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Stddev.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Stddev.java index 6cbebbc0ec..1f73242194 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Stddev.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Stddev.java @@ -24,7 +24,6 @@ import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSi import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; -import org.apache.doris.nereids.types.DecimalV2Type; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.FloatType; import org.apache.doris.nereids.types.IntegerType; @@ -49,9 +48,7 @@ public class Stddev extends NullableAggregateFunction FunctionSignature.ret(DoubleType.INSTANCE).args(SmallIntType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).args(IntegerType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).args(BigIntType.INSTANCE), - FunctionSignature.ret(DoubleType.INSTANCE).args(FloatType.INSTANCE), - FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).args(DecimalV2Type.SYSTEM_DEFAULT) - ); + FunctionSignature.ret(DoubleType.INSTANCE).args(FloatType.INSTANCE)); /** * constructor with 1 argument. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/StddevSamp.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/StddevSamp.java index 971af51a57..b64a4e425b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/StddevSamp.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/StddevSamp.java @@ -25,7 +25,6 @@ import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSi import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; -import org.apache.doris.nereids.types.DecimalV2Type; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.FloatType; import org.apache.doris.nereids.types.IntegerType; @@ -50,9 +49,7 @@ public class StddevSamp extends AggregateFunction FunctionSignature.ret(DoubleType.INSTANCE).args(SmallIntType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).args(IntegerType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).args(BigIntType.INSTANCE), - FunctionSignature.ret(DoubleType.INSTANCE).args(FloatType.INSTANCE), - FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).args(DecimalV2Type.SYSTEM_DEFAULT) - ); + FunctionSignature.ret(DoubleType.INSTANCE).args(FloatType.INSTANCE)); /** * constructor with 1 argument. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Variance.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Variance.java index 79fbcfb764..f56bfa6f6b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Variance.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Variance.java @@ -24,7 +24,6 @@ import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSi import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; -import org.apache.doris.nereids.types.DecimalV2Type; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.FloatType; import org.apache.doris.nereids.types.IntegerType; @@ -49,9 +48,7 @@ public class Variance extends NullableAggregateFunction FunctionSignature.ret(DoubleType.INSTANCE).args(SmallIntType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).args(IntegerType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).args(BigIntType.INSTANCE), - FunctionSignature.ret(DoubleType.INSTANCE).args(FloatType.INSTANCE), - FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).args(DecimalV2Type.SYSTEM_DEFAULT) - ); + FunctionSignature.ret(DoubleType.INSTANCE).args(FloatType.INSTANCE)); /** * constructor with 1 argument. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/VarianceSamp.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/VarianceSamp.java index 8196be54ab..637646ed85 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/VarianceSamp.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/VarianceSamp.java @@ -24,7 +24,6 @@ import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSi import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; -import org.apache.doris.nereids.types.DecimalV2Type; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.FloatType; import org.apache.doris.nereids.types.IntegerType; @@ -49,9 +48,7 @@ public class VarianceSamp extends AggregateFunction FunctionSignature.ret(DoubleType.INSTANCE).args(SmallIntType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).args(IntegerType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).args(BigIntType.INSTANCE), - FunctionSignature.ret(DoubleType.INSTANCE).args(FloatType.INSTANCE), - FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).args(DecimalV2Type.SYSTEM_DEFAULT) - ); + FunctionSignature.ret(DoubleType.INSTANCE).args(FloatType.INSTANCE)); /** * constructor with 1 argument.