From baadc14e60053be07e1c211068d181956ede5243 Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Mon, 27 Nov 2023 09:58:53 +0800 Subject: [PATCH] [Enhancement](function) support unix_timestamp with float (#26827) --------- Co-authored-by: YangWithU --- be/src/agent/be_exec_version_manager.h | 1 + be/src/vec/core/types.h | 4 + be/src/vec/functions/function_timestamp.cpp | 200 ++++++++++++++++-- be/src/vec/runtime/vdatetime_value.h | 2 + .../doris/analysis/FunctionCallExpr.java | 22 ++ .../DateTimeExtractAndTransform.java | 17 +- .../functions/scalar/UnixTimestamp.java | 45 +++- .../literal/DateTimeV2Literal.java | 8 + .../doris/nereids/util/TypeCoercionUtils.java | 5 + gensrc/script/doris_builtins_functions.py | 13 +- .../nereids_function_p0/scalar_function/U.out | 100 ++++----- .../nereids_p0/datatype/test_date_acquire.out | 10 + .../datetime_functions/test_date_function.out | 24 ++- .../datetime_functions/test_date_function.out | 19 +- .../jdbc/test_clickhouse_jdbc_catalog.groovy | 2 +- .../datatype/test_date_acquire.groovy | 18 +- .../datatype/test_date_implicit_cast.groovy | 86 ++++++++ .../test_date_function.groovy | 29 ++- .../test_date_function.groovy | 11 +- 19 files changed, 505 insertions(+), 111 deletions(-) create mode 100644 regression-test/data/nereids_p0/datatype/test_date_acquire.out create mode 100644 regression-test/suites/nereids_p0/datatype/test_date_implicit_cast.groovy diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h index 03d6b2ec6a..91f3e2b6d2 100644 --- a/be/src/agent/be_exec_version_manager.h +++ b/be/src/agent/be_exec_version_manager.h @@ -62,6 +62,7 @@ private: * a. aggregation function do not serialize bitmap to string. * b. array contains/position/countequal function return nullable in less situations. * c. cleared old version of Version 2. + * d. unix_timestamp function support timestamp with float for datetimev2, and change nullable mode. */ inline const int BeExecVersionManager::max_be_exec_version = 3; inline const int BeExecVersionManager::min_be_exec_version = 0; diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 9f3b64a9d1..768e656904 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -388,6 +388,10 @@ struct Decimal { return Decimal(binary_cast(decimal_value)); } + static Decimal from_int_frac(T integer, T fraction, int scale) { + return Decimal(integer * int_exp10(scale) + fraction); + } + template Decimal(const Decimal& x) { value = x; diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index 735b9067a7..801fa37ca0 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -17,24 +17,29 @@ #include #include +#include #include #include #include +#include #include #include +#include #include #include #include #include #include "common/status.h" +#include "runtime/decimalv2_value.h" #include "runtime/define_primitive_type.h" #include "runtime/runtime_state.h" #include "runtime/types.h" #include "udf/udf.h" #include "util/binary_cast.hpp" #include "util/datetype_cast.hpp" +#include "util/time.h" #include "util/time_lut.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" @@ -54,6 +59,7 @@ #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" +#include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" @@ -574,6 +580,103 @@ template struct UnixTimeStampDateImpl { static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } + static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { + if constexpr (std::is_same_v) { + if (arguments[0].type->is_nullable()) { + UInt32 scale = static_cast(arguments[0].type.get()) + ->get_nested_type() + ->get_scale(); + return make_nullable( + std::make_shared>(10 + scale, scale)); + } + UInt32 scale = arguments[0].type->get_scale(); + return std::make_shared>(10 + scale, scale); + } else { + if (arguments[0].type->is_nullable()) { + return make_nullable(std::make_shared()); + } + return std::make_shared(); + } + } + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + const ColumnPtr& col = block.get_by_position(arguments[0]).column; + DCHECK(!col->is_nullable()); + + if constexpr (std::is_same_v || + std::is_same_v) { + const auto* col_source = assert_cast(col.get()); + auto col_result = ColumnVector::create(); + auto& col_result_data = col_result->get_data(); + col_result->resize(input_rows_count); + + for (int i = 0; i < input_rows_count; i++) { + StringRef source = col_source->get_data_at(i); + const auto& ts_value = reinterpret_cast(*source.data); + int64_t timestamp {}; + ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); + col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp); + } + block.replace_by_position(result, std::move(col_result)); + } else if constexpr (std::is_same_v) { + const auto* col_source = assert_cast(col.get()); + auto col_result = ColumnVector::create(); + auto& col_result_data = col_result->get_data(); + col_result->resize(input_rows_count); + + for (int i = 0; i < input_rows_count; i++) { + StringRef source = col_source->get_data_at(i); + const auto& ts_value = + reinterpret_cast&>(*source.data); + int64_t timestamp {}; + const auto valid = + ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); + DCHECK(valid); + col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp); + } + block.replace_by_position(result, std::move(col_result)); + } else { // DatetimeV2 + const auto* col_source = assert_cast(col.get()); + UInt32 scale = block.get_by_position(arguments[0]).type->get_scale(); + auto col_result = ColumnDecimal::create(input_rows_count, scale); + auto& col_result_data = col_result->get_data(); + col_result->resize(input_rows_count); + + for (int i = 0; i < input_rows_count; i++) { + StringRef source = col_source->get_data_at(i); + const auto& ts_value = + reinterpret_cast&>(*source.data); + std::pair timestamp {}; + const auto valid = + ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); + DCHECK(valid); + + auto& [sec, ms] = timestamp; + sec = UnixTimeStampImpl::trim_timestamp(sec); + auto ms_str = std::to_string(ms).substr(0, scale); + if (ms_str.empty()) { + ms_str = "0"; + } + col_result_data[i] = Decimal64::from_int_frac(sec, std::stoll(ms_str), scale).value; + } + block.replace_by_position(result, std::move(col_result)); + } + + return Status::OK(); + } +}; + +template +struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl { + static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } +}; + +template +struct UnixTimeStampDateImplOld { + static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } + static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { RETURN_REAL_TYPE_FOR_DATEV2_FUNCTION(DataTypeInt32); } @@ -601,7 +704,7 @@ struct UnixTimeStampDateImpl { StringRef source = col_source->get_data_at(i); const VecDateTimeValue& ts_value = reinterpret_cast(*source.data); - int64_t timestamp; + int64_t timestamp {}; if (!ts_value.unix_timestamp(×tamp, context->state()->timezone_obj())) { null_map_data[i] = true; } else { @@ -626,7 +729,7 @@ struct UnixTimeStampDateImpl { StringRef source = col_source->get_data_at(i); const DateV2Value& ts_value = reinterpret_cast&>(*source.data); - int64_t timestamp; + int64_t timestamp {}; if (!ts_value.unix_timestamp(×tamp, context->state()->timezone_obj())) { null_map_data[i] = true; } else { @@ -642,7 +745,7 @@ struct UnixTimeStampDateImpl { StringRef source = col_source->get_data_at(i); const DateV2Value& ts_value = reinterpret_cast&>(*source.data); - int64_t timestamp; + int64_t timestamp {}; const auto valid = ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); DCHECK(valid); @@ -665,7 +768,7 @@ struct UnixTimeStampDateImpl { StringRef source = col_source->get_data_at(i); const DateV2Value& ts_value = reinterpret_cast&>(*source.data); - int64_t timestamp; + int64_t timestamp {}; if (!ts_value.unix_timestamp(×tamp, context->state()->timezone_obj())) { null_map_data[i] = true; } else { @@ -681,7 +784,7 @@ struct UnixTimeStampDateImpl { StringRef source = col_source->get_data_at(i); const DateV2Value& ts_value = reinterpret_cast&>(*source.data); - int64_t timestamp; + int64_t timestamp {}; const auto valid = ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); DCHECK(valid); @@ -696,15 +799,78 @@ struct UnixTimeStampDateImpl { }; template -struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl { +struct UnixTimeStampDatetimeImplOld : public UnixTimeStampDateImplOld { static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } }; +// This impl doesn't use default impl to deal null value. struct UnixTimeStampStrImpl { static DataTypes get_variadic_argument_types() { return {std::make_shared(), std::make_shared()}; } + static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { + return make_nullable(std::make_shared>(16, 6)); + } + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + ColumnPtr col_left = nullptr, col_right = nullptr; + bool source_const = false, format_const = false; + std::tie(col_left, source_const) = + unpack_if_const(block.get_by_position(arguments[0]).column); + std::tie(col_right, format_const) = + unpack_if_const(block.get_by_position(arguments[1]).column); + + auto col_result = ColumnDecimal::create(input_rows_count, 0); + auto null_map = ColumnVector::create(input_rows_count); + auto& col_result_data = col_result->get_data(); + auto& null_map_data = null_map->get_data(); + + check_set_nullable(col_left, null_map, source_const); + check_set_nullable(col_right, null_map, format_const); + + const auto* col_source = assert_cast(col_left.get()); + const auto* col_format = assert_cast(col_right.get()); + for (int i = 0; i < input_rows_count; i++) { + StringRef source = col_source->get_data_at(i); + StringRef fmt = col_format->get_data_at(i); + + DateV2Value ts_value; + if (!ts_value.from_date_format_str(fmt.data, fmt.size, source.data, source.size)) { + null_map_data[i] = true; + continue; + } + + std::pair timestamp {}; + if (!ts_value.unix_timestamp(×tamp, context->state()->timezone_obj())) { + null_map_data[i] = true; + } else { + null_map_data[i] = false; + + auto& [sec, ms] = timestamp; + sec = UnixTimeStampImpl::trim_timestamp(sec); + auto ms_str = std::to_string(ms).substr(0, 6); + if (ms_str.empty()) { + ms_str = "0"; + } + col_result_data[i] = Decimal64::from_int_frac(sec, std::stoll(ms_str), 6).value; + } + } + + block.replace_by_position( + result, ColumnNullable::create(std::move(col_result), std::move(null_map))); + + return Status::OK(); + } +}; + +struct UnixTimeStampStrImplOld { + static DataTypes get_variadic_argument_types() { + return {std::make_shared(), std::make_shared()}; + } + static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { return make_nullable(std::make_shared()); } @@ -739,7 +905,7 @@ struct UnixTimeStampStrImpl { continue; } - int64_t timestamp; + int64_t timestamp {}; if (!ts_value.unix_timestamp(×tamp, context->state()->timezone_obj())) { null_map_data[i] = true; } else { @@ -763,8 +929,6 @@ public: String get_name() const override { return name; } - bool use_default_implementation_for_nulls() const override { return false; } - size_t get_number_of_arguments() const override { return get_variadic_argument_types_impl().size(); } @@ -777,6 +941,10 @@ public: return Impl::get_variadic_argument_types(); } + bool use_default_implementation_for_nulls() const override { + return !static_cast(std::is_same_v); + } + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); @@ -1212,11 +1380,8 @@ void register_function_timestamp(SimpleFunctionFactory& factory) { factory.register_function>(); factory.register_function>>(); factory.register_function>>(); + factory.register_function>>(); factory.register_function>>(); - factory.register_function>>(); - factory.register_function>>(); - factory.register_function< - FunctionUnixTimestamp>>(); factory.register_function>(); factory.register_function>(); factory.register_function>(); @@ -1230,6 +1395,15 @@ void register_function_timestamp(SimpleFunctionFactory& factory) { factory.register_function>(); factory.register_function>(); factory.register_function>(); + + /// @TEMPORARY: for be_exec_version=3 + factory.register_alternative_function< + FunctionUnixTimestamp>>(); + factory.register_alternative_function< + FunctionUnixTimestamp>>(); + factory.register_alternative_function< + FunctionUnixTimestamp>>(); + factory.register_alternative_function>(); } } // namespace doris::vectorized diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 6c2b04bb9e..6fe7104ecd 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -31,6 +31,7 @@ #include #include #include +#include #include "util/hash_util.hpp" #include "util/time_lut.h" @@ -964,6 +965,7 @@ public: //it returns seconds of the value of date literal since '1970-01-01 00:00:00' UTC bool unix_timestamp(int64_t* timestamp, const std::string& timezone) const; bool unix_timestamp(int64_t* timestamp, const cctz::time_zone& ctz) const; + //the first arg is result of fixed point bool unix_timestamp(std::pair* timestamp, const std::string& timezone) const; bool unix_timestamp(std::pair* timestamp, const cctz::time_zone& ctz) const; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 3a6751ad77..d05de49ed9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -30,6 +30,7 @@ import org.apache.doris.catalog.Function; import org.apache.doris.catalog.FunctionSet; import org.apache.doris.catalog.FunctionUtil; import org.apache.doris.catalog.MapType; +import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarFunction; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructField; @@ -1681,6 +1682,27 @@ public class FunctionCallExpr extends Expr { children.set(1, new StringLiteral("%Y-%m-%d %H:%i:%s")); } } + + if (fnName.getFunction().equalsIgnoreCase("unix_timestamp") && children.size() == 1) { + if (getChild(0).type.isDatetimeV2()) { + ScalarType type = (ScalarType) getChild(0).type; + Preconditions.checkArgument(type.getScalarScale() <= 6, + "DatetimeV2's scale shouldn't exceed 6 but meet " + type.getScalarScale()); + fn.setReturnType( + ScalarType.createDecimalType(PrimitiveType.DECIMAL64, 10 + type.getScalarScale(), + type.getScalarScale())); + } else if (getChild(0).type.isStringType()) { + // use DATETIME to make scale adaptive + ScalarType type = ((ScalarType) (((StringLiteral) getChild(0)) + .uncheckedCastTo(ScalarType.DATETIME).type)); + if (type.isDatetimeV2()) { + int scale = ((ScalarType) (((StringLiteral) getChild(0)) + .uncheckedCastTo(ScalarType.DATETIME).type)).getScalarScale(); + fn.setReturnType( + ScalarType.createDecimalType(PrimitiveType.DECIMAL64, 10 + scale, scale)); + } + } + } } if (fnName.getFunction().equalsIgnoreCase("convert_to")) { if (children.size() < 2 || !getChild(1).isConstant()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java index e5195a102a..fa1e1ee2a7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java @@ -25,14 +25,17 @@ import org.apache.doris.nereids.trees.expressions.literal.DateLiteral; import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral; import org.apache.doris.nereids.trees.expressions.literal.DateTimeV2Literal; import org.apache.doris.nereids.trees.expressions.literal.DateV2Literal; +import org.apache.doris.nereids.trees.expressions.literal.DecimalV3Literal; import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.expressions.literal.SmallIntLiteral; import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral; import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral; +import org.apache.doris.nereids.types.DecimalV3Type; import org.apache.doris.nereids.types.VarcharType; import org.apache.doris.nereids.util.DateUtils; +import java.math.BigDecimal; import java.time.Duration; import java.time.LocalDate; import java.time.LocalDateTime; @@ -494,9 +497,19 @@ public class DateTimeExtractAndTransform { return new IntegerLiteral(getTimestamp(date.toJavaDateType())); } - @ExecFunction(name = "unix_timestamp", argTypes = {"DATETIMEV2"}, returnType = "INT") + /** + * date transformation function: unix_timestamp + */ + @ExecFunction(name = "unix_timestamp", argTypes = { "DATETIMEV2" }, returnType = "DECIMALV3") public static Expression unixTimestamp(DateTimeV2Literal date) { - return new IntegerLiteral(getTimestamp(date.toJavaDateType())); + if (date.getMicroSecond() == 0) { + return new DecimalV3Literal(DecimalV3Type.createDecimalV3TypeLooseCheck(10, 0), + new BigDecimal(getTimestamp(date.toJavaDateType()).toString())); + } + int scale = date.getDataType().getScale(); + String val = getTimestamp(date.toJavaDateType()).toString() + "." + date.getMicrosecondString(); + return new DecimalV3Literal(DecimalV3Type.createDecimalV3TypeLooseCheck(10 + scale, scale), + new BigDecimal(val)); } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnixTimestamp.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnixTimestamp.java index bdeabb74c8..ab65e27fb1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnixTimestamp.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnixTimestamp.java @@ -21,12 +21,13 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.Nondeterministic; -import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.types.DateTimeType; import org.apache.doris.nereids.types.DateTimeV2Type; import org.apache.doris.nereids.types.DateType; import org.apache.doris.nereids.types.DateV2Type; +import org.apache.doris.nereids.types.DecimalV3Type; import org.apache.doris.nereids.types.IntegerType; import org.apache.doris.nereids.types.StringType; import org.apache.doris.nereids.types.VarcharType; @@ -40,16 +41,19 @@ import java.util.List; * ScalarFunction 'unix_timestamp'. This class is generated by GenerateFunction. */ public class UnixTimestamp extends ScalarFunction - implements ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, Nondeterministic { + implements ExplicitlyCastableSignature, Nondeterministic { + // we got changes when computeSignature private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(IntegerType.INSTANCE).args(), - FunctionSignature.ret(IntegerType.INSTANCE).args(DateTimeV2Type.SYSTEM_DEFAULT), + FunctionSignature.ret(DecimalV3Type.createDecimalV3Type(16, 6)).args(DateTimeV2Type.SYSTEM_DEFAULT), FunctionSignature.ret(IntegerType.INSTANCE).args(DateV2Type.INSTANCE), FunctionSignature.ret(IntegerType.INSTANCE).args(DateTimeType.INSTANCE), FunctionSignature.ret(IntegerType.INSTANCE).args(DateType.INSTANCE), - FunctionSignature.ret(IntegerType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT), - FunctionSignature.ret(IntegerType.INSTANCE).args(StringType.INSTANCE, StringType.INSTANCE) + FunctionSignature.ret(DecimalV3Type.createDecimalV3Type(16, 6)).args(VarcharType.SYSTEM_DEFAULT, + VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(DecimalV3Type.createDecimalV3Type(16, 6)).args(StringType.INSTANCE, + StringType.INSTANCE) ); /** @@ -74,14 +78,41 @@ public class UnixTimestamp extends ScalarFunction } /** - * custom compute nullable. + * [['unix_timestamp'], 'INT', [], 'ALWAYS_NOT_NULLABLE'], + * [['unix_timestamp'], 'INT', ['DATETIME'], 'DEPEND_ON_ARGUMENT'], + * [['unix_timestamp'], 'INT', ['DATE'], 'DEPEND_ON_ARGUMENT'], + * [['unix_timestamp'], 'DECIMAL64', ['DATETIMEV2'], 'DEPEND_ON_ARGUMENT'], + * [['unix_timestamp'], 'INT', ['DATEV2'], 'DEPEND_ON_ARGUMENT'], + * [['unix_timestamp'], 'INT', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'], + * [['unix_timestamp'], 'INT', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'], */ @Override public boolean nullable() { if (arity() == 0) { return false; } - return PropagateNullableOnDateLikeV2Args.super.nullable(); + if (arity() == 1) { + return child(0).nullable(); + } + if (arity() == 2 && child(0).getDataType().isStringLikeType() && child(1).getDataType().isStringLikeType()) { + return true; + } + return child(0).nullable() || child(1).nullable(); + } + + @Override + public FunctionSignature computeSignature(FunctionSignature signature) { + if (arity() != 1) { + return signature; + } + DataType argType0 = getArgumentType(0); + if (argType0.isDateTimeV2Type()) { + int scale = ((DateTimeV2Type) argType0).getScale(); + return signature.withReturnType(DecimalV3Type.createDecimalV3Type(10 + scale, scale)); + } else if (argType0.isStringLikeType()) { + return signature.withReturnType(DecimalV3Type.createDecimalV3Type(16, 6)); + } + return signature; } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java index 7471fc829f..069e53f141 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java @@ -106,6 +106,14 @@ public class DateTimeV2Literal extends DateTimeLiteral { (int) (microSecond / Math.pow(10, DateTimeV2Type.MAX_SCALE - getDataType().getScale()))); } + public String getMicrosecondString() { + if (microSecond == 0) { + return "0"; + } + return String.format("%0" + getDataType().getScale() + "d", + (int) (microSecond / Math.pow(10, DateTimeV2Type.MAX_SCALE - getDataType().getScale()))); + } + @Override public Expression plusYears(long years) { return fromJavaDateType( diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java index 5991137f11..3574051817 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java @@ -468,6 +468,11 @@ public class TypeCoercionUtils { return promoted; } } + // adapt scale when from string to datetimev2 with float + if (type.isStringLikeType() && dataType.isDateTimeV2Type()) { + return recordTypeCoercionForSubQuery(input, + DateTimeV2Type.forTypeFromString(((Literal) input).getStringValue())); + } } return recordTypeCoercionForSubQuery(input, dataType); } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index b519f5c79e..d7eff94f80 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -863,12 +863,12 @@ visible_functions = { # Timestamp functions "Timestamp": [ [['unix_timestamp'], 'INT', [], 'ALWAYS_NOT_NULLABLE'], - [['unix_timestamp'], 'INT', ['DATETIME'], 'ALWAYS_NULLABLE'], - [['unix_timestamp'], 'INT', ['DATE'], 'ALWAYS_NULLABLE'], - [['unix_timestamp'], 'INT', ['DATETIMEV2'], ''], - [['unix_timestamp'], 'INT', ['DATEV2'], ''], - [['unix_timestamp'], 'INT', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'], - [['unix_timestamp'], 'INT', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'], + [['unix_timestamp'], 'INT', ['DATETIME'], 'DEPEND_ON_ARGUMENT'], + [['unix_timestamp'], 'INT', ['DATE'], 'DEPEND_ON_ARGUMENT'], + [['unix_timestamp'], 'DECIMAL64', ['DATETIMEV2'], 'DEPEND_ON_ARGUMENT'], + [['unix_timestamp'], 'INT', ['DATEV2'], 'DEPEND_ON_ARGUMENT'], + [['unix_timestamp'], 'DECIMAL64', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'], + [['unix_timestamp'], 'DECIMAL64', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'], [['from_unixtime'], 'VARCHAR', ['BIGINT'], 'ALWAYS_NULLABLE'], [['from_unixtime'], 'VARCHAR', ['BIGINT', 'VARCHAR'], 'ALWAYS_NULLABLE'], [['from_unixtime'], 'VARCHAR', ['BIGINT', 'STRING'], 'ALWAYS_NULLABLE'], @@ -2095,7 +2095,6 @@ nondeterministic_functions = [ # The function belongs to @null_result_with_one_null_param_functions, # as long as one parameter is null, the function must return null. null_result_with_one_null_param_functions = [ - 'unix_timestamp', 'str_to_date', 'convert_tz', 'pi', diff --git a/regression-test/data/nereids_function_p0/scalar_function/U.out b/regression-test/data/nereids_function_p0/scalar_function/U.out index fa8adc5f85..36700e53a9 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/U.out +++ b/regression-test/data/nereids_function_p0/scalar_function/U.out @@ -174,62 +174,62 @@ 1331481600 -- !sql_unix_timestamp_Varchar_Varchar -- -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N -- !sql_unix_timestamp_Varchar_Varchar_notnull -- -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N -- !sql_unix_timestamp_String_String -- -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N -- !sql_unix_timestamp_String_String_notnull -- -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N -- !sql_upper_Varchar -- NULL diff --git a/regression-test/data/nereids_p0/datatype/test_date_acquire.out b/regression-test/data/nereids_p0/datatype/test_date_acquire.out new file mode 100644 index 0000000000..aa7b1098a7 --- /dev/null +++ b/regression-test/data/nereids_p0/datatype/test_date_acquire.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +2019-03-21 15:10:55 1196389819 + +-- !sql -- +2019-03-21 07:10:55 1196418619 + +-- !sql -- +2019-03-21 11:10:55 1196404219 + diff --git a/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out index 96a8160f87..542b301ddd 100644 --- a/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out +++ b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out @@ -342,18 +342,27 @@ February -- !sql -- 749027 --- !sql -- +-- !sql_ustamp1 -- 1196389819 --- !sql -- +-- !sql_ustamp2 -- 1196389819 --- !sql -- +-- !sql_ustamp3 -- 1196389819 --- !sql -- +-- !sql_ustamp4 -- 0 +-- !sql_ustamp5 -- +1196389819.123456 + +-- !sql_ustamp6 -- +1196389819.123 + +-- !sql_ustamp7 -- +1196389819.1235 + -- !sql -- 0 @@ -642,3 +651,10 @@ true -- !sql -- 2023-08-17T17:41:18 +-- !sql_dt_null_1 -- +1607702400.0000 1607702400 1607702400 1607702400 +1607746332.0000 1607746332 1607702400 1607702400 +1607746332.0000 1607746332 1607702400 1607702400 +1607746332.1230 1607746332 1607702400 1607702400 +1607746332.6667 1607746333 1607702400 1607702400 + diff --git a/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out b/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out index 75e07024c7..7ad5b93341 100644 --- a/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out +++ b/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out @@ -353,18 +353,27 @@ February -- !sql -- 749027 --- !sql -- +-- !sql_ustamp1 -- 1196389819 --- !sql -- +-- !sql_ustamp2 -- 1196389819 --- !sql -- +-- !sql_ustamp3 -- 1196389819 --- !sql -- +-- !sql_ustamp4 -- 0 +-- !sql_ustamp5 -- +1196389819.123456 + +-- !sql_ustamp6 -- +1196389819.123 + +-- !sql_ustamp7 -- +1196389819.1235 + -- !sql -- 0 @@ -666,5 +675,5 @@ true 2023-08-17T17:41:18 -- !sql -- -1694966400 1694966400 +1694966400.000000 1694966400.000000 diff --git a/regression-test/suites/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.groovy index 274bf7a701..8497fec18a 100644 --- a/regression-test/suites/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.groovy @@ -94,7 +94,7 @@ suite("test_clickhouse_jdbc_catalog", "p0,external,clickhouse,external_docker,ex order_qt_func_push2 """select * from ts where ts <= unix_timestamp(from_unixtime(ts,'yyyyMMdd'));""" explain { sql("select * from ts where ts <= unix_timestamp(from_unixtime(ts,'yyyy-MM-dd'));") - contains """QUERY: SELECT "id", "ts" FROM "doris_test"."ts" WHERE ("ts" <= toUnixTimestamp(FROM_UNIXTIME(ts, '%Y-%m-%d')))""" + contains """QUERY: SELECT "id", "ts" FROM "doris_test"."ts" WHERE (ts <= toUnixTimestamp(FROM_UNIXTIME(ts, '%Y-%m-%d')))""" } order_qt_dt_with_tz """ select * from dt_with_tz order by id; """ diff --git a/regression-test/suites/nereids_p0/datatype/test_date_acquire.groovy b/regression-test/suites/nereids_p0/datatype/test_date_acquire.groovy index 981b28eac4..b71c371191 100644 --- a/regression-test/suites/nereids_p0/datatype/test_date_acquire.groovy +++ b/regression-test/suites/nereids_p0/datatype/test_date_acquire.groovy @@ -26,22 +26,12 @@ suite("test_date_acquire") { sql "set enable_fold_constant_by_be=true" - test { - sql "select from_unixtime(1553152255), unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s')" - result([['2019-03-21 15:10:55', 1196389819]]) - } + sql "set time_zone='+08:00'" + qt_sql "select from_unixtime(1553152255), unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s')" sql "set time_zone='+00:00'" - - test { - sql "select from_unixtime(1553152255), unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s')" - result([['2019-03-21 07:10:55', 1196418619]]) - } + qt_sql "select from_unixtime(1553152255), unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s')" sql "set time_zone='+04:00'" - - test { - sql "select from_unixtime(1553152255), unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s')" - result([['2019-03-21 11:10:55', 1196404219]]) - } + qt_sql "select from_unixtime(1553152255), unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s')" } diff --git a/regression-test/suites/nereids_p0/datatype/test_date_implicit_cast.groovy b/regression-test/suites/nereids_p0/datatype/test_date_implicit_cast.groovy new file mode 100644 index 0000000000..6dafff848f --- /dev/null +++ b/regression-test/suites/nereids_p0/datatype/test_date_implicit_cast.groovy @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_date_implicit_cast") { + sql 'set enable_nereids_planner=true' + sql 'set enable_fallback_to_original_planner=false' + + def tbl = "test_date_implicit_cast" + def result = "" + def contain0 = false + def contain1 = false + + sql """ DROP TABLE IF EXISTS `d4nn` """ + sql """ CREATE TABLE IF NOT EXISTS `d4nn` ( + `k1` DATETIMEV2(4) NOT NULL + ) + DISTRIBUTED BY HASH(`k1`) BUCKETS 5 properties("replication_num" = "1") + """ + + result = sql " desc verbose select if(k1='2020-12-12', k1, '2020-12-12 12:12:12.123') from d4nn " + for (String value : result) { + if (value.contains("col=k1, colUniqueId=0, type=DATETIMEV2(4)")) { + contain0 = true; + } + if (value.contains("col=null, colUniqueId=null, type=DATETIMEV2(4)")) { + contain1 = true; + } + } + assertTrue(contain0 && contain1, "failed on 1") + + + result = sql " desc verbose select if(k1='2020-12-12', k1, cast('2020-12-12 12:12:12.123' as datetimev2(3))) from d4nn; " + for (String value : result) { + if (value.contains("col=k1, colUniqueId=0, type=DATETIMEV2(4)")) { + contain0 = true; + } + if (value.contains("col=null, colUniqueId=null, type=DATETIMEV2(4)")) { + contain1 = true; + } + } + assertTrue(contain0 && contain1, "failed on 2") + + result = sql " desc verbose select if(k1='2012-12-12 12:12:12.1235', k1, '2020-12-12 12:12:12.12345') from d4nn; " + for (String value : result) { + if (value.contains("col=k1, colUniqueId=0, type=DATETIMEV2(4)")) { + contain0 = true; + } + if (value.contains("col=null, colUniqueId=null, type=DATETIMEV2(5)")) { + contain1 = true; + } + } + assertTrue(contain0 && contain1, "failed on 3") + + + sql """ DROP TABLE IF EXISTS `d6` """ + sql """ CREATE TABLE IF NOT EXISTS `d6` ( + `k1` DATETIMEV2(6) NULL + ) + DISTRIBUTED BY HASH(`k1`) BUCKETS 5 properties("replication_num" = "1") + """ + + result = sql " desc verbose select if(k1='2020-12-12 12:12:12.12345', k1, '2020-12-12 12:12:12.33333') from d6; " + for (String value : result) { + if (value.contains("col=k1, colUniqueId=0, type=DATETIMEV2(6)")) { + contain0 = true; + } + if (value.contains("col=null, colUniqueId=null, type=DATETIMEV2(6)")) { + contain1 = true; + } + } + assertTrue(contain0 && contain1, "failed on 4") +} \ No newline at end of file diff --git a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy index 6e5506d577..447dda31cd 100644 --- a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy +++ b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy @@ -375,10 +375,13 @@ suite("test_date_function") { // UNIX_TIMESTAMP def unin_timestamp_str = """ select unix_timestamp() """ assertTrue(unin_timestamp_str[0].size() == 1) - qt_sql """ select unix_timestamp('2007-11-30 10:30:19') """ - qt_sql """ select unix_timestamp('2007-11-30 10:30-19', '%Y-%m-%d %H:%i-%s') """ - qt_sql """ select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s') """ - qt_sql """ select unix_timestamp('1969-01-01 00:00:00') """ + qt_sql_ustamp1 """ select unix_timestamp('2007-11-30 10:30:19') """ + qt_sql_ustamp2 """ select unix_timestamp('2007-11-30 10:30-19', '%Y-%m-%d %H:%i-%s') """ + qt_sql_ustamp3 """ select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s') """ + qt_sql_ustamp4 """ select unix_timestamp('1969-01-01 00:00:00') """ + qt_sql_ustamp5 """ select unix_timestamp('2007-11-30 10:30:19.123456') """ + qt_sql_ustamp6 """ select unix_timestamp(cast('2007-11-30 10:30:19.123456' as datetimev2(3))) """ + qt_sql_ustamp7 """ select unix_timestamp(cast('2007-11-30 10:30:19.123456' as datetimev2(4))) """ // UTC_TIMESTAMP def utc_timestamp_str = sql """ select utc_timestamp(),utc_timestamp() + 1 """ @@ -670,4 +673,22 @@ suite("test_date_function") { } qt_sql """ select date_add("2023-08-17T01:41:18Z", interval 8 hour) """ + + sql """ DROP TABLE IF EXISTS dt_null; """ + sql """ CREATE TABLE IF NOT EXISTS dt_null( + `k1` INT NOT NULL, + `dtv24` datetimev2(4) NOT NULL, + `dtv20n` datetimev2(0) NULL, + `dv2` datev2 NOT NULL, + `dv2n` datev2 NULL + ) + DISTRIBUTED BY HASH(`k1`) BUCKETS 5 + properties("replication_num" = "1"); """ + sql """ insert into dt_null values ('1', '2020-12-12', '2020-12-12', '2020-12-12', '2020-12-12'), + ('2', '2020-12-12 12:12:12', '2020-12-12 12:12:12', '2020-12-12 12:12:12', '2020-12-12 12:12:12'), + ('3', '2020-12-12 12:12:12.0', '2020-12-12 12:12:12.0', '2020-12-12 12:12:12.0', '2020-12-12 12:12:12.0'), + ('4', '2020-12-12 12:12:12.123', '2020-12-12 12:12:12.123', '2020-12-12 12:12:12.123', '2020-12-12 12:12:12.123'), + ('5', '2020-12-12 12:12:12.666666', '2020-12-12 12:12:12.666666', '2020-12-12 12:12:12.666666', '2020-12-12 12:12:12.666666'); """ + + qt_sql_dt_null_1 """ select unix_timestamp(dtv24), unix_timestamp(dtv20n),unix_timestamp(dv2),unix_timestamp(dv2n) from dt_null order by k1; """ } diff --git a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy index 186123aeca..902883c5be 100644 --- a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy @@ -377,10 +377,13 @@ suite("test_date_function") { // UNIX_TIMESTAMP def unin_timestamp_str = """ select unix_timestamp() """ assertTrue(unin_timestamp_str[0].size() == 1) - qt_sql """ select unix_timestamp('2007-11-30 10:30:19') """ - qt_sql """ select unix_timestamp('2007-11-30 10:30-19', '%Y-%m-%d %H:%i-%s') """ - qt_sql """ select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s') """ - qt_sql """ select unix_timestamp('1969-01-01 00:00:00') """ + qt_sql_ustamp1 """ select unix_timestamp('2007-11-30 10:30:19') """ + qt_sql_ustamp2 """ select unix_timestamp('2007-11-30 10:30-19', '%Y-%m-%d %H:%i-%s') """ + qt_sql_ustamp3 """ select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s') """ + qt_sql_ustamp4 """ select unix_timestamp('1969-01-01 00:00:00') """ + qt_sql_ustamp5 """ select unix_timestamp('2007-11-30 10:30:19.123456') """ + qt_sql_ustamp6 """ select unix_timestamp(cast('2007-11-30 10:30:19.123456' as datetimev2(3))) """ + qt_sql_ustamp7 """ select unix_timestamp(cast('2007-11-30 10:30:19.123456' as datetimev2(4))) """ // UTC_TIMESTAMP def utc_timestamp_str = sql """ select utc_timestamp(),utc_timestamp() + 1 """