// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. // This file is copied from // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/DateTimeTransforms.h // and modified by Doris #pragma once #include "common/status.h" #include "runtime/runtime_state.h" #include "udf/udf_internal.h" #include "util/binary_cast.hpp" #include "util/type_traits.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" #include "vec/core/types.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_string.h" #include "vec/runtime/vdatetime_value.h" #include "vec/utils/util.hpp" namespace doris::vectorized { #define TIME_FUNCTION_IMPL(CLASS, UNIT, FUNCTION) \ template \ struct CLASS { \ using OpArgType = ArgType; \ static constexpr auto name = #UNIT; \ \ static inline auto execute(const ArgType& t) { \ const auto& date_time_value = (typename DateTraits::T&)(t); \ return date_time_value.FUNCTION; \ } \ \ static DataTypes get_variadic_argument_types() { \ return {std::make_shared::DateType>()}; \ } \ } #define TO_TIME_FUNCTION(CLASS, UNIT) TIME_FUNCTION_IMPL(CLASS, UNIT, UNIT()) TO_TIME_FUNCTION(ToYearImpl, year); TO_TIME_FUNCTION(ToQuarterImpl, quarter); TO_TIME_FUNCTION(ToMonthImpl, month); TO_TIME_FUNCTION(ToDayImpl, day); TO_TIME_FUNCTION(ToHourImpl, hour); TO_TIME_FUNCTION(ToMinuteImpl, minute); TO_TIME_FUNCTION(ToSecondImpl, second); TIME_FUNCTION_IMPL(WeekOfYearImpl, weekofyear, week(mysql_week_mode(3))); TIME_FUNCTION_IMPL(DayOfYearImpl, dayofyear, day_of_year()); TIME_FUNCTION_IMPL(DayOfMonthImpl, dayofmonth, day()); TIME_FUNCTION_IMPL(DayOfWeekImpl, dayofweek, day_of_week()); TIME_FUNCTION_IMPL(WeekDayImpl, weekday, weekday()); // TODO: the method should be always not nullable TIME_FUNCTION_IMPL(ToDaysImpl, to_days, daynr()); #define TIME_FUNCTION_ONE_ARG_IMPL(CLASS, UNIT, FUNCTION) \ template \ struct CLASS { \ using OpArgType = ArgType; \ static constexpr auto name = #UNIT; \ \ static inline auto execute(const ArgType& t) { \ const auto& date_time_value = (typename DateTraits::T&)(t); \ return date_time_value.FUNCTION; \ } \ \ static DataTypes get_variadic_argument_types() { \ return {std::make_shared::DateType>()}; \ } \ } TIME_FUNCTION_ONE_ARG_IMPL(ToWeekOneArgImpl, week, week(mysql_week_mode(0))); TIME_FUNCTION_ONE_ARG_IMPL(ToYearWeekOneArgImpl, yearweek, year_week(mysql_week_mode(0))); template struct ToDateImpl { using OpArgType = ArgType; using T = typename DateTraits::T; static constexpr auto name = "to_date"; static inline auto execute(const ArgType& t) { auto dt = binary_cast(t); if constexpr (std::is_same_v>) { return binary_cast(dt); } else if constexpr (std::is_same_v) { dt.cast_to_date(); return binary_cast(dt); } else { return (UInt32)(binary_cast(dt) >> TIME_PART_LENGTH); } } static DataTypes get_variadic_argument_types() { return {std::make_shared::DateType>()}; } }; template struct DateImpl : public ToDateImpl { static constexpr auto name = "date"; }; // TODO: This function look like no need do indeed copy here, we should optimize // this function template struct TimeStampImpl { using OpArgType = ArgType; static constexpr auto name = "timestamp"; static inline auto execute(const OpArgType& t) { return t; } static DataTypes get_variadic_argument_types() { return {std::make_shared::DateType>()}; } }; template struct DayNameImpl { using OpArgType = ArgType; static constexpr auto name = "dayname"; static constexpr auto max_size = MAX_DAY_NAME_LEN; static inline auto execute(const typename DateTraits::T& dt, ColumnString::Chars& res_data, size_t& offset) { const auto* day_name = dt.day_name(); if (day_name != nullptr) { auto len = strlen(day_name); memcpy(&res_data[offset], day_name, len); offset += len; } return offset; } static DataTypes get_variadic_argument_types() { return {std::make_shared::DateType>()}; } }; template struct MonthNameImpl { using OpArgType = ArgType; static constexpr auto name = "monthname"; static constexpr auto max_size = MAX_MONTH_NAME_LEN; static inline auto execute(const typename DateTraits::T& dt, ColumnString::Chars& res_data, size_t& offset) { const auto* month_name = dt.month_name(); if (month_name != nullptr) { auto len = strlen(month_name); memcpy(&res_data[offset], month_name, len); offset += len; } return offset; } static DataTypes get_variadic_argument_types() { return {std::make_shared::DateType>()}; } }; template struct DateFormatImpl { using FromType = ArgType; static constexpr auto name = "date_format"; static inline auto execute(const FromType& t, StringRef format, ColumnString::Chars& res_data, size_t& offset) { const auto& dt = (DateType&)t; if (format.size > 128) { return std::pair {offset, true}; } char buf[128]; if (!dt.to_format_string(format.data, format.size, buf)) { return std::pair {offset, true}; } auto len = strlen(buf); res_data.insert(buf, buf + len); offset += len; return std::pair {offset, false}; } static DataTypes get_variadic_argument_types() { return std::vector { std::dynamic_pointer_cast( std::make_shared::DateType>()), std::dynamic_pointer_cast( std::make_shared())}; } }; // TODO: This function should be depend on arguments not always nullable template struct FromUnixTimeImpl { using FromType = Int32; static constexpr auto name = "from_unixtime"; static inline auto execute(FromType val, StringRef format, ColumnString::Chars& res_data, size_t& offset, const cctz::time_zone& time_zone) { DateType dt; if (format.size > 128 || val < 0 || val > INT_MAX || !dt.from_unixtime(val, time_zone)) { return std::pair {offset, true}; } char buf[128]; if (!dt.to_format_string(format.data, format.size, buf)) { return std::pair {offset, true}; } auto len = strlen(buf); res_data.insert(buf, buf + len); offset += len; return std::pair {offset, false}; } }; template struct TransformerToStringOneArgument { static void vector(FunctionContext* context, const PaddedPODArray& ts, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { const auto len = ts.size(); res_data.resize(len * Transform::max_size); res_offsets.resize(len); null_map.resize(len); size_t offset = 0; for (int i = 0; i < len; ++i) { const auto& t = ts[i]; const auto& date_time_value = reinterpret_cast::T&>( t); res_offsets[i] = Transform::execute(date_time_value, res_data, offset); null_map[i] = !date_time_value.is_valid_date(); } } static void vector(FunctionContext* context, const PaddedPODArray& ts, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { const auto len = ts.size(); res_data.resize(len * Transform::max_size); res_offsets.resize(len); size_t offset = 0; for (int i = 0; i < len; ++i) { const auto& t = ts[i]; const auto& date_time_value = reinterpret_cast::T&>( t); res_offsets[i] = Transform::execute(date_time_value, res_data, offset); DCHECK(date_time_value.is_valid_date()); } } }; template struct TransformerToStringTwoArgument { static void vector_constant(FunctionContext* context, const PaddedPODArray& ts, const std::string& format, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, PaddedPODArray& null_map) { auto len = ts.size(); res_offsets.resize(len); res_data.reserve(len * format.size() + len); null_map.resize_fill(len, false); size_t offset = 0; for (int i = 0; i < len; ++i) { const auto& t = ts[i]; size_t new_offset; bool is_null; if constexpr (is_specialization_of_v) { std::tie(new_offset, is_null) = Transform::execute(t, StringRef(format.c_str(), format.size()), res_data, offset, context->impl()->state()->timezone_obj()); } else { std::tie(new_offset, is_null) = Transform::execute( t, StringRef(format.c_str(), format.size()), res_data, offset); } res_offsets[i] = new_offset; null_map[i] = is_null; } } }; template struct Transformer { static void vector(const PaddedPODArray& vec_from, PaddedPODArray& vec_to, NullMap& null_map) { size_t size = vec_from.size(); vec_to.resize(size); null_map.resize(size); for (size_t i = 0; i < size; ++i) { vec_to[i] = Transform::execute(vec_from[i]); null_map[i] = !((typename DateTraits::T&)(vec_from[i])) .is_valid_date(); } } static void vector(const PaddedPODArray& vec_from, PaddedPODArray& vec_to) { size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) { vec_to[i] = Transform::execute(vec_from[i]); DCHECK(((typename DateTraits::T&)(vec_from[i])) .is_valid_date()); } } }; template struct Transformer> { static void vector(const PaddedPODArray& vec_from, PaddedPODArray& vec_to, NullMap& null_map) { size_t size = vec_from.size(); vec_to.resize(size); null_map.resize(size); auto* __restrict to_ptr = vec_to.data(); auto* __restrict from_ptr = vec_from.data(); auto* __restrict null_map_ptr = null_map.data(); for (size_t i = 0; i < size; ++i) { to_ptr[i] = ToYearImpl::execute(from_ptr[i]); } for (size_t i = 0; i < size; ++i) { null_map_ptr[i] = to_ptr[i] > MAX_YEAR; } } static void vector(const PaddedPODArray& vec_from, PaddedPODArray& vec_to) { size_t size = vec_from.size(); vec_to.resize(size); auto* __restrict to_ptr = vec_to.data(); auto* __restrict from_ptr = vec_from.data(); for (size_t i = 0; i < size; ++i) { to_ptr[i] = ToYearImpl::execute(from_ptr[i]); } } }; template struct DateTimeTransformImpl { static Status execute(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { using Op = Transformer; const auto is_nullable = block.get_by_position(result).type->is_nullable(); const ColumnPtr source_col = remove_nullable(block.get_by_position(arguments[0]).column); if (const auto* sources = check_and_get_column>(source_col.get())) { auto col_to = ColumnVector::create(); if (is_nullable) { auto null_map = ColumnVector::create(input_rows_count); Op::vector(sources->get_data(), col_to->get_data(), null_map->get_data()); if (const auto* nullable_col = check_and_get_column( block.get_by_position(arguments[0]).column.get())) { NullMap& result_null_map = assert_cast(*null_map).get_data(); const NullMap& src_null_map = assert_cast(nullable_col->get_null_map_column()) .get_data(); VectorizedUtils::update_null_map(result_null_map, src_null_map); } block.replace_by_position( result, ColumnNullable::create(std::move(col_to), std::move(null_map))); } else { Op::vector(sources->get_data(), col_to->get_data()); block.replace_by_position(result, std::move(col_to)); } } else { return Status::RuntimeError("Illegal column {} of first argument of function {}", block.get_by_position(arguments[0]).column->get_name(), Transform::name); } return Status::OK(); } }; } // namespace doris::vectorized