// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. // This file is copied from // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsConversion.h // and modified by Doris #pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // IWYU pragma: no_include #include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "runtime/runtime_state.h" #include "udf/udf.h" #include "util/jsonb_document.h" #include "util/jsonb_stream.h" #include "util/jsonb_writer.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/column_map.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_struct.h" #include "vec/columns/column_vector.h" #include "vec/columns/columns_common.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" #include "vec/common/string_buffer.hpp" #include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/core/call_on_type_index.h" #include "vec/core/column_numbers.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/columns_with_type_and_name.h" #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_hll.h" #include "vec/data_types/data_type_ipv4.h" #include "vec/data_types/data_type_ipv6.h" #include "vec/data_types/data_type_jsonb.h" #include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" #include "vec/data_types/data_type_struct.h" #include "vec/data_types/data_type_time.h" #include "vec/data_types/data_type_time_v2.h" #include "vec/functions/function.h" #include "vec/functions/function_convert_tz.h" #include "vec/functions/function_helpers.h" #include "vec/io/reader_buffer.h" #include "vec/runtime/vdatetime_value.h" class DateLUTImpl; namespace doris { namespace vectorized { template class ColumnDecimal; } // namespace vectorized } // namespace doris namespace doris::vectorized { /** Type conversion functions. * toType - conversion in "natural way"; */ inline UInt32 extract_to_decimal_scale(const ColumnWithTypeAndName& named_column) { const auto* arg_type = named_column.type.get(); bool ok = check_and_get_data_type(arg_type) || check_and_get_data_type(arg_type) || check_and_get_data_type(arg_type) || check_and_get_data_type(arg_type); if (!ok) { LOG(FATAL) << fmt::format("Illegal type of toDecimal() scale {}", named_column.type->get_name()); } Field field; named_column.column->get(0, field); return field.get(); } struct PrecisionScaleArg { UInt32 precision; UInt32 scale; }; /** Cast from string or number to Time. * In Doris, the underlying storage type of the Time class is Float64. */ struct TimeCast { // Cast from string // Some examples of conversions. // '300' -> 00:03:00 '20:23' -> 20:23:00 '20:23:24' -> 20:23:24 template static bool try_parse_time(char* s, size_t len, T& x, const cctz::time_zone& local_time_zone) { /// TODO: Maybe we can move Timecast to the io_helper. if (try_as_time(s, len, x, local_time_zone)) { return true; } else { if (VecDateTimeValue dv {}; dv.from_date_str(s, len, local_time_zone)) { // can be parse as a datetime x = dv.hour() * 3600 + dv.minute() * 60 + dv.second(); return true; } return false; } } template static bool try_as_time(char* s, size_t len, T& x, const cctz::time_zone& local_time_zone) { char* first_char = s; char* end_char = s + len; int hour = 0, minute = 0, second = 0; auto parse_from_str_to_int = [](char* begin, size_t len, auto& num) { StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; auto int_value = StringParser::string_to_unsigned_int( reinterpret_cast(begin), len, &parse_result); if (UNLIKELY(parse_result != StringParser::PARSE_SUCCESS)) { return false; } num = int_value; return true; }; if (char* first_colon {nullptr}; (first_colon = (char*)memchr(first_char, ':', len)) != nullptr) { if (char* second_colon {nullptr}; (second_colon = (char*)memchr(first_colon + 1, ':', end_char - first_colon - 1)) != nullptr) { // find two colon // parse hour if (!parse_from_str_to_int(first_char, first_colon - first_char, hour)) { // hour failed return false; } // parse minute if (!parse_from_str_to_int(first_colon + 1, second_colon - first_colon - 1, minute)) { return false; } // parse second if (!parse_from_str_to_int(second_colon + 1, end_char - second_colon - 1, second)) { return false; } } else { // find one colon // parse hour if (!parse_from_str_to_int(first_char, first_colon - first_char, hour)) { return false; } // parse minute if (!parse_from_str_to_int(first_colon + 1, end_char - first_colon - 1, minute)) { return false; } } } else { // no colon ,so try to parse as a number size_t from {}; if (!parse_from_str_to_int(first_char, len, from)) { return false; } return try_parse_time(from, x, local_time_zone); } // minute second must be < 60 if (minute >= 60 || second >= 60) { return false; } x = hour * 3600 + minute * 60 + second; return true; } // Cast from number template //requires {std::is_arithmetic_v && std::is_arithmetic_v} static bool try_parse_time(T from, S& x, const cctz::time_zone& local_time_zone) { int64 seconds = from / 100; int64 hour = 0, minute = 0, second = 0; second = from - 100 * seconds; from /= 100; seconds = from / 100; minute = from - 100 * seconds; hour = seconds; if (minute >= 60 || second >= 60) { return false; } x = hour * 3600 + minute * 60 + second; return true; } template static bool try_parse_time(__int128 from, S& x, const cctz::time_zone& local_time_zone) { from %= (int64)(1000000000000); int64 seconds = from / 100; int64 hour = 0, minute = 0, second = 0; second = from - 100 * seconds; from /= 100; seconds = from / 100; minute = from - 100 * seconds; hour = seconds; if (minute >= 60 || second >= 60) { return false; } x = hour * 3600 + minute * 60 + second; return true; } }; /** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) */ template struct ConvertImpl { using FromFieldType = typename FromDataType::FieldType; using ToFieldType = typename ToDataType::FieldType; template static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t /*input_rows_count*/, bool check_overflow [[maybe_unused]] = false, Additions additions [[maybe_unused]] = Additions()) { const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]); using ColVecFrom = std::conditional_t, ColumnDecimal, ColumnVector>; using ColVecTo = std::conditional_t, ColumnDecimal, ColumnVector>; if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) { if constexpr (!(IsDataTypeDecimalOrNumber || IsTimeType || IsTimeV2Type) || !IsDataTypeDecimalOrNumber) return Status::RuntimeError("Illegal column {} of first argument of function {}", named_from.column->get_name(), Name::name); } if (const ColVecFrom* col_from = check_and_get_column(named_from.column.get())) { typename ColVecTo::MutablePtr col_to = nullptr; if constexpr (IsDataTypeDecimal) { UInt32 scale = ((PrecisionScaleArg)additions).scale; ToDataType::check_type_scale(scale); col_to = ColVecTo::create(0, scale); } else { col_to = ColVecTo::create(); } const auto& vec_from = col_from->get_data(); auto& vec_to = col_to->get_data(); size_t size = vec_from.size(); vec_to.resize(size); if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) { ColumnUInt8::MutablePtr col_null_map_to = nullptr; UInt8* vec_null_map_to = nullptr; if (check_overflow) { col_null_map_to = ColumnUInt8::create(size, 0); vec_null_map_to = col_null_map_to->get_data().data(); } if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) { convert_decimal_cols( vec_from.data(), vec_to.data(), vec_from.get_scale(), vec_to.get_scale(), vec_from.size(), vec_null_map_to); } else { for (size_t i = 0; i < size; ++i) { if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) { vec_to[i] = convert_decimals( vec_from[i], vec_from.get_scale(), vec_to.get_scale(), vec_null_map_to ? &vec_null_map_to[i] : vec_null_map_to); } else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) { vec_to[i] = convert_from_decimal( vec_from[i], vec_from.get_scale()); } else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) { vec_to[i] = convert_to_decimal( vec_from[i], vec_to.get_scale(), vec_null_map_to ? &vec_null_map_to[i] : vec_null_map_to); } else if constexpr (IsTimeType && IsDataTypeDecimal) { vec_to[i] = convert_to_decimal( reinterpret_cast(vec_from[i]) .to_int64(), vec_to.get_scale(), vec_null_map_to ? &vec_null_map_to[i] : vec_null_map_to); } else if constexpr (IsDateV2Type && IsDataTypeDecimal) { vec_to[i] = convert_to_decimal( reinterpret_cast&>( vec_from[i]) .to_date_int_val(), vec_to.get_scale(), vec_null_map_to ? &vec_null_map_to[i] : vec_null_map_to); } else if constexpr (IsDateTimeV2Type && IsDataTypeDecimal) { // TODO: should we consider the scale of datetimev2? vec_to[i] = convert_to_decimal( reinterpret_cast&>( vec_from[i]) .to_date_int_val(), vec_to.get_scale(), vec_null_map_to ? &vec_null_map_to[i] : vec_null_map_to); } } } if (check_overflow) { block.replace_by_position( result, ColumnNullable::create(std::move(col_to), std::move(col_null_map_to))); } else { block.replace_by_position(result, std::move(col_to)); } return Status::OK(); } else if constexpr (IsTimeType) { for (size_t i = 0; i < size; ++i) { if constexpr (IsTimeType) { vec_to[i] = static_cast(vec_from[i]); if constexpr (IsDateTimeType) { DataTypeDateTime::cast_to_date_time(vec_to[i]); } else { DataTypeDate::cast_to_date(vec_to[i]); } } else if constexpr (IsDateV2Type) { DataTypeDateV2::cast_from_date(vec_from[i], vec_to[i]); } else if constexpr (IsDateTimeV2Type) { DataTypeDateTimeV2::cast_from_date(vec_from[i], vec_to[i]); } else { vec_to[i] = reinterpret_cast(vec_from[i]).to_int64(); } } } else if constexpr (IsTimeV2Type) { for (size_t i = 0; i < size; ++i) { if constexpr (IsTimeV2Type) { if constexpr (IsDateTimeV2Type && IsDateV2Type) { DataTypeDateV2::cast_to_date_time_v2(vec_from[i], vec_to[i]); } else if constexpr (IsDateTimeV2Type && IsDateV2Type) { DataTypeDateTimeV2::cast_to_date_v2(vec_from[i], vec_to[i]); } else { UInt32 scale = additions; vec_to[i] = vec_from[i] / std::pow(10, 6 - scale); } } else if constexpr (IsTimeType) { if constexpr (IsDateTimeType && IsDateV2Type) { DataTypeDateV2::cast_to_date_time(vec_from[i], vec_to[i]); } else if constexpr (IsDateType && IsDateV2Type) { DataTypeDateV2::cast_to_date(vec_from[i], vec_to[i]); } else if constexpr (IsDateTimeType && IsDateTimeV2Type) { DataTypeDateTimeV2::cast_to_date_time(vec_from[i], vec_to[i]); } else if constexpr (IsDateType && IsDateTimeV2Type) { DataTypeDateTimeV2::cast_to_date(vec_from[i], vec_to[i]); } else { return Status::InvalidArgument("Wrong cast expression!"); } } else { if constexpr (IsDateTimeV2Type) { vec_to[i] = reinterpret_cast&>( vec_from[i]) .to_int64(); } else { vec_to[i] = reinterpret_cast&>( vec_from[i]) .to_int64(); } } } } else { if constexpr (IsDataTypeNumber && std::is_same_v) { // 300 -> 00:03:00 360 will be parse failed , so value maybe null ColumnUInt8::MutablePtr col_null_map_to; ColumnUInt8::Container* vec_null_map_to = nullptr; col_null_map_to = ColumnUInt8::create(size); vec_null_map_to = &col_null_map_to->get_data(); for (size_t i = 0; i < size; ++i) { (*vec_null_map_to)[i] = !TimeCast::try_parse_time( vec_from[i], vec_to[i], context->state()->timezone_obj()); vec_to[i] *= (1000 * 1000); } block.get_by_position(result).column = ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); return Status::OK(); } else { for (size_t i = 0; i < size; ++i) { vec_to[i] = static_cast(vec_from[i]); } } } // TODO: support boolean cast more reasonable if constexpr (std::is_same_v) { for (int i = 0; i < size; ++i) { vec_to[i] = static_cast(vec_to[i]); } } block.replace_by_position(result, std::move(col_to)); } else { return Status::RuntimeError("Illegal column {} of first argument of function {}", named_from.column->get_name(), Name::name); } return Status::OK(); } }; /** If types are identical, just take reference to column. */ template requires(!T::is_parametric) struct ConvertImpl { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t /*input_rows_count*/) { block.get_by_position(result).column = block.get_by_position(arguments[0]).column; return Status::OK(); } }; // using other type cast to Date/DateTime, unless String // Date/DateTime template struct ConvertImplToTimeType { using FromFieldType = typename FromDataType::FieldType; using ToFieldType = typename ToDataType::FieldType; static Status execute(Block& block, const ColumnNumbers& arguments, size_t result, size_t /*input_rows_count*/) { const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]); using ColVecFrom = std::conditional_t, ColumnDecimal, ColumnVector>; using DateValueType = std::conditional_t< IsTimeV2Type, std::conditional_t, DateV2Value, DateV2Value>, VecDateTimeValue>; using ColVecTo = ColumnVector; if (const ColVecFrom* col_from = check_and_get_column(named_from.column.get())) { const auto& vec_from = col_from->get_data(); size_t size = vec_from.size(); // create nested column auto col_to = ColVecTo::create(size); auto& vec_to = col_to->get_data(); // create null column ColumnUInt8::MutablePtr col_null_map_to; col_null_map_to = ColumnUInt8::create(size); auto& vec_null_map_to = col_null_map_to->get_data(); for (size_t i = 0; i < size; ++i) { auto& date_value = reinterpret_cast(vec_to[i]); if constexpr (IsDecimalNumber) { // TODO: should we consider the scale of datetimev2? vec_null_map_to[i] = !date_value.from_date_int64( convert_from_decimal( vec_from[i], vec_from.get_scale())); } else { vec_null_map_to[i] = !date_value.from_date_int64(vec_from[i]); } // DateType of VecDateTimeValue should cast to date if constexpr (IsDateType) { date_value.cast_to_date(); } else if constexpr (IsDateTimeType) { date_value.to_datetime(); } } block.get_by_position(result).column = ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); } else { return Status::RuntimeError("Illegal column {} of first argument of function {}", named_from.column->get_name(), Name::name); } return Status::OK(); } }; // Generic conversion of any type to String. struct ConvertImplGenericToString { static Status execute(Block& block, const ColumnNumbers& arguments, size_t result) { const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IDataType& type = *col_with_type_and_name.type; const IColumn& col_from = *col_with_type_and_name.column; size_t size = col_from.size(); auto col_to = ColumnString::create(); col_to->reserve(size * 2); VectorBufferWriter write_buffer(*col_to.get()); for (size_t i = 0; i < size; ++i) { type.to_string(col_from, i, write_buffer); write_buffer.commit(); } block.replace_by_position(result, std::move(col_to)); return Status::OK(); } static Status execute2(FunctionContext* /*ctx*/, Block& block, const ColumnNumbers& arguments, const size_t result, size_t /*input_rows_count*/) { return execute(block, arguments, result); } }; //this is for data in compound type struct ConvertImplGenericFromString { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IColumn& col_from = *col_with_type_and_name.column; // result column must set type DCHECK(block.get_by_position(result).type != nullptr); auto data_type_to = block.get_by_position(result).type; if (const ColumnString* col_from_string = check_and_get_column(&col_from)) { auto col_to = data_type_to->create_column(); auto serde = data_type_to->get_serde(); size_t size = col_from.size(); col_to->reserve(size); ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size); ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); const bool is_complex = is_complex_type(data_type_to); DataTypeSerDe::FormatOptions format_options; format_options.converted_from_string = true; for (size_t i = 0; i < size; ++i) { const auto& val = col_from_string->get_data_at(i); // Note: here we should handle the null element if (val.size == 0) { col_to->insert_default(); // empty string('') is an invalid format for complex type, set null_map to 1 if (is_complex) { (*vec_null_map_to)[i] = 1; } continue; } Slice string_slice(val.data, val.size); Status st = serde->deserialize_one_cell_from_json(*col_to, string_slice, format_options); // if parsing failed, will return null (*vec_null_map_to)[i] = !st.ok(); if (!st.ok()) { col_to->insert_default(); } } block.get_by_position(result).column = ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); } else { return Status::RuntimeError( "Illegal column {} of first argument of conversion function from string", col_from.get_name()); } return Status::OK(); } }; // Generic conversion of number to jsonb. template struct ConvertImplNumberToJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { const auto& col_with_type_and_name = block.get_by_position(arguments[0]); auto column_string = ColumnString::create(); JsonbWriter writer; const auto* col = check_and_get_column(col_with_type_and_name.column.get()); const auto& data = col->get_data(); for (size_t i = 0; i < input_rows_count; i++) { writer.reset(); if constexpr (std::is_same_v) { writer.writeBool(data[i]); } else if constexpr (std::is_same_v) { writer.writeInt8(data[i]); } else if constexpr (std::is_same_v) { writer.writeInt16(data[i]); } else if constexpr (std::is_same_v) { writer.writeInt32(data[i]); } else if constexpr (std::is_same_v) { writer.writeInt64(data[i]); } else if constexpr (std::is_same_v) { writer.writeInt128(data[i]); } else if constexpr (std::is_same_v) { writer.writeDouble(data[i]); } else { LOG(FATAL) << "unsupported type "; } column_string->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); } block.replace_by_position(result, std::move(column_string)); return Status::OK(); } }; // Generic conversion of any type to jsonb. struct ConvertImplGenericToJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IDataType& type = *col_with_type_and_name.type; const IColumn& col_from = *col_with_type_and_name.column; auto column_string = ColumnString::create(); JsonbWriter writer; auto tmp_col = ColumnString::create(); for (size_t i = 0; i < input_rows_count; i++) { // convert to string tmp_col->clear(); VectorBufferWriter write_buffer(*tmp_col.get()); type.to_string(col_from, i, write_buffer); write_buffer.commit(); // write string to jsonb writer.reset(); writer.writeStartString(); auto str_ref = tmp_col->get_data_at(0); writer.writeString(str_ref.data, str_ref.size); writer.writeEndString(); column_string->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); } block.replace_by_position(result, std::move(column_string)); return Status::OK(); } }; template struct ConvertImplFromJsonb { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { const auto& col_with_type_and_name = block.get_by_position(arguments[0]); const IColumn& col_from = *col_with_type_and_name.column; // result column must set type DCHECK(block.get_by_position(result).type != nullptr); auto data_type_to = block.get_by_position(result).type; if (const ColumnString* column_string = check_and_get_column(&col_from)) { auto null_map_col = ColumnUInt8::create(input_rows_count, 0); auto& null_map = null_map_col->get_data(); auto col_to = ColumnType::create(); //IColumn & col_to = *res; // size_t size = col_from.size(); col_to->reserve(input_rows_count); auto& res = col_to->get_data(); res.resize(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { const auto& val = column_string->get_data_at(i); // ReadBuffer read_buffer((char*)(val.data), val.size); // RETURN_IF_ERROR(data_type_to->from_string(read_buffer, col_to)); if (val.size == 0) { null_map[i] = 1; res[i] = 0; continue; } // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory JsonbDocument* doc = JsonbDocument::createDocument(val.data, val.size); if (UNLIKELY(!doc || !doc->getValue())) { null_map[i] = 1; res[i] = 0; continue; } // value is NOT necessary to be deleted since JsonbValue will not allocate memory JsonbValue* value = doc->getValue(); if (UNLIKELY(!value)) { null_map[i] = 1; res[i] = 0; continue; } if constexpr (type_index == TypeIndex::UInt8) { if (value->isTrue()) { res[i] = 1; } else if (value->isFalse()) { res[i] = 0; } else { null_map[i] = 1; res[i] = 0; } } else if constexpr (type_index == TypeIndex::Int8) { if (value->isInt8()) { res[i] = (int8_t)((const JsonbIntVal*)value)->val(); } else { null_map[i] = 1; res[i] = 0; } } else if constexpr (type_index == TypeIndex::Int16) { if (value->isInt8() || value->isInt16()) { res[i] = (int16_t)((const JsonbIntVal*)value)->val(); } else { null_map[i] = 1; res[i] = 0; } } else if constexpr (type_index == TypeIndex::Int32) { if (value->isInt8() || value->isInt16() || value->isInt32()) { res[i] = (int32_t)((const JsonbIntVal*)value)->val(); } else { null_map[i] = 1; res[i] = 0; } } else if constexpr (type_index == TypeIndex::Int64) { if (value->isInt8() || value->isInt16() || value->isInt32() || value->isInt64()) { res[i] = (int64_t)((const JsonbIntVal*)value)->val(); } else { null_map[i] = 1; res[i] = 0; } } else if constexpr (type_index == TypeIndex::Int128) { if (value->isInt8() || value->isInt16() || value->isInt32() || value->isInt64() || value->isInt128()) { res[i] = (int128_t)((const JsonbIntVal*)value)->val(); } else { null_map[i] = 1; res[i] = 0; } } else if constexpr (type_index == TypeIndex::Float64) { if (value->isDouble()) { res[i] = ((const JsonbDoubleVal*)value)->val(); } else if (value->isInt8() || value->isInt16() || value->isInt32() || value->isInt64()) { res[i] = ((const JsonbIntVal*)value)->val(); } else { null_map[i] = 1; res[i] = 0; } } else { LOG(FATAL) << "unsupported type "; } } block.replace_by_position( result, ColumnNullable::create(std::move(col_to), std::move(null_map_col))); } else { return Status::RuntimeError( "Illegal column {} of first argument of conversion function from string", col_from.get_name()); } return Status::OK(); } }; template struct ConvertImpl { template static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t /*input_rows_count*/, bool check_overflow [[maybe_unused]] = false, Additions additions [[maybe_unused]] = Additions()) { return Status::RuntimeError("not support convert from string"); } }; struct NameToString { static constexpr auto name = "to_string"; }; struct NameToDecimal32 { static constexpr auto name = "toDecimal32"; }; struct NameToDecimal64 { static constexpr auto name = "toDecimal64"; }; struct NameToDecimal128 { static constexpr auto name = "toDecimal128"; }; struct NameToDecimal128I { static constexpr auto name = "toDecimal128I"; }; struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; }; struct NameToUInt8 { static constexpr auto name = "toUInt8"; }; struct NameToUInt16 { static constexpr auto name = "toUInt16"; }; struct NameToUInt32 { static constexpr auto name = "toUInt32"; }; struct NameToUInt64 { static constexpr auto name = "toUInt64"; }; struct NameToInt8 { static constexpr auto name = "toInt8"; }; struct NameToInt16 { static constexpr auto name = "toInt16"; }; struct NameToInt32 { static constexpr auto name = "toInt32"; }; struct NameToInt64 { static constexpr auto name = "toInt64"; }; struct NameToInt128 { static constexpr auto name = "toInt128"; }; struct NameToFloat32 { static constexpr auto name = "toFloat32"; }; struct NameToFloat64 { static constexpr auto name = "toFloat64"; }; struct NameToIPv4 { static constexpr auto name = "toIPv4"; }; struct NameToIPv6 { static constexpr auto name = "toIPv6"; }; struct NameToDate { static constexpr auto name = "toDate"; }; struct NameToDateTime { static constexpr auto name = "toDateTime"; }; template bool try_parse_impl(typename DataType::FieldType& x, ReadBuffer& rb, const cctz::time_zone& local_time_zone, Additions additions [[maybe_unused]] = Additions()) { if constexpr (IsDateTimeType) { return try_read_datetime_text(x, rb, local_time_zone); } if constexpr (IsDateType) { return try_read_date_text(x, rb, local_time_zone); } if constexpr (IsDateV2Type) { return try_read_date_v2_text(x, rb, local_time_zone); } if constexpr (IsDateTimeV2Type) { UInt32 scale = additions; return try_read_datetime_v2_text(x, rb, local_time_zone, scale); } if constexpr (IsIPv4Type) { return try_read_ipv4_text(x, rb); } if constexpr (IsIPv6Type) { return try_read_ipv6_text(x, rb); } if constexpr (std::is_same_v && std::is_same_v) { // cast from string to time(float64) auto len = rb.count(); auto s = rb.position(); rb.position() = rb.end(); // make is_all_read = true auto ret = TimeCast::try_parse_time(s, len, x, local_time_zone); x *= (1000 * 1000); return ret; } if constexpr (std::is_floating_point_v) { return try_read_float_text(x, rb); } // uint8_t now use as boolean in doris if constexpr (std::is_same_v) { return try_read_bool_text(x, rb); } if constexpr (std::is_integral_v) { return try_read_int_text(x, rb); } } template StringParser::ParseResult try_parse_decimal_impl(typename DataType::FieldType& x, ReadBuffer& rb, const cctz::time_zone& local_time_zone, Additions additions [[maybe_unused]] = Additions()) { if constexpr (IsDataTypeDecimalV2) { UInt32 scale = ((PrecisionScaleArg)additions).scale; UInt32 precision = ((PrecisionScaleArg)additions).precision; return try_read_decimal_text(x, rb, precision, scale); } if constexpr (std::is_same_v, DataType>) { UInt32 scale = ((PrecisionScaleArg)additions).scale; UInt32 precision = ((PrecisionScaleArg)additions).precision; return try_read_decimal_text(x, rb, precision, scale); } if constexpr (std::is_same_v, DataType>) { UInt32 scale = ((PrecisionScaleArg)additions).scale; UInt32 precision = ((PrecisionScaleArg)additions).precision; return try_read_decimal_text(x, rb, precision, scale); } if constexpr (IsDataTypeDecimal128I) { UInt32 scale = ((PrecisionScaleArg)additions).scale; UInt32 precision = ((PrecisionScaleArg)additions).precision; return try_read_decimal_text(x, rb, precision, scale); } if constexpr (IsDataTypeDecimal256) { UInt32 scale = ((PrecisionScaleArg)additions).scale; UInt32 precision = ((PrecisionScaleArg)additions).precision; return try_read_decimal_text(x, rb, precision, scale); } } /// Monotonicity. struct PositiveMonotonicity { static bool has() { return true; } static IFunction::Monotonicity get(const IDataType&, const Field&, const Field&) { return {true}; } }; struct UnknownMonotonicity { static bool has() { return false; } static IFunction::Monotonicity get(const IDataType&, const Field&, const Field&) { return {false}; } }; template struct ToNumberMonotonicity { static bool has() { return true; } static UInt64 divide_by_range_of_type(UInt64 x) { if constexpr (sizeof(T) < sizeof(UInt64)) return x >> (sizeof(T) * 8); else return 0; } static IFunction::Monotonicity get(const IDataType& type, const Field& left, const Field& right) { if (!type.is_value_represented_by_number()) return {}; /// If type is same, the conversion is always monotonic. /// (Enum has separate case, because it is different data type) if (check_and_get_data_type>( &type) /*|| check_and_get_data_type>(&type)*/) return {true, true, true}; /// Float cases. /// When converting to Float, the conversion is always monotonic. if (std::is_floating_point_v) return {true, true, true}; /// If converting from Float, for monotonicity, arguments must fit in range of result type. if (WhichDataType(type).is_float()) { if (left.is_null() || right.is_null()) return {}; Float64 left_float = left.get(); Float64 right_float = right.get(); if (left_float >= std::numeric_limits::min() && left_float <= static_cast(std::numeric_limits::max()) && right_float >= std::numeric_limits::min() && right_float <= static_cast(std::numeric_limits::max())) return {true}; return {}; } /// Integer cases. const bool from_is_unsigned = type.is_value_represented_by_unsigned_integer(); const bool to_is_unsigned = std::is_unsigned_v; const size_t size_of_from = type.get_size_of_value_in_memory(); const size_t size_of_to = sizeof(T); const bool left_in_first_half = left.is_null() ? from_is_unsigned : (left.get() >= 0); const bool right_in_first_half = right.is_null() ? !from_is_unsigned : (right.get() >= 0); /// Size of type is the same. if (size_of_from == size_of_to) { if (from_is_unsigned == to_is_unsigned) return {true, true, true}; if (left_in_first_half == right_in_first_half) return {true}; return {}; } /// Size of type is expanded. if (size_of_from < size_of_to) { if (from_is_unsigned == to_is_unsigned) return {true, true, true}; if (!to_is_unsigned) return {true, true, true}; /// signed -> unsigned. If arguments from the same half, then function is monotonic. if (left_in_first_half == right_in_first_half) return {true}; return {}; } /// Size of type is shrinked. if (size_of_from > size_of_to) { /// Function cannot be monotonic on unbounded ranges. if (left.is_null() || right.is_null()) return {}; if (from_is_unsigned == to_is_unsigned) { /// all bits other than that fits, must be same. if (divide_by_range_of_type(left.get()) == divide_by_range_of_type(right.get())) return {true}; return {}; } else { /// When signedness is changed, it's also required for arguments to be from the same half. /// And they must be in the same half after converting to the result type. if (left_in_first_half == right_in_first_half && (T(left.get()) >= 0) == (T(right.get()) >= 0) && divide_by_range_of_type(left.get()) == divide_by_range_of_type(right.get())) return {true}; return {}; } } LOG(FATAL) << "__builtin_unreachable"; __builtin_unreachable(); } }; /** The monotonicity for the `to_string` function is mainly determined for test purposes. * It is doubtful that anyone is looking to optimize queries with conditions `std::to_string(CounterID) = 34`. */ struct ToStringMonotonicity { static bool has() { return true; } static IFunction::Monotonicity get(const IDataType& type, const Field& left, const Field& right) { IFunction::Monotonicity positive(true, true); IFunction::Monotonicity not_monotonic; if (left.is_null() || right.is_null()) return {}; if (left.get_type() == Field::Types::UInt64 && right.get_type() == Field::Types::UInt64) { return (left.get() == 0 && right.get() == 0) || (floor(log10(left.get())) == floor(log10(right.get()))) ? positive : not_monotonic; } if (left.get_type() == Field::Types::Int64 && right.get_type() == Field::Types::Int64) { return (left.get() == 0 && right.get() == 0) || (left.get() > 0 && right.get() > 0 && floor(log10(left.get())) == floor(log10(right.get()))) ? positive : not_monotonic; } return not_monotonic; } }; template class FunctionConvert : public IFunction { public: using Monotonic = MonotonicityImpl; static constexpr auto name = Name::name; static FunctionPtr create() { return std::make_shared(); } String get_name() const override { return name; } bool is_variadic() const override { return true; } size_t get_number_of_arguments() const override { return 0; } bool get_is_injective(const Block&) override { return std::is_same_v; } // This function should not be called for get DateType Ptr // using the FunctionCast::get_return_type_impl DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { return std::make_shared(); } ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { if (!arguments.size()) { return Status::RuntimeError("Function {} expects at least 1 arguments", get_name()); } const IDataType* from_type = block.get_by_position(arguments[0]).type.get(); Status ret_status; /// Generic conversion of any type to String. if constexpr (std::is_same_v) { return ConvertImplGenericToString::execute(block, arguments, result); } else { auto call = [&](const auto& types) -> bool { using Types = std::decay_t; using LeftDataType = typename Types::LeftType; using RightDataType = typename Types::RightType; // now, cast to decimal do not execute the code if constexpr (IsDataTypeDecimal) { if (arguments.size() != 2) { ret_status = Status::RuntimeError( "Function {} expects 2 arguments for Decimal.", get_name()); return true; } const ColumnWithTypeAndName& scale_column = block.get_by_position(result); ret_status = ConvertImpl::execute( context, block, arguments, result, input_rows_count, context->check_overflow_for_decimal(), scale_column.type->get_scale()); } else if constexpr (IsDataTypeDateTimeV2) { const ColumnWithTypeAndName& scale_column = block.get_by_position(result); ret_status = ConvertImpl::execute( context, block, arguments, result, input_rows_count, context->check_overflow_for_decimal(), scale_column.type->get_scale()); } else { ret_status = ConvertImpl::execute( context, block, arguments, result, input_rows_count); } return true; }; bool done = call_on_index_and_data_type(from_type->get_type_id(), call); if (!done) { ret_status = Status::RuntimeError( "Illegal type {} of argument of function {}", block.get_by_position(arguments[0]).type->get_name(), get_name()); } return ret_status; } } bool has_information_about_monotonicity() const override { return Monotonic::has(); } Monotonicity get_monotonicity_for_range(const IDataType& type, const Field& left, const Field& right) const override { return Monotonic::get(type, left, right); } }; using FunctionToUInt8 = FunctionConvert>; using FunctionToUInt16 = FunctionConvert>; using FunctionToUInt32 = FunctionConvert>; using FunctionToUInt64 = FunctionConvert>; using FunctionToInt8 = FunctionConvert>; using FunctionToInt16 = FunctionConvert>; using FunctionToInt32 = FunctionConvert>; using FunctionToInt64 = FunctionConvert>; using FunctionToInt128 = FunctionConvert>; using FunctionToFloat32 = FunctionConvert>; using FunctionToFloat64 = FunctionConvert>; using FunctionToTimeV2 = FunctionConvert>; using FunctionToString = FunctionConvert; using FunctionToDecimal32 = FunctionConvert, NameToDecimal32, UnknownMonotonicity>; using FunctionToDecimal64 = FunctionConvert, NameToDecimal64, UnknownMonotonicity>; using FunctionToDecimal128 = FunctionConvert, NameToDecimal128, UnknownMonotonicity>; using FunctionToDecimal128I = FunctionConvert, NameToDecimal128I, UnknownMonotonicity>; using FunctionToDecimal256 = FunctionConvert, NameToDecimal256, UnknownMonotonicity>; using FunctionToIPv4 = FunctionConvert; using FunctionToIPv6 = FunctionConvert; using FunctionToDate = FunctionConvert; using FunctionToDateTime = FunctionConvert; using FunctionToDateV2 = FunctionConvert; using FunctionToDateTimeV2 = FunctionConvert; template struct FunctionTo; template <> struct FunctionTo { using Type = FunctionToUInt8; }; template <> struct FunctionTo { using Type = FunctionToUInt16; }; template <> struct FunctionTo { using Type = FunctionToUInt32; }; template <> struct FunctionTo { using Type = FunctionToUInt64; }; template <> struct FunctionTo { using Type = FunctionToInt8; }; template <> struct FunctionTo { using Type = FunctionToInt16; }; template <> struct FunctionTo { using Type = FunctionToInt32; }; template <> struct FunctionTo { using Type = FunctionToInt64; }; template <> struct FunctionTo { using Type = FunctionToInt128; }; template <> struct FunctionTo { using Type = FunctionToFloat32; }; template <> struct FunctionTo { using Type = FunctionToFloat64; }; template <> struct FunctionTo> { using Type = FunctionToDecimal32; }; template <> struct FunctionTo> { using Type = FunctionToDecimal64; }; template <> struct FunctionTo> { using Type = FunctionToDecimal128; }; template <> struct FunctionTo> { using Type = FunctionToDecimal128I; }; template <> struct FunctionTo> { using Type = FunctionToDecimal256; }; template <> struct FunctionTo { using Type = FunctionToIPv4; }; template <> struct FunctionTo { using Type = FunctionToIPv6; }; template <> struct FunctionTo { using Type = FunctionToDate; }; template <> struct FunctionTo { using Type = FunctionToDateTime; }; template <> struct FunctionTo { using Type = FunctionToDateV2; }; template <> struct FunctionTo { using Type = FunctionToDateTimeV2; }; template <> struct FunctionTo { using Type = FunctionToTimeV2; }; class PreparedFunctionCast : public PreparedFunctionImpl { public: using WrapperType = std::function; explicit PreparedFunctionCast(WrapperType&& wrapper_function_, const char* name_) : wrapper_function(std::move(wrapper_function_)), name(name_) {} String get_name() const override { return name; } protected: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { /// drop second argument, pass others ColumnNumbers new_arguments {arguments.front()}; if (arguments.size() > 2) new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments)); return wrapper_function(context, block, new_arguments, result, input_rows_count); } bool use_default_implementation_for_nulls() const override { return false; } bool use_default_implementation_for_low_cardinality_columns() const override { return false; } ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } private: WrapperType wrapper_function; const char* name; }; struct NameCast { static constexpr auto name = "CAST"; }; template struct ConvertThroughParsing { static_assert(std::is_same_v, "ConvertThroughParsing is only applicable for String or FixedString data types"); using ToFieldType = typename ToDataType::FieldType; static bool is_all_read(ReadBuffer& in) { return in.eof(); } template static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count, bool check_overflow [[maybe_unused]] = false, Additions additions [[maybe_unused]] = Additions()) { using ColVecTo = std::conditional_t, ColumnDecimal, ColumnVector>; const IColumn* col_from = block.get_by_position(arguments[0]).column.get(); const ColumnString* col_from_string = check_and_get_column(col_from); if (std::is_same_v && !col_from_string) { return Status::RuntimeError("Illegal column {} of first argument of function {}", col_from->get_name(), Name::name); } size_t size = input_rows_count; typename ColVecTo::MutablePtr col_to = nullptr; if constexpr (IsDataTypeDecimal) { UInt32 scale = ((PrecisionScaleArg)additions).scale; ToDataType::check_type_scale(scale); col_to = ColVecTo::create(size, scale); } else { col_to = ColVecTo::create(size); } typename ColVecTo::Container& vec_to = col_to->get_data(); ColumnUInt8::MutablePtr col_null_map_to; ColumnUInt8::Container* vec_null_map_to [[maybe_unused]] = nullptr; col_null_map_to = ColumnUInt8::create(size); vec_null_map_to = &col_null_map_to->get_data(); const ColumnString::Chars* chars = nullptr; const IColumn::Offsets* offsets = nullptr; size_t fixed_string_size = 0; if constexpr (std::is_same_v) { chars = &col_from_string->get_chars(); offsets = &col_from_string->get_offsets(); } size_t current_offset = 0; for (size_t i = 0; i < size; ++i) { size_t next_offset = std::is_same_v ? (*offsets)[i] : (current_offset + fixed_string_size); size_t string_size = std::is_same_v ? next_offset - current_offset : fixed_string_size; ReadBuffer read_buffer(&(*chars)[current_offset], string_size); bool parsed; if constexpr (IsDataTypeDecimal) { ToDataType::check_type_precision((PrecisionScaleArg(additions).precision)); StringParser::ParseResult res = try_parse_decimal_impl( vec_to[i], read_buffer, context->state()->timezone_obj(), PrecisionScaleArg(additions)); parsed = (res == StringParser::PARSE_SUCCESS || res == StringParser::PARSE_OVERFLOW || res == StringParser::PARSE_UNDERFLOW); } else if constexpr (IsDataTypeDateTimeV2) { auto type = check_and_get_data_type( block.get_by_position(result).type.get()); parsed = try_parse_impl(vec_to[i], read_buffer, context->state()->timezone_obj(), type->get_scale()); } else { parsed = try_parse_impl( vec_to[i], read_buffer, context->state()->timezone_obj()); } (*vec_null_map_to)[i] = !parsed || !is_all_read(read_buffer); current_offset = next_offset; } block.get_by_position(result).column = ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); return Status::OK(); } }; template struct ConvertImpl, Name> : ConvertThroughParsing, Name> {}; template struct ConvertImpl, Name> : ConvertThroughParsing, Name> {}; template struct ConvertImpl, Name> : ConvertThroughParsing, Name> {}; template struct ConvertImpl, Name> : ConvertThroughParsing, Name> {}; template struct ConvertImpl, Name> : ConvertThroughParsing, Name> {}; template struct ConvertImpl : ConvertThroughParsing {}; template struct ConvertImpl : ConvertThroughParsing {}; template class FunctionConvertFromString : public IFunction { public: static constexpr auto name = Name::name; static FunctionPtr create() { return std::make_shared(); } String get_name() const override { return name; } bool is_variadic() const override { return true; } size_t get_number_of_arguments() const override { return 0; } ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } // This function should not be called for get DateType Ptr // using the FunctionCast::get_return_type_impl DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { DataTypePtr res; if constexpr (IsDataTypeDecimal) { LOG(FATAL) << "Someting wrong with toDecimalNNOrZero() or toDecimalNNOrNull()"; } else { res = std::make_shared(); } return res; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { const IDataType* from_type = block.get_by_position(arguments[0]).type.get(); if (check_and_get_data_type(from_type)) { return ConvertThroughParsing::execute( context, block, arguments, result, input_rows_count); } return Status::RuntimeError( "Illegal type {} of argument of function {} . Only String or FixedString " "argument is accepted for try-conversion function. For other arguments, use " "function without 'orZero' or 'orNull'.", block.get_by_position(arguments[0]).type->get_name(), get_name()); } }; template class FunctionConvertToTimeType : public IFunction { public: static constexpr auto name = Name::name; static FunctionPtr create() { return std::make_shared(); } String get_name() const override { return name; } bool is_variadic() const override { return true; } size_t get_number_of_arguments() const override { return 0; } ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } // This function should not be called for get DateType Ptr // using the FunctionCast::get_return_type_impl DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { return std::make_shared(); } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { Status ret_status = Status::OK(); const IDataType* from_type = block.get_by_position(arguments[0]).type.get(); auto call = [&](const auto& types) -> bool { using Types = std::decay_t; using LeftDataType = typename Types::LeftType; using RightDataType = typename Types::RightType; ret_status = ConvertImplToTimeType::execute( block, arguments, result, input_rows_count); return true; }; bool done = call_on_index_and_number_data_type(from_type->get_type_id(), call); if (!done) { return Status::RuntimeError("Illegal type {} of argument of function {}", block.get_by_position(arguments[0]).type->get_name(), get_name()); } return ret_status; } }; class FunctionCast final : public IFunctionBase { public: using WrapperType = std::function; using ElementWrappers = std::vector; using MonotonicityForRange = std::function; FunctionCast(const char* name_, MonotonicityForRange&& monotonicity_for_range_, const DataTypes& argument_types_, const DataTypePtr& return_type_) : name(name_), monotonicity_for_range(monotonicity_for_range_), argument_types(argument_types_), return_type(return_type_) {} const DataTypes& get_argument_types() const override { return argument_types; } const DataTypePtr& get_return_type() const override { return return_type; } PreparedFunctionPtr prepare(FunctionContext* context, const Block& /*sample_block*/, const ColumnNumbers& /*arguments*/, size_t /*result*/) const override { return std::make_shared( prepare_unpack_dictionaries(context, get_argument_types()[0], get_return_type()), name); } String get_name() const override { return name; } bool is_deterministic() const override { return true; } bool is_deterministic_in_scope_of_query() const override { return true; } bool has_information_about_monotonicity() const override { return static_cast(monotonicity_for_range); } Monotonicity get_monotonicity_for_range(const IDataType& type, const Field& left, const Field& right) const override { return monotonicity_for_range(type, left, right); } bool is_use_default_implementation_for_constants() const override { return true; } private: const char* name; MonotonicityForRange monotonicity_for_range; DataTypes argument_types; DataTypePtr return_type; template WrapperType create_wrapper(const DataTypePtr& from_type, const DataType* const, bool requested_result_is_nullable) const { FunctionPtr function; if (requested_result_is_nullable && check_and_get_data_type(from_type.get())) { /// In case when converting to Nullable type, we apply different parsing rule, /// that will not throw an exception but return NULL in case of malformed input. function = FunctionConvertFromString::create(); } else if (requested_result_is_nullable && (IsTimeType || IsTimeV2Type)&&!( check_and_get_data_type(from_type.get()) || check_and_get_data_type(from_type.get()) || check_and_get_data_type(from_type.get()) || check_and_get_data_type(from_type.get()))) { function = FunctionConvertToTimeType::create(); } else { function = FunctionTo::Type::create(); } /// Check conversion using underlying function { function->get_return_type(ColumnsWithTypeAndName(1, {nullptr, from_type, ""})); } return [function](FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { return function->execute(context, block, arguments, result, input_rows_count); }; } WrapperType create_string_wrapper(const DataTypePtr& from_type) const { FunctionPtr function = FunctionToString::create(); /// Check conversion using underlying function { function->get_return_type(ColumnsWithTypeAndName(1, {nullptr, from_type, ""})); } return [function](FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { return function->execute(context, block, arguments, result, input_rows_count); }; } template WrapperType create_decimal_wrapper(const DataTypePtr& from_type, const DataTypeDecimal* to_type) const { using ToDataType = DataTypeDecimal; TypeIndex type_index = from_type->get_type_id(); UInt32 precision = to_type->get_precision(); UInt32 scale = to_type->get_scale(); WhichDataType which(type_index); bool ok = which.is_int() || which.is_native_uint() || which.is_decimal() || which.is_float() || which.is_date_or_datetime() || which.is_date_v2_or_datetime_v2() || which.is_string_or_fixed_string(); if (!ok) { return create_unsupport_wrapper(from_type->get_name(), to_type->get_name()); } return [type_index, precision, scale](FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { auto res = call_on_index_and_data_type( type_index, [&](const auto& types) -> bool { using Types = std::decay_t; using LeftDataType = typename Types::LeftType; using RightDataType = typename Types::RightType; auto state = ConvertImpl::execute( context, block, arguments, result, input_rows_count, context->check_overflow_for_decimal(), PrecisionScaleArg {precision, scale}); if (!state) { throw Exception(state.code(), state.to_string()); } return true; }); /// Additionally check if call_on_index_and_data_type wasn't called at all. if (!res) { auto to = DataTypeDecimal(precision, scale); return Status::RuntimeError("Conversion from {} to {} is not supported", getTypeName(type_index), to.get_name()); } return Status::OK(); }; } WrapperType create_identity_wrapper(const DataTypePtr&) const { return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t /*input_rows_count*/) { block.get_by_position(result).column = block.get_by_position(arguments.front()).column; return Status::OK(); }; } WrapperType create_nothing_wrapper(const IDataType* to_type) const { ColumnPtr res = to_type->create_column_const_with_default_value(1); return [res](FunctionContext* context, Block& block, const ColumnNumbers&, const size_t result, size_t input_rows_count) { /// Column of Nothing type is trivially convertible to any other column block.get_by_position(result).column = res->clone_resized(input_rows_count)->convert_to_full_column_if_const(); return Status::OK(); }; } WrapperType create_unsupport_wrapper(const String error_msg) const { LOG(WARNING) << error_msg; return [error_msg](FunctionContext* /*context*/, Block& /*block*/, const ColumnNumbers& /*arguments*/, const size_t /*result*/, size_t /*input_rows_count*/) { return Status::InvalidArgument(error_msg); }; } WrapperType create_unsupport_wrapper(const String from_type_name, const String to_type_name) const { const String error_msg = fmt::format("Conversion from {} to {} is not supported", from_type_name, to_type_name); return create_unsupport_wrapper(error_msg); } WrapperType create_hll_wrapper(FunctionContext* context, const DataTypePtr& from_type_untyped, const DataTypeHLL& to_type) const { /// Conversion from String through parsing. if (check_and_get_data_type(from_type_untyped.get())) { return &ConvertImplGenericFromString::execute; } //TODO if from is not string, it must be HLL? const auto* from_type = check_and_get_data_type(from_type_untyped.get()); if (!from_type) { return create_unsupport_wrapper( "CAST AS HLL can only be performed between HLL, String " "types"); } return nullptr; } WrapperType create_bitmap_wrapper(FunctionContext* context, const DataTypePtr& from_type_untyped, const DataTypeBitMap& to_type) const { /// Conversion from String through parsing. if (check_and_get_data_type(from_type_untyped.get())) { return &ConvertImplGenericFromString::execute; } //TODO if from is not string, it must be BITMAP? const auto* from_type = check_and_get_data_type(from_type_untyped.get()); if (!from_type) { return create_unsupport_wrapper( "CAST AS BITMAP can only be performed between BITMAP, String " "types"); } return nullptr; } WrapperType create_array_wrapper(FunctionContext* context, const DataTypePtr& from_type_untyped, const DataTypeArray& to_type) const { /// Conversion from String through parsing. if (check_and_get_data_type(from_type_untyped.get())) { return &ConvertImplGenericFromString::execute; } const auto* from_type = check_and_get_data_type(from_type_untyped.get()); if (!from_type) { return create_unsupport_wrapper( "CAST AS Array can only be performed between same-dimensional Array, String " "types"); } DataTypePtr from_nested_type = from_type->get_nested_type(); /// In query SELECT CAST([] AS Array(Array(String))) from type is Array(Nothing) bool from_empty_array = is_nothing(from_nested_type); if (from_type->get_number_of_dimensions() != to_type.get_number_of_dimensions() && !from_empty_array) { return create_unsupport_wrapper( "CAST AS Array can only be performed between same-dimensional array types"); } const DataTypePtr& to_nested_type = to_type.get_nested_type(); /// Prepare nested type conversion const auto nested_function = prepare_unpack_dictionaries(context, from_nested_type, to_nested_type); return [nested_function, from_nested_type, to_nested_type]( FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t /*input_rows_count*/) -> Status { auto& from_column = block.get_by_position(arguments.front()).column; const ColumnArray* from_col_array = check_and_get_column(from_column.get()); if (from_col_array) { /// create columns for converting nested column containing original and result columns ColumnWithTypeAndName from_nested_column {from_col_array->get_data_ptr(), from_nested_type, ""}; /// convert nested column ColumnNumbers new_arguments {block.columns()}; block.insert(from_nested_column); size_t nested_result = block.columns(); block.insert({to_nested_type, ""}); RETURN_IF_ERROR(nested_function(context, block, new_arguments, nested_result, from_col_array->get_data_ptr()->size())); auto nested_result_column = block.get_by_position(nested_result).column; /// set converted nested column to result block.get_by_position(result).column = ColumnArray::create( nested_result_column, from_col_array->get_offsets_ptr()); } else { return Status::RuntimeError("Illegal column {} for function CAST AS Array", from_column->get_name()); } return Status::OK(); }; } // check jsonb value type and get to_type value WrapperType create_jsonb_wrapper(const DataTypeJsonb& from_type, const DataTypePtr& to_type) const { // Conversion from String through parsing. if (check_and_get_data_type(to_type.get())) { return &ConvertImplGenericToString::execute2; } switch (to_type->get_type_id()) { case TypeIndex::UInt8: return &ConvertImplFromJsonb::execute; case TypeIndex::Int8: return &ConvertImplFromJsonb::execute; case TypeIndex::Int16: return &ConvertImplFromJsonb::execute; case TypeIndex::Int32: return &ConvertImplFromJsonb::execute; case TypeIndex::Int64: return &ConvertImplFromJsonb::execute; case TypeIndex::Int128: return &ConvertImplFromJsonb::execute; case TypeIndex::Float64: return &ConvertImplFromJsonb::execute; default: return create_unsupport_wrapper(from_type.get_name(), to_type->get_name()); } return nullptr; } // create cresponding jsonb value with type to_type // use jsonb writer to create jsonb value WrapperType create_jsonb_wrapper(const DataTypePtr& from_type, const DataTypeJsonb& to_type) const { switch (from_type->get_type_id()) { case TypeIndex::UInt8: return &ConvertImplNumberToJsonb::execute; case TypeIndex::Int8: return &ConvertImplNumberToJsonb::execute; case TypeIndex::Int16: return &ConvertImplNumberToJsonb::execute; case TypeIndex::Int32: return &ConvertImplNumberToJsonb::execute; case TypeIndex::Int64: return &ConvertImplNumberToJsonb::execute; case TypeIndex::Int128: return &ConvertImplNumberToJsonb::execute; case TypeIndex::Float64: return &ConvertImplNumberToJsonb::execute; case TypeIndex::String: return &ConvertImplGenericFromString::execute; default: return &ConvertImplGenericToJsonb::execute; } } //TODO(Amory) . Need support more cast for key , value for map WrapperType create_map_wrapper(FunctionContext* context, const DataTypePtr& from_type, const DataTypeMap& to_type) const { if (from_type->get_type_id() == TypeIndex::String) { return &ConvertImplGenericFromString::execute; } auto from = check_and_get_data_type(from_type.get()); if (!from) { return create_unsupport_wrapper( fmt::format("CAST AS Map can only be performed between Map types or from " "String. from type: {}, to type: {}", from_type->get_name(), to_type.get_name())); } DataTypes from_kv_types; DataTypes to_kv_types; from_kv_types.reserve(2); to_kv_types.reserve(2); from_kv_types.push_back(from->get_key_type()); from_kv_types.push_back(from->get_value_type()); to_kv_types.push_back(to_type.get_key_type()); to_kv_types.push_back(to_type.get_value_type()); auto kv_wrappers = get_element_wrappers(context, from_kv_types, to_kv_types); return [kv_wrappers, from_kv_types, to_kv_types]( FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t /*input_rows_count*/) -> Status { auto& from_column = block.get_by_position(arguments.front()).column; auto from_col_map = check_and_get_column(from_column.get()); if (!from_col_map) { return Status::RuntimeError("Illegal column {} for function CAST AS MAP", from_column->get_name()); } Columns converted_columns(2); ColumnsWithTypeAndName columnsWithTypeAndName(2); columnsWithTypeAndName[0] = {from_col_map->get_keys_ptr(), from_kv_types[0], ""}; columnsWithTypeAndName[1] = {from_col_map->get_values_ptr(), from_kv_types[1], ""}; for (size_t i = 0; i < 2; ++i) { ColumnNumbers element_arguments {block.columns()}; block.insert(columnsWithTypeAndName[i]); size_t element_result = block.columns(); block.insert({to_kv_types[i], ""}); RETURN_IF_ERROR(kv_wrappers[i](context, block, element_arguments, element_result, columnsWithTypeAndName[i].column->size())); converted_columns[i] = block.get_by_position(element_result).column; } block.get_by_position(result).column = ColumnMap::create( converted_columns[0], converted_columns[1], from_col_map->get_offsets_ptr()); return Status::OK(); }; } ElementWrappers get_element_wrappers(FunctionContext* context, const DataTypes& from_element_types, const DataTypes& to_element_types) const { DCHECK(from_element_types.size() == to_element_types.size()); ElementWrappers element_wrappers; element_wrappers.reserve(from_element_types.size()); for (size_t i = 0; i < from_element_types.size(); ++i) { const DataTypePtr& from_element_type = from_element_types[i]; const DataTypePtr& to_element_type = to_element_types[i]; element_wrappers.push_back( prepare_unpack_dictionaries(context, from_element_type, to_element_type)); } return element_wrappers; } // check struct value type and get to_type value // TODO: need handle another type to cast struct WrapperType create_struct_wrapper(FunctionContext* context, const DataTypePtr& from_type, const DataTypeStruct& to_type) const { // support CAST AS Struct from string if (from_type->get_type_id() == TypeIndex::String) { return &ConvertImplGenericFromString::execute; } // only support CAST AS Struct from struct or string types auto from = check_and_get_data_type(from_type.get()); if (!from) { return create_unsupport_wrapper( fmt::format("CAST AS Struct can only be performed between struct types or from " "String. Left type: {}, right type: {}", from_type->get_name(), to_type.get_name())); } const auto& from_element_types = from->get_elements(); const auto& to_element_types = to_type.get_elements(); // only support CAST AS Struct from struct type with same number of elements if (from_element_types.size() != to_element_types.size()) { return create_unsupport_wrapper( fmt::format("CAST AS Struct can only be performed between struct types with " "the same number of elements. Left type: {}, right type: {}", from_type->get_name(), to_type.get_name())); } auto element_wrappers = get_element_wrappers(context, from_element_types, to_element_types); return [element_wrappers, from_element_types, to_element_types]( FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t /*input_rows_count*/) -> Status { auto& from_column = block.get_by_position(arguments.front()).column; auto from_col_struct = check_and_get_column(from_column.get()); if (!from_col_struct) { return Status::RuntimeError("Illegal column {} for function CAST AS Struct", from_column->get_name()); } size_t elements_num = to_element_types.size(); Columns converted_columns(elements_num); for (size_t i = 0; i < elements_num; ++i) { ColumnWithTypeAndName from_element_column {from_col_struct->get_column_ptr(i), from_element_types[i], ""}; ColumnNumbers element_arguments {block.columns()}; block.insert(from_element_column); size_t element_result = block.columns(); block.insert({to_element_types[i], ""}); RETURN_IF_ERROR(element_wrappers[i](context, block, element_arguments, element_result, from_col_struct->get_column(i).size())); converted_columns[i] = block.get_by_position(element_result).column; } block.get_by_position(result).column = ColumnStruct::create(converted_columns); return Status::OK(); }; } WrapperType prepare_unpack_dictionaries(FunctionContext* context, const DataTypePtr& from_type, const DataTypePtr& to_type) const { const auto& from_nested = from_type; const auto& to_nested = to_type; if (from_type->only_null() || from_type->is_null_literal()) { if (!to_nested->is_nullable()) { return create_unsupport_wrapper("Cannot convert NULL to a non-nullable type"); } return [](FunctionContext* context, Block& block, const ColumnNumbers&, const size_t result, size_t input_rows_count) { auto& res = block.get_by_position(result); res.column = res.type->create_column_const_with_default_value(input_rows_count) ->convert_to_full_column_if_const(); return Status::OK(); }; } constexpr bool skip_not_null_check = false; auto wrapper = prepare_remove_nullable(context, from_nested, to_nested, skip_not_null_check); return wrapper; } WrapperType prepare_remove_nullable(FunctionContext* context, const DataTypePtr& from_type, const DataTypePtr& to_type, bool skip_not_null_check) const { /// Determine whether pre-processing and/or post-processing must take place during conversion. bool source_is_nullable = from_type->is_nullable(); bool result_is_nullable = to_type->is_nullable(); auto wrapper = prepare_impl(context, remove_nullable(from_type), remove_nullable(to_type), result_is_nullable); if (result_is_nullable) { return [wrapper, source_is_nullable](FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { /// Create a temporary block on which to perform the operation. auto& res = block.get_by_position(result); const auto& ret_type = res.type; const auto& nullable_type = static_cast(*ret_type); const auto& nested_type = nullable_type.get_nested_type(); Block tmp_block; size_t tmp_res_index = 0; if (source_is_nullable) { auto [t_block, tmp_args] = create_block_with_nested_columns(block, arguments, true); tmp_block = std::move(t_block); tmp_res_index = tmp_block.columns(); tmp_block.insert({nullptr, nested_type, ""}); /// Perform the requested conversion. RETURN_IF_ERROR( wrapper(context, tmp_block, {0}, tmp_res_index, input_rows_count)); } else { tmp_block = block; tmp_res_index = block.columns(); tmp_block.insert({nullptr, nested_type, ""}); /// Perform the requested conversion. RETURN_IF_ERROR(wrapper(context, tmp_block, arguments, tmp_res_index, input_rows_count)); } // Note: here we should return the nullable result column const auto& tmp_res = tmp_block.get_by_position(tmp_res_index); res.column = wrap_in_nullable(tmp_res.column, Block({block.get_by_position(arguments[0]), tmp_res}), {0}, 1, input_rows_count); return Status::OK(); }; } else if (source_is_nullable) { /// Conversion from Nullable to non-Nullable. return [wrapper, skip_not_null_check](FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { auto [tmp_block, tmp_args, tmp_res] = create_block_with_nested_columns(block, arguments, result); /// Check that all values are not-NULL. /// Check can be skipped in case if LowCardinality dictionary is transformed. /// In that case, correctness will be checked beforehand. if (!skip_not_null_check) { const auto& col = block.get_by_position(arguments[0]).column; const auto& nullable_col = assert_cast(*col); const auto& null_map = nullable_col.get_null_map_data(); if (!memory_is_zero(null_map.data(), null_map.size())) { return Status::RuntimeError( "Cannot convert NULL value to non-Nullable type"); } } RETURN_IF_ERROR(wrapper(context, tmp_block, tmp_args, tmp_res, input_rows_count)); block.get_by_position(result).column = tmp_block.get_by_position(tmp_res).column; return Status::OK(); }; } else { return wrapper; } } /// 'from_type' and 'to_type' are nested types in case of Nullable. /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. WrapperType prepare_impl(FunctionContext* context, const DataTypePtr& from_type, const DataTypePtr& to_type, bool requested_result_is_nullable) const { if (from_type->equals(*to_type)) return create_identity_wrapper(from_type); else if (WhichDataType(from_type).is_nothing()) return create_nothing_wrapper(to_type.get()); if (from_type->get_type_id() == TypeIndex::JSONB) { return create_jsonb_wrapper(static_cast(*from_type), to_type); } if (to_type->get_type_id() == TypeIndex::JSONB) { return create_jsonb_wrapper(from_type, static_cast(*to_type)); } WrapperType ret; auto make_default_wrapper = [&](const auto& types) -> bool { using Types = std::decay_t; using ToDataType = typename Types::LeftType; if constexpr (std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v) { ret = create_wrapper(from_type, check_and_get_data_type(to_type.get()), requested_result_is_nullable); return true; } if constexpr (std::is_same_v> || std::is_same_v> || std::is_same_v> || std::is_same_v> || std::is_same_v>) { ret = create_decimal_wrapper(from_type, check_and_get_data_type(to_type.get())); return true; } return false; }; if (call_on_index_and_data_type(to_type->get_type_id(), make_default_wrapper)) return ret; switch (to_type->get_type_id()) { case TypeIndex::String: return create_string_wrapper(from_type); case TypeIndex::Array: return create_array_wrapper(context, from_type, static_cast(*to_type)); case TypeIndex::Struct: return create_struct_wrapper(context, from_type, static_cast(*to_type)); case TypeIndex::Map: return create_map_wrapper(context, from_type, static_cast(*to_type)); case TypeIndex::HLL: return create_hll_wrapper(context, from_type, static_cast(*to_type)); case TypeIndex::BitMap: return create_bitmap_wrapper(context, from_type, static_cast(*to_type)); default: break; } return create_unsupport_wrapper(from_type->get_name(), to_type->get_name()); } }; class FunctionBuilderCast : public FunctionBuilderImpl { public: using MonotonicityForRange = FunctionCast::MonotonicityForRange; static constexpr auto name = "CAST"; static FunctionBuilderPtr create() { return std::make_shared(); } FunctionBuilderCast() = default; String get_name() const override { return name; } size_t get_number_of_arguments() const override { return 2; } ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } protected: FunctionBasePtr build_impl(const ColumnsWithTypeAndName& arguments, const DataTypePtr& return_type) const override { DataTypes data_types(arguments.size()); for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type; auto monotonicity = get_monotonicity_information(arguments.front().type, return_type.get()); return std::make_shared(name, std::move(monotonicity), data_types, return_type); } DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { DataTypePtr type = arguments[1].type; DCHECK(type != nullptr); bool need_to_be_nullable = false; // 1. from_type is nullable need_to_be_nullable |= arguments[0].type->is_nullable(); // 2. from_type is string, to_type is not string need_to_be_nullable |= (arguments[0].type->get_type_id() == TypeIndex::String) && (type->get_type_id() != TypeIndex::String); // 3. from_type is not DateTime/Date, to_type is DateTime/Date need_to_be_nullable |= (arguments[0].type->get_type_id() != TypeIndex::Date && arguments[0].type->get_type_id() != TypeIndex::DateTime) && (type->get_type_id() == TypeIndex::Date || type->get_type_id() == TypeIndex::DateTime); // 4. from_type is not DateTimeV2/DateV2, to_type is DateTimeV2/DateV2 need_to_be_nullable |= (arguments[0].type->get_type_id() != TypeIndex::DateV2 && arguments[0].type->get_type_id() != TypeIndex::DateTimeV2) && (type->get_type_id() == TypeIndex::DateV2 || type->get_type_id() == TypeIndex::DateTimeV2); if (need_to_be_nullable && !type->is_nullable()) { return make_nullable(type); } return type; } bool use_default_implementation_for_nulls() const override { return false; } bool use_default_implementation_for_low_cardinality_columns() const override { return false; } private: template static auto monotonicity_for_type(const DataType* const) { return FunctionTo::Type::Monotonic::get; } MonotonicityForRange get_monotonicity_information(const DataTypePtr& from_type, const IDataType* to_type) const { if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); if (const auto type = check_and_get_data_type(to_type)) return monotonicity_for_type(type); /// other types like Null, FixedString, Array and Tuple have no monotonicity defined return {}; } }; } // namespace doris::vectorized