[feature](function) Modified cast as time to behave more like MySQL (#18565)
Because the underlying type of time was float64, select cast("19:22:18" as time) would result in a null value in the past.
Results in the following:
This commit is contained in:
@ -23,6 +23,7 @@
|
||||
#include <utility>
|
||||
|
||||
#include "vec/core/types.h"
|
||||
#include "vec/data_types/data_type_time.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
@ -202,7 +203,8 @@ bool call_on_index_and_data_type(TypeIndex number, F&& f) {
|
||||
return f(TypePair<DataTypeNumber<Float32>, T>());
|
||||
case TypeIndex::Float64:
|
||||
return f(TypePair<DataTypeNumber<Float64>, T>());
|
||||
|
||||
case TypeIndex::Time:
|
||||
return f(TypePair<DataTypeTime, T>());
|
||||
case TypeIndex::Decimal32:
|
||||
return f(TypePair<DataTypeDecimal<Decimal32>, T>());
|
||||
case TypeIndex::Decimal64:
|
||||
|
||||
@ -91,6 +91,7 @@ enum class TypeIndex {
|
||||
Struct = 40,
|
||||
VARIANT = 41,
|
||||
QuantileState = 42,
|
||||
Time = 43
|
||||
};
|
||||
|
||||
struct Consted {
|
||||
@ -626,6 +627,8 @@ inline const char* getTypeName(TypeIndex idx) {
|
||||
return "Struct";
|
||||
case TypeIndex::QuantileState:
|
||||
return TypeName<QuantileState<double>>::get();
|
||||
case TypeIndex::Time:
|
||||
return "Time";
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
|
||||
@ -171,6 +171,8 @@ PGenericType_TypeId IDataType::get_pdata_type(const IDataType* data_type) {
|
||||
return PGenericType::JSONB;
|
||||
case TypeIndex::Map:
|
||||
return PGenericType::MAP;
|
||||
case TypeIndex::Time:
|
||||
return PGenericType::TIME;
|
||||
default:
|
||||
return PGenericType::UNKNOWN;
|
||||
}
|
||||
|
||||
@ -286,6 +286,9 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool
|
||||
case TypeIndex::DateV2:
|
||||
nested = std::make_shared<vectorized::DataTypeDateV2>();
|
||||
break;
|
||||
case TypeIndex::Time:
|
||||
nested = std::make_shared<DataTypeTime>();
|
||||
break;
|
||||
case TypeIndex::DateTimeV2:
|
||||
nested = std::make_shared<DataTypeDateTimeV2>();
|
||||
break;
|
||||
@ -522,6 +525,10 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) {
|
||||
nested = std::make_shared<DataTypeQuantileStateDouble>();
|
||||
break;
|
||||
}
|
||||
case PGenericType::TIME: {
|
||||
nested = std::make_shared<DataTypeTime>();
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
LOG(FATAL) << fmt::format("Unknown data type: {}", pcolumn.type());
|
||||
return nullptr;
|
||||
|
||||
@ -76,6 +76,7 @@ public:
|
||||
DataTypeSerDeSPtr get_serde() const override {
|
||||
return std::make_shared<DataTypeNumberSerDe<Float64>>();
|
||||
};
|
||||
TypeIndex get_type_id() const override { return TypeIndex::Time; }
|
||||
};
|
||||
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -75,6 +75,7 @@
|
||||
#include "vec/data_types/data_type_number.h"
|
||||
#include "vec/data_types/data_type_string.h"
|
||||
#include "vec/data_types/data_type_struct.h"
|
||||
#include "vec/data_types/data_type_time.h"
|
||||
#include "vec/data_types/data_type_time_v2.h"
|
||||
#include "vec/functions/function.h"
|
||||
#include "vec/functions/function_helpers.h"
|
||||
@ -110,7 +111,92 @@ inline UInt32 extract_to_decimal_scale(const ColumnWithTypeAndName& named_column
|
||||
named_column.column->get(0, field);
|
||||
return field.get<UInt32>();
|
||||
}
|
||||
/** Cast from string or number to Time.
|
||||
* In Doris, the underlying storage type of the Time class is Float64.
|
||||
*/
|
||||
struct TimeCast {
|
||||
// Cast from string
|
||||
// Some examples of conversions.
|
||||
// '300' -> 00:03:00 '20:23' -> 20:23:00 '20:23:24' -> 20:23:24
|
||||
template <typename T>
|
||||
static bool try_parse_time(char* s, size_t len, T& x) {
|
||||
char* first_char = s;
|
||||
char* end_char = s + len;
|
||||
int hour = 0, minute = 0, second = 0;
|
||||
auto parse_from_str_to_int = [](char* begin, size_t len, auto& num) {
|
||||
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
|
||||
auto int_value = StringParser::string_to_unsigned_int<uint64_t>(
|
||||
reinterpret_cast<char*>(begin), len, &parse_result);
|
||||
if (UNLIKELY(parse_result != StringParser::PARSE_SUCCESS)) {
|
||||
return false;
|
||||
}
|
||||
num = int_value;
|
||||
return true;
|
||||
};
|
||||
if (char* first_colon {nullptr};
|
||||
(first_colon = (char*)memchr(first_char, ':', len)) != nullptr) {
|
||||
if (char* second_colon {nullptr};
|
||||
(second_colon = (char*)memchr(first_colon + 1, ':', end_char - first_colon - 1)) !=
|
||||
nullptr) {
|
||||
// find two colon
|
||||
// parse hour
|
||||
if (!parse_from_str_to_int(first_char, first_colon - first_char, hour)) {
|
||||
// hour failed
|
||||
return false;
|
||||
}
|
||||
// parse minute
|
||||
if (!parse_from_str_to_int(first_colon + 1, second_colon - first_colon - 1,
|
||||
minute)) {
|
||||
return false;
|
||||
}
|
||||
// parse second
|
||||
if (!parse_from_str_to_int(second_colon + 1, end_char - second_colon - 1, second)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// find one colon
|
||||
// parse hour
|
||||
if (!parse_from_str_to_int(first_char, first_colon - first_char, hour)) {
|
||||
return false;
|
||||
}
|
||||
// parse minute
|
||||
if (!parse_from_str_to_int(first_colon + 1, end_char - first_colon - 1, minute)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// no colon ,so try to parse as a number
|
||||
size_t from {};
|
||||
if (!parse_from_str_to_int(first_char, len, from)) {
|
||||
return false;
|
||||
}
|
||||
return try_parse_time(from, x);
|
||||
}
|
||||
// minute second must be < 60
|
||||
if (minute >= 60 || second >= 60) {
|
||||
return false;
|
||||
}
|
||||
x = hour * 3600 + minute * 60 + second;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Cast from number
|
||||
template <typename T, typename S>
|
||||
static bool try_parse_time(T from, S& x) {
|
||||
int64 seconds = from / 100;
|
||||
int64 hour = 0, minute = 0, second = 0;
|
||||
second = from - 100 * seconds;
|
||||
from /= 100;
|
||||
seconds = from / 100;
|
||||
minute = from - 100 * seconds;
|
||||
hour = seconds;
|
||||
if (minute >= 60 || second >= 60) {
|
||||
return false;
|
||||
}
|
||||
x = hour * 3600 + minute * 60 + second;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment.
|
||||
* (Date is represented internally as number of days from some day; DateTime - as unix timestamp)
|
||||
*/
|
||||
@ -275,11 +361,25 @@ struct ConvertImpl {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
|
||||
if constexpr (IsDataTypeNumber<FromDataType> &&
|
||||
std::is_same_v<ToDataType, DataTypeTime>) {
|
||||
// 300 -> 00:03:00 360 will be parse failed , so value maybe null
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container* vec_null_map_to = nullptr;
|
||||
col_null_map_to = ColumnUInt8::create(size);
|
||||
vec_null_map_to = &col_null_map_to->get_data();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
(*vec_null_map_to)[i] = !TimeCast::try_parse_time(vec_from[i], vec_to[i]);
|
||||
}
|
||||
block.get_by_position(result).column =
|
||||
ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
|
||||
return Status::OK();
|
||||
} else {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: support boolean cast more reasonable
|
||||
if constexpr (std::is_same_v<uint8_t, ToFieldType>) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
@ -699,7 +799,7 @@ struct NameToDateTime {
|
||||
static constexpr auto name = "toDateTime";
|
||||
};
|
||||
|
||||
template <typename DataType, typename Additions = void*>
|
||||
template <typename DataType, typename Additions = void*, typename FromDataType = void*>
|
||||
bool try_parse_impl(typename DataType::FieldType& x, ReadBuffer& rb, const DateLUTImpl*,
|
||||
Additions additions [[maybe_unused]] = Additions()) {
|
||||
if constexpr (IsDateTimeType<DataType>) {
|
||||
@ -719,6 +819,15 @@ bool try_parse_impl(typename DataType::FieldType& x, ReadBuffer& rb, const DateL
|
||||
return try_read_datetime_v2_text(x, rb, scale);
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<DataTypeString, FromDataType> &&
|
||||
std::is_same_v<DataTypeTime, DataType>) {
|
||||
// cast from string to time(float64)
|
||||
auto len = rb.count();
|
||||
auto s = rb.position();
|
||||
rb.position() = rb.end(); // make is_all_read = true
|
||||
return TimeCast::try_parse_time(s, len, x);
|
||||
}
|
||||
|
||||
if constexpr (std::is_floating_point_v<typename DataType::FieldType>) {
|
||||
return try_read_float_text(x, rb);
|
||||
}
|
||||
@ -1002,6 +1111,8 @@ using FunctionToFloat32 =
|
||||
FunctionConvert<DataTypeFloat32, NameToFloat32, ToNumberMonotonicity<Float32>>;
|
||||
using FunctionToFloat64 =
|
||||
FunctionConvert<DataTypeFloat64, NameToFloat64, ToNumberMonotonicity<Float64>>;
|
||||
|
||||
using FunctionToTime = FunctionConvert<DataTypeTime, NameToFloat64, ToNumberMonotonicity<Float64>>;
|
||||
using FunctionToString = FunctionConvert<DataTypeString, NameToString, ToStringMonotonicity>;
|
||||
using FunctionToDecimal32 =
|
||||
FunctionConvert<DataTypeDecimal<Decimal32>, NameToDecimal32, UnknownMonotonicity>;
|
||||
@ -1096,7 +1207,10 @@ template <>
|
||||
struct FunctionTo<DataTypeDateTimeV2> {
|
||||
using Type = FunctionToDateTimeV2;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct FunctionTo<DataTypeTime> {
|
||||
using Type = FunctionToTime;
|
||||
};
|
||||
class PreparedFunctionCast : public PreparedFunctionImpl {
|
||||
public:
|
||||
using WrapperType = std::function<Status(FunctionContext* context, Block&, const ColumnNumbers&,
|
||||
@ -1186,7 +1300,6 @@ struct ConvertThroughParsing {
|
||||
}
|
||||
|
||||
size_t current_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
size_t next_offset = std::is_same_v<FromDataType, DataTypeString>
|
||||
? (*offsets)[i]
|
||||
@ -1207,7 +1320,8 @@ struct ConvertThroughParsing {
|
||||
parsed = try_parse_impl<ToDataType>(vec_to[i], read_buffer, local_time_zone,
|
||||
type->get_scale());
|
||||
} else {
|
||||
parsed = try_parse_impl<ToDataType>(vec_to[i], read_buffer, local_time_zone);
|
||||
parsed = try_parse_impl<ToDataType, void*, FromDataType>(vec_to[i], read_buffer,
|
||||
local_time_zone);
|
||||
}
|
||||
(*vec_null_map_to)[i] = !parsed || !is_all_read(read_buffer);
|
||||
|
||||
@ -1837,7 +1951,8 @@ private:
|
||||
std::is_same_v<ToDataType, DataTypeDate> ||
|
||||
std::is_same_v<ToDataType, DataTypeDateTime> ||
|
||||
std::is_same_v<ToDataType, DataTypeDateV2> ||
|
||||
std::is_same_v<ToDataType, DataTypeDateTimeV2>) {
|
||||
std::is_same_v<ToDataType, DataTypeDateTimeV2> ||
|
||||
std::is_same_v<ToDataType, DataTypeTime>) {
|
||||
ret = create_wrapper(from_type, check_and_get_data_type<ToDataType>(to_type.get()),
|
||||
requested_result_is_nullable);
|
||||
return true;
|
||||
|
||||
@ -107,6 +107,7 @@ message PGenericType {
|
||||
DECIMAL128I = 32;
|
||||
VARIANT = 33;
|
||||
QUANTILE_STATE = 34;
|
||||
TIME = 35;
|
||||
UNKNOWN = 999;
|
||||
}
|
||||
required TypeId id = 2;
|
||||
|
||||
9
regression-test/data/correctness/test_cast_as_time.out
Normal file
9
regression-test/data/correctness/test_cast_as_time.out
Normal file
@ -0,0 +1,9 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !select1 --
|
||||
00:03:00
|
||||
\\N
|
||||
20:20:20
|
||||
-- !select2 --
|
||||
19:18:17
|
||||
30:20:00
|
||||
00:04:00
|
||||
48
regression-test/suites/correctness/test_cast_as_time.groovy
Normal file
48
regression-test/suites/correctness/test_cast_as_time.groovy
Normal file
@ -0,0 +1,48 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_cast_as_time") {
|
||||
sql """ DROP TABLE IF EXISTS tbl_cast_as_time """
|
||||
sql """
|
||||
CREATE TABLE tbl_cast_as_time (
|
||||
id INT DEFAULT '10',
|
||||
str VARCHAR(32) DEFAULT ''
|
||||
) ENGINE=OLAP
|
||||
AGGREGATE KEY(id,str)
|
||||
DISTRIBUTED BY HASH(id) BUCKETS 10
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"in_memory" = "false",
|
||||
"storage_format" = "V2"
|
||||
);
|
||||
"""
|
||||
sql """
|
||||
insert into tbl_cast_as_time values(300,'19:18:17')
|
||||
"""
|
||||
sql """
|
||||
insert into tbl_cast_as_time values(360,'30:20')
|
||||
"""
|
||||
sql """
|
||||
insert into tbl_cast_as_time values(202020,'400')
|
||||
"""
|
||||
qt_select1 """
|
||||
select cast(id as time) from tbl_cast_as_time order by id
|
||||
"""
|
||||
qt_select2 """
|
||||
select cast(str as time) from tbl_cast_as_time order by id
|
||||
"""
|
||||
}
|
||||
Reference in New Issue
Block a user