[branch-2.1](function) fix date_format and from_unixtime core when meet long format string (#35883) (#36158)
pick #35883
This commit is contained in:
@ -1236,11 +1236,11 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATEV2>
|
||||
CppType tmp = *reinterpret_cast<const CppType*>(src);
|
||||
DateV2Value<DateV2ValueType> value =
|
||||
binary_cast<CppType, DateV2Value<DateV2ValueType>>(tmp);
|
||||
string format = "%Y-%m-%d";
|
||||
string res;
|
||||
res.resize(12);
|
||||
res.reserve(12);
|
||||
value.to_format_string(format.c_str(), format.size(), res.data());
|
||||
std::string format = "%Y-%m-%d";
|
||||
std::string res;
|
||||
res.resize(12 + SAFE_FORMAT_STRING_MARGIN);
|
||||
value.to_format_string_conservative(format.c_str(), format.size(), res.data(),
|
||||
12 + SAFE_FORMAT_STRING_MARGIN);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -1277,9 +1277,9 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>
|
||||
binary_cast<CppType, DateV2Value<DateTimeV2ValueType>>(tmp);
|
||||
string format = "%Y-%m-%d %H:%i:%s.%f";
|
||||
string res;
|
||||
res.resize(30);
|
||||
res.reserve(30);
|
||||
value.to_format_string(format.c_str(), format.size(), res.data());
|
||||
res.resize(30 + SAFE_FORMAT_STRING_MARGIN);
|
||||
value.to_format_string_conservative(format.c_str(), format.size(), res.data(),
|
||||
30 + SAFE_FORMAT_STRING_MARGIN);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
@ -20,7 +20,6 @@
|
||||
#include <arrow/builder.h>
|
||||
|
||||
#include <chrono> // IWYU pragma: keep
|
||||
#include <type_traits>
|
||||
|
||||
#include "vec/columns/column_const.h"
|
||||
#include "vec/io/io_helper.h"
|
||||
@ -32,8 +31,7 @@ enum {
|
||||
DIVISOR_FOR_NANO = 1000000000
|
||||
};
|
||||
|
||||
namespace doris {
|
||||
namespace vectorized {
|
||||
namespace doris::vectorized {
|
||||
static const int64_t timestamp_threshold = -2177481943;
|
||||
static const int64_t timestamp_diff = 343;
|
||||
static const int64_t micr_to_nano_second = 1000;
|
||||
@ -57,8 +55,9 @@ Status DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column
|
||||
|
||||
if (options.date_olap_format) {
|
||||
std::string format = "%Y-%m-%d %H:%i:%s.%f";
|
||||
char buf[30];
|
||||
val.to_format_string(format.c_str(), format.size(), buf);
|
||||
char buf[30 + SAFE_FORMAT_STRING_MARGIN];
|
||||
val.to_format_string_conservative(format.c_str(), format.size(), buf,
|
||||
30 + SAFE_FORMAT_STRING_MARGIN);
|
||||
std::string s = std::string(buf);
|
||||
bw.write(s.c_str(), s.length());
|
||||
} else {
|
||||
@ -132,7 +131,7 @@ void DataTypeDateTimeV2SerDe::read_column_from_arrow(IColumn& column,
|
||||
auto& col_data = static_cast<ColumnVector<Int64>&>(column).get_data();
|
||||
int64_t divisor = 1;
|
||||
if (arrow_array->type()->id() == arrow::Type::TIMESTAMP) {
|
||||
auto concrete_array = dynamic_cast<const arrow::TimestampArray*>(arrow_array);
|
||||
const auto* concrete_array = dynamic_cast<const arrow::TimestampArray*>(arrow_array);
|
||||
const auto type = std::static_pointer_cast<arrow::TimestampType>(arrow_array->type());
|
||||
switch (type->unit()) {
|
||||
case arrow::TimeUnit::type::SECOND: {
|
||||
@ -176,7 +175,7 @@ template <bool is_binary_format>
|
||||
Status DataTypeDateTimeV2SerDe::_write_column_to_mysql(const IColumn& column,
|
||||
MysqlRowBuffer<is_binary_format>& result,
|
||||
int row_idx, bool col_const) const {
|
||||
auto& data = assert_cast<const ColumnVector<UInt64>&>(column).get_data();
|
||||
const auto& data = assert_cast<const ColumnVector<UInt64>&>(column).get_data();
|
||||
const auto col_index = index_check_const(row_idx, col_const);
|
||||
DateV2Value<DateTimeV2ValueType> date_val =
|
||||
binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(data[col_index]);
|
||||
@ -245,5 +244,4 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace vectorized
|
||||
} // namespace doris
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -190,8 +190,9 @@ struct DateFormatImpl {
|
||||
if (format.size > 128) {
|
||||
return std::pair {offset, true};
|
||||
}
|
||||
char buf[128];
|
||||
if (!dt.to_format_string(format.data, format.size, buf)) {
|
||||
char buf[100 + SAFE_FORMAT_STRING_MARGIN];
|
||||
if (!dt.to_format_string_conservative(format.data, format.size, buf,
|
||||
100 + SAFE_FORMAT_STRING_MARGIN)) {
|
||||
return std::pair {offset, true};
|
||||
}
|
||||
|
||||
@ -227,8 +228,9 @@ struct FromUnixTimeImpl {
|
||||
}
|
||||
dt.from_unixtime(val, time_zone);
|
||||
|
||||
char buf[128];
|
||||
if (!dt.to_format_string(format.data, format.size, buf)) {
|
||||
char buf[100 + SAFE_FORMAT_STRING_MARGIN];
|
||||
if (!dt.to_format_string_conservative(format.data, format.size, buf,
|
||||
100 + SAFE_FORMAT_STRING_MARGIN)) {
|
||||
return std::pair {offset, true};
|
||||
}
|
||||
|
||||
|
||||
@ -543,6 +543,7 @@ bool VecDateTimeValue::from_date_daynr(uint64_t daynr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// @return: tail
|
||||
static char* int_to_str(uint64_t val, char* to) {
|
||||
char buf[64];
|
||||
char* ptr = buf;
|
||||
@ -555,7 +556,6 @@ static char* int_to_str(uint64_t val, char* to) {
|
||||
while (ptr > buf) {
|
||||
*to++ = *--ptr;
|
||||
}
|
||||
|
||||
return to;
|
||||
}
|
||||
|
||||
@ -566,18 +566,17 @@ static char* append_string(const char* from, char* to) {
|
||||
return to;
|
||||
}
|
||||
|
||||
static char* append_with_prefix(const char* str, int str_len, char prefix, int full_len, char* to) {
|
||||
int len = (str_len > full_len) ? str_len : full_len;
|
||||
len -= str_len;
|
||||
while (len-- > 0) {
|
||||
// push prefix;
|
||||
static char* append_with_prefix(const char* str, int str_len, char prefix, int target_len,
|
||||
char* to) {
|
||||
// full_len is the lower bound. if less, use prefix to pad. if greater, accept all.
|
||||
int diff = target_len - str_len;
|
||||
// use prefix to pad
|
||||
while (diff-- > 0) { // won't be INT_MIN. it's ok
|
||||
*to++ = prefix;
|
||||
}
|
||||
while (str_len-- > 0) {
|
||||
*to++ = *str++;
|
||||
}
|
||||
|
||||
return to;
|
||||
memcpy(to, str, str_len);
|
||||
return to + str_len;
|
||||
}
|
||||
|
||||
int VecDateTimeValue::compute_format_len(const char* format, int len) {
|
||||
@ -673,10 +672,12 @@ char* write_four_digits_to_string(int number, char* dst) {
|
||||
return dst + 4;
|
||||
}
|
||||
|
||||
bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) const {
|
||||
bool VecDateTimeValue::to_format_string_conservative(const char* format, int len, char* to,
|
||||
int max_valid_length) const {
|
||||
if (check_range(_year, _month, _day, _hour, _minute, _second, _type)) {
|
||||
return false;
|
||||
}
|
||||
char* const begin = to; // to check written bytes
|
||||
char buf[64];
|
||||
char* cursor = buf;
|
||||
char* pos = nullptr;
|
||||
@ -685,6 +686,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c
|
||||
char ch = '\0';
|
||||
|
||||
while (ptr < end) {
|
||||
if (to - begin + SAFE_FORMAT_STRING_MARGIN > max_valid_length) [[unlikely]] {
|
||||
return false;
|
||||
}
|
||||
if (*ptr != '%' || (ptr + 1) == end) {
|
||||
*to++ = *ptr++;
|
||||
continue;
|
||||
@ -932,6 +936,7 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// put it literal
|
||||
*to++ = ch;
|
||||
break;
|
||||
}
|
||||
@ -3421,10 +3426,12 @@ void DateV2Value<T>::set_microsecond(uint32_t microsecond) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) const {
|
||||
bool DateV2Value<T>::to_format_string_conservative(const char* format, int len, char* to,
|
||||
int max_valid_length) const {
|
||||
if (is_invalid(year(), month(), day(), hour(), minute(), second(), microsecond())) {
|
||||
return false;
|
||||
}
|
||||
char* const begin = to; // to check written bytes
|
||||
char buf[64];
|
||||
char* pos = nullptr;
|
||||
char* cursor = buf;
|
||||
@ -3433,6 +3440,9 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con
|
||||
char ch = '\0';
|
||||
|
||||
while (ptr < end) {
|
||||
if (to - begin + SAFE_FORMAT_STRING_MARGIN > max_valid_length) [[unlikely]] {
|
||||
return false;
|
||||
}
|
||||
if (*ptr != '%' || (ptr + 1) == end) {
|
||||
*to++ = *ptr++;
|
||||
continue;
|
||||
@ -3666,6 +3676,7 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// put it literal
|
||||
*to++ = ch;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -143,6 +143,8 @@ struct TimeInterval {
|
||||
|
||||
enum TimeType { TIME_TIME = 1, TIME_DATE = 2, TIME_DATETIME = 3 };
|
||||
|
||||
constexpr int SAFE_FORMAT_STRING_MARGIN = 12;
|
||||
|
||||
// Used to compute week
|
||||
const int WEEK_MONDAY_FIRST = 1;
|
||||
const int WEEK_YEAR = 2;
|
||||
@ -394,8 +396,12 @@ public:
|
||||
|
||||
char* to_string(char* to) const;
|
||||
|
||||
// Convert this datetime value to string by the format string
|
||||
bool to_format_string(const char* format, int len, char* to) const;
|
||||
// Convert this datetime value to string by the format string.
|
||||
// for performance of checking, may return false when just APPROACH BUT NOT REACH max_valid_length.
|
||||
// so need a little big buffer and its length as max_valid_length to make sure store valid data.
|
||||
// to make sure of this. make the buffer size = <data_need_length> + SAFE_FORMAT_STRING_MARGIN. and pass this size as max_valid_length
|
||||
bool to_format_string_conservative(const char* format, int len, char* to,
|
||||
int max_valid_length) const;
|
||||
|
||||
// compute the length of data format pattern
|
||||
static int compute_format_len(const char* format, int len);
|
||||
@ -822,7 +828,12 @@ public:
|
||||
return val;
|
||||
}
|
||||
|
||||
bool to_format_string(const char* format, int len, char* to) const;
|
||||
// Convert this datetime value to string by the format string.
|
||||
// for performance of checking, may return false when just APPROACH BUT NOT REACH max_valid_length.
|
||||
// so need a little big buffer and its length as max_valid_length to make sure store valid data.
|
||||
// to make sure of this. make the buffer size = <data_need_length> + SAFE_FORMAT_STRING_MARGIN. and pass this size as max_valid_length
|
||||
bool to_format_string_conservative(const char* format, int len, char* to,
|
||||
int max_valid_length) const;
|
||||
|
||||
bool from_date_format_str(const char* format, int format_len, const char* value,
|
||||
int value_len) {
|
||||
|
||||
@ -27,4 +27,8 @@
|
||||
\N
|
||||
|
||||
-- !sql10 --
|
||||
\N
|
||||
\N
|
||||
|
||||
-- !long --
|
||||
\N
|
||||
|
||||
|
||||
@ -491,6 +491,9 @@ true
|
||||
-- !sql --
|
||||
2022 31 4
|
||||
|
||||
-- !sql_date_format_long --
|
||||
\N
|
||||
|
||||
-- !sql --
|
||||
\N
|
||||
|
||||
|
||||
@ -44,4 +44,5 @@ suite("test_from_unixtime") {
|
||||
qt_sql9 "select from_unixtime(-7629445119491449, \"%Y-%m-%d\");"
|
||||
qt_sql10 "select from_unixtime(-7629445119491449);"
|
||||
|
||||
qt_long "select from_unixtime(1196440219, '%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b');"
|
||||
}
|
||||
|
||||
@ -474,6 +474,7 @@ suite("test_date_function") {
|
||||
qt_sql """ select date_format('1999-01-01', '%X %V'); """
|
||||
qt_sql """ select date_format('2025-01-01', '%X %V'); """
|
||||
qt_sql """ select date_format('2022-08-04', '%X %V %w'); """
|
||||
qt_sql_date_format_long """ select date_format(cast('2011-06-24' as DATETIMEV2(0)), '%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b') """
|
||||
qt_sql """ select STR_TO_DATE('Tue Jul 12 20:00:45 CST 2022', '%a %b %e %H:%i:%s %Y'); """
|
||||
qt_sql """ select STR_TO_DATE('Tue Jul 12 20:00:45 CST 2022', '%a %b %e %T CST %Y'); """
|
||||
qt_sql """ select STR_TO_DATE('2018-4-2 15:3:28','%Y-%m-%d %H:%i:%s'); """
|
||||
|
||||
Reference in New Issue
Block a user