From af070ac853fc88f4fb8fbcaeaffe5532f60ebb09 Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Sat, 28 Jun 2025 10:48:58 +0800 Subject: [PATCH] [branch-2.1](timezone) Fix incorrect DST handling (#51454) (#52418) pick https://github.com/apache/doris/pull/51454 --- be/src/vec/runtime/vdatetime_value.cpp | 53 ++++++++----------- be/src/vec/runtime/vdatetime_value.h | 20 +++---- .../datatype_p0/datetimev2/test_timezone.out | 6 +++ .../datetimev2/test_tz_streamload.out | 2 +- .../datetimev2/test_timezone.groovy | 5 ++ 5 files changed, 43 insertions(+), 43 deletions(-) diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index d161c64fd1..ef6aa7e95e 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -2041,7 +2041,7 @@ bool DateV2Value::from_date_str_base(const char* date_str, int len, int scale int field_idx = 0; int field_len = year_len; - long sec_offset = 0; + int sec_offset = 0; bool need_use_timezone = false; while (ptr < end && isdigit(*ptr) && field_idx < MAX_DATE_PARTS) { @@ -2206,40 +2206,29 @@ bool DateV2Value::from_date_str_base(const char* date_str, int len, int scale if (!TimezoneUtils::find_cctz_time_zone(std::string {ptr, end}, given_tz)) { return false; // invalid format } - auto given = cctz::convert(cctz::civil_second {}, given_tz); - auto local = cctz::convert(cctz::civil_second {}, *local_time_zone); - // these two values is absolute time. so they are negative. need to use (-local) - (-given) - sec_offset = std::chrono::duration_cast(given - local).count(); - } - - // In check_range_and_set_time, for Date type the time part will be truncated. So if the timezone offset should make - // rounding to date part, it would be lost. To avoid this, we use a Datetime type to do these calc. It will save the - // time part and apply the offset. Then convert to Date type back. - // see https://github.com/apache/doris/pull/33553 for more details. - if constexpr (!is_datetime) { - if (sec_offset) { - DateV2Value tmp; - if (!tmp.check_range_and_set_time(date_val[0], date_val[1], date_val[2], date_val[3], - date_val[4], date_val[5], date_val[6])) { - return false; - } - if (!tmp.date_add_interval( - TimeInterval {TimeUnit::SECOND, sec_offset, false})) { - return false; - } - this->assign_from(tmp); - return true; + if (is_invalid(date_val[0], date_val[1], date_val[2], date_val[3], date_val[4], date_val[5], + date_val[6])) { + return false; } + // will carring on the bits in cctz::civil_second. if day is 70, will carry to month. + cctz::civil_second cs {date_val[0], date_val[1], date_val[2], + date_val[3], date_val[4], date_val[5]}; + + auto given = cctz::convert(cs, given_tz); + auto local = cctz::convert(given, *local_time_zone); + date_val[0] = local.year(); + date_val[1] = local.month(); + date_val[2] = local.day(); + date_val[3] = local.hour(); + date_val[4] = local.minute(); + date_val[5] = local.second(); } - if (!check_range_and_set_time(date_val[0], date_val[1], date_val[2], date_val[3], date_val[4], - date_val[5], date_val[6])) { - return false; - } - - return sec_offset ? date_add_interval( - TimeInterval {TimeUnit::SECOND, sec_offset, false}) - : true; + return check_range_and_set_time(date_val[0], date_val[1], date_val[2], date_val[3], date_val[4], + date_val[5], date_val[6]) && + (sec_offset ? date_add_interval( + TimeInterval {TimeUnit::SECOND, sec_offset, false}) + : true); } template diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index b337fea46a..12e961f5fa 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -1024,8 +1024,8 @@ public: } bool operator==(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 == ts2; @@ -1040,8 +1040,8 @@ public: bool operator<=(const DateV2Value& other) const { return !(*this > other); } bool operator<=(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 <= ts2; @@ -1050,8 +1050,8 @@ public: bool operator>=(const DateV2Value& other) const { return !(*this < other); } bool operator>=(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 >= ts2; @@ -1062,8 +1062,8 @@ public: } bool operator<(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 < ts2; @@ -1074,8 +1074,8 @@ public: } bool operator>(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 > ts2; diff --git a/regression-test/data/datatype_p0/datetimev2/test_timezone.out b/regression-test/data/datatype_p0/datetimev2/test_timezone.out index 1fae14def3..b891efdcad 100644 --- a/regression-test/data/datatype_p0/datetimev2/test_timezone.out +++ b/regression-test/data/datatype_p0/datetimev2/test_timezone.out @@ -25,3 +25,9 @@ -- !fold3 -- 2020-12-12T13:12:12 +-- !nodst -- +2010-01-05T10:15:30 + +-- !dst -- +2010-08-05T09:15:30 + diff --git a/regression-test/data/datatype_p0/datetimev2/test_tz_streamload.out b/regression-test/data/datatype_p0/datetimev2/test_tz_streamload.out index ab103c3a30..a05ac54d30 100644 --- a/regression-test/data/datatype_p0/datetimev2/test_tz_streamload.out +++ b/regression-test/data/datatype_p0/datetimev2/test_tz_streamload.out @@ -15,7 +15,7 @@ 3 2023-08-17T17:41:18 4 2023-08-17T14:41:18 5 2023-08-17T09:41:18 -6 2023-08-18T01:41:18 +6 2023-08-18T00:41:18 7 2023-08-17T17:41:18 8 2023-08-17T19:41:18 diff --git a/regression-test/suites/datatype_p0/datetimev2/test_timezone.groovy b/regression-test/suites/datatype_p0/datetimev2/test_timezone.groovy index 981d8ecd0c..5a965e49a9 100644 --- a/regression-test/suites/datatype_p0/datetimev2/test_timezone.groovy +++ b/regression-test/suites/datatype_p0/datetimev2/test_timezone.groovy @@ -55,4 +55,9 @@ suite("test_timezone") { qt_fold1 """ select cast('2020-12-12T12:12:12asia/shanghai' as datetime); """ qt_fold2 """ select cast('2020-12-12T12:12:12america/los_angeLES' as datetime); """ qt_fold3 """ select cast('2020-12-12T12:12:12Europe/pARIS' as datetime); """ + + qt_nodst "select cast('2010-01-05 08:15:30Europe/London' as datetime);" + qt_dst "select cast('2010-08-05 08:15:30Europe/London' as datetime);" + testFoldConst ("select cast('2010-01-05 08:15:30Europe/London' as datetime);") + testFoldConst ("select cast('2010-08-05 08:15:30Europe/London' as datetime);") }