[branch-2.1](timezone) refactor tzdata load to accelerate and unify timezone parsing (#37062) (#37269)
pick https://github.com/apache/doris/pull/37062 1. revert https://github.com/apache/doris/pull/25097. we decide to rely on OS. not maintain independent tzdata anymore to keep result consistency 2. refactor timezone load. removed rwlock. before: ```sql mysql [optest]>select count(convert_tz(d, 'Asia/Shanghai', 'America/Los_Angeles')), count(convert_tz(dt, 'America/Los_Angeles', '+00:00')) from dates; +-------------------------------------------------------------------------------------+--------------------------------------------------------+ | count(convert_tz(cast(d as DATETIMEV2(6)), 'Asia/Shanghai', 'America/Los_Angeles')) | count(convert_tz(dt, 'America/Los_Angeles', '+00:00')) | +-------------------------------------------------------------------------------------+--------------------------------------------------------+ | 16000000 | 16000000 | +-------------------------------------------------------------------------------------+--------------------------------------------------------+ 1 row in set (6.88 sec) ``` now: ```sql mysql [optest]>select count(convert_tz(d, 'Asia/Shanghai', 'America/Los_Angeles')), count(convert_tz(dt, 'America/Los_Angeles', '+00:00')) from dates; +-------------------------------------------------------------------------------------+--------------------------------------------------------+ | count(convert_tz(cast(d as DATETIMEV2(6)), 'Asia/Shanghai', 'America/Los_Angeles')) | count(convert_tz(dt, 'America/Los_Angeles', '+00:00')) | +-------------------------------------------------------------------------------------+--------------------------------------------------------+ | 16000000 | 16000000 | +-------------------------------------------------------------------------------------+--------------------------------------------------------+ 1 row in set (2.61 sec) ``` 3. now don't support timezone offset format string like 'UTC+8', like we already said in https://doris.apache.org/docs/dev/query/query-variables/time-zone/#usage 4. support case-insensitive timezone parsing in nereids. 5. a bug when parse timezone using nereids. should check DST by input, but wrongly by now before. now fixed. doc pr: https://github.com/apache/doris-website/pull/810
This commit is contained in:
@ -145,17 +145,6 @@ public class DateLiteral extends Literal {
|
||||
return punctuations.contains(c);
|
||||
}
|
||||
|
||||
private static void replacePunctuation(String s, StringBuilder sb, char c, int idx) {
|
||||
if (idx >= sb.length()) {
|
||||
return;
|
||||
}
|
||||
if (isPunctuation(sb.charAt(idx))) {
|
||||
sb.setCharAt(idx, c);
|
||||
} else {
|
||||
throw new AnalysisException("date/datetime literal [" + s + "] is invalid");
|
||||
}
|
||||
}
|
||||
|
||||
static String normalize(String s) {
|
||||
// merge consecutive space
|
||||
if (s.contains(" ")) {
|
||||
|
||||
@ -32,6 +32,7 @@ import org.apache.logging.log4j.Logger;
|
||||
import java.time.Instant;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.time.temporal.ChronoField;
|
||||
import java.time.temporal.TemporalAccessor;
|
||||
import java.time.temporal.TemporalQueries;
|
||||
@ -131,6 +132,7 @@ public class DateTimeLiteral extends DateLiteral {
|
||||
|
||||
@Override
|
||||
protected void init(String s) throws AnalysisException {
|
||||
// TODO: check and do fast parse like fastParseDate
|
||||
TemporalAccessor temporal = parse(s);
|
||||
|
||||
year = DateUtils.getOrDefault(temporal, ChronoField.YEAR);
|
||||
@ -142,8 +144,13 @@ public class DateTimeLiteral extends DateLiteral {
|
||||
|
||||
ZoneId zoneId = temporal.query(TemporalQueries.zone());
|
||||
if (zoneId != null) {
|
||||
int offset = DateUtils.getTimeZone().getRules().getOffset(Instant.now()).getTotalSeconds()
|
||||
- zoneId.getRules().getOffset(Instant.now()).getTotalSeconds();
|
||||
// get correct DST of that time.
|
||||
Instant thatTime = ZonedDateTime
|
||||
.of((int) year, (int) month, (int) day, (int) hour, (int) minute, (int) second, 0, zoneId)
|
||||
.toInstant();
|
||||
|
||||
int offset = DateUtils.getTimeZone().getRules().getOffset(thatTime).getTotalSeconds()
|
||||
- zoneId.getRules().getOffset(thatTime).getTotalSeconds();
|
||||
if (offset != 0) {
|
||||
DateTimeLiteral result = (DateTimeLiteral) this.plusSeconds(offset);
|
||||
this.second = result.second;
|
||||
|
||||
@ -41,6 +41,7 @@ import java.time.temporal.ChronoField;
|
||||
public class DateTimeFormatterUtils {
|
||||
public static final DateTimeFormatter ZONE_FORMATTER = new DateTimeFormatterBuilder()
|
||||
.optionalStart()
|
||||
.parseCaseInsensitive()
|
||||
.appendZoneOrOffsetId()
|
||||
.optionalEnd()
|
||||
.toFormatter()
|
||||
|
||||
Reference in New Issue
Block a user