[branch-2.1](timezone) refactor tzdata load to accelerate and unify timezone parsing (#37062) (#37269)

pick https://github.com/apache/doris/pull/37062

1. revert https://github.com/apache/doris/pull/25097. we decide to rely
on OS. not maintain independent tzdata anymore to keep result
consistency
2. refactor timezone load. removed rwlock.

before:
```sql
mysql [optest]>select count(convert_tz(d, 'Asia/Shanghai', 'America/Los_Angeles')), count(convert_tz(dt, 'America/Los_Angeles', '+00:00')) from dates;
+-------------------------------------------------------------------------------------+--------------------------------------------------------+
| count(convert_tz(cast(d as DATETIMEV2(6)), 'Asia/Shanghai', 'America/Los_Angeles')) | count(convert_tz(dt, 'America/Los_Angeles', '+00:00')) |
+-------------------------------------------------------------------------------------+--------------------------------------------------------+
|                                                                            16000000 |                                               16000000 |
+-------------------------------------------------------------------------------------+--------------------------------------------------------+
1 row in set (6.88 sec)
```
now:
```sql
mysql [optest]>select count(convert_tz(d, 'Asia/Shanghai', 'America/Los_Angeles')), count(convert_tz(dt, 'America/Los_Angeles', '+00:00')) from dates;
+-------------------------------------------------------------------------------------+--------------------------------------------------------+
| count(convert_tz(cast(d as DATETIMEV2(6)), 'Asia/Shanghai', 'America/Los_Angeles')) | count(convert_tz(dt, 'America/Los_Angeles', '+00:00')) |
+-------------------------------------------------------------------------------------+--------------------------------------------------------+
|                                                                            16000000 |                                               16000000 |
+-------------------------------------------------------------------------------------+--------------------------------------------------------+
1 row in set (2.61 sec)
```
3. now don't support timezone offset format string like 'UTC+8', like we
already said in
https://doris.apache.org/docs/dev/query/query-variables/time-zone/#usage
4. support case-insensitive timezone parsing in nereids.
5. a bug when parse timezone using nereids. should check DST by input,
but wrongly by now before. now fixed.

doc pr: https://github.com/apache/doris-website/pull/810
This commit is contained in:
zclllyybb
2024-07-15 10:56:48 +08:00
committed by GitHub
parent 351ba4aeb2
commit 2759383365
22 changed files with 126 additions and 367 deletions

View File

@ -145,17 +145,6 @@ public class DateLiteral extends Literal {
return punctuations.contains(c);
}
private static void replacePunctuation(String s, StringBuilder sb, char c, int idx) {
if (idx >= sb.length()) {
return;
}
if (isPunctuation(sb.charAt(idx))) {
sb.setCharAt(idx, c);
} else {
throw new AnalysisException("date/datetime literal [" + s + "] is invalid");
}
}
static String normalize(String s) {
// merge consecutive space
if (s.contains(" ")) {

View File

@ -32,6 +32,7 @@ import org.apache.logging.log4j.Logger;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalQueries;
@ -131,6 +132,7 @@ public class DateTimeLiteral extends DateLiteral {
@Override
protected void init(String s) throws AnalysisException {
// TODO: check and do fast parse like fastParseDate
TemporalAccessor temporal = parse(s);
year = DateUtils.getOrDefault(temporal, ChronoField.YEAR);
@ -142,8 +144,13 @@ public class DateTimeLiteral extends DateLiteral {
ZoneId zoneId = temporal.query(TemporalQueries.zone());
if (zoneId != null) {
int offset = DateUtils.getTimeZone().getRules().getOffset(Instant.now()).getTotalSeconds()
- zoneId.getRules().getOffset(Instant.now()).getTotalSeconds();
// get correct DST of that time.
Instant thatTime = ZonedDateTime
.of((int) year, (int) month, (int) day, (int) hour, (int) minute, (int) second, 0, zoneId)
.toInstant();
int offset = DateUtils.getTimeZone().getRules().getOffset(thatTime).getTotalSeconds()
- zoneId.getRules().getOffset(thatTime).getTotalSeconds();
if (offset != 0) {
DateTimeLiteral result = (DateTimeLiteral) this.plusSeconds(offset);
this.second = result.second;

View File

@ -41,6 +41,7 @@ import java.time.temporal.ChronoField;
public class DateTimeFormatterUtils {
public static final DateTimeFormatter ZONE_FORMATTER = new DateTimeFormatterBuilder()
.optionalStart()
.parseCaseInsensitive()
.appendZoneOrOffsetId()
.optionalEnd()
.toFormatter()