[feature](Nereids): date/datetime parser support many complex case (#24287)

- feature: normalize date/datetime with leading 0
- feature: support 'HH' offset in date/datetime
- feature: normalize() add missing Minute/Second in Time part
- feature: normalize offset HH to HH:MM
- correct DateTimeFormatterUtilsTest
This commit is contained in:
jakevin
2023-09-13 17:30:58 +08:00
committed by GitHub
parent 231038f050
commit 05722b4cfd
5 changed files with 191 additions and 122 deletions

View File

@ -96,23 +96,88 @@ public class DateLiteral extends Literal {
this.day = other.day;
}
// replace 'T' with ' '
private static String replaceDelimiterT(String s) {
// Matcher matcher = Pattern.compile("^(\\d{2,4}-\\d{1,2}-\\d{1,2})T").matcher(s);
// if (matcher.find()) {
// return matcher.group(1) + " " + s.substring(matcher.end());
// }
// return s;
if (s.length() <= 10) {
return s;
static String normalize(String s) {
StringBuilder sb = new StringBuilder();
int i = 0;
// handle two digit year
if (s.charAt(2) != '-' && s.charAt(4) != '-') {
throw new AnalysisException("date/datetime literal [" + s + "] is invalid");
}
if (s.charAt(10) == 'T') {
return s.substring(0, 10) + " " + s.substring(11);
} else if (s.charAt(8) == 'T') {
return s.substring(0, 8) + " " + s.substring(9);
} else {
return s;
if (s.charAt(2) == '-') {
String yy = s.substring(0, 2);
int year = Integer.parseInt(yy);
if (year >= 0 && year <= 69) {
sb.append("20");
} else if (year >= 70 && year <= 99) {
sb.append("19");
}
sb.append(yy);
i = 2;
}
// normalized leading 0
while (i < s.length()) {
char c = s.charAt(i);
if (c == '.') {
// skip .microsecond, such as .0001 .000001
sb.append(c); // Append the dot itself
i += 1; // Skip the dot
// skip the microsecond part
while (i < s.length() && Character.isDigit(s.charAt(i))) {
sb.append(s.charAt(i));
i += 1;
}
} else if (Character.isDigit(c)) {
// find consecutive digit
int j = i + 1;
while (j < s.length() && Character.isDigit(s.charAt(j))) {
j += 1;
}
int len = j - i;
if (len == 4 || len == 2) {
for (int k = i; k < j; k++) {
sb.append(s.charAt(k));
}
} else if (len == 1) {
sb.append('0');
sb.append(c);
} else {
throw new AnalysisException("date/datetime literal [" + s + "] is invalid");
}
i = j;
} else {
sb.append(c);
i += 1;
}
}
int len = sb.length();
// Replace delimiter 'T' with ' '
if (len > 10 && sb.charAt(10) == 'T') {
sb.setCharAt(10, ' ');
}
// add missing Minute Second in Time part
if (len > 10 && sb.charAt(10) == ' ') {
if (len == 13 || len > 13 && sb.charAt(13) != ':') {
sb.insert(13, ":00:00");
} else if (len == 16 || (len > 16 && sb.charAt(16) != ':')) {
sb.insert(16, ":00");
}
}
len = sb.length();
int signIdx = sb.indexOf("+", 10); // from index:10, skip date part (it contains '-')
signIdx = signIdx == -1 ? sb.indexOf("-", 10) : signIdx;
if (signIdx != -1 && len - signIdx == 3) {
sb.append(":00");
}
return sb.toString();
}
protected static TemporalAccessor parse(String s) {
@ -135,8 +200,8 @@ public class DateLiteral extends Literal {
return dateTime;
}
// replace first 'T' with ' '
s = replaceDelimiterT(s);
s = normalize(s);
if (!s.contains(" ")) {
dateTime = DateTimeFormatterUtils.ZONE_DATE_FORMATTER.parse(s);
} else {
@ -145,12 +210,12 @@ public class DateLiteral extends Literal {
// if Year is not present, throw exception
if (!dateTime.isSupported(ChronoField.YEAR)) {
throw new AnalysisException("datetime literal [" + originalString + "] is invalid");
throw new AnalysisException("date/datetime literal [" + originalString + "] is invalid");
}
return dateTime;
} catch (Exception ex) {
throw new AnalysisException("datetime literal [" + originalString + "] is invalid");
throw new AnalysisException("date/datetime literal [" + originalString + "] is invalid");
}
}
@ -161,7 +226,7 @@ public class DateLiteral extends Literal {
day = DateUtils.getOrDefault(dateTime, ChronoField.DAY_OF_MONTH);
if (checkRange() || checkDate()) {
throw new AnalysisException("datetime literal [" + s + "] is out of range");
throw new AnalysisException("date/datetime literal [" + s + "] is out of range");
}
}

View File

@ -44,12 +44,6 @@ public class DateTimeFormatterUtils {
// .appendZoneText(TextStyle.FULL)
.appendZoneOrOffsetId()
.optionalEnd()
// .appendOptional(
// new DateTimeFormatterBuilder().appendOffset("+HH", "").toFormatter())
// .appendOptional(
// new DateTimeFormatterBuilder().appendOffset("+HH:MM", "").toFormatter())
// .appendOptional(
// new DateTimeFormatterBuilder().appendOffset("+HH:MM:SS", "").toFormatter())
.toFormatter()
.withResolverStyle(ResolverStyle.STRICT);
// yymmdd
@ -60,29 +54,17 @@ public class DateTimeFormatterUtils {
.toFormatter().withResolverStyle(ResolverStyle.STRICT);
// yyyy-mm-dd
public static final DateTimeFormatter DATE_FORMATTER = new DateTimeFormatterBuilder()
.appendOptional(
new DateTimeFormatterBuilder().appendValue(ChronoField.YEAR, 4).toFormatter())
.appendOptional(
new DateTimeFormatterBuilder().appendValueReduced(ChronoField.YEAR, 2, 2, 1970).toFormatter())
.appendValue(ChronoField.YEAR, 4)
.appendLiteral('-').appendValue(ChronoField.MONTH_OF_YEAR, 2)
.appendLiteral('-').appendValue(ChronoField.DAY_OF_MONTH, 2)
.toFormatter().withResolverStyle(ResolverStyle.STRICT);
// HH[:mm][:ss][.microsecond]
public static final DateTimeFormatter TIME_FORMATTER = new DateTimeFormatterBuilder()
.appendValue(ChronoField.HOUR_OF_DAY, 2)
.appendOptional(
new DateTimeFormatterBuilder()
.appendLiteral(':').appendValue(ChronoField.MINUTE_OF_HOUR, 2)
.appendOptional(
new DateTimeFormatterBuilder()
.appendLiteral(':').appendValue(ChronoField.SECOND_OF_MINUTE, 2)
.appendOptional(new DateTimeFormatterBuilder()
.appendFraction(ChronoField.MICRO_OF_SECOND, 1, 6, true)
.toFormatter())
.toFormatter()
)
.toFormatter()
)
.appendLiteral(':').appendValue(ChronoField.MINUTE_OF_HOUR, 2)
.appendLiteral(':').appendValue(ChronoField.SECOND_OF_MINUTE, 2)
.appendOptional(new DateTimeFormatterBuilder()
.appendFraction(ChronoField.MICRO_OF_SECOND, 1, 6, true).toFormatter())
.toFormatter().withResolverStyle(ResolverStyle.STRICT);
// Time without delimiter: HHmmss[.microsecond]
private static final DateTimeFormatter BASIC_TIME_FORMATTER = new DateTimeFormatterBuilder()
@ -118,10 +100,7 @@ public class DateTimeFormatterUtils {
.append(TIME_FORMATTER)
.toFormatter().withResolverStyle(ResolverStyle.STRICT);
public static final DateTimeFormatter ZONE_DATE_FORMATTER = new DateTimeFormatterBuilder()
.appendOptional(
new DateTimeFormatterBuilder().appendValue(ChronoField.YEAR, 4).toFormatter())
.appendOptional(
new DateTimeFormatterBuilder().appendValueReduced(ChronoField.YEAR, 2, 2, 1970).toFormatter())
.appendValue(ChronoField.YEAR, 4)
.appendLiteral('-').appendValue(ChronoField.MONTH_OF_YEAR, 2)
.appendLiteral('-').appendValue(ChronoField.DAY_OF_MONTH, 2)
// .optionalStart()

View File

@ -20,16 +20,59 @@ package org.apache.doris.nereids.trees.expressions.literal;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import java.util.function.Consumer;
class DateLiteralTest {
@Test
void reject() {
// TODO: reject them.
// Now parse them as date + offset.
// PG parse them as date + offset, MySQL parse them as date + time (rubbish behavior!)
// So strange! reject these strange case.
// Assertions.assertThrows(AnalysisException.class, () -> new DateLiteral("2022-01-01-01"));
// Assertions.assertThrows(AnalysisException.class, () -> new DateLiteral("2022-01-01-1"));
// Assertions.assertThrows(AnalysisException.class, () -> new DateLiteral("2022-01-01+01"));
// Assertions.assertThrows(AnalysisException.class, () -> new DateLiteral("2022-01-01+1"));
}
@Test
void testNormalize() {
String s = DateLiteral.normalize("2021-5");
Assertions.assertEquals("2021-05", s);
s = DateLiteral.normalize("2021-5-1");
Assertions.assertEquals("2021-05-01", s);
s = DateLiteral.normalize("2021-5-01");
Assertions.assertEquals("2021-05-01", s);
s = DateLiteral.normalize("2021-5-01 0:0:0");
Assertions.assertEquals("2021-05-01 00:00:00", s);
s = DateLiteral.normalize("2021-5-01 0:0:0.001");
Assertions.assertEquals("2021-05-01 00:00:00.001", s);
s = DateLiteral.normalize("2021-5-01 0:0:0.001+8:0");
Assertions.assertEquals("2021-05-01 00:00:00.001+08:00", s);
s = DateLiteral.normalize("2021-5-01 0:0:0.001+8:0:0");
Assertions.assertEquals("2021-05-01 00:00:00.001+08:00:00", s);
s = DateLiteral.normalize("2021-5-01 0:0:0.001UTC+8:0");
Assertions.assertEquals("2021-05-01 00:00:00.001UTC+08:00", s);
s = DateLiteral.normalize("2021-5-01 0:0:0.001UTC+8:0:0");
Assertions.assertEquals("2021-05-01 00:00:00.001UTC+08:00:00", s);
}
@Test
void testDate() {
new DateLiteral("220101");
new DateLiteral("22-01-01");
new DateLiteral("22-01-1");
new DateLiteral("22-1-1");
new DateLiteral("2022-01-01");
new DateLiteral("2022-01-1");
new DateLiteral("2022-1-1");
new DateLiteral("20220101");
Assertions.assertThrows(AnalysisException.class, () -> new DateLiteral("-01-01"));
@ -40,15 +83,14 @@ class DateLiteralTest {
new DateLiteral("2022-01-01Z");
new DateLiteral("2022-01-01UTC");
new DateLiteral("2022-01-01GMT");
// new DateLiteral("2022-01-01UTC+08");
// new DateLiteral("2022-01-01UTC-06");
new DateLiteral("2022-01-01UTC+08");
new DateLiteral("2022-01-01UTC-06");
new DateLiteral("2022-01-01UTC+08:00");
new DateLiteral("2022-01-01UTC-06:00");
new DateLiteral("2022-01-01Europe/London");
}
@Test
@Disabled
void testOffset() {
new DateLiteral("2022-01-01+01:00:00");
new DateLiteral("2022-01-01+01:00");
@ -61,23 +103,32 @@ class DateLiteralTest {
new DateLiteral("2022-01-01-01:00");
new DateLiteral("2022-01-01-1:0:0");
new DateLiteral("2022-01-01-1:0");
Assertions.assertThrows(AnalysisException.class, () -> new DateLiteral("2022-01-01-01"));
Assertions.assertThrows(AnalysisException.class, () -> new DateLiteral("2022-01-01-1"));
}
@Disabled
@Test
void testIrregularDate() {
new DateLiteral("2016-07-02");
Consumer<DateLiteral> assertFunc = (DateLiteral dateLiteral) -> {
Assertions.assertEquals("2016-07-02", dateLiteral.toString());
};
DateLiteral dateLiteral;
new DateLiteral("2016-7-02");
new DateLiteral("2016-07-2");
new DateLiteral("2016-7-2");
dateLiteral = new DateLiteral("2016-07-02");
assertFunc.accept(dateLiteral);
new DateLiteral("2016-07-02");
new DateLiteral("2016-07-2");
new DateLiteral("2016-7-02");
new DateLiteral("2016-7-2");
dateLiteral = new DateLiteral("2016-7-02");
assertFunc.accept(dateLiteral);
dateLiteral = new DateLiteral("2016-07-2");
assertFunc.accept(dateLiteral);
dateLiteral = new DateLiteral("2016-7-2");
assertFunc.accept(dateLiteral);
dateLiteral = new DateLiteral("2016-07-02");
assertFunc.accept(dateLiteral);
dateLiteral = new DateLiteral("2016-07-2");
assertFunc.accept(dateLiteral);
dateLiteral = new DateLiteral("2016-7-02");
assertFunc.accept(dateLiteral);
dateLiteral = new DateLiteral("2016-7-2");
assertFunc.accept(dateLiteral);
}
}

View File

@ -18,10 +18,16 @@
package org.apache.doris.nereids.trees.expressions.literal;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
class DateTimeLiteralTest {
@Test
void reject() {
// Assertions.assertThrows(IllegalArgumentException.class, () -> {
// new DateTimeV2Literal("2022-08-01T01:01:01-00:00");
// });
}
@Test
void testWithoutZoneOrOffset() {
new DateTimeV2Literal("2022-08-01");
@ -91,7 +97,7 @@ class DateTimeLiteralTest {
@Test
void testZoneOrOffsetRight() {
java.util.function.BiConsumer<DateTimeV2Literal, Long> assertHour = (dateTimeV2Literal, expectHour) -> {
Assertions.assertSame(dateTimeV2Literal.hour, expectHour);
Assertions.assertEquals(dateTimeV2Literal.hour, expectHour);
};
DateTimeV2Literal dateTimeV2Literal;
dateTimeV2Literal = new DateTimeV2Literal("2022-08-01 00:00:00Europe/London"); // +01:00
@ -123,43 +129,41 @@ class DateTimeLiteralTest {
@Test
void testZoneOffset() {
new DateTimeV2Literal("2022-08-01 01:01:01UTC+01:01:01");
// new DateTimeV2Literal("2022-08-01 01:01:01UTC+1:1:1");
new DateTimeV2Literal("2022-08-01 01:01:01UTC+1:1:1");
new DateTimeV2Literal("2022-08-01 01:01:01UTC+01:01");
// new DateTimeV2Literal("2022-08-01 01:01:01UTC+01");
// new DateTimeV2Literal("2022-08-01 01:01:01UTC+1");
new DateTimeV2Literal("2022-08-01 01:01:01UTC+01");
new DateTimeV2Literal("2022-08-01 01:01:01UTC+1");
}
@Test
void testTwoDigitalYearZoneOffset() {
new DateTimeV2Literal("22-08-01 01:01:01UTC+01:01:01");
// new DateTimeV2Literal("22-08-01 01:01:01UTC+1:1:1");
new DateTimeV2Literal("22-08-01 01:01:01UTC+1:1:1");
new DateTimeV2Literal("22-08-01 01:01:01UTC+01:01");
// new DateTimeV2Literal("22-08-01 01:01:01UTC+01");
// new DateTimeV2Literal("22-08-01 01:01:01UTC+1");
new DateTimeV2Literal("22-08-01 01:01:01UTC+01");
new DateTimeV2Literal("22-08-01 01:01:01UTC+1");
}
@Test
void testOffset() {
new DateTimeV2Literal("2022-08-01 01:01:01+01:01:01");
new DateTimeV2Literal("2022-08-01 01:01:01+01:01");
// new DateTimeV2Literal("2022-08-01 01:01:01+01");
// new DateTimeV2Literal("2022-08-01 01:01:01+01:1:01");
// new DateTimeV2Literal("2022-08-01 01:01:01+01:1");
// new DateTimeV2Literal("2022-08-01 01:01:01+01:01:1");
// new DateTimeV2Literal("2022-08-01 01:01:01+1:1:1");
// new DateTimeV2Literal("2022-08-01 01:01:01+1:1");
// new DateTimeV2Literal("2022-08-01 01:01:01+1");
new DateTimeV2Literal("2022-08-01 01:01:01+01");
new DateTimeV2Literal("2022-08-01 01:01:01+01:1:01");
new DateTimeV2Literal("2022-08-01 01:01:01+01:1");
new DateTimeV2Literal("2022-08-01 01:01:01+01:01:1");
new DateTimeV2Literal("2022-08-01 01:01:01+1:1:1");
new DateTimeV2Literal("2022-08-01 01:01:01+1:1");
new DateTimeV2Literal("2022-08-01 01:01:01+1");
new DateTimeV2Literal("2022-05-01 01:02:55+02:30");
new DateTimeV2Literal("2022-05-01 01:02:55.123-02:30");
new DateTimeV2Literal("2022-06-01T01:02:55+04:30");
new DateTimeV2Literal("2022-06-01 01:02:55.123-07:30");
// new DateTimeV2Literal("20220701010255+07:00");
// new DateTimeV2Literal("20220701010255-05:00");
new DateTimeV2Literal("2022-05-01 01:02:55+02:30");
new DateTimeV2Literal("2022-05-01 01:02:55.123-02:30");
@ -171,9 +175,9 @@ class DateTimeLiteralTest {
@Test
void testDateTime() {
// new DateTimeV2Literal("2022-08-01 01:01:01UTC+1:1:1");
// new DateTimeV2Literal("2022-08-01 01:01:01UTC+1:1");
// new DateTimeV2Literal("2022-08-01 01:01:01UTC+1");
new DateTimeV2Literal("2022-08-01 01:01:01UTC+1:1:1");
new DateTimeV2Literal("2022-08-01 01:01:01UTC+1:1");
new DateTimeV2Literal("2022-08-01 01:01:01UTC+1");
new DateTimeV2Literal("0001-01-01 00:01:01");
new DateTimeV2Literal("0001-01-01 00:01:01.001");
@ -183,27 +187,26 @@ class DateTimeLiteralTest {
new DateTimeV2Literal("2022-01-01 01:02:55.123");
new DateTimeV2Literal("2022-02-01 01:02:55Z");
new DateTimeV2Literal("2022-02-01 01:02:55.123Z");
// new DateTimeV2Literal("2022-03-01 01:02:55UTC+8");
new DateTimeV2Literal("2022-03-01 01:02:55UTC+8");
new DateTimeV2Literal("2022-03-01 01:02:55.123UTC");
// new DateTimeV2Literal("2022-04-01 01:02:55UTC-6");
// new DateTimeV2Literal("2022-04-01T01:02:55UTC-6");
// new DateTimeV2Literal("2022-04-01T01:02:55.123UTC+6");
new DateTimeV2Literal("2022-04-01 01:02:55UTC-6");
new DateTimeV2Literal("2022-04-01T01:02:55UTC-6");
new DateTimeV2Literal("2022-04-01T01:02:55.123UTC+6");
new DateTimeV2Literal("2022-01-01 01:02:55");
new DateTimeV2Literal("2022-01-01 01:02:55.123");
new DateTimeV2Literal("2022-02-01 01:02:55Z");
new DateTimeV2Literal("2022-02-01 01:02:55.123Z");
// new DateTimeV2Literal("2022-03-01 01:02:55UTC+8");
new DateTimeV2Literal("2022-03-01 01:02:55UTC+8");
new DateTimeV2Literal("2022-03-01 01:02:55.123UTC");
// new DateTimeV2Literal("2022-04-01T01:02:55UTC-6");
// new DateTimeV2Literal("2022-04-01T01:02:55.123UTC+6");
new DateTimeV2Literal("2022-04-01T01:02:55UTC-6");
new DateTimeV2Literal("2022-04-01T01:02:55.123UTC+6");
new DateTimeV2Literal("0001-01-01");
// new DateTimeV2Literal("20220801GMT+5");
// new DateTimeV2Literal("20220801GMT-3");
}
@Disabled
@Test
void testIrregularDateTime() {
new DateLiteral("2016-07-02 01:01:00");
@ -227,7 +230,6 @@ class DateTimeLiteralTest {
new DateLiteral("2016-7-2 01:1:0");
}
@Disabled
@Test
void testIrregularDateTimeHour() {
new DateTimeV2Literal("2016-07-02 01");
@ -243,7 +245,6 @@ class DateTimeLiteralTest {
new DateTimeV2Literal("2016-7-2 01");
}
@Disabled
@Test
void testIrregularDateTimeHourMinute() {
new DateTimeV2Literal("2016-07-02 01:01");
@ -267,7 +268,6 @@ class DateTimeLiteralTest {
new DateTimeV2Literal("2016-7-2 1:1");
}
@Disabled
@Test
void testIrregularDateTimeHourMinuteSecond() {
new DateTimeV2Literal("2016-07-02 01:01:01");
@ -307,7 +307,6 @@ class DateTimeLiteralTest {
new DateTimeV2Literal("2016-7-2 1:1:1");
}
@Disabled
@Test
void testIrregularDateTimeHourMinuteSecondMicrosecond() {
new DateTimeV2Literal("2016-07-02 01:01:01.1");

View File

@ -33,11 +33,9 @@ class DateTimeFormatterUtilsTest {
formatter.parse("");
// formatter.parse("UTC+01");
formatter.parse("UTC+01:00");
formatter.parse("UTC+01:00:00");
// formatter.parse("GMT+01");
formatter.parse("GMT+01:00");
formatter.parse("Asia/Shanghai");
formatter.parse("Z");
@ -88,27 +86,6 @@ class DateTimeFormatterUtilsTest {
Assertions.assertThrows(DateTimeParseException.class, () -> withoutT.parse("20200219T010101.0000001"));
}
@Test
void testTwoDigitalDate() {
DateTimeFormatter formatter = DateTimeFormatterUtils.DATE_FORMATTER;
// Year values in the range 00-69 become 2000-2069.
// Year values in the range 70-99 become 1970-199
for (int i = 0; i < 100; i++) {
String str;
if (i < 10) {
str = "0" + i + "-02-19";
} else {
str = i + "-02-19";
}
TemporalAccessor dateTime = formatter.parse(str);
if (i < 70) {
Assertions.assertEquals(2000 + i, dateTime.get(ChronoField.YEAR));
} else {
Assertions.assertEquals(1900 + i, dateTime.get(ChronoField.YEAR));
}
}
}
@Test
void testDateTimeFormatter() {
DateTimeFormatter formatter = DateTimeFormatterUtils.DATE_TIME_FORMATTER;
@ -131,9 +108,7 @@ class DateTimeFormatterUtilsTest {
assertTime.accept(dateTime);
dateTime = timeFormatter.parse("01:01:01");
assertTime.accept(dateTime);
dateTime = timeFormatter.parse("01:01");
assertTime.accept(dateTime);
dateTime = timeFormatter.parse("01");
assertTime.accept(dateTime);
Assertions.assertThrows(DateTimeParseException.class, () -> timeFormatter.parse("01:01"));
Assertions.assertThrows(DateTimeParseException.class, () -> timeFormatter.parse("01"));
}
}