[enhance](function) refactor from_format_str and support more format (#30452)

This commit is contained in:
zclllyybb
2024-02-01 19:07:54 +08:00
committed by yiguolei
parent fb0d712096
commit 3315c16383
15 changed files with 370 additions and 367 deletions

View File

@ -1203,16 +1203,28 @@ public class DateLiteral extends LiteralExpr {
// this method is exaclty same as from_date_format_str() in be/src/runtime/datetime_value.cpp
// change this method should also change that.
public int fromDateFormatStr(String format, String value, boolean hasSubVal) throws InvalidFormatException {
int fp = 0; // pointer to the current format string
int fend = format.length(); // end of format string
int vp = 0; // pointer to the date string value
int vend = value.length(); // end of date string value
int pFormat = 0; // pointer to the current format string
int endFormat = format.length(); // end of format string
int pValue = 0; // pointer to the date string value
int endValue = value.length(); // end of date string value
boolean datePartUsed = false;
boolean timePartUsed = false;
boolean microSecondPartUsed = false;
int partUsed = 0;
final int yearPart = 1 << 0;
final int monthPart = 1 << 1;
final int dayPart = 1 << 2;
final int weekdayPart = 1 << 3;
final int yeardayPart = 1 << 4;
final int weekNumPart = 1 << 5;
final int normalDatePart = yearPart | monthPart | dayPart;
final int specialDatePart = weekdayPart | yeardayPart | weekNumPart;
final int datePart = normalDatePart | specialDatePart;
final int hourPart = 1 << 6;
final int minutePart = 1 << 7;
final int secondPart = 1 << 8;
final int fracPart = 1 << 9;
final int timePart = hourPart | minutePart | secondPart | fracPart;
int dayPart = 0;
int halfDay = 0; // 0 for am/none, 12 for pm.
long weekday = -1;
long yearday = -1;
long weekNum = -1;
@ -1221,169 +1233,170 @@ public class DateLiteral extends LiteralExpr {
boolean sundayFirst = false;
boolean strictWeekNumberYearType = false;
long strictWeekNumberYear = -1;
boolean usaTime = false;
boolean hourSystem12 = false; // hour in [0..12] and with am/pm
char f;
while (fp < fend && vp < vend) {
char now;
while (pFormat < endFormat && pValue < endValue) {
// Skip space character
while (vp < vend && Character.isSpaceChar(value.charAt(vp))) {
vp++;
while (pValue < endValue && Character.isSpaceChar(value.charAt(pValue))) {
pValue++;
}
if (vp >= vend) {
if (pValue >= endValue) {
break;
}
// Check switch
f = format.charAt(fp);
if (f == '%' && fp + 1 < fend) {
now = format.charAt(pFormat);
if (now == '%' && pFormat + 1 < endFormat) {
int tmp = 0;
long intValue = 0;
fp++;
f = format.charAt(fp);
fp++;
switch (f) {
pFormat++;
now = format.charAt(pFormat);
pFormat++;
switch (now) {
// Year
case 'y':
// Year, numeric (two digits)
tmp = vp + Math.min(2, vend - vp);
intValue = strToLong(value.substring(vp, tmp));
tmp = pValue + Math.min(2, endValue - pValue);
intValue = strToLong(value.substring(pValue, tmp));
intValue += intValue >= 70 ? 1900 : 2000;
this.year = intValue;
vp = tmp;
datePartUsed = true;
pValue = tmp;
partUsed |= yearPart;
break;
case 'Y':
// Year, numeric, four digits
tmp = vp + Math.min(4, vend - vp);
intValue = strToLong(value.substring(vp, tmp));
if (tmp - vp <= 2) {
tmp = pValue + Math.min(4, endValue - pValue);
intValue = strToLong(value.substring(pValue, tmp));
if (tmp - pValue <= 2) {
intValue += intValue >= 70 ? 1900 : 2000;
}
this.year = intValue;
vp = tmp;
datePartUsed = true;
pValue = tmp;
partUsed |= yearPart;
break;
// Month
case 'm':
case 'c':
tmp = vp + Math.min(2, vend - vp);
intValue = strToLong(value.substring(vp, tmp));
tmp = pValue + Math.min(2, endValue - pValue);
intValue = strToLong(value.substring(pValue, tmp));
this.month = intValue;
vp = tmp;
datePartUsed = true;
pValue = tmp;
partUsed |= monthPart;
break;
case 'M': {
int nextPos = findWord(value, vp);
intValue = checkWord(MONTH_NAME_DICT, value.substring(vp, nextPos));
int nextPos = findWord(value, pValue);
intValue = checkWord(MONTH_NAME_DICT, value.substring(pValue, nextPos));
this.month = intValue;
vp = nextPos;
pValue = nextPos;
partUsed |= monthPart;
break;
}
case 'b': {
int nextPos = findWord(value, vp);
intValue = checkWord(MONTH_ABBR_NAME_DICT, value.substring(vp, nextPos));
int nextPos = findWord(value, pValue);
intValue = checkWord(MONTH_ABBR_NAME_DICT, value.substring(pValue, nextPos));
this.month = intValue;
vp = nextPos;
pValue = nextPos;
partUsed |= monthPart;
break;
}
// Day
case 'd':
case 'e':
tmp = vp + Math.min(2, vend - vp);
intValue = strToLong(value.substring(vp, tmp));
tmp = pValue + Math.min(2, endValue - pValue);
intValue = strToLong(value.substring(pValue, tmp));
this.day = intValue;
vp = tmp;
datePartUsed = true;
pValue = tmp;
partUsed |= dayPart;
break;
case 'D':
tmp = vp + Math.min(2, vend - vp);
intValue = strToLong(value.substring(vp, tmp));
tmp = pValue + Math.min(2, endValue - pValue);
intValue = strToLong(value.substring(pValue, tmp));
this.day = intValue;
vp = tmp + Math.min(2, vend - tmp);
datePartUsed = true;
pValue = tmp + Math.min(2, endValue - tmp);
partUsed |= dayPart;
break;
// Hour
case 'h':
case 'I':
case 'l':
usaTime = true;
hourSystem12 = true;
partUsed |= hourPart;
case 'k': // CHECKSTYLE IGNORE THIS LINE: Fall through
case 'H':
tmp = findNumber(value, vp, 2);
intValue = strToLong(value.substring(vp, tmp));
tmp = findNumber(value, pValue, 2);
intValue = strToLong(value.substring(pValue, tmp));
this.hour = intValue;
vp = tmp;
timePartUsed = true;
pValue = tmp;
partUsed |= hourPart;
break;
// Minute
case 'i':
tmp = vp + Math.min(2, vend - vp);
intValue = strToLong(value.substring(vp, tmp));
tmp = pValue + Math.min(2, endValue - pValue);
intValue = strToLong(value.substring(pValue, tmp));
this.minute = intValue;
vp = tmp;
timePartUsed = true;
pValue = tmp;
partUsed |= minutePart;
break;
// Second
case 's':
case 'S':
tmp = vp + Math.min(2, vend - vp);
intValue = strToLong(value.substring(vp, tmp));
tmp = pValue + Math.min(2, endValue - pValue);
intValue = strToLong(value.substring(pValue, tmp));
this.second = intValue;
vp = tmp;
timePartUsed = true;
pValue = tmp;
partUsed |= secondPart;
break;
// Micro second
case 'f':
// FIXME: fix same with BE
tmp = vp;
tmp = pValue;
// when there's still something to the end, fix the scale of ms.
while (tmp < vend && Character.isDigit(value.charAt(tmp))) {
while (tmp < endValue && Character.isDigit(value.charAt(tmp))) {
tmp += 1;
}
if (tmp - vp > 6) {
int tmp2 = vp + 6;
intValue = strToLong(value.substring(vp, tmp2));
if (tmp - pValue > 6) {
int tmp2 = pValue + 6;
intValue = strToLong(value.substring(pValue, tmp2));
} else {
intValue = strToLong(value.substring(vp, tmp));
intValue = strToLong(value.substring(pValue, tmp));
}
this.microsecond = (long) (intValue * Math.pow(10, 6 - Math.min(6, tmp - vp)));
timePartUsed = true;
microSecondPartUsed = true;
vp = tmp;
this.microsecond = (long) (intValue * Math.pow(10, 6 - Math.min(6, tmp - pValue)));
partUsed |= fracPart;
pValue = tmp;
break;
// AM/PM
case 'p':
if ((vend - vp) < 2 || Character.toUpperCase(value.charAt(vp + 1)) != 'M' || !usaTime) {
if ((endValue - pValue) < 2 || Character.toUpperCase(value.charAt(pValue + 1)) != 'M'
|| !hourSystem12) {
throw new InvalidFormatException("Invalid %p format");
}
if (Character.toUpperCase(value.charAt(vp)) == 'P') {
if (Character.toUpperCase(value.charAt(pValue)) == 'P') {
// PM
dayPart = 12;
halfDay = 12;
}
timePartUsed = true;
vp += 2;
pValue += 2;
break;
// Weekday
case 'W': {
int nextPos = findWord(value, vp);
intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(vp, nextPos));
int nextPos = findWord(value, pValue);
intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(pValue, nextPos));
intValue++;
weekday = intValue;
datePartUsed = true;
partUsed |= weekdayPart;
break;
}
case 'a': {
int nextPos = findWord(value, vp);
intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(vp, nextPos));
int nextPos = findWord(value, pValue);
intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(pValue, nextPos));
intValue++;
weekday = intValue;
datePartUsed = true;
partUsed |= weekdayPart;
break;
}
case 'w':
tmp = vp + Math.min(1, vend - vp);
intValue = strToLong(value.substring(vp, tmp));
tmp = pValue + Math.min(1, endValue - pValue);
intValue = strToLong(value.substring(pValue, tmp));
if (intValue >= 7) {
throw new InvalidFormatException("invalid day of week: " + intValue);
}
@ -1391,97 +1404,97 @@ public class DateLiteral extends LiteralExpr {
intValue = 7;
}
weekday = intValue;
vp = tmp;
datePartUsed = true;
pValue = tmp;
partUsed |= weekdayPart;
break;
case 'j':
tmp = vp + Math.min(3, vend - vp);
intValue = strToLong(value.substring(vp, tmp));
tmp = pValue + Math.min(3, endValue - pValue);
intValue = strToLong(value.substring(pValue, tmp));
yearday = intValue;
vp = tmp;
datePartUsed = true;
pValue = tmp;
partUsed |= yeardayPart;
break;
case 'u':
case 'v':
case 'U':
case 'V':
sundayFirst = (format.charAt(fp - 1) == 'U' || format.charAt(fp - 1) == 'V');
sundayFirst = (format.charAt(pFormat - 1) == 'U' || format.charAt(pFormat - 1) == 'V');
// Used to check if there is %x or %X
strictWeekNumber = (format.charAt(fp - 1) == 'V' || format.charAt(fp - 1) == 'v');
tmp = vp + Math.min(2, vend - vp);
intValue = Long.valueOf(value.substring(vp, tmp));
strictWeekNumber = (format.charAt(pFormat - 1) == 'V' || format.charAt(pFormat - 1) == 'v');
tmp = pValue + Math.min(2, endValue - pValue);
intValue = Long.valueOf(value.substring(pValue, tmp));
weekNum = intValue;
if (weekNum > 53 || (strictWeekNumber && weekNum == 0)) {
throw new InvalidFormatException("invalid num of week: " + weekNum);
}
vp = tmp;
datePartUsed = true;
pValue = tmp;
partUsed |= weekNumPart;
break;
// strict week number, must be used with %V or %v
case 'x':
case 'X':
strictWeekNumberYearType = (format.charAt(fp - 1) == 'X');
tmp = vp + Math.min(4, vend - vp);
intValue = Long.valueOf(value.substring(vp, tmp));
strictWeekNumberYearType = (format.charAt(pFormat - 1) == 'X');
tmp = pValue + Math.min(4, endValue - pValue);
intValue = Long.valueOf(value.substring(pValue, tmp));
strictWeekNumberYear = intValue;
vp = tmp;
datePartUsed = true;
pValue = tmp;
partUsed |= weekNumPart;
break;
case 'r':
tmp = fromDateFormatStr("%I:%i:%S %p", value.substring(vp, vend), true);
vp = tmp;
timePartUsed = true;
tmp = fromDateFormatStr("%I:%i:%S %p", value.substring(pValue, endValue), true);
pValue = tmp;
partUsed |= timePart;
break;
case 'T':
tmp = fromDateFormatStr("%H:%i:%S", value.substring(vp, vend), true);
vp = tmp;
timePartUsed = true;
tmp = fromDateFormatStr("%H:%i:%S", value.substring(pValue, endValue), true);
pValue = tmp;
partUsed |= timePart;
break;
case '.':
while (vp < vend && Character.toString(value.charAt(vp)).matches("\\p{Punct}")) {
vp++;
while (pValue < endValue && Character.toString(value.charAt(pValue)).matches("\\p{Punct}")) {
pValue++;
}
break;
case '@':
while (vp < vend && Character.isLetter(value.charAt(vp))) {
vp++;
while (pValue < endValue && Character.isLetter(value.charAt(pValue))) {
pValue++;
}
break;
case '#':
while (vp < vend && Character.isDigit(value.charAt(vp))) {
vp++;
while (pValue < endValue && Character.isDigit(value.charAt(pValue))) {
pValue++;
}
break;
case '%': // %%, escape the %
if ('%' != value.charAt(vp)) {
throw new InvalidFormatException("invalid char after %: " + value.charAt(vp));
if ('%' != value.charAt(pValue)) {
throw new InvalidFormatException("invalid char after %: " + value.charAt(pValue));
}
vp++;
pValue++;
break;
default:
throw new InvalidFormatException("Invalid format pattern: " + f);
throw new InvalidFormatException("Invalid format pattern: " + now);
}
} else if (format.charAt(fp) != ' ') {
if (format.charAt(fp) != value.charAt(vp)) {
throw new InvalidFormatException("Invalid char: " + value.charAt(vp) + ", expected: "
+ format.charAt(fp));
} else if (format.charAt(pFormat) != ' ') {
if (format.charAt(pFormat) != value.charAt(pValue)) {
throw new InvalidFormatException("Invalid char: " + value.charAt(pValue) + ", expected: "
+ format.charAt(pFormat));
}
fp++;
vp++;
pFormat++;
pValue++;
} else {
fp++;
pFormat++;
}
}
// continue to iterate pattern if has
// to find out if it has time part.
while (fp < fend) {
f = format.charAt(fp);
if (f == '%' && fp + 1 < fend) {
fp++;
f = format.charAt(fp);
fp++;
switch (f) {
while (pFormat < endFormat) {
now = format.charAt(pFormat);
if (now == '%' && pFormat + 1 < endFormat) {
pFormat++;
now = format.charAt(pFormat);
pFormat++;
switch (now) {
case 'H':
case 'h':
case 'I':
@ -1493,25 +1506,29 @@ public class DateLiteral extends LiteralExpr {
case 'S':
case 'p':
case 'T':
timePartUsed = true;
partUsed |= timePart;
break;
default:
break;
}
} else {
fp++;
pFormat++;
}
}
if (usaTime) {
if (partUsed == 0) {
throw new InvalidFormatException("Nothing for legal Date: " + value);
}
if (hourSystem12) {
if (this.hour > 12 || this.hour < 1) {
throw new InvalidFormatException("Invalid hour: " + hour);
}
this.hour = (this.hour % 12) + dayPart;
this.hour = (this.hour % 12) + halfDay;
}
if (hasSubVal) {
return vp;
return pValue;
}
// Year day
@ -1540,21 +1557,21 @@ public class DateLiteral extends LiteralExpr {
getDateFromDaynr(days);
}
if (!timePartUsed && year > 0) {
if (format.equals("%Y")) {
month = day = 1;
} else if (format.equals("%Y-%m")) {
// complete default month/day
if ((partUsed & ~normalDatePart) == 0) { // only date here
if ((partUsed & dayPart) == 0) {
day = 1;
if ((partUsed & monthPart) == 0) {
month = 1;
}
}
}
// Compute timestamp type
// TODO(Gabriel): we still use old version datetime/date and change this to new version when
// we think it's stable enough
if (datePartUsed) {
if (microSecondPartUsed) {
if ((partUsed & datePart) != 0) { // Ymd part only
if ((partUsed & fracPart) != 0) {
this.type = Type.DATETIMEV2_WITH_MAX_SCALAR;
} else if (timePartUsed) {
} else if ((partUsed & timePart) != 0) {
this.type = ScalarType.getDefaultDateType(Type.DATETIME);
} else {
this.type = ScalarType.getDefaultDateType(Type.DATE);