[enhance](function) refactor from_format_str and support more format (#30452)
This commit is contained in:
@ -1203,16 +1203,28 @@ public class DateLiteral extends LiteralExpr {
|
||||
// this method is exaclty same as from_date_format_str() in be/src/runtime/datetime_value.cpp
|
||||
// change this method should also change that.
|
||||
public int fromDateFormatStr(String format, String value, boolean hasSubVal) throws InvalidFormatException {
|
||||
int fp = 0; // pointer to the current format string
|
||||
int fend = format.length(); // end of format string
|
||||
int vp = 0; // pointer to the date string value
|
||||
int vend = value.length(); // end of date string value
|
||||
int pFormat = 0; // pointer to the current format string
|
||||
int endFormat = format.length(); // end of format string
|
||||
int pValue = 0; // pointer to the date string value
|
||||
int endValue = value.length(); // end of date string value
|
||||
|
||||
boolean datePartUsed = false;
|
||||
boolean timePartUsed = false;
|
||||
boolean microSecondPartUsed = false;
|
||||
int partUsed = 0;
|
||||
final int yearPart = 1 << 0;
|
||||
final int monthPart = 1 << 1;
|
||||
final int dayPart = 1 << 2;
|
||||
final int weekdayPart = 1 << 3;
|
||||
final int yeardayPart = 1 << 4;
|
||||
final int weekNumPart = 1 << 5;
|
||||
final int normalDatePart = yearPart | monthPart | dayPart;
|
||||
final int specialDatePart = weekdayPart | yeardayPart | weekNumPart;
|
||||
final int datePart = normalDatePart | specialDatePart;
|
||||
final int hourPart = 1 << 6;
|
||||
final int minutePart = 1 << 7;
|
||||
final int secondPart = 1 << 8;
|
||||
final int fracPart = 1 << 9;
|
||||
final int timePart = hourPart | minutePart | secondPart | fracPart;
|
||||
|
||||
int dayPart = 0;
|
||||
int halfDay = 0; // 0 for am/none, 12 for pm.
|
||||
long weekday = -1;
|
||||
long yearday = -1;
|
||||
long weekNum = -1;
|
||||
@ -1221,169 +1233,170 @@ public class DateLiteral extends LiteralExpr {
|
||||
boolean sundayFirst = false;
|
||||
boolean strictWeekNumberYearType = false;
|
||||
long strictWeekNumberYear = -1;
|
||||
boolean usaTime = false;
|
||||
boolean hourSystem12 = false; // hour in [0..12] and with am/pm
|
||||
|
||||
char f;
|
||||
while (fp < fend && vp < vend) {
|
||||
char now;
|
||||
while (pFormat < endFormat && pValue < endValue) {
|
||||
// Skip space character
|
||||
while (vp < vend && Character.isSpaceChar(value.charAt(vp))) {
|
||||
vp++;
|
||||
while (pValue < endValue && Character.isSpaceChar(value.charAt(pValue))) {
|
||||
pValue++;
|
||||
}
|
||||
if (vp >= vend) {
|
||||
if (pValue >= endValue) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Check switch
|
||||
f = format.charAt(fp);
|
||||
if (f == '%' && fp + 1 < fend) {
|
||||
now = format.charAt(pFormat);
|
||||
if (now == '%' && pFormat + 1 < endFormat) {
|
||||
int tmp = 0;
|
||||
long intValue = 0;
|
||||
fp++;
|
||||
f = format.charAt(fp);
|
||||
fp++;
|
||||
switch (f) {
|
||||
pFormat++;
|
||||
now = format.charAt(pFormat);
|
||||
pFormat++;
|
||||
switch (now) {
|
||||
// Year
|
||||
case 'y':
|
||||
// Year, numeric (two digits)
|
||||
tmp = vp + Math.min(2, vend - vp);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
tmp = pValue + Math.min(2, endValue - pValue);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
intValue += intValue >= 70 ? 1900 : 2000;
|
||||
this.year = intValue;
|
||||
vp = tmp;
|
||||
datePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= yearPart;
|
||||
break;
|
||||
case 'Y':
|
||||
// Year, numeric, four digits
|
||||
tmp = vp + Math.min(4, vend - vp);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
if (tmp - vp <= 2) {
|
||||
tmp = pValue + Math.min(4, endValue - pValue);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
if (tmp - pValue <= 2) {
|
||||
intValue += intValue >= 70 ? 1900 : 2000;
|
||||
}
|
||||
this.year = intValue;
|
||||
vp = tmp;
|
||||
datePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= yearPart;
|
||||
break;
|
||||
// Month
|
||||
case 'm':
|
||||
case 'c':
|
||||
tmp = vp + Math.min(2, vend - vp);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
tmp = pValue + Math.min(2, endValue - pValue);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
this.month = intValue;
|
||||
vp = tmp;
|
||||
datePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= monthPart;
|
||||
break;
|
||||
case 'M': {
|
||||
int nextPos = findWord(value, vp);
|
||||
intValue = checkWord(MONTH_NAME_DICT, value.substring(vp, nextPos));
|
||||
int nextPos = findWord(value, pValue);
|
||||
intValue = checkWord(MONTH_NAME_DICT, value.substring(pValue, nextPos));
|
||||
this.month = intValue;
|
||||
vp = nextPos;
|
||||
pValue = nextPos;
|
||||
partUsed |= monthPart;
|
||||
break;
|
||||
}
|
||||
case 'b': {
|
||||
int nextPos = findWord(value, vp);
|
||||
intValue = checkWord(MONTH_ABBR_NAME_DICT, value.substring(vp, nextPos));
|
||||
int nextPos = findWord(value, pValue);
|
||||
intValue = checkWord(MONTH_ABBR_NAME_DICT, value.substring(pValue, nextPos));
|
||||
this.month = intValue;
|
||||
vp = nextPos;
|
||||
pValue = nextPos;
|
||||
partUsed |= monthPart;
|
||||
break;
|
||||
}
|
||||
// Day
|
||||
case 'd':
|
||||
case 'e':
|
||||
tmp = vp + Math.min(2, vend - vp);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
tmp = pValue + Math.min(2, endValue - pValue);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
this.day = intValue;
|
||||
vp = tmp;
|
||||
datePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= dayPart;
|
||||
break;
|
||||
case 'D':
|
||||
tmp = vp + Math.min(2, vend - vp);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
tmp = pValue + Math.min(2, endValue - pValue);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
this.day = intValue;
|
||||
vp = tmp + Math.min(2, vend - tmp);
|
||||
datePartUsed = true;
|
||||
pValue = tmp + Math.min(2, endValue - tmp);
|
||||
partUsed |= dayPart;
|
||||
break;
|
||||
// Hour
|
||||
case 'h':
|
||||
case 'I':
|
||||
case 'l':
|
||||
usaTime = true;
|
||||
hourSystem12 = true;
|
||||
partUsed |= hourPart;
|
||||
case 'k': // CHECKSTYLE IGNORE THIS LINE: Fall through
|
||||
case 'H':
|
||||
tmp = findNumber(value, vp, 2);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
tmp = findNumber(value, pValue, 2);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
this.hour = intValue;
|
||||
vp = tmp;
|
||||
timePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= hourPart;
|
||||
break;
|
||||
// Minute
|
||||
case 'i':
|
||||
tmp = vp + Math.min(2, vend - vp);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
tmp = pValue + Math.min(2, endValue - pValue);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
this.minute = intValue;
|
||||
vp = tmp;
|
||||
timePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= minutePart;
|
||||
break;
|
||||
// Second
|
||||
case 's':
|
||||
case 'S':
|
||||
tmp = vp + Math.min(2, vend - vp);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
tmp = pValue + Math.min(2, endValue - pValue);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
this.second = intValue;
|
||||
vp = tmp;
|
||||
timePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= secondPart;
|
||||
break;
|
||||
// Micro second
|
||||
case 'f':
|
||||
// FIXME: fix same with BE
|
||||
tmp = vp;
|
||||
tmp = pValue;
|
||||
// when there's still something to the end, fix the scale of ms.
|
||||
while (tmp < vend && Character.isDigit(value.charAt(tmp))) {
|
||||
while (tmp < endValue && Character.isDigit(value.charAt(tmp))) {
|
||||
tmp += 1;
|
||||
}
|
||||
|
||||
if (tmp - vp > 6) {
|
||||
int tmp2 = vp + 6;
|
||||
intValue = strToLong(value.substring(vp, tmp2));
|
||||
if (tmp - pValue > 6) {
|
||||
int tmp2 = pValue + 6;
|
||||
intValue = strToLong(value.substring(pValue, tmp2));
|
||||
} else {
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
}
|
||||
this.microsecond = (long) (intValue * Math.pow(10, 6 - Math.min(6, tmp - vp)));
|
||||
timePartUsed = true;
|
||||
microSecondPartUsed = true;
|
||||
vp = tmp;
|
||||
this.microsecond = (long) (intValue * Math.pow(10, 6 - Math.min(6, tmp - pValue)));
|
||||
partUsed |= fracPart;
|
||||
pValue = tmp;
|
||||
break;
|
||||
// AM/PM
|
||||
case 'p':
|
||||
if ((vend - vp) < 2 || Character.toUpperCase(value.charAt(vp + 1)) != 'M' || !usaTime) {
|
||||
if ((endValue - pValue) < 2 || Character.toUpperCase(value.charAt(pValue + 1)) != 'M'
|
||||
|| !hourSystem12) {
|
||||
throw new InvalidFormatException("Invalid %p format");
|
||||
}
|
||||
if (Character.toUpperCase(value.charAt(vp)) == 'P') {
|
||||
if (Character.toUpperCase(value.charAt(pValue)) == 'P') {
|
||||
// PM
|
||||
dayPart = 12;
|
||||
halfDay = 12;
|
||||
}
|
||||
timePartUsed = true;
|
||||
vp += 2;
|
||||
pValue += 2;
|
||||
break;
|
||||
// Weekday
|
||||
case 'W': {
|
||||
int nextPos = findWord(value, vp);
|
||||
intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(vp, nextPos));
|
||||
int nextPos = findWord(value, pValue);
|
||||
intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(pValue, nextPos));
|
||||
intValue++;
|
||||
weekday = intValue;
|
||||
datePartUsed = true;
|
||||
partUsed |= weekdayPart;
|
||||
break;
|
||||
}
|
||||
case 'a': {
|
||||
int nextPos = findWord(value, vp);
|
||||
intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(vp, nextPos));
|
||||
int nextPos = findWord(value, pValue);
|
||||
intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(pValue, nextPos));
|
||||
intValue++;
|
||||
weekday = intValue;
|
||||
datePartUsed = true;
|
||||
partUsed |= weekdayPart;
|
||||
break;
|
||||
}
|
||||
case 'w':
|
||||
tmp = vp + Math.min(1, vend - vp);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
tmp = pValue + Math.min(1, endValue - pValue);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
if (intValue >= 7) {
|
||||
throw new InvalidFormatException("invalid day of week: " + intValue);
|
||||
}
|
||||
@ -1391,97 +1404,97 @@ public class DateLiteral extends LiteralExpr {
|
||||
intValue = 7;
|
||||
}
|
||||
weekday = intValue;
|
||||
vp = tmp;
|
||||
datePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= weekdayPart;
|
||||
break;
|
||||
case 'j':
|
||||
tmp = vp + Math.min(3, vend - vp);
|
||||
intValue = strToLong(value.substring(vp, tmp));
|
||||
tmp = pValue + Math.min(3, endValue - pValue);
|
||||
intValue = strToLong(value.substring(pValue, tmp));
|
||||
yearday = intValue;
|
||||
vp = tmp;
|
||||
datePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= yeardayPart;
|
||||
break;
|
||||
case 'u':
|
||||
case 'v':
|
||||
case 'U':
|
||||
case 'V':
|
||||
sundayFirst = (format.charAt(fp - 1) == 'U' || format.charAt(fp - 1) == 'V');
|
||||
sundayFirst = (format.charAt(pFormat - 1) == 'U' || format.charAt(pFormat - 1) == 'V');
|
||||
// Used to check if there is %x or %X
|
||||
strictWeekNumber = (format.charAt(fp - 1) == 'V' || format.charAt(fp - 1) == 'v');
|
||||
tmp = vp + Math.min(2, vend - vp);
|
||||
intValue = Long.valueOf(value.substring(vp, tmp));
|
||||
strictWeekNumber = (format.charAt(pFormat - 1) == 'V' || format.charAt(pFormat - 1) == 'v');
|
||||
tmp = pValue + Math.min(2, endValue - pValue);
|
||||
intValue = Long.valueOf(value.substring(pValue, tmp));
|
||||
weekNum = intValue;
|
||||
if (weekNum > 53 || (strictWeekNumber && weekNum == 0)) {
|
||||
throw new InvalidFormatException("invalid num of week: " + weekNum);
|
||||
}
|
||||
vp = tmp;
|
||||
datePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= weekNumPart;
|
||||
break;
|
||||
// strict week number, must be used with %V or %v
|
||||
case 'x':
|
||||
case 'X':
|
||||
strictWeekNumberYearType = (format.charAt(fp - 1) == 'X');
|
||||
tmp = vp + Math.min(4, vend - vp);
|
||||
intValue = Long.valueOf(value.substring(vp, tmp));
|
||||
strictWeekNumberYearType = (format.charAt(pFormat - 1) == 'X');
|
||||
tmp = pValue + Math.min(4, endValue - pValue);
|
||||
intValue = Long.valueOf(value.substring(pValue, tmp));
|
||||
strictWeekNumberYear = intValue;
|
||||
vp = tmp;
|
||||
datePartUsed = true;
|
||||
pValue = tmp;
|
||||
partUsed |= weekNumPart;
|
||||
break;
|
||||
case 'r':
|
||||
tmp = fromDateFormatStr("%I:%i:%S %p", value.substring(vp, vend), true);
|
||||
vp = tmp;
|
||||
timePartUsed = true;
|
||||
tmp = fromDateFormatStr("%I:%i:%S %p", value.substring(pValue, endValue), true);
|
||||
pValue = tmp;
|
||||
partUsed |= timePart;
|
||||
break;
|
||||
case 'T':
|
||||
tmp = fromDateFormatStr("%H:%i:%S", value.substring(vp, vend), true);
|
||||
vp = tmp;
|
||||
timePartUsed = true;
|
||||
tmp = fromDateFormatStr("%H:%i:%S", value.substring(pValue, endValue), true);
|
||||
pValue = tmp;
|
||||
partUsed |= timePart;
|
||||
break;
|
||||
case '.':
|
||||
while (vp < vend && Character.toString(value.charAt(vp)).matches("\\p{Punct}")) {
|
||||
vp++;
|
||||
while (pValue < endValue && Character.toString(value.charAt(pValue)).matches("\\p{Punct}")) {
|
||||
pValue++;
|
||||
}
|
||||
break;
|
||||
case '@':
|
||||
while (vp < vend && Character.isLetter(value.charAt(vp))) {
|
||||
vp++;
|
||||
while (pValue < endValue && Character.isLetter(value.charAt(pValue))) {
|
||||
pValue++;
|
||||
}
|
||||
break;
|
||||
case '#':
|
||||
while (vp < vend && Character.isDigit(value.charAt(vp))) {
|
||||
vp++;
|
||||
while (pValue < endValue && Character.isDigit(value.charAt(pValue))) {
|
||||
pValue++;
|
||||
}
|
||||
break;
|
||||
case '%': // %%, escape the %
|
||||
if ('%' != value.charAt(vp)) {
|
||||
throw new InvalidFormatException("invalid char after %: " + value.charAt(vp));
|
||||
if ('%' != value.charAt(pValue)) {
|
||||
throw new InvalidFormatException("invalid char after %: " + value.charAt(pValue));
|
||||
}
|
||||
vp++;
|
||||
pValue++;
|
||||
break;
|
||||
default:
|
||||
throw new InvalidFormatException("Invalid format pattern: " + f);
|
||||
throw new InvalidFormatException("Invalid format pattern: " + now);
|
||||
}
|
||||
} else if (format.charAt(fp) != ' ') {
|
||||
if (format.charAt(fp) != value.charAt(vp)) {
|
||||
throw new InvalidFormatException("Invalid char: " + value.charAt(vp) + ", expected: "
|
||||
+ format.charAt(fp));
|
||||
} else if (format.charAt(pFormat) != ' ') {
|
||||
if (format.charAt(pFormat) != value.charAt(pValue)) {
|
||||
throw new InvalidFormatException("Invalid char: " + value.charAt(pValue) + ", expected: "
|
||||
+ format.charAt(pFormat));
|
||||
}
|
||||
fp++;
|
||||
vp++;
|
||||
pFormat++;
|
||||
pValue++;
|
||||
} else {
|
||||
fp++;
|
||||
pFormat++;
|
||||
}
|
||||
}
|
||||
|
||||
// continue to iterate pattern if has
|
||||
// to find out if it has time part.
|
||||
while (fp < fend) {
|
||||
f = format.charAt(fp);
|
||||
if (f == '%' && fp + 1 < fend) {
|
||||
fp++;
|
||||
f = format.charAt(fp);
|
||||
fp++;
|
||||
switch (f) {
|
||||
while (pFormat < endFormat) {
|
||||
now = format.charAt(pFormat);
|
||||
if (now == '%' && pFormat + 1 < endFormat) {
|
||||
pFormat++;
|
||||
now = format.charAt(pFormat);
|
||||
pFormat++;
|
||||
switch (now) {
|
||||
case 'H':
|
||||
case 'h':
|
||||
case 'I':
|
||||
@ -1493,25 +1506,29 @@ public class DateLiteral extends LiteralExpr {
|
||||
case 'S':
|
||||
case 'p':
|
||||
case 'T':
|
||||
timePartUsed = true;
|
||||
partUsed |= timePart;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
fp++;
|
||||
pFormat++;
|
||||
}
|
||||
}
|
||||
|
||||
if (usaTime) {
|
||||
if (partUsed == 0) {
|
||||
throw new InvalidFormatException("Nothing for legal Date: " + value);
|
||||
}
|
||||
|
||||
if (hourSystem12) {
|
||||
if (this.hour > 12 || this.hour < 1) {
|
||||
throw new InvalidFormatException("Invalid hour: " + hour);
|
||||
}
|
||||
this.hour = (this.hour % 12) + dayPart;
|
||||
this.hour = (this.hour % 12) + halfDay;
|
||||
}
|
||||
|
||||
if (hasSubVal) {
|
||||
return vp;
|
||||
return pValue;
|
||||
}
|
||||
|
||||
// Year day
|
||||
@ -1540,21 +1557,21 @@ public class DateLiteral extends LiteralExpr {
|
||||
getDateFromDaynr(days);
|
||||
}
|
||||
|
||||
if (!timePartUsed && year > 0) {
|
||||
if (format.equals("%Y")) {
|
||||
month = day = 1;
|
||||
} else if (format.equals("%Y-%m")) {
|
||||
// complete default month/day
|
||||
if ((partUsed & ~normalDatePart) == 0) { // only date here
|
||||
if ((partUsed & dayPart) == 0) {
|
||||
day = 1;
|
||||
if ((partUsed & monthPart) == 0) {
|
||||
month = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute timestamp type
|
||||
// TODO(Gabriel): we still use old version datetime/date and change this to new version when
|
||||
// we think it's stable enough
|
||||
if (datePartUsed) {
|
||||
if (microSecondPartUsed) {
|
||||
if ((partUsed & datePart) != 0) { // Ymd part only
|
||||
if ((partUsed & fracPart) != 0) {
|
||||
this.type = Type.DATETIMEV2_WITH_MAX_SCALAR;
|
||||
} else if (timePartUsed) {
|
||||
} else if ((partUsed & timePart) != 0) {
|
||||
this.type = ScalarType.getDefaultDateType(Type.DATETIME);
|
||||
} else {
|
||||
this.type = ScalarType.getDefaultDateType(Type.DATE);
|
||||
|
||||
Reference in New Issue
Block a user