[fix](analysis)fix use regex determine whether time part exists may cause backtracking (#24882)

This commit is contained in:
DongLiang-0
2023-09-27 09:26:42 +08:00
committed by GitHub
parent 24ee3607e1
commit f7f359d36c

View File

@ -34,6 +34,7 @@ import org.apache.doris.thrift.TExprNodeType;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -58,8 +59,10 @@ import java.time.temporal.TemporalAccessor;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class DateLiteral extends LiteralExpr {
private static final Logger LOG = LogManager.getLogger(DateLiteral.class);
@ -93,6 +96,7 @@ public class DateLiteral extends LiteralExpr {
private static Map<String, Integer> MONTH_NAME_DICT = Maps.newHashMap();
private static Map<String, Integer> MONTH_ABBR_NAME_DICT = Maps.newHashMap();
private static Map<String, Integer> WEEK_DAY_NAME_DICT = Maps.newHashMap();
private static Set<Character> TIME_PART_SET = Sets.newHashSet();
private static final int[] DAYS_IN_MONTH = new int[] {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
private static final int ALLOW_SPACE_MASK = 4 | 64;
private static final int MAX_DATE_PARTS = 8;
@ -127,6 +131,7 @@ public class DateLiteral extends LiteralExpr {
.appendFraction(ChronoField.MICRO_OF_SECOND, 0, 6, false)
.toFormatter().withResolverStyle(ResolverStyle.STRICT),
DATETIMEKEY_FORMATTER, DATEKEY_FORMATTER);
TIME_PART_SET = "HhIiklrSsTp".chars().mapToObj(c -> (char) c).collect(Collectors.toSet());
} catch (AnalysisException e) {
LOG.error("invalid date format", e);
System.exit(-1);
@ -175,12 +180,10 @@ public class DateLiteral extends LiteralExpr {
MONTH_ABBR_NAME_DICT.put("sun", 6);
}
//Regex used to determine if the TIME field exists int date_format
private static final Pattern HAS_TIME_PART = Pattern.compile("^.*[HhIiklrSsTp]+.*$");
private static final Pattern HAS_OFFSET_PART = Pattern.compile("[\\+\\-]\\d{2}:\\d{2}");
//Date Literal persist type in meta
private enum DateLiteralType {
// Date Literal persist type in meta
private enum DateLiteralType {
DATETIME(0),
DATE(1),
@ -429,7 +432,7 @@ public class DateLiteral extends LiteralExpr {
if (s.contains(" ")) {
builder.appendLiteral(" ");
}
String[] timePart = s.contains(" ") ? s.split(" ")[1].split(":") : new String[]{};
String[] timePart = s.contains(" ") ? s.split(" ")[1].split(":") : new String[] {};
if (timePart.length > 0 && (type.equals(Type.DATE) || type.equals(Type.DATEV2))) {
throw new AnalysisException("Invalid date value: " + s);
}
@ -556,7 +559,7 @@ public class DateLiteral extends LiteralExpr {
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putInt(value);
} else if (type == PrimitiveType.DATETIMEV2) {
long value = (year << 46) | (month << 42) | (day << 37) | (hour << 32)
long value = (year << 46) | (month << 42) | (day << 37) | (hour << 32)
| (minute << 26) | (second << 20) | (microsecond % (1 << 20));
buffer = ByteBuffer.allocate(8);
buffer.order(ByteOrder.LITTLE_ENDIAN);
@ -780,7 +783,7 @@ public class DateLiteral extends LiteralExpr {
private long makePackedDatetimeV2() {
return (year << 46) | (month << 42) | (day << 37) | (hour << 32)
| (minute << 26) | (second << 20) | (microsecond % (1 << 20));
| (minute << 26) | (second << 20) | (microsecond % (1 << 20));
}
private long makePackedDateV2() {
@ -790,7 +793,7 @@ public class DateLiteral extends LiteralExpr {
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
//set flag bit in meta, 0 is DATETIME and 1 is DATE
// set flag bit in meta, 0 is DATETIME and 1 is DATE
if (this.type.equals(Type.DATETIME)) {
out.writeShort(DateLiteralType.DATETIME.value());
out.writeLong(makePackedDatetime());
@ -896,11 +899,11 @@ public class DateLiteral extends LiteralExpr {
}
public static boolean hasTimePart(String format) {
return HAS_TIME_PART.matcher(format).matches();
return format.chars().anyMatch(c -> TIME_PART_SET.contains((char) c));
}
//Return the date stored in the dateliteral as pattern format.
//eg : "%Y-%m-%d" or "%Y-%m-%d %H:%i:%s"
// Return the date stored in the dateliteral as pattern format.
// eg : "%Y-%m-%d" or "%Y-%m-%d %H:%i:%s"
public String dateFormat(String pattern) throws AnalysisException {
TemporalAccessor accessor;
if (type.equals(Type.DATE) || type.equals(Type.DATEV2)) {