[Optimize] Check invalid datetime to avoid scanning a lots of partitions (#5643)
Support parsing date format `'%Y-%m-%d %H:%i' and '%Y-%m-%d %H' Support handling date time with nanoseconds
This commit is contained in:
@ -41,6 +41,7 @@ import org.apache.doris.rewrite.ExprRewriter;
|
||||
import org.apache.doris.rewrite.FoldConstantsRule;
|
||||
import org.apache.doris.rewrite.RewriteFromUnixTimeRule;
|
||||
import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule;
|
||||
import org.apache.doris.rewrite.SimplifyInvalidDateBinaryPredicatesDateRule;
|
||||
import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmap;
|
||||
import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmapOrHLLRule;
|
||||
import org.apache.doris.rewrite.mvrewrite.CountFieldToSum;
|
||||
@ -257,6 +258,7 @@ public class Analyzer {
|
||||
rules.add(NormalizeBinaryPredicatesRule.INSTANCE);
|
||||
rules.add(FoldConstantsRule.INSTANCE);
|
||||
rules.add(RewriteFromUnixTimeRule.INSTANCE);
|
||||
rules.add(SimplifyInvalidDateBinaryPredicatesDateRule.INSTANCE);
|
||||
exprRewriter_ = new ExprRewriter(rules);
|
||||
// init mv rewriter
|
||||
List<ExprRewriteRule> mvRewriteRules = Lists.newArrayList();
|
||||
|
||||
@ -63,8 +63,12 @@ public class DateLiteral extends LiteralExpr {
|
||||
|
||||
private static final int DATEKEY_LENGTH = 8;
|
||||
private static final int MAX_MICROSECOND = 999999;
|
||||
private static final int DATETIME_TO_MINUTE_STRING_LENGTH = 16;
|
||||
private static final int DATETIME_TO_HOUR_STRING_LENGTH = 13;
|
||||
|
||||
private static DateTimeFormatter DATE_TIME_FORMATTER = null;
|
||||
private static DateTimeFormatter DATE_TIME_FORMATTER_TO_HOUR = null;
|
||||
private static DateTimeFormatter DATE_TIME_FORMATTER_TO_MINUTE = null;
|
||||
private static DateTimeFormatter DATE_FORMATTER = null;
|
||||
/*
|
||||
* Dates containing two-digit year values are ambiguous because the century is unknown.
|
||||
@ -90,6 +94,8 @@ public class DateLiteral extends LiteralExpr {
|
||||
static {
|
||||
try {
|
||||
DATE_TIME_FORMATTER = formatBuilder("%Y-%m-%d %H:%i:%s").toFormatter();
|
||||
DATE_TIME_FORMATTER_TO_HOUR = formatBuilder("%Y-%m-%d %H").toFormatter();
|
||||
DATE_TIME_FORMATTER_TO_MINUTE = formatBuilder("%Y-%m-%d %H:%i").toFormatter();
|
||||
DATE_FORMATTER = formatBuilder("%Y-%m-%d").toFormatter();
|
||||
DATEKEY_FORMATTER = formatBuilder("%Y%m%d").toFormatter();
|
||||
DATE_TIME_FORMATTER_TWO_DIGIT = formatBuilder("%y-%m-%d %H:%i:%s").toFormatter();
|
||||
@ -268,7 +274,14 @@ public class DateLiteral extends LiteralExpr {
|
||||
if (s.split("-")[0].length() == 2) {
|
||||
dateTime = DATE_TIME_FORMATTER_TWO_DIGIT.parseLocalDateTime(s);
|
||||
} else {
|
||||
dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s);
|
||||
// parse format '%Y-%m-%d %H:%i' and '%Y-%m-%d %H'
|
||||
if (s.length() == DATETIME_TO_MINUTE_STRING_LENGTH) {
|
||||
dateTime = DATE_TIME_FORMATTER_TO_MINUTE.parseLocalDateTime(s);
|
||||
} else if (s.length() == DATETIME_TO_HOUR_STRING_LENGTH) {
|
||||
dateTime = DATE_TIME_FORMATTER_TO_HOUR.parseLocalDateTime(s);
|
||||
} else {
|
||||
dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,63 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.rewrite;
|
||||
|
||||
import org.apache.doris.analysis.Analyzer;
|
||||
import org.apache.doris.analysis.BinaryPredicate;
|
||||
import org.apache.doris.analysis.CastExpr;
|
||||
import org.apache.doris.analysis.Expr;
|
||||
import org.apache.doris.analysis.NullLiteral;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
|
||||
/**
|
||||
* this rule try to convert date expression, if date is invalid, it will be
|
||||
* converted into null literal to avoid to scan all partitions
|
||||
* if a date data is invalid or contains nanosecond, it will be convert into CastExpr
|
||||
* only support rewriting pattern: slot + operator + date literal
|
||||
* Examples:
|
||||
* date = "2020-10-32" => NULL
|
||||
*/
|
||||
public class SimplifyInvalidDateBinaryPredicatesDateRule implements ExprRewriteRule {
|
||||
public static ExprRewriteRule INSTANCE = new SimplifyInvalidDateBinaryPredicatesDateRule();
|
||||
public static final int DATETIME_STRING_MAX_LENGTH = new String("yyyy-MM-dd HH:ii:ss").length();
|
||||
|
||||
@Override
|
||||
public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
|
||||
if (!(expr instanceof BinaryPredicate)) return expr;
|
||||
Expr lchild = expr.getChild(0);
|
||||
if (!lchild.getType().isDateType()) {
|
||||
return expr;
|
||||
}
|
||||
Expr valueExpr = expr.getChild(1);
|
||||
if (!valueExpr.getType().isDateType()) {
|
||||
return expr;
|
||||
}
|
||||
if (!valueExpr.isConstant()) {
|
||||
return expr;
|
||||
}
|
||||
if (valueExpr instanceof CastExpr) {
|
||||
String dateStr = valueExpr.toSql();
|
||||
// if it contains millisecond, microsecond, nanosecond, do nothing
|
||||
if (dateStr.length() > DATETIME_STRING_MAX_LENGTH && dateStr.contains(".")) {
|
||||
return expr;
|
||||
}
|
||||
return new NullLiteral();
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
}
|
||||
@ -58,6 +58,20 @@ public class DateLiteralTest {
|
||||
Assert.assertFalse(hasException);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseDateTimeToHourORMinute() throws Exception{
|
||||
String s = "2020-12-13 12:13:14";
|
||||
Type type = Type.DATETIME;
|
||||
DateLiteral literal = new DateLiteral(s, type);
|
||||
Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:14"));
|
||||
s = "2020-12-13 12:13";
|
||||
literal = new DateLiteral(s, type);
|
||||
Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:00"));
|
||||
s = "2020-12-13 12";
|
||||
literal = new DateLiteral(s, type);
|
||||
Assert.assertTrue(literal.toSql().contains("2020-12-13 12:00:00"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void uncheckedCastTo() {
|
||||
boolean hasException = false;
|
||||
|
||||
@ -1467,9 +1467,117 @@ public class QueryPlanTest {
|
||||
//format less than
|
||||
sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd') < '2021-03-02 10:01:28'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
System.out.println("wangxixu-explain:"+explainString);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `query_time` < 1614614400, `query_time` >= 0"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCheckInvalidDate() throws Exception {
|
||||
FeConstants.runningUnitTest = true;
|
||||
connectContext.setDatabase("default_cluster:test");
|
||||
//valid date
|
||||
String sql = "select day from tbl_int_date where day = '2020-10-30'";
|
||||
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 00:00:00'"));
|
||||
sql = "select day from tbl_int_date where day = from_unixtime(1196440219)";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2007-12-01 00:30:19'"));
|
||||
sql = "select day from tbl_int_date where day = str_to_date('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s');";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2014-12-21 12:34:56'"));
|
||||
//valid date
|
||||
sql = "select day from tbl_int_date where day = 20201030";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 00:00:00'"));
|
||||
//valid date
|
||||
sql = "select day from tbl_int_date where day = '20201030'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 00:00:00'"));
|
||||
//valid date contains micro second
|
||||
sql = "select day from tbl_int_date where day = '2020-10-30 10:00:01.111111'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 10:00:01.111111'"));
|
||||
//invalid date
|
||||
sql = "select day from tbl_int_date where day = '2020-10-32'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid date
|
||||
sql = "select day from tbl_int_date where day = '20201032'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid date
|
||||
sql = "select day from tbl_int_date where day = 20201032";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid date
|
||||
sql = "select day from tbl_int_date where day = 'hello'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid date
|
||||
sql = "select day from tbl_int_date where day = 2020-10-30";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid date
|
||||
sql = "select day from tbl_int_date where day = 10-30";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
|
||||
|
||||
//valid datetime
|
||||
sql = "select day from tbl_int_date where date = '2020-10-30 12:12:30'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 12:12:30'"));
|
||||
//valid datetime, support parsing to minute
|
||||
sql = "select day from tbl_int_date where date = '2020-10-30 12:12'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 12:12:00'"));
|
||||
//valid datetime, support parsing to hour
|
||||
sql = "select day from tbl_int_date where date = '2020-10-30 12'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 12:00:00'"));
|
||||
//valid datetime
|
||||
sql = "select day from tbl_int_date where date = 20201030";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 00:00:00'"));
|
||||
//valid datetime
|
||||
sql = "select day from tbl_int_date where date = '20201030'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 00:00:00'"));
|
||||
//valid datetime
|
||||
sql = "select day from tbl_int_date where date = '2020-10-30'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 00:00:00'"));
|
||||
//valid datetime contains micro second
|
||||
sql = "select day from tbl_int_date where date = '2020-10-30 10:00:01.111111'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 10:00:01.111111'"));
|
||||
//invalid datetime
|
||||
sql = "select day from tbl_int_date where date = '2020-10-32'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid datetime
|
||||
sql = "select day from tbl_int_date where date = 'hello'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid datetime
|
||||
sql = "select day from tbl_int_date where date = 2020-10-30";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid datetime
|
||||
sql = "select day from tbl_int_date where date = 10-30";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid datetime
|
||||
sql = "select day from tbl_int_date where date = '2020-10-12 12:23:76'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//invalid datetime with timestamp
|
||||
sql = "select day from tbl_int_date where date = '1604031150'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
//valid datetime with timestamp in micro second
|
||||
sql = "select day from tbl_int_date where date = '1604031150000'";
|
||||
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
|
||||
Assert.assertTrue(explainString.contains("NULL"));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user