[Optimize] Check invalid datetime to avoid scanning a lots of partitions (#5643)

Support parsing date format `'%Y-%m-%d %H:%i' and '%Y-%m-%d %H'
Support handling date time with nanoseconds
This commit is contained in:
xinghuayu007
2021-05-19 09:25:58 +08:00
committed by GitHub
parent add8c4bb74
commit 8d74176970
5 changed files with 202 additions and 2 deletions

View File

@ -41,6 +41,7 @@ import org.apache.doris.rewrite.ExprRewriter;
import org.apache.doris.rewrite.FoldConstantsRule;
import org.apache.doris.rewrite.RewriteFromUnixTimeRule;
import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule;
import org.apache.doris.rewrite.SimplifyInvalidDateBinaryPredicatesDateRule;
import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmap;
import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmapOrHLLRule;
import org.apache.doris.rewrite.mvrewrite.CountFieldToSum;
@ -257,6 +258,7 @@ public class Analyzer {
rules.add(NormalizeBinaryPredicatesRule.INSTANCE);
rules.add(FoldConstantsRule.INSTANCE);
rules.add(RewriteFromUnixTimeRule.INSTANCE);
rules.add(SimplifyInvalidDateBinaryPredicatesDateRule.INSTANCE);
exprRewriter_ = new ExprRewriter(rules);
// init mv rewriter
List<ExprRewriteRule> mvRewriteRules = Lists.newArrayList();

View File

@ -63,8 +63,12 @@ public class DateLiteral extends LiteralExpr {
private static final int DATEKEY_LENGTH = 8;
private static final int MAX_MICROSECOND = 999999;
private static final int DATETIME_TO_MINUTE_STRING_LENGTH = 16;
private static final int DATETIME_TO_HOUR_STRING_LENGTH = 13;
private static DateTimeFormatter DATE_TIME_FORMATTER = null;
private static DateTimeFormatter DATE_TIME_FORMATTER_TO_HOUR = null;
private static DateTimeFormatter DATE_TIME_FORMATTER_TO_MINUTE = null;
private static DateTimeFormatter DATE_FORMATTER = null;
/*
* Dates containing two-digit year values are ambiguous because the century is unknown.
@ -90,6 +94,8 @@ public class DateLiteral extends LiteralExpr {
static {
try {
DATE_TIME_FORMATTER = formatBuilder("%Y-%m-%d %H:%i:%s").toFormatter();
DATE_TIME_FORMATTER_TO_HOUR = formatBuilder("%Y-%m-%d %H").toFormatter();
DATE_TIME_FORMATTER_TO_MINUTE = formatBuilder("%Y-%m-%d %H:%i").toFormatter();
DATE_FORMATTER = formatBuilder("%Y-%m-%d").toFormatter();
DATEKEY_FORMATTER = formatBuilder("%Y%m%d").toFormatter();
DATE_TIME_FORMATTER_TWO_DIGIT = formatBuilder("%y-%m-%d %H:%i:%s").toFormatter();
@ -268,7 +274,14 @@ public class DateLiteral extends LiteralExpr {
if (s.split("-")[0].length() == 2) {
dateTime = DATE_TIME_FORMATTER_TWO_DIGIT.parseLocalDateTime(s);
} else {
dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s);
// parse format '%Y-%m-%d %H:%i' and '%Y-%m-%d %H'
if (s.length() == DATETIME_TO_MINUTE_STRING_LENGTH) {
dateTime = DATE_TIME_FORMATTER_TO_MINUTE.parseLocalDateTime(s);
} else if (s.length() == DATETIME_TO_HOUR_STRING_LENGTH) {
dateTime = DATE_TIME_FORMATTER_TO_HOUR.parseLocalDateTime(s);
} else {
dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s);
}
}
}

View File

@ -0,0 +1,63 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.rewrite;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.BinaryPredicate;
import org.apache.doris.analysis.CastExpr;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.NullLiteral;
import org.apache.doris.common.AnalysisException;
/**
* this rule try to convert date expression, if date is invalid, it will be
* converted into null literal to avoid to scan all partitions
* if a date data is invalid or contains nanosecond, it will be convert into CastExpr
* only support rewriting pattern: slot + operator + date literal
* Examples:
* date = "2020-10-32" => NULL
*/
public class SimplifyInvalidDateBinaryPredicatesDateRule implements ExprRewriteRule {
public static ExprRewriteRule INSTANCE = new SimplifyInvalidDateBinaryPredicatesDateRule();
public static final int DATETIME_STRING_MAX_LENGTH = new String("yyyy-MM-dd HH:ii:ss").length();
@Override
public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
if (!(expr instanceof BinaryPredicate)) return expr;
Expr lchild = expr.getChild(0);
if (!lchild.getType().isDateType()) {
return expr;
}
Expr valueExpr = expr.getChild(1);
if (!valueExpr.getType().isDateType()) {
return expr;
}
if (!valueExpr.isConstant()) {
return expr;
}
if (valueExpr instanceof CastExpr) {
String dateStr = valueExpr.toSql();
// if it contains millisecond, microsecond, nanosecond, do nothing
if (dateStr.length() > DATETIME_STRING_MAX_LENGTH && dateStr.contains(".")) {
return expr;
}
return new NullLiteral();
}
return expr;
}
}

View File

@ -58,6 +58,20 @@ public class DateLiteralTest {
Assert.assertFalse(hasException);
}
@Test
public void testParseDateTimeToHourORMinute() throws Exception{
String s = "2020-12-13 12:13:14";
Type type = Type.DATETIME;
DateLiteral literal = new DateLiteral(s, type);
Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:14"));
s = "2020-12-13 12:13";
literal = new DateLiteral(s, type);
Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:00"));
s = "2020-12-13 12";
literal = new DateLiteral(s, type);
Assert.assertTrue(literal.toSql().contains("2020-12-13 12:00:00"));
}
@Test
public void uncheckedCastTo() {
boolean hasException = false;

View File

@ -1467,9 +1467,117 @@ public class QueryPlanTest {
//format less than
sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd') < '2021-03-02 10:01:28'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
System.out.println("wangxixu-explain:"+explainString);
Assert.assertTrue(explainString.contains("PREDICATES: `query_time` < 1614614400, `query_time` >= 0"));
}
@Test
public void testCheckInvalidDate() throws Exception {
FeConstants.runningUnitTest = true;
connectContext.setDatabase("default_cluster:test");
//valid date
String sql = "select day from tbl_int_date where day = '2020-10-30'";
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 00:00:00'"));
sql = "select day from tbl_int_date where day = from_unixtime(1196440219)";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2007-12-01 00:30:19'"));
sql = "select day from tbl_int_date where day = str_to_date('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s');";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2014-12-21 12:34:56'"));
//valid date
sql = "select day from tbl_int_date where day = 20201030";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 00:00:00'"));
//valid date
sql = "select day from tbl_int_date where day = '20201030'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 00:00:00'"));
//valid date contains micro second
sql = "select day from tbl_int_date where day = '2020-10-30 10:00:01.111111'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 10:00:01.111111'"));
//invalid date
sql = "select day from tbl_int_date where day = '2020-10-32'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid date
sql = "select day from tbl_int_date where day = '20201032'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid date
sql = "select day from tbl_int_date where day = 20201032";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid date
sql = "select day from tbl_int_date where day = 'hello'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid date
sql = "select day from tbl_int_date where day = 2020-10-30";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid date
sql = "select day from tbl_int_date where day = 10-30";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//valid datetime
sql = "select day from tbl_int_date where date = '2020-10-30 12:12:30'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 12:12:30'"));
//valid datetime, support parsing to minute
sql = "select day from tbl_int_date where date = '2020-10-30 12:12'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 12:12:00'"));
//valid datetime, support parsing to hour
sql = "select day from tbl_int_date where date = '2020-10-30 12'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 12:00:00'"));
//valid datetime
sql = "select day from tbl_int_date where date = 20201030";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 00:00:00'"));
//valid datetime
sql = "select day from tbl_int_date where date = '20201030'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 00:00:00'"));
//valid datetime
sql = "select day from tbl_int_date where date = '2020-10-30'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 00:00:00'"));
//valid datetime contains micro second
sql = "select day from tbl_int_date where date = '2020-10-30 10:00:01.111111'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 10:00:01.111111'"));
//invalid datetime
sql = "select day from tbl_int_date where date = '2020-10-32'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid datetime
sql = "select day from tbl_int_date where date = 'hello'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid datetime
sql = "select day from tbl_int_date where date = 2020-10-30";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid datetime
sql = "select day from tbl_int_date where date = 10-30";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid datetime
sql = "select day from tbl_int_date where date = '2020-10-12 12:23:76'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//invalid datetime with timestamp
sql = "select day from tbl_int_date where date = '1604031150'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
//valid datetime with timestamp in micro second
sql = "select day from tbl_int_date where date = '1604031150000'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
Assert.assertTrue(explainString.contains("NULL"));
}
}