From 8d7417697035ea2972769ccf8f820dcf479bfd39 Mon Sep 17 00:00:00 2001 From: xinghuayu007 <1450306854@qq.com> Date: Wed, 19 May 2021 09:25:58 +0800 Subject: [PATCH] [Optimize] Check invalid datetime to avoid scanning a lots of partitions (#5643) Support parsing date format `'%Y-%m-%d %H:%i' and '%Y-%m-%d %H' Support handling date time with nanoseconds --- .../org/apache/doris/analysis/Analyzer.java | 2 + .../apache/doris/analysis/DateLiteral.java | 15 ++- ...fyInvalidDateBinaryPredicatesDateRule.java | 63 ++++++++++ .../doris/analysis/DateLiteralTest.java | 14 +++ .../apache/doris/planner/QueryPlanTest.java | 110 +++++++++++++++++- 5 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java index dabf2f52fc..e5f872f6d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java @@ -41,6 +41,7 @@ import org.apache.doris.rewrite.ExprRewriter; import org.apache.doris.rewrite.FoldConstantsRule; import org.apache.doris.rewrite.RewriteFromUnixTimeRule; import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule; +import org.apache.doris.rewrite.SimplifyInvalidDateBinaryPredicatesDateRule; import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmap; import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmapOrHLLRule; import org.apache.doris.rewrite.mvrewrite.CountFieldToSum; @@ -257,6 +258,7 @@ public class Analyzer { rules.add(NormalizeBinaryPredicatesRule.INSTANCE); rules.add(FoldConstantsRule.INSTANCE); rules.add(RewriteFromUnixTimeRule.INSTANCE); + rules.add(SimplifyInvalidDateBinaryPredicatesDateRule.INSTANCE); exprRewriter_ = new ExprRewriter(rules); // init mv rewriter List mvRewriteRules = Lists.newArrayList(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java index 4025c61027..eceef373d3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java @@ -63,8 +63,12 @@ public class DateLiteral extends LiteralExpr { private static final int DATEKEY_LENGTH = 8; private static final int MAX_MICROSECOND = 999999; + private static final int DATETIME_TO_MINUTE_STRING_LENGTH = 16; + private static final int DATETIME_TO_HOUR_STRING_LENGTH = 13; private static DateTimeFormatter DATE_TIME_FORMATTER = null; + private static DateTimeFormatter DATE_TIME_FORMATTER_TO_HOUR = null; + private static DateTimeFormatter DATE_TIME_FORMATTER_TO_MINUTE = null; private static DateTimeFormatter DATE_FORMATTER = null; /* * Dates containing two-digit year values are ambiguous because the century is unknown. @@ -90,6 +94,8 @@ public class DateLiteral extends LiteralExpr { static { try { DATE_TIME_FORMATTER = formatBuilder("%Y-%m-%d %H:%i:%s").toFormatter(); + DATE_TIME_FORMATTER_TO_HOUR = formatBuilder("%Y-%m-%d %H").toFormatter(); + DATE_TIME_FORMATTER_TO_MINUTE = formatBuilder("%Y-%m-%d %H:%i").toFormatter(); DATE_FORMATTER = formatBuilder("%Y-%m-%d").toFormatter(); DATEKEY_FORMATTER = formatBuilder("%Y%m%d").toFormatter(); DATE_TIME_FORMATTER_TWO_DIGIT = formatBuilder("%y-%m-%d %H:%i:%s").toFormatter(); @@ -268,7 +274,14 @@ public class DateLiteral extends LiteralExpr { if (s.split("-")[0].length() == 2) { dateTime = DATE_TIME_FORMATTER_TWO_DIGIT.parseLocalDateTime(s); } else { - dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s); + // parse format '%Y-%m-%d %H:%i' and '%Y-%m-%d %H' + if (s.length() == DATETIME_TO_MINUTE_STRING_LENGTH) { + dateTime = DATE_TIME_FORMATTER_TO_MINUTE.parseLocalDateTime(s); + } else if (s.length() == DATETIME_TO_HOUR_STRING_LENGTH) { + dateTime = DATE_TIME_FORMATTER_TO_HOUR.parseLocalDateTime(s); + } else { + dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java new file mode 100644 index 0000000000..c049f96637 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.rewrite; + +import org.apache.doris.analysis.Analyzer; +import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.CastExpr; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.NullLiteral; +import org.apache.doris.common.AnalysisException; + +/** + * this rule try to convert date expression, if date is invalid, it will be + * converted into null literal to avoid to scan all partitions + * if a date data is invalid or contains nanosecond, it will be convert into CastExpr + * only support rewriting pattern: slot + operator + date literal + * Examples: + * date = "2020-10-32" => NULL + */ +public class SimplifyInvalidDateBinaryPredicatesDateRule implements ExprRewriteRule { + public static ExprRewriteRule INSTANCE = new SimplifyInvalidDateBinaryPredicatesDateRule(); + public static final int DATETIME_STRING_MAX_LENGTH = new String("yyyy-MM-dd HH:ii:ss").length(); + + @Override + public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException { + if (!(expr instanceof BinaryPredicate)) return expr; + Expr lchild = expr.getChild(0); + if (!lchild.getType().isDateType()) { + return expr; + } + Expr valueExpr = expr.getChild(1); + if (!valueExpr.getType().isDateType()) { + return expr; + } + if (!valueExpr.isConstant()) { + return expr; + } + if (valueExpr instanceof CastExpr) { + String dateStr = valueExpr.toSql(); + // if it contains millisecond, microsecond, nanosecond, do nothing + if (dateStr.length() > DATETIME_STRING_MAX_LENGTH && dateStr.contains(".")) { + return expr; + } + return new NullLiteral(); + } + return expr; + } +} \ No newline at end of file diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java index 9b94f9d912..b55ebc8b69 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java @@ -58,6 +58,20 @@ public class DateLiteralTest { Assert.assertFalse(hasException); } + @Test + public void testParseDateTimeToHourORMinute() throws Exception{ + String s = "2020-12-13 12:13:14"; + Type type = Type.DATETIME; + DateLiteral literal = new DateLiteral(s, type); + Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:14")); + s = "2020-12-13 12:13"; + literal = new DateLiteral(s, type); + Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:00")); + s = "2020-12-13 12"; + literal = new DateLiteral(s, type); + Assert.assertTrue(literal.toSql().contains("2020-12-13 12:00:00")); + } + @Test public void uncheckedCastTo() { boolean hasException = false; diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java index 3ce7198e4e..f3343777d1 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java @@ -1467,9 +1467,117 @@ public class QueryPlanTest { //format less than sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd') < '2021-03-02 10:01:28'"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); - System.out.println("wangxixu-explain:"+explainString); Assert.assertTrue(explainString.contains("PREDICATES: `query_time` < 1614614400, `query_time` >= 0")); } + @Test + public void testCheckInvalidDate() throws Exception { + FeConstants.runningUnitTest = true; + connectContext.setDatabase("default_cluster:test"); + //valid date + String sql = "select day from tbl_int_date where day = '2020-10-30'"; + String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 00:00:00'")); + sql = "select day from tbl_int_date where day = from_unixtime(1196440219)"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2007-12-01 00:30:19'")); + sql = "select day from tbl_int_date where day = str_to_date('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s');"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2014-12-21 12:34:56'")); + //valid date + sql = "select day from tbl_int_date where day = 20201030"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 00:00:00'")); + //valid date + sql = "select day from tbl_int_date where day = '20201030'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 00:00:00'")); + //valid date contains micro second + sql = "select day from tbl_int_date where day = '2020-10-30 10:00:01.111111'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `day` = '2020-10-30 10:00:01.111111'")); + //invalid date + sql = "select day from tbl_int_date where day = '2020-10-32'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid date + sql = "select day from tbl_int_date where day = '20201032'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid date + sql = "select day from tbl_int_date where day = 20201032"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid date + sql = "select day from tbl_int_date where day = 'hello'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid date + sql = "select day from tbl_int_date where day = 2020-10-30"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid date + sql = "select day from tbl_int_date where day = 10-30"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + + + //valid datetime + sql = "select day from tbl_int_date where date = '2020-10-30 12:12:30'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 12:12:30'")); + //valid datetime, support parsing to minute + sql = "select day from tbl_int_date where date = '2020-10-30 12:12'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 12:12:00'")); + //valid datetime, support parsing to hour + sql = "select day from tbl_int_date where date = '2020-10-30 12'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 12:00:00'")); + //valid datetime + sql = "select day from tbl_int_date where date = 20201030"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 00:00:00'")); + //valid datetime + sql = "select day from tbl_int_date where date = '20201030'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 00:00:00'")); + //valid datetime + sql = "select day from tbl_int_date where date = '2020-10-30'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 00:00:00'")); + //valid datetime contains micro second + sql = "select day from tbl_int_date where date = '2020-10-30 10:00:01.111111'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `date` = '2020-10-30 10:00:01.111111'")); + //invalid datetime + sql = "select day from tbl_int_date where date = '2020-10-32'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid datetime + sql = "select day from tbl_int_date where date = 'hello'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid datetime + sql = "select day from tbl_int_date where date = 2020-10-30"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid datetime + sql = "select day from tbl_int_date where date = 10-30"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid datetime + sql = "select day from tbl_int_date where date = '2020-10-12 12:23:76'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //invalid datetime with timestamp + sql = "select day from tbl_int_date where date = '1604031150'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + //valid datetime with timestamp in micro second + sql = "select day from tbl_int_date where date = '1604031150000'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("NULL")); + } }