From 2d5ffac590df924faa5c7d1c51f108bab7d4f82f Mon Sep 17 00:00:00 2001 From: minghong Date: Thu, 11 Aug 2022 17:13:26 +0800 Subject: [PATCH] [fix](optimization) InferFiltersRule bug: a self inner join on a view, which contains where clause, will cause mis-inference. (#11566) --- .../org/apache/doris/analysis/Analyzer.java | 26 +++++++++ .../apache/doris/analysis/InlineViewRef.java | 2 +- .../org/apache/doris/analysis/SelectStmt.java | 5 ++ .../doris/planner/PredicatePushDown.java | 3 + .../test_pushdown_pred_to_view.out | 4 ++ .../test_pushdown_pred_to_view.groovy | 58 +++++++++++++++++++ 6 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 regression-test/data/correctness/test_pushdown_pred_to_view.out create mode 100644 regression-test/suites/correctness/test_pushdown_pred_to_view.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java index 9500b8df82..77ede5e754 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java @@ -152,6 +152,17 @@ public class Analyzer { // Flag indicating if this analyzer instance belongs to a subquery. private boolean isSubquery = false; + public boolean isInlineView() { + return isInlineView; + } + + public void setInlineView(boolean inlineView) { + isInlineView = inlineView; + } + + // Flag indicating if this analyzer instance belongs to an inlineview. + private boolean isInlineView = false; + // Flag indicating whether this analyzer belongs to a WITH clause view. private boolean isWithClause = false; @@ -776,6 +787,21 @@ public class Analyzer { d = resolveColumnRef(colName); } else { d = resolveColumnRef(newTblName, colName); + //in reanalyze, the inferred expr may contain upper level table alias, and the upper level alias has not + // been PROCESSED. So we resolve this column without tbl name. + // For example: a view V "select * from t where t.a>1" + // sql: select * from V as t1 join V as t2 on t1.a=t2.a and t1.a in (1,2) + // after first analyze, sql becomes: + // select * from V as t1 join V as t2 on t1.a=t2.a and t1.a in (1,2) and t2.a in (1, 2) + // in reanalyze, when we process V as t2, we indeed process sql like this: + // select * from t where t.a>1 and t2.a in (1, 2) + // in order to resolve t2.a, we have to ignore "t2" + // =================================================== + // Someone may concern that if t2 is not alias of t, this fix will cause incorrect resolve. In fact, + // this does not happen, since we push t2.a in (1.2) down to this inline view, t2 must be alias of t. + if (d == null && isInlineView) { + d = resolveColumnRef(colName); + } } /* * Now, we only support the columns in the subquery to associate the outer query columns in parent level. diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java index 478f76998b..032fe0b260 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java @@ -185,7 +185,7 @@ public class InlineViewRef extends TableRef { // Analyze the inline view query statement with its own analyzer inlineViewAnalyzer = new Analyzer(analyzer); - + inlineViewAnalyzer.setInlineView(true); queryStmt.analyze(inlineViewAnalyzer); correlatedTupleIds.addAll(queryStmt.getCorrelatedTupleIds(inlineViewAnalyzer)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index 62062243db..307e13b296 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -1346,6 +1346,11 @@ public class SelectStmt extends QueryStmt { ref.rewriteExprs(rewriter, analyzer); } // Also equal exprs in the statements of subqueries. + // TODO: (minghong) if this a view, even no whereClause, + // we should analyze whereClause to enable filter inference + // for example, a view without where clause, `V`, + // `select * from T join V on T.id = V.id and T.id=1` + // we could infer `V.id=1` List subqueryExprs = Lists.newArrayList(); if (whereClause != null) { whereClause = rewriter.rewrite(whereClause, analyzer, ExprRewriter.ClauseType.WHERE_CLAUSE); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PredicatePushDown.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PredicatePushDown.java index 76c2ace45f..4f11d1a17e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PredicatePushDown.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PredicatePushDown.java @@ -129,6 +129,9 @@ public class PredicatePushDown { } } + // TODO: (minghong) here is a bug. For example, this is a left join, we cannot infer "t2.id = 1" + // by "t1.id=1" and "t1.id=t2.id". + // we should not do inference work here. it should be done in some rule like InferFilterRule. // Rewrite the oldPredicate with new leftChild // For example: oldPredicate is t1.id = 1, leftChild is t2.id, will return t2.id = 1 private static Expr rewritePredicate(Analyzer analyzer, Expr oldPredicate, Expr leftChild) { diff --git a/regression-test/data/correctness/test_pushdown_pred_to_view.out b/regression-test/data/correctness/test_pushdown_pred_to_view.out new file mode 100644 index 0000000000..a4cca003b9 --- /dev/null +++ b/regression-test/data/correctness/test_pushdown_pred_to_view.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 1 + diff --git a/regression-test/suites/correctness/test_pushdown_pred_to_view.groovy b/regression-test/suites/correctness/test_pushdown_pred_to_view.groovy new file mode 100644 index 0000000000..41afda7351 --- /dev/null +++ b/regression-test/suites/correctness/test_pushdown_pred_to_view.groovy @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one + // or more contributor license agreements. See the NOTICE file + // distributed with this work for additional information + // regarding copyright ownership. The ASF licenses this file + // to you under the Apache License, Version 2.0 (the + // "License"); you may not use this file except in compliance + // with the License. You may obtain a copy of the License at + // + // http://www.apache.org/licenses/LICENSE-2.0 + // + // Unless required by applicable law or agreed to in writing, + // software distributed under the License is distributed on an + // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + // KIND, either express or implied. See the License for the + // specific language governing permissions and limitations + // under the License. + + +/* +How to produce the bug: +suppose we have table T, and a view V: select * from T where T.id>0 +When we execute sql: select * from V as v1 join V as v2 on v1.id=v2.id where v1.id in (1,2); +by InferFilterRule, { v1.id=v2.id , v1.id in (1,2) } => v2.id in (1,2) +and then we push v1.id in (1,2) and v2.id in (1,2) down to v1 and v2, respectively. + +In re-analyze phase, we expand v1 with infered condition, we have sql: select * from T where T.id>0 and v1.id in (1,2) +The bug is we cannot resolve v1.id in context of the expanded sql. +The same resolve error occurs when re-analyze v2. +*/ + suite("test_pushdown_pred_to_view") { + sql """ DROP TABLE IF EXISTS T """ + sql """ + CREATE TABLE `T` ( + `id` int + ) ENGINE=OLAP + AGGREGATE KEY(`id`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ); + """ + sql "drop view if exists V;" + sql """ + create view V as select * from T where id > 0; + """ + + sql """ + insert into T values(1); + """ + + qt_sql """ + select * from V as v1 join V as v2 on v1.id=v2.id and v1.id>0; + """ + } +