From 98106ad60f17d8c6838864f516d3dda1653edf65 Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Wed, 21 Feb 2024 15:12:59 +0800 Subject: [PATCH] [fix](nereids) fix bug of unnest subuqery with having clause (#31152) 1. run PushDownFilterThroughAggregation, PushDownFilterThroughProject and MergeFilters before subquery unnesting 2. should keep plan unchanged even left semi join's condition is true (because the right table may be empty()) 3. PushDownFilterThroughProject need check if filter's input slots are all coming from project's output --- .../doris/nereids/jobs/executor/Rewriter.java | 47 ++++++++++- .../rules/rewrite/EliminateSemiJoin.java | 6 +- .../rewrite/PushDownFilterThroughProject.java | 77 +++++++++++++++---- .../rules/rewrite/EliminateSemiJoinTest.java | 2 +- .../subquery/subquery_unnesting.out | 4 + .../eliminate_join_condition.out | 4 +- .../subquery/subquery_unnesting.groovy | 2 + 7 files changed, 121 insertions(+), 21 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 8f60fbddfd..e2d386dc91 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -102,6 +102,7 @@ import org.apache.doris.nereids.rules.rewrite.PushConjunctsIntoOdbcScan; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOneSide; import org.apache.doris.nereids.rules.rewrite.PushDownDistinctThroughJoin; +import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughAggregation; import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughProject; import org.apache.doris.nereids.rules.rewrite.PushDownLimit; import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughJoin; @@ -167,7 +168,51 @@ public class Rewriter extends AbstractBatchJobExecutor { // after doing NormalizeAggregate in analysis job // we need run the following 2 rules to make AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION work bottomUp(new PullUpProjectUnderApply()), - topDown(new PushDownFilterThroughProject()), + topDown( + /* + * for subquery unnest, we need hand sql like + * + * SELECT * + * FROM table1 AS t1 + * WHERE EXISTS + * (SELECT `pk` + * FROM table2 AS t2 + * WHERE t1.pk = t2 .pk + * GROUP BY t2.pk + * HAVING t2.pk > 0) ; + * + * before: + * apply + * / \ + * child Filter(t2.pk > 0) + * | + * Project(t2.pk) + * | + * agg + * | + * Project(t2.pk) + * | + * Filter(t1.pk=t2.pk) + * | + * child + * + * after: + * apply + * / \ + * child agg + * | + * Project(t2.pk) + * | + * Filter(t1.pk=t2.pk and t2.pk >0) + * | + * child + * + * then PullUpCorrelatedFilterUnderApplyAggregateProject rule can match the node pattern + */ + new PushDownFilterThroughAggregation(), + new PushDownFilterThroughProject(), + new MergeFilters() + ), custom(RuleType.AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION, AggScalarSubQueryToWindowFunction::new), bottomUp( diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java index f514a1d14e..808dcd7b18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java @@ -57,8 +57,10 @@ public class EliminateSemiJoin extends OneRewriteRuleFactory { } else { return null; } - if (joinType == JoinType.LEFT_SEMI_JOIN && condition - || (joinType == JoinType.LEFT_ANTI_JOIN && !condition)) { + if (joinType == JoinType.LEFT_SEMI_JOIN && condition) { + // the right table may be empty, we need keep plan unchanged + return null; + } else if (joinType == JoinType.LEFT_ANTI_JOIN && !condition) { return join.left(); } else if (joinType == JoinType.LEFT_SEMI_JOIN && !condition || (joinType == JoinType.LEFT_ANTI_JOIN && condition)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java index c0fe79fe01..77c90820a2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java @@ -17,18 +17,24 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.common.Pair; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.WindowExpression; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.PlanUtils; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; import java.util.List; +import java.util.Set; /** * Push down filter through project. @@ -37,6 +43,7 @@ import java.util.List; * output: * project(c+d as a, e as b) -> filter(c+d>2, e=0). */ + public class PushDownFilterThroughProject implements RewriteRuleFactory { @Override public List buildRules() { @@ -53,27 +60,65 @@ public class PushDownFilterThroughProject implements RewriteRuleFactory { .whenNot(filter -> filter.child().child().getProjects().stream() .anyMatch(expr -> expr.anyMatch(WindowExpression.class::isInstance))) .whenNot(filter -> filter.child().child().hasPushedDownToProjectionFunctions()) - .then(filter -> { - LogicalLimit> limit = filter.child(); - LogicalProject project = limit.child(); - - return project.withProjectsAndChild(project.getProjects(), - new LogicalFilter<>( - ExpressionUtils.replace(filter.getConjuncts(), - project.getAliasToProducer()), - limit.withChildren(project.child()))); - }).toRule(RuleType.PUSH_DOWN_FILTER_THROUGH_PROJECT_UNDER_LIMIT) + .then(PushDownFilterThroughProject::pushdownFilterThroughLimitProject) + .toRule(RuleType.PUSH_DOWN_FILTER_THROUGH_PROJECT_UNDER_LIMIT) ); } /** pushdown Filter through project */ - public static Plan pushdownFilterThroughProject(LogicalFilter> filter) { + private static Plan pushdownFilterThroughProject(LogicalFilter> filter) { LogicalProject project = filter.child(); - return project.withChildren( + Set childOutputs = project.getOutputSet(); + // we need run this rule before subquey unnesting + // therefore the conjuncts may contain slots from outer query + // we should only push down conjuncts without any outer query's slot + // so we split the conjuncts into two parts: + // splitConjuncts.first -> conjuncts having outer query slots which should NOT be pushed down + // splitConjuncts.second -> conjuncts without any outer query slots which should be pushed down + Pair, Set> splitConjuncts = + splitConjunctsByChildOutput(filter.getConjuncts(), childOutputs); + if (splitConjuncts.second.isEmpty()) { + // all conjuncts contain outer query's slots, no conjunct can be pushed down + // just return unchanged plan + return null; + } + project = (LogicalProject) project.withChildren(new LogicalFilter<>( + ExpressionUtils.replace(splitConjuncts.second, project.getAliasToProducer()), + project.child())); + return PlanUtils.filterOrSelf(splitConjuncts.first, project); + } + + private static Plan pushdownFilterThroughLimitProject( + LogicalFilter>> filter) { + LogicalLimit> limit = filter.child(); + LogicalProject project = limit.child(); + Set childOutputs = project.getOutputSet(); + // split the conjuncts by child's output + Pair, Set> splitConjuncts = + splitConjunctsByChildOutput(filter.getConjuncts(), childOutputs); + if (splitConjuncts.second.isEmpty()) { + return null; + } + project = project.withProjectsAndChild(project.getProjects(), new LogicalFilter<>( - ExpressionUtils.replace(filter.getConjuncts(), project.getAliasToProducer()), - project.child() - ) - ); + ExpressionUtils.replace(splitConjuncts.second, + project.getAliasToProducer()), + limit.withChildren(project.child()))); + return PlanUtils.filterOrSelf(splitConjuncts.first, project); + } + + private static Pair, Set> splitConjunctsByChildOutput( + Set conjuncts, Set childOutputs) { + Set pushDownPredicates = Sets.newLinkedHashSet(); + Set remainPredicates = Sets.newLinkedHashSet(); + conjuncts.forEach(conjunct -> { + Set conjunctSlots = conjunct.getInputSlots(); + if (childOutputs.containsAll(conjunctSlots)) { + pushDownPredicates.add(conjunct); + } else { + remainPredicates.add(conjunct); + } + }); + return Pair.of(remainPredicates, pushDownPredicates); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java index 69c1f41d90..61218ae6dc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java @@ -50,7 +50,7 @@ class EliminateSemiJoinTest extends TestWithFeService implements MemoPatternMatc .rewrite() .matches( logicalResultSink( - logicalOlapScan() + logicalProject(logicalJoin()) ) ); } diff --git a/regression-test/data/nereids_p0/subquery/subquery_unnesting.out b/regression-test/data/nereids_p0/subquery/subquery_unnesting.out index df30f66818..e262f19b21 100644 --- a/regression-test/data/nereids_p0/subquery/subquery_unnesting.out +++ b/regression-test/data/nereids_p0/subquery/subquery_unnesting.out @@ -1504,3 +1504,7 @@ 22 3 24 4 +-- !select61 -- + +-- !select62 -- + diff --git a/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out b/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out index 4c765dcf37..571a81995e 100644 --- a/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out +++ b/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out @@ -25,7 +25,9 @@ PhysicalResultSink -- !left_semi_join -- PhysicalResultSink ---PhysicalOlapScan[t] +--NestedLoopJoin[LEFT_SEMI_JOIN] +----PhysicalOlapScan[t] +----PhysicalOlapScan[t] -- !left_anti_join -- PhysicalResultSink diff --git a/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy b/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy index 2bae4cc7bb..174a72df62 100644 --- a/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy +++ b/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy @@ -130,4 +130,6 @@ suite ("subquery_unnesting") { qt_select58 """select t1.* from t1 left join t2 on t1.k2 = t2.k3 and not exists ( select t3.k1 from t3 ) or t1.k1 < 10 order by t1.k1, t1.k2;""" qt_select59 """select t1.* from t1 left join t2 on t1.k2 = t2.k3 and not exists ( select t3.k1 from t3 ) order by t1.k1, t1.k2;""" qt_select60 """select * from t1 where exists(select distinct k1 from t2 where t1.k1 > t2.k3 or t1.k2 < t2.v1) order by t1.k1, t1.k2;""" + qt_select61 """SELECT * FROM t1 AS t1 WHERE EXISTS (SELECT k1 FROM t1 AS t2 WHERE t1.k1 <> t2.k1 + 7 GROUP BY k1 HAVING k1 >= 100);""" + qt_select62 """select * from t1 left semi join ( select * from t1 where t1.k1 < -1 ) l on true;""" }