diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 8f60fbddfd..e2d386dc91 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -102,6 +102,7 @@ import org.apache.doris.nereids.rules.rewrite.PushConjunctsIntoOdbcScan; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOneSide; import org.apache.doris.nereids.rules.rewrite.PushDownDistinctThroughJoin; +import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughAggregation; import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughProject; import org.apache.doris.nereids.rules.rewrite.PushDownLimit; import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughJoin; @@ -167,7 +168,51 @@ public class Rewriter extends AbstractBatchJobExecutor { // after doing NormalizeAggregate in analysis job // we need run the following 2 rules to make AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION work bottomUp(new PullUpProjectUnderApply()), - topDown(new PushDownFilterThroughProject()), + topDown( + /* + * for subquery unnest, we need hand sql like + * + * SELECT * + * FROM table1 AS t1 + * WHERE EXISTS + * (SELECT `pk` + * FROM table2 AS t2 + * WHERE t1.pk = t2 .pk + * GROUP BY t2.pk + * HAVING t2.pk > 0) ; + * + * before: + * apply + * / \ + * child Filter(t2.pk > 0) + * | + * Project(t2.pk) + * | + * agg + * | + * Project(t2.pk) + * | + * Filter(t1.pk=t2.pk) + * | + * child + * + * after: + * apply + * / \ + * child agg + * | + * Project(t2.pk) + * | + * Filter(t1.pk=t2.pk and t2.pk >0) + * | + * child + * + * then PullUpCorrelatedFilterUnderApplyAggregateProject rule can match the node pattern + */ + new PushDownFilterThroughAggregation(), + new PushDownFilterThroughProject(), + new MergeFilters() + ), custom(RuleType.AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION, AggScalarSubQueryToWindowFunction::new), bottomUp( diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java index f514a1d14e..808dcd7b18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java @@ -57,8 +57,10 @@ public class EliminateSemiJoin extends OneRewriteRuleFactory { } else { return null; } - if (joinType == JoinType.LEFT_SEMI_JOIN && condition - || (joinType == JoinType.LEFT_ANTI_JOIN && !condition)) { + if (joinType == JoinType.LEFT_SEMI_JOIN && condition) { + // the right table may be empty, we need keep plan unchanged + return null; + } else if (joinType == JoinType.LEFT_ANTI_JOIN && !condition) { return join.left(); } else if (joinType == JoinType.LEFT_SEMI_JOIN && !condition || (joinType == JoinType.LEFT_ANTI_JOIN && condition)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java index c0fe79fe01..77c90820a2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java @@ -17,18 +17,24 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.common.Pair; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.WindowExpression; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.PlanUtils; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; import java.util.List; +import java.util.Set; /** * Push down filter through project. @@ -37,6 +43,7 @@ import java.util.List; * output: * project(c+d as a, e as b) -> filter(c+d>2, e=0). */ + public class PushDownFilterThroughProject implements RewriteRuleFactory { @Override public List buildRules() { @@ -53,27 +60,65 @@ public class PushDownFilterThroughProject implements RewriteRuleFactory { .whenNot(filter -> filter.child().child().getProjects().stream() .anyMatch(expr -> expr.anyMatch(WindowExpression.class::isInstance))) .whenNot(filter -> filter.child().child().hasPushedDownToProjectionFunctions()) - .then(filter -> { - LogicalLimit> limit = filter.child(); - LogicalProject project = limit.child(); - - return project.withProjectsAndChild(project.getProjects(), - new LogicalFilter<>( - ExpressionUtils.replace(filter.getConjuncts(), - project.getAliasToProducer()), - limit.withChildren(project.child()))); - }).toRule(RuleType.PUSH_DOWN_FILTER_THROUGH_PROJECT_UNDER_LIMIT) + .then(PushDownFilterThroughProject::pushdownFilterThroughLimitProject) + .toRule(RuleType.PUSH_DOWN_FILTER_THROUGH_PROJECT_UNDER_LIMIT) ); } /** pushdown Filter through project */ - public static Plan pushdownFilterThroughProject(LogicalFilter> filter) { + private static Plan pushdownFilterThroughProject(LogicalFilter> filter) { LogicalProject project = filter.child(); - return project.withChildren( + Set childOutputs = project.getOutputSet(); + // we need run this rule before subquey unnesting + // therefore the conjuncts may contain slots from outer query + // we should only push down conjuncts without any outer query's slot + // so we split the conjuncts into two parts: + // splitConjuncts.first -> conjuncts having outer query slots which should NOT be pushed down + // splitConjuncts.second -> conjuncts without any outer query slots which should be pushed down + Pair, Set> splitConjuncts = + splitConjunctsByChildOutput(filter.getConjuncts(), childOutputs); + if (splitConjuncts.second.isEmpty()) { + // all conjuncts contain outer query's slots, no conjunct can be pushed down + // just return unchanged plan + return null; + } + project = (LogicalProject) project.withChildren(new LogicalFilter<>( + ExpressionUtils.replace(splitConjuncts.second, project.getAliasToProducer()), + project.child())); + return PlanUtils.filterOrSelf(splitConjuncts.first, project); + } + + private static Plan pushdownFilterThroughLimitProject( + LogicalFilter>> filter) { + LogicalLimit> limit = filter.child(); + LogicalProject project = limit.child(); + Set childOutputs = project.getOutputSet(); + // split the conjuncts by child's output + Pair, Set> splitConjuncts = + splitConjunctsByChildOutput(filter.getConjuncts(), childOutputs); + if (splitConjuncts.second.isEmpty()) { + return null; + } + project = project.withProjectsAndChild(project.getProjects(), new LogicalFilter<>( - ExpressionUtils.replace(filter.getConjuncts(), project.getAliasToProducer()), - project.child() - ) - ); + ExpressionUtils.replace(splitConjuncts.second, + project.getAliasToProducer()), + limit.withChildren(project.child()))); + return PlanUtils.filterOrSelf(splitConjuncts.first, project); + } + + private static Pair, Set> splitConjunctsByChildOutput( + Set conjuncts, Set childOutputs) { + Set pushDownPredicates = Sets.newLinkedHashSet(); + Set remainPredicates = Sets.newLinkedHashSet(); + conjuncts.forEach(conjunct -> { + Set conjunctSlots = conjunct.getInputSlots(); + if (childOutputs.containsAll(conjunctSlots)) { + pushDownPredicates.add(conjunct); + } else { + remainPredicates.add(conjunct); + } + }); + return Pair.of(remainPredicates, pushDownPredicates); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java index 69c1f41d90..61218ae6dc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java @@ -50,7 +50,7 @@ class EliminateSemiJoinTest extends TestWithFeService implements MemoPatternMatc .rewrite() .matches( logicalResultSink( - logicalOlapScan() + logicalProject(logicalJoin()) ) ); } diff --git a/regression-test/data/nereids_p0/subquery/subquery_unnesting.out b/regression-test/data/nereids_p0/subquery/subquery_unnesting.out index df30f66818..e262f19b21 100644 --- a/regression-test/data/nereids_p0/subquery/subquery_unnesting.out +++ b/regression-test/data/nereids_p0/subquery/subquery_unnesting.out @@ -1504,3 +1504,7 @@ 22 3 24 4 +-- !select61 -- + +-- !select62 -- + diff --git a/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out b/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out index 4c765dcf37..571a81995e 100644 --- a/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out +++ b/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out @@ -25,7 +25,9 @@ PhysicalResultSink -- !left_semi_join -- PhysicalResultSink ---PhysicalOlapScan[t] +--NestedLoopJoin[LEFT_SEMI_JOIN] +----PhysicalOlapScan[t] +----PhysicalOlapScan[t] -- !left_anti_join -- PhysicalResultSink diff --git a/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy b/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy index 2bae4cc7bb..174a72df62 100644 --- a/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy +++ b/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy @@ -130,4 +130,6 @@ suite ("subquery_unnesting") { qt_select58 """select t1.* from t1 left join t2 on t1.k2 = t2.k3 and not exists ( select t3.k1 from t3 ) or t1.k1 < 10 order by t1.k1, t1.k2;""" qt_select59 """select t1.* from t1 left join t2 on t1.k2 = t2.k3 and not exists ( select t3.k1 from t3 ) order by t1.k1, t1.k2;""" qt_select60 """select * from t1 where exists(select distinct k1 from t2 where t1.k1 > t2.k3 or t1.k2 < t2.v1) order by t1.k1, t1.k2;""" + qt_select61 """SELECT * FROM t1 AS t1 WHERE EXISTS (SELECT k1 FROM t1 AS t2 WHERE t1.k1 <> t2.k1 + 7 GROUP BY k1 HAVING k1 >= 100);""" + qt_select62 """select * from t1 left semi join ( select * from t1 where t1.k1 < -1 ) l on true;""" }