[fix](nereids) fix bug of unnest subuqery with having clause (#31152)

1. run PushDownFilterThroughAggregation, PushDownFilterThroughProject and MergeFilters before subquery unnesting
2. should keep plan unchanged even left semi join's condition is true (because the right table may be empty())
3. PushDownFilterThroughProject need check if filter's input slots are all coming from project's output
This commit is contained in:
starocean999
2024-02-21 15:12:59 +08:00
committed by yiguolei
parent 1abe9d4384
commit 98106ad60f
7 changed files with 121 additions and 21 deletions

View File

@ -102,6 +102,7 @@ import org.apache.doris.nereids.rules.rewrite.PushConjunctsIntoOdbcScan;
import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoin;
import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOneSide;
import org.apache.doris.nereids.rules.rewrite.PushDownDistinctThroughJoin;
import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughAggregation;
import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughProject;
import org.apache.doris.nereids.rules.rewrite.PushDownLimit;
import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughJoin;
@ -167,7 +168,51 @@ public class Rewriter extends AbstractBatchJobExecutor {
// after doing NormalizeAggregate in analysis job
// we need run the following 2 rules to make AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION work
bottomUp(new PullUpProjectUnderApply()),
topDown(new PushDownFilterThroughProject()),
topDown(
/*
* for subquery unnest, we need hand sql like
*
* SELECT *
* FROM table1 AS t1
* WHERE EXISTS
* (SELECT `pk`
* FROM table2 AS t2
* WHERE t1.pk = t2 .pk
* GROUP BY t2.pk
* HAVING t2.pk > 0) ;
*
* before:
* apply
* / \
* child Filter(t2.pk > 0)
* |
* Project(t2.pk)
* |
* agg
* |
* Project(t2.pk)
* |
* Filter(t1.pk=t2.pk)
* |
* child
*
* after:
* apply
* / \
* child agg
* |
* Project(t2.pk)
* |
* Filter(t1.pk=t2.pk and t2.pk >0)
* |
* child
*
* then PullUpCorrelatedFilterUnderApplyAggregateProject rule can match the node pattern
*/
new PushDownFilterThroughAggregation(),
new PushDownFilterThroughProject(),
new MergeFilters()
),
custom(RuleType.AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION,
AggScalarSubQueryToWindowFunction::new),
bottomUp(

View File

@ -57,8 +57,10 @@ public class EliminateSemiJoin extends OneRewriteRuleFactory {
} else {
return null;
}
if (joinType == JoinType.LEFT_SEMI_JOIN && condition
|| (joinType == JoinType.LEFT_ANTI_JOIN && !condition)) {
if (joinType == JoinType.LEFT_SEMI_JOIN && condition) {
// the right table may be empty, we need keep plan unchanged
return null;
} else if (joinType == JoinType.LEFT_ANTI_JOIN && !condition) {
return join.left();
} else if (joinType == JoinType.LEFT_SEMI_JOIN && !condition
|| (joinType == JoinType.LEFT_ANTI_JOIN && condition)) {

View File

@ -17,18 +17,24 @@
package org.apache.doris.nereids.rules.rewrite;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.WindowExpression;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.PlanUtils;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import java.util.List;
import java.util.Set;
/**
* Push down filter through project.
@ -37,6 +43,7 @@ import java.util.List;
* output:
* project(c+d as a, e as b) -> filter(c+d>2, e=0).
*/
public class PushDownFilterThroughProject implements RewriteRuleFactory {
@Override
public List<Rule> buildRules() {
@ -53,27 +60,65 @@ public class PushDownFilterThroughProject implements RewriteRuleFactory {
.whenNot(filter -> filter.child().child().getProjects().stream()
.anyMatch(expr -> expr.anyMatch(WindowExpression.class::isInstance)))
.whenNot(filter -> filter.child().child().hasPushedDownToProjectionFunctions())
.then(filter -> {
LogicalLimit<LogicalProject<Plan>> limit = filter.child();
LogicalProject<Plan> project = limit.child();
return project.withProjectsAndChild(project.getProjects(),
new LogicalFilter<>(
ExpressionUtils.replace(filter.getConjuncts(),
project.getAliasToProducer()),
limit.withChildren(project.child())));
}).toRule(RuleType.PUSH_DOWN_FILTER_THROUGH_PROJECT_UNDER_LIMIT)
.then(PushDownFilterThroughProject::pushdownFilterThroughLimitProject)
.toRule(RuleType.PUSH_DOWN_FILTER_THROUGH_PROJECT_UNDER_LIMIT)
);
}
/** pushdown Filter through project */
public static Plan pushdownFilterThroughProject(LogicalFilter<LogicalProject<Plan>> filter) {
private static Plan pushdownFilterThroughProject(LogicalFilter<LogicalProject<Plan>> filter) {
LogicalProject<Plan> project = filter.child();
return project.withChildren(
Set<Slot> childOutputs = project.getOutputSet();
// we need run this rule before subquey unnesting
// therefore the conjuncts may contain slots from outer query
// we should only push down conjuncts without any outer query's slot
// so we split the conjuncts into two parts:
// splitConjuncts.first -> conjuncts having outer query slots which should NOT be pushed down
// splitConjuncts.second -> conjuncts without any outer query slots which should be pushed down
Pair<Set<Expression>, Set<Expression>> splitConjuncts =
splitConjunctsByChildOutput(filter.getConjuncts(), childOutputs);
if (splitConjuncts.second.isEmpty()) {
// all conjuncts contain outer query's slots, no conjunct can be pushed down
// just return unchanged plan
return null;
}
project = (LogicalProject<Plan>) project.withChildren(new LogicalFilter<>(
ExpressionUtils.replace(splitConjuncts.second, project.getAliasToProducer()),
project.child()));
return PlanUtils.filterOrSelf(splitConjuncts.first, project);
}
private static Plan pushdownFilterThroughLimitProject(
LogicalFilter<LogicalLimit<LogicalProject<Plan>>> filter) {
LogicalLimit<LogicalProject<Plan>> limit = filter.child();
LogicalProject<Plan> project = limit.child();
Set<Slot> childOutputs = project.getOutputSet();
// split the conjuncts by child's output
Pair<Set<Expression>, Set<Expression>> splitConjuncts =
splitConjunctsByChildOutput(filter.getConjuncts(), childOutputs);
if (splitConjuncts.second.isEmpty()) {
return null;
}
project = project.withProjectsAndChild(project.getProjects(),
new LogicalFilter<>(
ExpressionUtils.replace(filter.getConjuncts(), project.getAliasToProducer()),
project.child()
)
);
ExpressionUtils.replace(splitConjuncts.second,
project.getAliasToProducer()),
limit.withChildren(project.child())));
return PlanUtils.filterOrSelf(splitConjuncts.first, project);
}
private static Pair<Set<Expression>, Set<Expression>> splitConjunctsByChildOutput(
Set<Expression> conjuncts, Set<Slot> childOutputs) {
Set<Expression> pushDownPredicates = Sets.newLinkedHashSet();
Set<Expression> remainPredicates = Sets.newLinkedHashSet();
conjuncts.forEach(conjunct -> {
Set<Slot> conjunctSlots = conjunct.getInputSlots();
if (childOutputs.containsAll(conjunctSlots)) {
pushDownPredicates.add(conjunct);
} else {
remainPredicates.add(conjunct);
}
});
return Pair.of(remainPredicates, pushDownPredicates);
}
}