[tpcds](nereids) add rule to eliminate empty relation #22203

1. eliminate emptyrelation,
2. const fold after filter pushdown
This commit is contained in:
minghong
2023-08-04 12:49:53 +08:00
committed by GitHub
parent 0e9fad4fe9
commit 62b1a7bcf3
12 changed files with 501 additions and 39 deletions

View File

@ -49,6 +49,7 @@ import org.apache.doris.nereids.rules.rewrite.CountDistinctRewrite;
import org.apache.doris.nereids.rules.rewrite.DeferMaterializeTopNResult;
import org.apache.doris.nereids.rules.rewrite.EliminateAggregate;
import org.apache.doris.nereids.rules.rewrite.EliminateDedupJoinCondition;
import org.apache.doris.nereids.rules.rewrite.EliminateEmptyRelation;
import org.apache.doris.nereids.rules.rewrite.EliminateFilter;
import org.apache.doris.nereids.rules.rewrite.EliminateGroupByConstant;
import org.apache.doris.nereids.rules.rewrite.EliminateLimit;
@ -228,7 +229,8 @@ public class Rewriter extends AbstractBatchJobExecutor {
bottomUp(RuleSet.PUSH_DOWN_FILTERS),
// after eliminate outer join, we can move some filters to join.otherJoinConjuncts,
// this can help to translate plan to backend
topDown(new PushFilterInsideJoin())
topDown(new PushFilterInsideJoin()),
topDown(new ExpressionNormalization())
),
custom(RuleType.CHECK_DATA_TYPES, CheckDataTypes::new),
@ -307,6 +309,10 @@ public class Rewriter extends AbstractBatchJobExecutor {
new CollectFilterAboveConsumer(),
new CollectProjectAboveConsumer()
)
),
topic("eliminate empty relation",
bottomUp(new EliminateEmptyRelation())
)
);

View File

@ -107,6 +107,12 @@ public enum RuleType {
ELIMINATE_GROUP_BY_CONSTANT(RuleTypeClass.REWRITE),
ELIMINATE_ORDER_BY_CONSTANT(RuleTypeClass.REWRITE),
ELIMINATE_HINT(RuleTypeClass.REWRITE),
ELIMINATE_JOIN_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_FILTER_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_AGG_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_UNION_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_INTERSECTION_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_EXCEPT_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
INFER_PREDICATES(RuleTypeClass.REWRITE),
INFER_AGG_NOT_NULL(RuleTypeClass.REWRITE),
INFER_SET_OPERATOR_DISTINCT(RuleTypeClass.REWRITE),

View File

@ -106,7 +106,7 @@ public class CheckAfterRewrite extends OneAnalysisRuleFactory {
notFromChildren = removeValidSlotsNotFromChildren(notFromChildren, childrenOutput);
if (!notFromChildren.isEmpty()) {
throw new AnalysisException(String.format("Input slot(s) not in child's output: %s in plan: %s,"
+ " child output is: %s",
+ " child output is: %s\n" + "plan tree:\n" + plan.treeString(),
StringUtils.join(notFromChildren.stream()
.map(ExpressionTrait::toString)
.collect(Collectors.toSet()), ", "), plan,

View File

@ -0,0 +1,193 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.rewrite;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.ExprId;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.algebra.EmptyRelation;
import org.apache.doris.nereids.trees.plans.algebra.SetOperation;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.qe.ConnectContext;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
/**
* try to eliminate sub plan tree which contains EmptyRelation
*/
public class EliminateEmptyRelation implements RewriteRuleFactory {
@Override
public List<Rule> buildRules() {
return ImmutableList.of(
// join->empty
logicalJoin(any(), any())
.when(this::hasEmptyRelationChild)
.then(join -> {
if (canReplaceJoinByEmptyRelation(join)) {
return new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
join.getOutput());
}
return join;
})
.toRule(RuleType.ELIMINATE_JOIN_ON_EMPTYRELATION),
logicalFilter(logicalEmptyRelation())
.then(filter -> new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
filter.getOutput())
).toRule(RuleType.ELIMINATE_FILTER_ON_EMPTYRELATION),
logicalAggregate(logicalEmptyRelation())
.when(agg -> !agg.getGroupByExpressions().isEmpty())
.then(agg -> new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
agg.getOutput())
).toRule(RuleType.ELIMINATE_AGG_ON_EMPTYRELATION),
// after BuildAggForUnion rule, union may have more than 2 children.
logicalUnion(multi()).then(union -> {
if (union.children().size() == 0) {
// example: select * from (select 1,2 union select 3, 4) T;
// the children size is 0. (1,2) and (3,4) are stored in union.constantExprsList
return null;
}
List<Plan> nonEmptyChildren = union.children().stream()
.filter(child -> !(child instanceof EmptyRelation))
.collect(Collectors.toList());
if (nonEmptyChildren.isEmpty()) {
if (union.getConstantExprsList().isEmpty()) {
return new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
union.getOutput());
} else {
return union.withChildren(ImmutableList.of());
}
} else if (nonEmptyChildren.size() == 1) {
if (union.getConstantExprsList().isEmpty()) {
Plan child = nonEmptyChildren.get(0);
List<Slot> unionOutput = union.getOutput();
List<Slot> childOutput = child.getOutput();
List<NamedExpression> projects = Lists.newArrayList();
for (int i = 0; i < unionOutput.size(); i++) {
ExprId id = unionOutput.get(i).getExprId();
Alias alias = new Alias(id, childOutput.get(i), unionOutput.get(i).getName());
projects.add(alias);
}
LogicalProject project = new LogicalProject<>(projects, child);
return project;
}
}
if (union.children().size() != nonEmptyChildren.size()) {
return union.withChildren(ImmutableList.copyOf(nonEmptyChildren));
} else {
// no empty relation child, do not change union
return null;
}
}).toRule(RuleType.ELIMINATE_UNION_ON_EMPTYRELATION),
// set intersect
logicalIntersect(multi()).then(intersect -> {
List<Plan> emptyChildren = intersect.children().stream()
.filter(EmptyRelation.class::isInstance)
.collect(Collectors.toList());
if (emptyChildren.isEmpty()) {
// no empty relation child, plan not changed
return null;
} else {
// there is empty relation child, the intersection result is empty.
return new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
intersect.getOutput());
}
}).toRule(RuleType.ELIMINATE_INTERSECTION_ON_EMPTYRELATION),
// set except
logicalExcept(multi()).then(except -> {
Plan first = except.child(0);
if (first instanceof EmptyRelation) {
// empty except any => empty
return new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
except.getOutput());
} else {
List<Plan> nonEmptyChildren = except.children().stream()
.filter(child -> !(child instanceof EmptyRelation))
.collect(Collectors.toList());
if (nonEmptyChildren.size() == 1) {
// the first child is not empty, others are all empty
// case 1. FIRST except(distinct) empty = > project(AGG(FIRST))
// case 2. FIRST except(all) empty = > project(FIRST)
Plan projectChild;
if (except.getQualifier() == SetOperation.Qualifier.DISTINCT) {
List<NamedExpression> firstOutputNamedExpressions = first.getOutput()
.stream().map(slot -> (NamedExpression) slot)
.collect(ImmutableList.toImmutableList());
projectChild = new LogicalAggregate<>(ImmutableList.copyOf(firstOutputNamedExpressions),
firstOutputNamedExpressions, true, Optional.empty(), first);
} else {
projectChild = first;
}
List<Slot> exceptOutput = except.getOutput();
List<Slot> projectInputSlots = projectChild.getOutput();
List<NamedExpression> projects = Lists.newArrayList();
for (int i = 0; i < exceptOutput.size(); i++) {
ExprId id = exceptOutput.get(i).getExprId();
Alias alias = new Alias(id, projectInputSlots.get(i), exceptOutput.get(i).getName());
projects.add(alias);
}
LogicalProject project = new LogicalProject(projects, projectChild);
return project;
} else if (nonEmptyChildren.size() == except.children().size()) {
return null;
} else {
return except.withChildren(nonEmptyChildren);
}
}
}).toRule(RuleType.ELIMINATE_EXCEPT_ON_EMPTYRELATION)
);
}
private boolean hasEmptyRelationChild(LogicalJoin join) {
return join.left() instanceof EmptyRelation || join.right() instanceof EmptyRelation;
}
private boolean canReplaceJoinByEmptyRelation(LogicalJoin join) {
return (join.getJoinType() == JoinType.INNER_JOIN
|| join.getJoinType() == JoinType.LEFT_SEMI_JOIN
|| join.getJoinType() == JoinType.RIGHT_SEMI_JOIN
|| join.getJoinType() == JoinType.CROSS_JOIN)
|| (join.getJoinType() == JoinType.LEFT_OUTER_JOIN && join.left() instanceof EmptyRelation)
|| (join.getJoinType() == JoinType.RIGHT_OUTER_JOIN && join.right() instanceof EmptyRelation);
}
}