[feature] (Nereids) add rule to merge consecutive project nodes (#11262)

Merge consecutive project nodes. For example:
logical plan tree:
```
               project(a)
                  |
                project(a,b)
                  |
                project(a, b, c)
                  |
                scan
```
transformed to:
```
                 project(a)
                    |
                 scan
```
This commit is contained in:
minghong
2022-08-02 15:58:55 +08:00
committed by GitHub
parent 38ffe685b5
commit cd6fbd09bf
6 changed files with 230 additions and 21 deletions

View File

@ -102,7 +102,7 @@ public class UnboundRelation extends LogicalLeaf implements Unbound {
@Override
public String toString() {
return "UnresolvedRelation" + "(" + StringUtils.join(nameParts, ".") + ")";
return "UnboundRelation" + "(" + StringUtils.join(nameParts, ".") + ")";
}
@Override

View File

@ -56,6 +56,7 @@ public enum RuleType {
REWRITE_JOIN_EXPRESSION(RuleTypeClass.REWRITE),
REORDER_JOIN(RuleTypeClass.REWRITE),
MERGE_CONSECUTIVE_FILTERS(RuleTypeClass.REWRITE),
MERGE_CONSECUTIVE_PROJECTS(RuleTypeClass.REWRITE),
REWRITE_SENTINEL(RuleTypeClass.REWRITE),
// exploration rules

View File

@ -24,7 +24,7 @@ import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.AggregateFunction;
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionReplacer;
import org.apache.doris.nereids.trees.plans.AggPhase;
import org.apache.doris.nereids.trees.plans.GroupPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
@ -135,23 +135,4 @@ public class AggregateDisassemble extends OneRewriteRuleFactory {
);
}).toRule(RuleType.AGGREGATE_DISASSEMBLE);
}
@SuppressWarnings("InnerClassMayBeStatic")
private static class ExpressionReplacer
extends DefaultExpressionRewriter<Map<Expression, Expression>> {
private static final ExpressionReplacer INSTANCE = new ExpressionReplacer();
@Override
public Expression visit(Expression expr, Map<Expression, Expression> substitutionMap) {
// TODO: we need to do sub tree match and replace. but we do not have semanticEquals now.
// e.g. a + 1 + 2 in output expression should be replaced by
// (slot reference to update phase out (a + 1)) + 2, if we do group by a + 1
// currently, we could only handle output expression same with group by expression
if (substitutionMap.containsKey(expr)) {
return substitutionMap.get(expr);
} else {
return super.visit(expr, substitutionMap);
}
}
}
}

View File

@ -0,0 +1,72 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.rewrite.logical;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.rules.rewrite.OneRewriteRuleFactory;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionReplacer;
import org.apache.doris.nereids.trees.plans.GroupPlan;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* this rule aims to merge consecutive filters.
* For example:
* logical plan tree:
* project(a)
* |
* project(a,b)
* |
* project(a, b, c)
* |
* scan
* transformed to:
* project(a)
* |
* scan
*/
public class MergeConsecutiveProjects extends OneRewriteRuleFactory {
@Override
public Rule build() {
return logicalProject(logicalProject()).then(project -> {
List<NamedExpression> projectExpressions = project.getProjects();
LogicalProject<GroupPlan> childProject = project.child();
List<NamedExpression> childProjectExpressions = childProject.getProjects();
Map<Expression, Expression> childAliasMap = childProjectExpressions.stream()
.filter(e -> e instanceof Alias)
.collect(Collectors.toMap(
NamedExpression::toSlot, e -> e.child(0))
);
projectExpressions = projectExpressions.stream()
.map(e -> ExpressionReplacer.INSTANCE.visit(e, childAliasMap))
.map(NamedExpression.class::cast)
.collect(Collectors.toList());
return new LogicalProject(projectExpressions, (Plan) childProject.children().get(0));
}).toRule(RuleType.MERGE_CONSECUTIVE_PROJECTS);
}
}

View File

@ -0,0 +1,40 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.expressions.visitor;
import org.apache.doris.nereids.trees.expressions.Expression;
import java.util.Map;
/**
* replace expr nodes by substitutionMap
*/
public class ExpressionReplacer
extends DefaultExpressionRewriter<Map<Expression, Expression>> {
public static final ExpressionReplacer INSTANCE = new ExpressionReplacer();
@Override
public Expression visit(Expression expr, Map<Expression, Expression> substitutionMap) {
if (substitutionMap.containsKey(expr)) {
return substitutionMap.get(expr);
} else {
return super.visit(expr, substitutionMap);
}
}
}

View File

@ -0,0 +1,115 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.rewrite.logical;
import org.apache.doris.nereids.PlannerContext;
import org.apache.doris.nereids.analyzer.UnboundRelation;
import org.apache.doris.nereids.memo.Memo;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.trees.expressions.Add;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.IntegerLiteral;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.qe.ConnectContext;
import com.google.common.collect.Lists;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.List;
/**
* MergeConsecutiveProjects ut
*/
public class MergeConsecutiveProjectsTest {
@Test
public void testMergeConsecutiveProjects() {
UnboundRelation relation = new UnboundRelation(Lists.newArrayList("db", "table"));
NamedExpression colA = new SlotReference("a", IntegerType.INSTANCE, true, Lists.newArrayList("a"));
NamedExpression colB = new SlotReference("b", IntegerType.INSTANCE, true, Lists.newArrayList("b"));
NamedExpression colC = new SlotReference("c", IntegerType.INSTANCE, true, Lists.newArrayList("c"));
LogicalProject project1 = new LogicalProject(Lists.newArrayList(colA, colB, colC), relation);
LogicalProject project2 = new LogicalProject(Lists.newArrayList(colA, colB), project1);
LogicalProject project3 = new LogicalProject(Lists.newArrayList(colA), project2);
PlannerContext plannerContext = new Memo(project3)
.newPlannerContext(new ConnectContext())
.setDefaultJobContext();
List<Rule> rules = Lists.newArrayList(new MergeConsecutiveProjects().build());
plannerContext.bottomUpRewrite(rules);
Plan plan = plannerContext.getMemo().copyOut();
System.out.println(plan.treeString());
Assertions.assertTrue(plan instanceof LogicalProject);
Assertions.assertTrue(((LogicalProject<?>) plan).getProjects().equals(Lists.newArrayList(colA)));
Assertions.assertTrue(plan.child(0) instanceof UnboundRelation);
}
/**
* project2(X + 2)
* |
* project1(B, C, A+1 as X)
* |
* relation
* transform to :
* project2((A + 1) + 2)
* |
* relation
*/
@Test
public void testMergeConsecutiveProjectsWithAlias() {
UnboundRelation relation = new UnboundRelation(Lists.newArrayList("db", "table"));
NamedExpression colA = new SlotReference("a", IntegerType.INSTANCE, true, Lists.newArrayList("a"));
NamedExpression colB = new SlotReference("b", IntegerType.INSTANCE, true, Lists.newArrayList("b"));
NamedExpression colC = new SlotReference("c", IntegerType.INSTANCE, true, Lists.newArrayList("c"));
Alias alias = new Alias(new Add(colA, new IntegerLiteral(1)), "X");
Slot aliasRef = alias.toSlot();
LogicalProject project1 = new LogicalProject(
Lists.newArrayList(
colB,
colC,
alias),
relation);
LogicalProject project2 = new LogicalProject(
Lists.newArrayList(
new Alias(new Add(aliasRef, new IntegerLiteral(2)), "Y")
),
project1);
PlannerContext plannerContext = new Memo(project2)
.newPlannerContext(new ConnectContext())
.setDefaultJobContext();
List<Rule> rules = Lists.newArrayList(new MergeConsecutiveProjects().build());
plannerContext.bottomUpRewrite(rules);
Plan plan = plannerContext.getMemo().copyOut();
System.out.println(plan.treeString());
Assertions.assertTrue(plan instanceof LogicalProject);
LogicalProject finalProject = (LogicalProject) plan;
Add finalExpression = new Add(
new Add(colA, new IntegerLiteral(1)),
new IntegerLiteral(2)
);
Assertions.assertEquals(1, finalProject.getProjects().size());
Assertions.assertTrue(((Alias) finalProject.getProjects().get(0)).child().equals(finalExpression));
}
}