[feature](Planner): Push down LimitDistinct through Union (#27745)

This commit is contained in:
jakevin
2023-11-29 21:12:42 +08:00
committed by GitHub
parent 83ed8d3cba
commit acc14d7e4c
7 changed files with 193 additions and 0 deletions

View File

@ -95,6 +95,7 @@ import org.apache.doris.nereids.rules.rewrite.PushDownDistinctThroughJoin;
import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughProject;
import org.apache.doris.nereids.rules.rewrite.PushDownLimit;
import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughJoin;
import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughUnion;
import org.apache.doris.nereids.rules.rewrite.PushDownMinMaxThroughJoin;
import org.apache.doris.nereids.rules.rewrite.PushDownSumThroughJoin;
import org.apache.doris.nereids.rules.rewrite.PushDownTopNThroughJoin;
@ -286,6 +287,7 @@ public class Rewriter extends AbstractBatchJobExecutor {
new PushDownLimit(),
new PushDownTopNThroughJoin(),
new PushDownLimitDistinctThroughJoin(),
new PushDownLimitDistinctThroughUnion(),
new PushDownTopNThroughWindow(),
new PushDownTopNThroughUnion()
),

View File

@ -269,6 +269,7 @@ public enum RuleType {
// limit distinct push down
PUSH_DOWN_LIMIT_DISTINCT_THROUGH_JOIN(RuleTypeClass.REWRITE),
PUSH_DOWN_LIMIT_DISTINCT_THROUGH_PROJECT_JOIN(RuleTypeClass.REWRITE),
PUSH_DOWN_LIMIT_DISTINCT_THROUGH_UNION(RuleTypeClass.REWRITE),
// adjust nullable
ADJUST_NULLABLE(RuleTypeClass.REWRITE),
ADJUST_CONJUNCTS_RETURN_TYPE(RuleTypeClass.REWRITE),

View File

@ -0,0 +1,100 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.rewrite;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.algebra.SetOperation.Qualifier;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
import org.apache.doris.nereids.util.ExpressionUtils;
import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* <pre>
* LIMIT-Distinct
* -> Union All
* -> child plan1
* -> child plan2
* -> child plan3
*
* rewritten to
*
* LIMIT-Distinct
* -> Union All
* -> LIMIT-Distinct
* -> child plan1
* -> LIMIT-Distinct
* -> LIMIT plan2
* -> TopN-Distinct
* -> LIMIT plan3
* </pre>
*/
public class PushDownLimitDistinctThroughUnion implements RewriteRuleFactory {
@Override
public List<Rule> buildRules() {
return ImmutableList.of(
logicalLimit(logicalAggregate(logicalUnion().when(union -> union.getQualifier() == Qualifier.ALL))
.when(agg -> agg.isDistinct()))
.then(limit -> {
LogicalAggregate<LogicalUnion> agg = limit.child();
LogicalUnion union = agg.child();
List<Plan> newChildren = new ArrayList<>();
for (Plan child : union.children()) {
Map<Expression, Expression> replaceMap = new HashMap<>();
for (int i = 0; i < union.getOutputs().size(); ++i) {
NamedExpression output = union.getOutputs().get(i);
replaceMap.put(output, child.getOutput().get(i));
}
List<Expression> newGroupBy = agg.getGroupByExpressions().stream()
.map(expr -> ExpressionUtils.replace(expr, replaceMap))
.collect(Collectors.toList());
List<NamedExpression> newOutputs = agg.getOutputs().stream()
.map(expr -> ExpressionUtils.replace(expr, replaceMap))
.collect(Collectors.toList());
LogicalAggregate<Plan> newAgg = new LogicalAggregate<>(newGroupBy, newOutputs, child);
LogicalLimit<Plan> newLimit = limit.withLimitChild(limit.getLimit() + limit.getOffset(),
0, newAgg);
newChildren.add(newLimit);
}
if (union.children().equals(newChildren)) {
return null;
}
return limit.withChildren(agg.withChildren(union.withChildren(newChildren)));
})
.toRule(RuleType.PUSH_DOWN_LIMIT_DISTINCT_THROUGH_UNION)
);
}
}

View File

@ -46,6 +46,7 @@ import java.util.Map;
*
* rewritten to
*
* TopN-Distinct
* -> Union All
* -> TopN-Distinct
* -> child plan1

View File

@ -23,6 +23,7 @@ import org.apache.doris.nereids.rules.exploration.mv.SlotMapping;
import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
import org.apache.doris.nereids.rules.expression.rules.FoldConstantRule;
import org.apache.doris.nereids.trees.TreeNode;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
@ -300,6 +301,19 @@ public class ExpressionUtils {
return expr.accept(ExpressionReplacer.INSTANCE, replaceMap);
}
/**
* replace NameExpression.
*/
public static NamedExpression replace(NamedExpression expr,
Map<? extends Expression, ? extends Expression> replaceMap) {
Expression newExpr = expr.accept(ExpressionReplacer.INSTANCE, replaceMap);
if (newExpr instanceof NamedExpression) {
return (NamedExpression) newExpr;
} else {
return new Alias(expr.getExprId(), newExpr, expr.getName());
}
}
public static List<Expression> replace(List<Expression> exprs,
Map<? extends Expression, ? extends Expression> replaceMap) {
return exprs.stream()