[Feat](nereids) add transform rule SimplifyWindowExpression (#33647)
rewrite func(para) over (partition by unique_keys) 1. func() is count(non-null) or rank/dense_rank/row_number -> 1 2. func(para) is min/max/sum/avg/first_value/last_value -> para e.g select max(c1) over(partition by pk) from t1; -> select c1 from t1;
This commit is contained in:
@ -121,6 +121,7 @@ import org.apache.doris.nereids.rules.rewrite.PushProjectThroughUnion;
|
||||
import org.apache.doris.nereids.rules.rewrite.ReduceAggregateChildOutputRows;
|
||||
import org.apache.doris.nereids.rules.rewrite.ReorderJoin;
|
||||
import org.apache.doris.nereids.rules.rewrite.RewriteCteChildren;
|
||||
import org.apache.doris.nereids.rules.rewrite.SimplifyWindowExpression;
|
||||
import org.apache.doris.nereids.rules.rewrite.SplitLimit;
|
||||
import org.apache.doris.nereids.rules.rewrite.SumLiteralRewrite;
|
||||
import org.apache.doris.nereids.rules.rewrite.TransposeSemiJoinAgg;
|
||||
@ -226,7 +227,8 @@ public class Rewriter extends AbstractBatchJobExecutor {
|
||||
topic("Window analysis",
|
||||
topDown(
|
||||
new ExtractAndNormalizeWindowExpression(),
|
||||
new CheckAndStandardizeWindowFunctionAndFrame()
|
||||
new CheckAndStandardizeWindowFunctionAndFrame(),
|
||||
new SimplifyWindowExpression()
|
||||
)
|
||||
),
|
||||
topic("Rewrite join",
|
||||
|
||||
@ -96,6 +96,7 @@ public enum RuleType {
|
||||
NORMALIZE_SORT(RuleTypeClass.REWRITE),
|
||||
NORMALIZE_REPEAT(RuleTypeClass.REWRITE),
|
||||
EXTRACT_AND_NORMALIZE_WINDOW_EXPRESSIONS(RuleTypeClass.REWRITE),
|
||||
SIMPLIFY_WINDOW_EXPRESSION(RuleTypeClass.REWRITE),
|
||||
CHECK_AND_STANDARDIZE_WINDOW_FUNCTION_AND_FRAME(RuleTypeClass.REWRITE),
|
||||
CHECK_MATCH_EXPRESSION(RuleTypeClass.REWRITE),
|
||||
CREATE_PARTITION_TOPN_FOR_WINDOW(RuleTypeClass.REWRITE),
|
||||
|
||||
@ -0,0 +1,123 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.nereids.rules.rewrite;
|
||||
|
||||
import org.apache.doris.nereids.annotation.DependsRules;
|
||||
import org.apache.doris.nereids.pattern.MatchingContext;
|
||||
import org.apache.doris.nereids.rules.Rule;
|
||||
import org.apache.doris.nereids.rules.RuleType;
|
||||
import org.apache.doris.nereids.trees.expressions.Alias;
|
||||
import org.apache.doris.nereids.trees.expressions.Expression;
|
||||
import org.apache.doris.nereids.trees.expressions.NamedExpression;
|
||||
import org.apache.doris.nereids.trees.expressions.Slot;
|
||||
import org.apache.doris.nereids.trees.expressions.WindowExpression;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.BoundFunction;
|
||||
import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
|
||||
import org.apache.doris.nereids.trees.plans.Plan;
|
||||
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
|
||||
import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* rewrite func(para) over (partition by unique_keys)
|
||||
* 1. func() is count(non-null) or rank/dense_rank/row_number -> 1
|
||||
* 2. func(para) is min/max/sum/avg/first_value/last_value -> para
|
||||
* e.g
|
||||
* select max(c1) over(partition by pk) from t1;
|
||||
* -> select c1 from t1;
|
||||
* */
|
||||
@DependsRules({
|
||||
ExtractAndNormalizeWindowExpression.class
|
||||
})
|
||||
public class SimplifyWindowExpression extends OneRewriteRuleFactory {
|
||||
private static final String COUNT = "count";
|
||||
private static final ImmutableSet<String> REWRRITE_TO_CONST_WINDOW_FUNCTIONS =
|
||||
ImmutableSet.of("rank", "dense_rank", "row_number");
|
||||
private static final ImmutableSet<String> REWRRITE_TO_SLOT_WINDOW_FUNCTIONS =
|
||||
ImmutableSet.of("min", "max", "sum", "avg", "first_value", "last_value");
|
||||
|
||||
@Override
|
||||
public Rule build() {
|
||||
return logicalWindow(any()).thenApply(this::simplify)
|
||||
.toRule(RuleType.SIMPLIFY_WINDOW_EXPRESSION);
|
||||
}
|
||||
|
||||
private Plan simplify(MatchingContext<LogicalWindow<Plan>> ctx) {
|
||||
LogicalWindow<Plan> window = ctx.root;
|
||||
ImmutableList.Builder<NamedExpression> projectionsBuilder = ImmutableList.builder();
|
||||
ImmutableList.Builder<NamedExpression> remainWindowExpression = ImmutableList.builder();
|
||||
List<NamedExpression> windowExpressions = window.getWindowExpressions();
|
||||
for (NamedExpression expr : windowExpressions) {
|
||||
Alias alias = (Alias) expr;
|
||||
WindowExpression windowExpression = (WindowExpression) alias.child();
|
||||
if (windowExpression.getPartitionKeys().stream().anyMatch((
|
||||
partitionKey -> partitionKey.getDataType().isOnlyMetricType()))) {
|
||||
continue;
|
||||
}
|
||||
// after normalize window, partition key must be slot
|
||||
List<Slot> partitionSlots = (List<Slot>) (List) windowExpression.getPartitionKeys();
|
||||
Set<Slot> partitionSlotSet = new HashSet<>(partitionSlots);
|
||||
if (!window.getLogicalProperties().getFunctionalDependencies().isUnique(partitionSlotSet)) {
|
||||
remainWindowExpression.add(expr);
|
||||
continue;
|
||||
}
|
||||
Expression function = windowExpression.getFunction();
|
||||
if (function instanceof BoundFunction) {
|
||||
BoundFunction boundFunction = (BoundFunction) function;
|
||||
String name = ((BoundFunction) function).getName();
|
||||
if ((name.equals(COUNT) && boundFunction.child(0).notNullable())
|
||||
|| REWRRITE_TO_CONST_WINDOW_FUNCTIONS.contains(name)) {
|
||||
projectionsBuilder.add(new Alias(alias.getExprId(), new TinyIntLiteral((byte) 1), alias.getName()));
|
||||
} else if (REWRRITE_TO_SLOT_WINDOW_FUNCTIONS.contains(name)) {
|
||||
projectionsBuilder.add(new Alias(alias.getExprId(), boundFunction.child(0), alias.getName()));
|
||||
} else {
|
||||
remainWindowExpression.add(expr);
|
||||
}
|
||||
} else {
|
||||
remainWindowExpression.add(expr);
|
||||
}
|
||||
}
|
||||
List<NamedExpression> projections = projectionsBuilder.build();
|
||||
List<NamedExpression> remainWindows = remainWindowExpression.build();
|
||||
if (projections.isEmpty()) {
|
||||
return window;
|
||||
} else if (remainWindows.isEmpty()) {
|
||||
Plan windowChild = window.child(0);
|
||||
List<Slot> slots = windowChild.getOutput();
|
||||
List<NamedExpression> finalProjections = Lists.newArrayList(projections);
|
||||
finalProjections.addAll(slots);
|
||||
return new LogicalProject(finalProjections, windowChild);
|
||||
} else {
|
||||
List<Slot> windowOutputs = Lists.newArrayList();
|
||||
for (NamedExpression remainWindow : remainWindows) {
|
||||
windowOutputs.add(remainWindow.toSlot());
|
||||
}
|
||||
List<NamedExpression> finalProjections = Lists.newArrayList(projections);
|
||||
finalProjections.addAll(windowOutputs);
|
||||
return new LogicalProject(finalProjections, window.withExpression(remainWindows,
|
||||
window.child(0)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user