Revert "branch-2.1: [fix](nereids) fix distinct window compute wrong result (#48987) (#49010)" (#49079)

Revert "branch-2.1: [fix](nereids) fix distinct window compute wrong
result (#48987) (#49010)"
This commit is contained in:
924060929
2025-03-15 00:02:10 +08:00
committed by GitHub
parent d0f6edd212
commit 8589db3ec3
6 changed files with 130 additions and 192 deletions

View File

@ -30,6 +30,7 @@ import org.apache.doris.nereids.rules.analysis.CheckAnalysis;
import org.apache.doris.nereids.rules.analysis.CheckPolicy;
import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint;
import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias;
import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant;
import org.apache.doris.nereids.rules.analysis.EliminateGroupByConstant;
import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint;
import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots;
@ -39,6 +40,7 @@ import org.apache.doris.nereids.rules.analysis.NormalizeAggregate;
import org.apache.doris.nereids.rules.analysis.NormalizeRepeat;
import org.apache.doris.nereids.rules.analysis.OneRowRelationExtractAggregate;
import org.apache.doris.nereids.rules.analysis.ProjectToGlobalAggregate;
import org.apache.doris.nereids.rules.analysis.ProjectWithDistinctToAggregate;
import org.apache.doris.nereids.rules.analysis.ReplaceExpressionByChildOutput;
import org.apache.doris.nereids.rules.analysis.SubqueryToApply;
import org.apache.doris.nereids.rules.analysis.VariableToLiteral;
@ -103,6 +105,13 @@ public class Analyzer extends AbstractBatchJobExecutor {
bottomUp(new AddInitMaterializationHook()),
bottomUp(
new ProjectToGlobalAggregate(),
// this rule check's the logicalProject node's isDistinct property
// and replace the logicalProject node with a LogicalAggregate node
// so any rule before this, if create a new logicalProject node
// should make sure isDistinct property is correctly passed around.
// please see rule BindSlotReference or BindFunction for example
new EliminateDistinctConstant(),
new ProjectWithDistinctToAggregate(),
new ReplaceExpressionByChildOutput(),
new OneRowRelationExtractAggregate()
),

View File

@ -0,0 +1,48 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.analysis;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.plans.LimitPhase;
import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
/**
* EliminateDistinctConstant.
* <p>
* example sql:
* <pre>
* select distinct 1,2,3 from tbl
* =>
* select 1,2,3 from (select 1, 2, 3 from tbl limit 1) as tmp
* </pre>
*/
public class EliminateDistinctConstant extends OneAnalysisRuleFactory {
@Override
public Rule build() {
return RuleType.ELIMINATE_DISTINCT_CONSTANT.build(
logicalProject()
.when(LogicalProject::isDistinct)
.when(project -> project.getProjects().stream().allMatch(Expression::isConstant))
.then(project -> new LogicalProject(project.getProjects(), new LogicalLimit<>(1, 0,
LimitPhase.ORIGIN, project.child())))
);
}
}

View File

@ -17,24 +17,13 @@
package org.apache.doris.nereids.rules.analysis;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitors;
import org.apache.doris.nereids.trees.plans.LimitPhase;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import com.google.common.collect.ImmutableList;
import java.util.List;
/**
* ProjectToGlobalAggregate.
* <p>
@ -54,110 +43,17 @@ public class ProjectToGlobalAggregate extends OneAnalysisRuleFactory {
@Override
public Rule build() {
return RuleType.PROJECT_TO_GLOBAL_AGGREGATE.build(
logicalProject().then(project -> {
project = distinctConstantsToLimit1(project);
Plan result = projectToAggregate(project);
return distinctToAggregate(result, project);
})
logicalProject().then(project -> {
boolean needGlobalAggregate = project.getProjects()
.stream()
.anyMatch(p -> p.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null));
if (needGlobalAggregate) {
return new LogicalAggregate<>(ImmutableList.of(), project.getProjects(), project.child());
} else {
return project;
}
})
);
}
// select distinct 1,2,3 from tbl
// ↓
// select 1,2,3 from (select 1, 2, 3 from tbl limit 1) as tmp
private static LogicalProject<Plan> distinctConstantsToLimit1(LogicalProject<Plan> project) {
if (!project.isDistinct()) {
return project;
}
boolean allSelectItemAreConstants = true;
for (NamedExpression selectItem : project.getProjects()) {
if (!selectItem.isConstant()) {
allSelectItemAreConstants = false;
break;
}
}
if (allSelectItemAreConstants) {
return new LogicalProject<>(
project.getProjects(),
new LogicalLimit<>(1, 0, LimitPhase.ORIGIN, project.child())
);
}
return project;
}
// select avg(xxx) from tbl
// ↓
// LogicalAggregate(groupBy=[], output=[avg(xxx)])
private static Plan projectToAggregate(LogicalProject<Plan> project) {
// contains aggregate functions, like sum, avg ?
for (NamedExpression selectItem : project.getProjects()) {
if (selectItem.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null)) {
return new LogicalAggregate<>(ImmutableList.of(), project.getProjects(), project.child());
}
}
return project;
}
private static Plan distinctToAggregate(Plan result, LogicalProject<Plan> originProject) {
if (!originProject.isDistinct()) {
return result;
}
if (result instanceof LogicalProject) {
// remove distinct: select distinct fun(xxx) as c1 from tbl
//
// LogicalProject(distinct=true, output=[fun(xxx) as c1])
// ↓
// LogicalAggregate(groupBy=[c1], output=[c1])
// |
// LogicalProject(output=[fun(xxx) as c1])
LogicalProject<?> project = (LogicalProject<?>) result;
ImmutableList.Builder<NamedExpression> bottomProjectOutput
= ImmutableList.builderWithExpectedSize(project.getProjects().size());
ImmutableList.Builder<NamedExpression> topAggOutput
= ImmutableList.builderWithExpectedSize(project.getProjects().size());
boolean hasComplexExpr = false;
for (NamedExpression selectItem : project.getProjects()) {
if (selectItem.isSlot()) {
topAggOutput.add(selectItem);
bottomProjectOutput.add(selectItem);
} else if (isAliasLiteral(selectItem)) {
// stay in agg, and eliminate by `ELIMINATE_GROUP_BY_CONSTANT`
topAggOutput.add(selectItem);
} else {
// `FillUpMissingSlots` not support find complex expr in aggregate,
// so we should push down into the bottom project
hasComplexExpr = true;
topAggOutput.add(selectItem.toSlot());
bottomProjectOutput.add(selectItem);
}
}
if (!hasComplexExpr) {
List<Slot> projects = (List) project.getProjects();
return new LogicalAggregate(projects, projects, project.child());
}
LogicalProject<?> removeDistinct = new LogicalProject<>(bottomProjectOutput.build(), project.child());
ImmutableList<NamedExpression> aggOutput = topAggOutput.build();
return new LogicalAggregate(aggOutput, aggOutput, removeDistinct);
} else if (result instanceof LogicalAggregate) {
// remove distinct: select distinct avg(xxx) as c1 from tbl
//
// LogicalProject(distinct=true, output=[avg(xxx) as c1])
// ↓
// LogicalAggregate(output=[avg(xxx) as c1])
return result;
} else {
// never reach
throw new AnalysisException("Unsupported");
}
}
private static boolean isAliasLiteral(NamedExpression selectItem) {
return selectItem instanceof Alias && selectItem.child(0) instanceof Literal;
}
}

View File

@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.analysis;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
/**
* ProjectWithDistinctToAggregate.
* <p>
* example sql:
* <pre>
* select distinct value from tbl
*
* LogicalProject(projects=[distinct value])
* |
* LogicalOlapScan(table=tbl)
* =>
* LogicalAggregate(groupBy=[value], output=[value])
* |
* LogicalOlapScan(table=tbl)
* </pre>
*/
public class ProjectWithDistinctToAggregate extends OneAnalysisRuleFactory {
@Override
public Rule build() {
return RuleType.PROJECT_WITH_DISTINCT_TO_AGGREGATE.build(
logicalProject()
.when(LogicalProject::isDistinct)
.whenNot(project -> project.getProjects().stream().anyMatch(this::hasAggregateFunction))
.then(project -> new LogicalAggregate<>(project.getProjects(), false, project.child()))
);
}
private boolean hasAggregateFunction(Expression expression) {
return expression.anyMatch(AggregateFunction.class::isInstance);
}
}

View File

@ -53,27 +53,21 @@ public class ReplaceExpressionByChildOutput implements AnalysisRuleFactory {
))
.add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build(
logicalSort(logicalAggregate()).then(sort -> {
LogicalAggregate<Plan> agg = sort.child();
Map<Expression, Slot> sMap = buildOutputAliasMap(agg.getOutputExpressions());
if (sMap.isEmpty() && isSelectDistinct(agg)) {
sMap = getSelectDistinctExpressions(agg);
}
LogicalAggregate<Plan> aggregate = sort.child();
Map<Expression, Slot> sMap = buildOutputAliasMap(aggregate.getOutputExpressions());
return replaceSortExpression(sort, sMap);
})
)).add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build(
logicalSort(logicalHaving(logicalAggregate())).then(sort -> {
LogicalAggregate<Plan> agg = sort.child().child();
Map<Expression, Slot> sMap = buildOutputAliasMap(agg.getOutputExpressions());
if (sMap.isEmpty() && isSelectDistinct(agg)) {
sMap = getSelectDistinctExpressions(agg);
}
LogicalAggregate<Plan> aggregate = sort.child().child();
Map<Expression, Slot> sMap = buildOutputAliasMap(aggregate.getOutputExpressions());
return replaceSortExpression(sort, sMap);
})
))
.build();
}
private static Map<Expression, Slot> buildOutputAliasMap(List<NamedExpression> output) {
private Map<Expression, Slot> buildOutputAliasMap(List<NamedExpression> output) {
Map<Expression, Slot> sMap = Maps.newHashMapWithExpectedSize(output.size());
for (NamedExpression expr : output) {
if (expr instanceof Alias) {
@ -99,22 +93,4 @@ public class ReplaceExpressionByChildOutput implements AnalysisRuleFactory {
return changed ? new LogicalSort<>(newKeys.build(), sort.child()) : sort;
}
private static boolean isSelectDistinct(LogicalAggregate<? extends Plan> agg) {
return agg.getGroupByExpressions().equals(agg.getOutputExpressions())
&& agg.getGroupByExpressions().equals(agg.child().getOutput());
}
private static Map<Expression, Slot> getSelectDistinctExpressions(LogicalAggregate<? extends Plan> agg) {
Plan child = agg.child();
List<NamedExpression> selectItems;
if (child instanceof LogicalProject) {
selectItems = ((LogicalProject<?>) child).getProjects();
} else if (child instanceof LogicalAggregate) {
selectItems = ((LogicalAggregate<?>) child).getOutputExpressions();
} else {
selectItems = ImmutableList.of();
}
return buildOutputAliasMap(selectItems);
}
}