[opt](nereids)project sub expression in other condition for nested loop join (#32697)

1. project sub expression in other condition for nested loop join
2. fix a bug in ut framework which may gennerate duplicated ExprId
This commit is contained in:
minghong
2024-04-24 16:48:55 +08:00
committed by yiguolei
parent ef73533e27
commit 0faae45537
46 changed files with 1198 additions and 892 deletions

View File

@ -27,5 +27,11 @@ package org.apache.doris.common;
public abstract class IdGenerator<IdType extends Id<IdType>> {
protected int nextId = 0;
// test only
public IdGenerator<IdType> resetId(int initialId) {
nextId = initialId;
return this;
}
public abstract IdType getNextId();
}

View File

@ -97,7 +97,7 @@ public class StatementContext implements Closeable {
// Thus hasUnknownColStats has higher priority than isDpHyp
private boolean hasUnknownColStats = false;
private final IdGenerator<ExprId> exprIdGenerator = ExprId.createGenerator();
private final IdGenerator<ExprId> exprIdGenerator;
private final IdGenerator<ObjectId> objectIdGenerator = ObjectId.createGenerator();
private final IdGenerator<RelationId> relationIdGenerator = RelationId.createGenerator();
private final IdGenerator<CTEId> cteIdGenerator = CTEId.createGenerator();
@ -142,13 +142,24 @@ public class StatementContext implements Closeable {
private TreeMap<Pair<Integer, Integer>, String> indexInSqlToString = new TreeMap<>(new Pair.PairComparator<>());
public StatementContext() {
this(ConnectContext.get(), null);
this(ConnectContext.get(), null, 0);
}
public StatementContext(int initialId) {
this(ConnectContext.get(), null, initialId);
}
/** StatementContext */
public StatementContext(ConnectContext connectContext, OriginStatement originStatement) {
this(connectContext, originStatement, 0);
}
/**
* StatementContext
*/
public StatementContext(ConnectContext connectContext, OriginStatement originStatement, int initialId) {
this.connectContext = connectContext;
this.originStatement = originStatement;
exprIdGenerator = ExprId.createGenerator(initialId);
if (connectContext != null && connectContext.getSessionVariable() != null
&& connectContext.queryId() != null
&& CacheAnalyzer.canUseSqlCache(connectContext.getSessionVariable())) {

View File

@ -90,6 +90,7 @@ import org.apache.doris.nereids.rules.rewrite.MergeSetOperationsExcept;
import org.apache.doris.nereids.rules.rewrite.MergeTopNs;
import org.apache.doris.nereids.rules.rewrite.NormalizeSort;
import org.apache.doris.nereids.rules.rewrite.OrExpansion;
import org.apache.doris.nereids.rules.rewrite.ProjectOtherJoinConditionForNestedLoopJoin;
import org.apache.doris.nereids.rules.rewrite.PruneEmptyPartition;
import org.apache.doris.nereids.rules.rewrite.PruneFileScanPartition;
import org.apache.doris.nereids.rules.rewrite.PruneOlapScanPartition;
@ -267,7 +268,8 @@ public class Rewriter extends AbstractBatchJobExecutor {
// eliminate useless not null or inferred not null
// TODO: wait InferPredicates to infer more not null.
bottomUp(new EliminateNotNull()),
topDown(new ConvertInnerOrCrossJoin())
topDown(new ConvertInnerOrCrossJoin()),
topDown(new ProjectOtherJoinConditionForNestedLoopJoin())
),
topic("Column pruning and infer predicate",
custom(RuleType.COLUMN_PRUNING, ColumnPruning::new),

View File

@ -267,6 +267,7 @@ public enum RuleType {
INNER_TO_CROSS_JOIN(RuleTypeClass.REWRITE),
CROSS_TO_INNER_JOIN(RuleTypeClass.REWRITE),
PRUNE_EMPTY_PARTITION(RuleTypeClass.REWRITE),
PROJECT_OTHER_JOIN_CONDITION(RuleTypeClass.REWRITE),
// split limit
SPLIT_LIMIT(RuleTypeClass.REWRITE),

View File

@ -0,0 +1,127 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.rewrite;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* join (l_orderkey > n_nationkey + n_regionkey)
* +----scan(lineItem)
* +----scan(nation)
* =>
* join(l_orderkey > x)
* +----scan(lineItem)
* +----project(n_nationkey + n_regionkey as x)
* +----scan(nation)
*/
public class ProjectOtherJoinConditionForNestedLoopJoin extends OneRewriteRuleFactory {
@Override
public Rule build() {
return logicalJoin().when(
join -> join.getHashJoinConjuncts().isEmpty()
&& !join.isMarkJoin()
&& !join.getOtherJoinConjuncts().isEmpty()
).then(
join -> {
List<Expression> otherConjuncts = join.getOtherJoinConjuncts();
List<Expression> newOtherConjuncts = new ArrayList<>();
Set<Slot> leftSlots = join.child(0).getOutputSet();
Set<Slot> rightSlots = join.child(1).getOutputSet();
ReplacerContext ctx = new ReplacerContext(leftSlots, rightSlots);
for (Expression conj : otherConjuncts) {
Expression newConj = conj.accept(AliasReplacer.INSTANCE, ctx);
newOtherConjuncts.add(newConj);
}
boolean changed = !ctx.leftAlias.isEmpty() || !ctx.rightAlias.isEmpty();
if (changed) {
Plan left = join.left();
if (!ctx.leftAlias.isEmpty()) {
List<NamedExpression> newProjects = Lists.newArrayList(left.getOutput());
newProjects.addAll(ctx.leftAlias);
left = new LogicalProject<>(newProjects, left);
}
Plan right = join.right();
if (!ctx.rightAlias.isEmpty()) {
List<NamedExpression> newProjects = Lists.newArrayList(right.getOutput());
newProjects.addAll(ctx.rightAlias);
right = new LogicalProject<>(newProjects, right);
}
return join.withJoinConjuncts(join.getHashJoinConjuncts(),
newOtherConjuncts, join.getJoinReorderContext())
.withChildren(ImmutableList.of(left, right));
}
return null;
}
).toRule(RuleType.PROJECT_OTHER_JOIN_CONDITION);
}
private static class ReplacerContext {
HashMap<Expression, Alias> aliasMap = new HashMap<>();
Set<Slot> leftSlots;
Set<Slot> rightSlots;
Set<Alias> leftAlias = new HashSet<>();
Set<Alias> rightAlias = new HashSet<>();
public ReplacerContext(Set<Slot> leftSlots, Set<Slot> rightSlots) {
this.leftSlots = leftSlots;
this.rightSlots = rightSlots;
}
}
private static class AliasReplacer extends DefaultExpressionRewriter<ReplacerContext> {
public static AliasReplacer INSTANCE = new AliasReplacer();
@Override
public Expression visit(Expression expression, ReplacerContext ctx) {
Set<Slot> input = expression.getInputSlots();
if (input.isEmpty() || expression instanceof Slot) {
return expression;
}
if (ctx.leftSlots.containsAll(input)) {
Alias alias = ctx.aliasMap.computeIfAbsent(expression, o -> new Alias(o));
ctx.leftAlias.add(alias);
return alias.toSlot();
} else if (ctx.rightSlots.containsAll(input)) {
Alias alias = ctx.aliasMap.computeIfAbsent(expression, o -> new Alias(o));
ctx.rightAlias.add(alias);
return alias.toSlot();
} else {
return super.visit(expression, ctx);
}
}
}
}

View File

@ -33,12 +33,19 @@ public class ExprId extends Id<ExprId> {
* Should be only called by {@link StatementScopeIdGenerator}.
*/
public static IdGenerator<ExprId> createGenerator() {
return createGenerator(0);
}
/**
* for ut test only
*/
public static IdGenerator<ExprId> createGenerator(int initialId) {
return new IdGenerator<ExprId>() {
@Override
public ExprId getNextId() {
return new ExprId(nextId++);
}
};
}.resetId(initialId);
}
@Override

View File

@ -29,8 +29,9 @@ import com.google.common.annotations.VisibleForTesting;
*/
public class StatementScopeIdGenerator {
// for test only
private static StatementContext statementContext = new StatementContext();
// for ut test only, ExprId starts with 10000 to avoid duplicate ExprId. In ut, before creating ConnectContext,
// table is already created, and hence column.exprId may be recreated during applying rules.
private static StatementContext statementContext = new StatementContext(10000);
public static ExprId newExprId() {
// this branch is for test only