[refactor](Nereids) New expression extractor for partitions pruning (#36407)

cherry pick from #36326

An exception throw in TryEliminateUninterestedPredicates, for this case

 CREATE TABLE `tbltest` (
  `id` INT NULL,
  `col2` VARCHAR(255) NULL,
  `col3` VARCHAR(255) NULL,
  `dt` DATE NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`, `col2`)
PARTITION BY RANGE(`dt`)
(PARTITION p20240617 VALUES [('2024-06-17'), ('2024-06-18')))
DISTRIBUTED BY HASH(`id`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);

select * from tbltest
where
  case
    when col2 = 'xxx' and col3='yyy' then false -- note this is not about partition column
    when col2 in ('xxx') then false
    when col2 like 'xxx%' then false
    else true
  end

The `CaseWhen` require children should be `WhenClause`, TryEliminateUninterestedPredicates maybe rewrite the WhenClause to true/false predicate, and cause this exception:

ERROR 1105 (HY000): errCode = 2, detailMessage = The children format needs to be [WhenClause+, DefaultValue?]

Original extractor(TryEliminateUninterestedPredicates.java) caused some errors while try to derive the expressions which can be used for pruning partitions.
I tried to write a new extractor(and with unit tests) for pruning partitions, it is more simple and reliable (I think).

The theory of extractor is pretty simple:
A:Sort the expression in two kinds:
  1. evaluable-expression (let's mark it as E).
    Expressions that can be evaluated in the partition pruning stage.
    In the other word: not contains non-partition slots or deterministic expression.
  2. un-evaluable-expression (let's mark it as UE).
    Expressions that can NOT be evaluated in the partition pruning stage.
    In the other word: contains non-partition slots or deterministic expression.

B: Travel the predicate, only point on AND and OR operator, following the rule:
  (E and UE) -> (E and TRUE) -> E
  (UE and UE) -> TRUE
  (E and E) -> (E and E)
  (E or UE) -> TRUE
  (UE or UE) -> TRUE
  (E or E) -> (E or E)
This commit is contained in:
XuPengfei
2024-06-19 14:47:26 +08:00
committed by GitHub
parent 38d750a7e0
commit 3c952c75be
7 changed files with 474 additions and 180 deletions

View File

@ -36,7 +36,6 @@ import java.util.stream.Collectors;
*/
public class UnboundFunction extends Function implements Unbound, PropagateNullable {
private final String dbName;
private final String name;
private final boolean isDistinct;
public UnboundFunction(String name, List<Expression> arguments) {
@ -52,16 +51,11 @@ public class UnboundFunction extends Function implements Unbound, PropagateNulla
}
public UnboundFunction(String dbName, String name, boolean isDistinct, List<Expression> arguments) {
super(arguments);
super(name, arguments);
this.dbName = dbName;
this.name = Objects.requireNonNull(name, "name cannot be null");
this.isDistinct = isDistinct;
}
public String getName() {
return name;
}
@Override
public String getExpressionName() {
if (!this.exprName.isPresent()) {
@ -87,13 +81,13 @@ public class UnboundFunction extends Function implements Unbound, PropagateNulla
String params = children.stream()
.map(Expression::toSql)
.collect(Collectors.joining(", "));
return name + "(" + (isDistinct ? "distinct " : "") + params + ")";
return getName() + "(" + (isDistinct ? "distinct " : "") + params + ")";
}
@Override
public String toString() {
String params = Joiner.on(", ").join(children);
return "'" + name + "(" + (isDistinct ? "distinct " : "") + params + ")";
return "'" + getName() + "(" + (isDistinct ? "distinct " : "") + params + ")";
}
@Override
@ -103,7 +97,7 @@ public class UnboundFunction extends Function implements Unbound, PropagateNulla
@Override
public UnboundFunction withChildren(List<Expression> children) {
return new UnboundFunction(dbName, name, isDistinct, children);
return new UnboundFunction(dbName, getName(), isDistinct, children);
}
@Override
@ -118,11 +112,11 @@ public class UnboundFunction extends Function implements Unbound, PropagateNulla
return false;
}
UnboundFunction that = (UnboundFunction) o;
return isDistinct == that.isDistinct && name.equals(that.name);
return isDistinct == that.isDistinct && getName().equals(that.getName());
}
@Override
public int hashCode() {
return Objects.hash(name, isDistinct);
return Objects.hash(getName(), isDistinct);
}
}

View File

@ -0,0 +1,177 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.expression.rules;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
import org.apache.doris.nereids.rules.expression.rules.PartitionPruneExpressionExtractor.Context;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SubqueryExpr;
import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
import com.google.common.annotations.VisibleForTesting;
import java.util.Objects;
import java.util.Set;
/**
* PartitionPruneExpressionExtractor
*
* This rewriter only used to extract the expression that can be used in partition pruning from
* the whole predicate expression.
* The theory of extractor is pretty simple:
* A:Sort the expression in two kinds:
* 1. evaluable-expression (let's mark it as E).
* Expressions that can be evaluated in the partition pruning stage.
* In the other word: not contains non-partition slots or deterministic expression.
* 2. un-evaluable-expression (let's mark it as UE).
* Expressions that can NOT be evaluated in the partition pruning stage.
* In the other word: contains non-partition slots or deterministic expression.
*
* B: Travel the predicate, only point on AND and OR operator, following the rule:
* (E and UE) -> (E and TRUE) -> E
* (UE and UE) -> TRUE
* (E and E) -> (E and E)
* (E or UE) -> TRUE
* (UE or UE) -> TRUE
* (E or E) -> (E or E)
*
* e.g.
* (part = 1 and non_part = 'a') or (part = 2)
* -> (part = 1 and true) or (part = 2)
* -> (part = 1) or (part = 2)
*
* It's better that do some expression optimize(like fold, eliminate etc.) on predicate before this step.
*/
public class PartitionPruneExpressionExtractor extends DefaultExpressionRewriter<Context> {
private final ExpressionEvaluableDetector expressionEvaluableDetector;
private PartitionPruneExpressionExtractor(Set<Slot> interestedSlots) {
this.expressionEvaluableDetector = new ExpressionEvaluableDetector(interestedSlots);
}
/**
* Extract partition prune expression from predicate
*/
public static Expression extract(Expression predicate,
Set<Slot> partitionSlots,
CascadesContext cascadesContext) {
predicate = predicate.accept(FoldConstantRuleOnFE.VISITOR_INSTANCE,
new ExpressionRewriteContext(cascadesContext));
PartitionPruneExpressionExtractor rewriter = new PartitionPruneExpressionExtractor(partitionSlots);
Context context = new Context();
Expression partitionPruneExpression = predicate.accept(rewriter, context);
if (context.containsUnEvaluableExpression) {
return BooleanLiteral.TRUE;
}
return partitionPruneExpression;
}
@Override
public Expression visit(Expression originExpr, Context parentContext) {
if (originExpr instanceof And) {
return this.visitAnd((And) originExpr, parentContext);
}
if (originExpr instanceof Or) {
return this.visitOr((Or) originExpr, parentContext);
}
parentContext.containsUnEvaluableExpression = !expressionEvaluableDetector.detect(originExpr);
return originExpr;
}
@Override
public Expression visitAnd(And node, Context parentContext) {
// handle left node
Context leftContext = new Context();
Expression newLeft = node.left().accept(this, leftContext);
// handle right node
Context rightContext = new Context();
Expression newRight = node.right().accept(this, rightContext);
// if anyone of them is FALSE, the whole expression should be FALSE.
if (newLeft == BooleanLiteral.FALSE || newRight == BooleanLiteral.FALSE) {
return BooleanLiteral.FALSE;
}
// If left node contains non-partition slot or is TURE, just discard it.
if (newLeft == BooleanLiteral.TRUE || leftContext.containsUnEvaluableExpression) {
return rightContext.containsUnEvaluableExpression ? BooleanLiteral.TRUE : newRight;
}
// If right node contains non-partition slot or is TURE, just discard it.
if (newRight == BooleanLiteral.TRUE || rightContext.containsUnEvaluableExpression) {
return newLeft;
}
// both does not contains non-partition slot.
return new And(newLeft, newRight);
}
@Override
public Expression visitOr(Or node, Context parentContext) {
// handle left node
Context leftContext = new Context();
Expression newLeft = node.left().accept(this, leftContext);
// handle right node
Context rightContext = new Context();
Expression newRight = node.right().accept(this, rightContext);
// if anyone of them is TRUE or contains non-partition slot, just return TRUE.
if (newLeft == BooleanLiteral.TRUE || newRight == BooleanLiteral.TRUE
|| leftContext.containsUnEvaluableExpression || rightContext.containsUnEvaluableExpression) {
return BooleanLiteral.TRUE;
}
return new Or(newLeft, newRight);
}
/**
* Context
*/
public static class Context {
private boolean containsUnEvaluableExpression;
}
/**
* The detector only indicate that whether a predicate contains interested slots or not,
* and do not change the predicate.
*/
@VisibleForTesting
public static class ExpressionEvaluableDetector extends DefaultExpressionRewriter<Context> {
private final Set<Slot> partitionSlots;
public ExpressionEvaluableDetector(Set<Slot> partitionSlots) {
this.partitionSlots = Objects.requireNonNull(partitionSlots, "partitionSlots can not be null");
}
/**
* Return true if expression does NOT contains un-evaluable expression.
*/
@VisibleForTesting
public boolean detect(Expression expression) {
boolean containsUnEvaluableExpression = expression.anyMatch(
expr -> expr instanceof SubqueryExpr || (expr instanceof Slot && !partitionSlots.contains(expr)));
return !containsUnEvaluableExpression;
}
}
}

View File

@ -117,7 +117,7 @@ public class PartitionPruner extends DefaultExpressionRewriter<Void> {
public static List<Long> prune(List<Slot> partitionSlots, Expression partitionPredicate,
Map<Long, PartitionItem> idToPartitions, CascadesContext cascadesContext,
PartitionTableType partitionTableType) {
partitionPredicate = TryEliminateUninterestedPredicates.rewrite(
partitionPredicate = PartitionPruneExpressionExtractor.extract(
partitionPredicate, ImmutableSet.copyOf(partitionSlots), cascadesContext);
partitionPredicate = PredicateRewriteForPartitionPrune.rewrite(partitionPredicate, cascadesContext);

View File

@ -1,152 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.rules.expression.rules;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
import org.apache.doris.nereids.rules.expression.rules.TryEliminateUninterestedPredicates.Context;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
import java.util.Set;
/**
* TryEliminateUninterestedPredicates
*
* this rewriter usually used to extract the partition columns related predicates,
* and try to eliminate partition columns related predicate.
*
* e.g.
* (part = 1 and non_part = 'a') or (part = 2)
* -> (part = 1 and true) or (part = 2)
* -> (part = 1) or (part = 2)
*
* maybe eliminate failed in some special cases, e.g. (non_part + part) = 2.
* the key point is: if a predicate(return boolean type) only contains the uninterested slots, we can eliminate it.
*/
public class TryEliminateUninterestedPredicates extends DefaultExpressionRewriter<Context> {
private final Set<Slot> interestedSlots;
private final ExpressionRewriteContext expressionRewriteContext;
private TryEliminateUninterestedPredicates(Set<Slot> interestedSlots, CascadesContext cascadesContext) {
this.interestedSlots = interestedSlots;
this.expressionRewriteContext = new ExpressionRewriteContext(cascadesContext);
}
/** rewrite */
public static Expression rewrite(Expression expression, Set<Slot> interestedSlots,
CascadesContext cascadesContext) {
// before eliminate uninterested predicate, we must push down `Not` under CompoundPredicate
expression = expression.rewriteUp(expr -> {
if (expr instanceof Not) {
return SimplifyNotExprRule.simplify((Not) expr);
} else {
return expr;
}
});
TryEliminateUninterestedPredicates rewriter = new TryEliminateUninterestedPredicates(
interestedSlots, cascadesContext);
return expression.accept(rewriter, new Context());
}
@Override
public Expression visit(Expression originExpr, Context parentContext) {
Context currentContext = new Context();
// postorder traversal
Expression expr = super.visit(originExpr, currentContext);
// process predicate
if (expr.getDataType().isBooleanType()) {
// if a predicate contains not only interested slots but also non-interested slots,
// we can not eliminate non-interested slots:
// e.g.
// not(uninterested slot b + interested slot a > 1)
// -> not(uninterested slot b + interested slot a > 1)
if (!currentContext.childrenContainsInterestedSlots && currentContext.childrenContainsNonInterestedSlots) {
// propagate true value up to eliminate uninterested slots,
// because we don't know the runtime value of the slots
// e.g.
// not(uninterested slot b > 1)
// -> not(true)
// -> true
expr = BooleanLiteral.TRUE;
} else {
// simplify the predicate expression, the interested slots may be eliminated too
// e.g.
// ((interested slot a) and not(uninterested slot b > 1)) or true
// -> ((interested slot a) and not(true)) or true
// -> ((interested slot a) and true) or true
// -> (interested slot a) or true
// -> true
expr = FoldConstantRuleOnFE.evaluate(expr, expressionRewriteContext);
}
} else {
// ((uninterested slot b > 0) + 1) > 1
// -> (true + 1) > 1
// -> ((uninterested slot b > 0) + 1) > 1 (recover to origin expr because `true + 1` is not predicate)
// -> true (not contains interested slot but contains uninterested slot)
expr = originExpr;
}
parentContext.childrenContainsInterestedSlots |= currentContext.childrenContainsInterestedSlots;
parentContext.childrenContainsNonInterestedSlots |= currentContext.childrenContainsNonInterestedSlots;
return expr;
}
@Override
public Expression visitAnd(And and, Context parentContext) {
Expression left = and.left();
Context leftContext = new Context();
// Expression newLeft = this.visit(left, leftContext);
Expression newLeft = left.accept(this, leftContext);
if (leftContext.childrenContainsNonInterestedSlots) {
newLeft = BooleanLiteral.TRUE;
}
Expression right = and.right();
Context rightContext = new Context();
Expression newRight = this.visit(right, rightContext);
if (rightContext.childrenContainsNonInterestedSlots) {
newRight = BooleanLiteral.TRUE;
}
Expression expr = FoldConstantRuleOnFE.evaluate(new And(newLeft, newRight), expressionRewriteContext);
parentContext.childrenContainsInterestedSlots =
rightContext.childrenContainsInterestedSlots || leftContext.childrenContainsInterestedSlots;
return expr;
}
@Override
public Expression visitSlot(Slot slot, Context context) {
boolean isInterestedSlot = interestedSlots.contains(slot);
context.childrenContainsInterestedSlots |= isInterestedSlot;
context.childrenContainsNonInterestedSlots |= !isInterestedSlot;
return slot;
}
/** Context */
public static class Context {
private boolean childrenContainsInterestedSlots;
private boolean childrenContainsNonInterestedSlots;
}
}

View File

@ -33,7 +33,6 @@ import java.util.stream.Collectors;
/** BoundFunction. */
public abstract class BoundFunction extends Function implements ComputeSignature {
private final String name;
private final Supplier<FunctionSignature> signatureCache = Suppliers.memoize(() -> {
// first step: find the candidate signature in the signature list
@ -43,17 +42,11 @@ public abstract class BoundFunction extends Function implements ComputeSignature
});
public BoundFunction(String name, Expression... arguments) {
super(arguments);
this.name = Objects.requireNonNull(name, "name can not be null");
super(name, arguments);
}
public BoundFunction(String name, List<Expression> children) {
super(children);
this.name = Objects.requireNonNull(name, "name can not be null");
}
public String getName() {
return name;
super(name, children);
}
@Override
@ -75,17 +68,17 @@ public abstract class BoundFunction extends Function implements ComputeSignature
@Override
protected boolean extraEquals(Expression that) {
return Objects.equals(name, ((BoundFunction) that).name);
return Objects.equals(getName(), ((BoundFunction) that).getName());
}
@Override
public int hashCode() {
return Objects.hash(name, children);
return Objects.hash(getName(), children);
}
@Override
public String toSql() throws UnboundException {
StringBuilder sql = new StringBuilder(name).append("(");
StringBuilder sql = new StringBuilder(getName()).append("(");
int arity = arity();
for (int i = 0; i < arity; i++) {
Expression arg = child(i);
@ -103,6 +96,6 @@ public abstract class BoundFunction extends Function implements ComputeSignature
.stream()
.map(Expression::toString)
.collect(Collectors.joining(", "));
return name + "(" + args + ")";
return getName() + "(" + args + ")";
}
}

View File

@ -21,20 +21,29 @@ import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda;
import java.util.List;
import java.util.Objects;
/**
* function in nereids.
*/
public abstract class Function extends Expression {
public Function(Expression... children) {
private final String name;
public Function(String name, Expression... children) {
super(children);
this.name = Objects.requireNonNull(name, "name can not be null");
}
public Function(List<Expression> children) {
public Function(String name, List<Expression> children) {
super(children);
this.name = Objects.requireNonNull(name, "name can not be null");
}
public boolean isHighOrder() {
return !children.isEmpty() && children.get(0) instanceof Lambda;
}
public final String getName() {
return name;
}
}