[Enhancement](partition prune): calculate the column ranges of compound predicates (#14886)

Doris does not support disjunctive predicates very well, which causes some problems in partition prune.
For example, sqls like the followings will trigger a full table scan without partition pruning

select * from test.t1
where (dt between 20211121 and 20211122) or  (dt between 20211125 and 20211126)
This commit is contained in:
spaces-x
2022-12-15 20:47:44 +08:00
committed by GitHub
parent c4de619110
commit bccea1c511
2 changed files with 91 additions and 0 deletions

View File

@ -43,7 +43,9 @@ import com.google.common.base.MoreObjects;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeRangeSet;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -260,6 +262,26 @@ public abstract class ScanNode extends PlanNode {
ColumnBound bound = ColumnBound.of((LiteralExpr) inPredicate.getChild(i));
result.add(Range.closed(bound, bound));
}
} else if (expr instanceof CompoundPredicate) {
CompoundPredicate compoundPredicate = (CompoundPredicate) expr;
ColumnRanges leftChildRange = null;
ColumnRanges rightChildRange = null;
switch (compoundPredicate.getOp()) {
case AND:
leftChildRange = expressionToRanges(compoundPredicate.getChild(0), desc);
rightChildRange = expressionToRanges(compoundPredicate.getChild(1), desc);
return leftChildRange.intersectRanges(rightChildRange);
case OR:
leftChildRange = expressionToRanges(compoundPredicate.getChild(0), desc);
rightChildRange = expressionToRanges(compoundPredicate.getChild(1), desc);
return leftChildRange.unionRanges(rightChildRange);
case NOT:
leftChildRange = expressionToRanges(compoundPredicate.getChild(0), desc);
return leftChildRange.complementOfRanges();
default:
throw new RuntimeException("unknown OP in compound predicate: "
+ compoundPredicate.getOp().toString());
}
}
if (result.isEmpty()) {
@ -377,6 +399,67 @@ public abstract class ScanNode extends PlanNode {
return IS_NULL;
}
public ColumnRanges complementOfRanges() {
if (type == Type.CONVERT_SUCCESS) {
RangeSet<ColumnBound> rangeSet = TreeRangeSet.create();
rangeSet.addAll(ranges);
return create(Lists.newArrayList(rangeSet.complement().asRanges()));
}
return CONVERT_FAILURE;
}
public ColumnRanges intersectRanges(ColumnRanges other) {
// intersect ranges can handle isnull
switch (this.type) {
case IS_NULL:
return createIsNull();
case CONVERT_FAILURE:
return createFailure();
case CONVERT_SUCCESS:
switch (other.type) {
case IS_NULL:
return createIsNull();
case CONVERT_FAILURE:
return createFailure();
case CONVERT_SUCCESS:
RangeSet<ColumnBound> rangeSet = TreeRangeSet.create();
rangeSet.addAll(this.ranges);
RangeSet<ColumnBound> intersectSet = TreeRangeSet.create();
other.ranges.forEach(range -> intersectSet.addAll(rangeSet.subRangeSet(range)));
return create(Lists.newArrayList(intersectSet.asRanges()));
default:
return createFailure();
}
default:
return createFailure();
}
}
public ColumnRanges unionRanges(ColumnRanges other) {
switch (this.type) {
case IS_NULL:
case CONVERT_FAILURE:
return createFailure();
case CONVERT_SUCCESS:
switch (other.type) {
case IS_NULL:
case CONVERT_FAILURE:
return createFailure();
case CONVERT_SUCCESS:
RangeSet<ColumnBound> rangeSet = TreeRangeSet.create();
rangeSet.addAll(this.ranges);
rangeSet.addAll(other.ranges);
List<Range<ColumnBound>> unionRangeList = Lists.newArrayList(rangeSet.asRanges());
return create(unionRangeList);
default:
return createFailure();
}
default:
return createFailure();
}
}
public static ColumnRanges createFailure() {
return CONVERT_FAILURE;
}

View File

@ -185,6 +185,14 @@ public class RangePartitionPruneTest extends PartitionPruneTestBase {
// maybe something goes wrong with ExtractCommonFactorsRule.
addCase("select * from test.t1 where ((dt=20211123 and k1=1) or (dt=20211125 and k1=3)) and k2>0", "partitions=8/8", "partitions=8/8");
addCase("select * from test.t2 where k1 > 10 or k2 < 1", "partitions=9/9", "partitions=9/9");
// add some cases for CompoundPredicate
addCase("select * from test.t1 where (dt >= 20211121 and dt <= 20211122) or (dt >= 20211123 and dt <= 20211125)",
"partitions=8/8", "partitions=5/8");
addCase("select * from test.t1 where (dt between 20211121 and 20211122) or (dt between 20211123 and 20211125)",
"partitions=8/8", "partitions=5/8");
addCase("select * from test.t1 where (dt between 20211121 and 20211122) or dt is null or (dt between 20211123 and 20211125)",
"partitions=8/8", "partitions=6/8");
}
@Test