[stats](nereids)in predicate range adjust (#22156)
1. refactor in-predicate filter estimation example: A in (1, 2, 3, 4) after in-preidcate filter, A.stats.max<=4 and A.stats.min>=1 2. maintain minExpr and maxExpr in in-predicate stats derive
This commit is contained in:
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.doris.nereids.stats;
|
||||
|
||||
import org.apache.doris.analysis.LiteralExpr;
|
||||
import org.apache.doris.nereids.stats.FilterEstimation.EstimationContext;
|
||||
import org.apache.doris.nereids.trees.TreeNode;
|
||||
import org.apache.doris.nereids.trees.expressions.And;
|
||||
@ -36,6 +37,7 @@ import org.apache.doris.nereids.trees.expressions.Or;
|
||||
import org.apache.doris.nereids.trees.expressions.Slot;
|
||||
import org.apache.doris.nereids.trees.expressions.SlotReference;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.Function;
|
||||
import org.apache.doris.nereids.trees.expressions.literal.Literal;
|
||||
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
|
||||
import org.apache.doris.statistics.Bucket;
|
||||
import org.apache.doris.statistics.ColumnStatistic;
|
||||
@ -266,8 +268,12 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
return context.statistics.withSel(DEFAULT_IN_COEFFICIENT);
|
||||
}
|
||||
List<Expression> options = inPredicate.getOptions();
|
||||
double maxOption = 0;
|
||||
double minOption = Double.MAX_VALUE;
|
||||
// init minOption and maxOption by compareExpr.max and compareExpr.min respectively,
|
||||
// and then adjust min/max by options
|
||||
double minOptionValue = compareExprStats.maxValue;
|
||||
double maxOptionValue = compareExprStats.minValue;
|
||||
LiteralExpr minOptionLiteral = compareExprStats.maxExpr;
|
||||
LiteralExpr maxOptionLiteral = compareExprStats.minExpr;
|
||||
/* suppose A.(min, max) = (0, 10), A.ndv=10
|
||||
A in ( 1, 2, 5, 100):
|
||||
validInOptCount = 3, that is (1, 2, 5)
|
||||
@ -283,32 +289,62 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
A.(min, max) not changed
|
||||
A.selectivity = 7/10
|
||||
*/
|
||||
double validInOptCount = 0;
|
||||
int validInOptCount = 0;
|
||||
double selectivity = 1.0;
|
||||
ColumnStatisticBuilder compareExprStatsBuilder = new ColumnStatisticBuilder(compareExprStats);
|
||||
|
||||
int nonLiteralOptionCount = 0;
|
||||
for (Expression option : options) {
|
||||
ColumnStatistic optionStats = ExpressionEstimation.estimate(option, context.statistics);
|
||||
double validOptionNdv = compareExprStats.ndvIntersection(optionStats);
|
||||
if (validOptionNdv > 0.0) {
|
||||
validInOptCount += validOptionNdv;
|
||||
maxOption = Math.max(optionStats.maxValue, maxOption);
|
||||
minOption = Math.min(optionStats.minValue, minOption);
|
||||
if (option instanceof Literal) {
|
||||
// remove the options which is out of compareExpr.range
|
||||
if (compareExprStats.minValue <= optionStats.maxValue
|
||||
&& optionStats.maxValue <= compareExprStats.maxValue) {
|
||||
validInOptCount++;
|
||||
LiteralExpr optionLiteralExpr = ((Literal) option).toLegacyLiteral();
|
||||
if (maxOptionLiteral == null || optionLiteralExpr.compareTo(maxOptionLiteral) >= 0) {
|
||||
maxOptionLiteral = optionLiteralExpr;
|
||||
maxOptionValue = optionStats.maxValue;
|
||||
}
|
||||
|
||||
if (minOptionLiteral == null || optionLiteralExpr.compareTo(minOptionLiteral) <= 0) {
|
||||
minOptionLiteral = optionLiteralExpr;
|
||||
minOptionValue = optionStats.minValue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
nonLiteralOptionCount++;
|
||||
}
|
||||
}
|
||||
if (nonLiteralOptionCount > 0) {
|
||||
// A in (x+1, ...)
|
||||
// "x+1" is not literal, and if const-fold can not handle it, it blocks estimation of min/max value.
|
||||
// and hence, we do not adjust compareExpr.stats.range.
|
||||
int newNdv = nonLiteralOptionCount + validInOptCount;
|
||||
if (newNdv < compareExprStats.ndv) {
|
||||
compareExprStatsBuilder.setNdv(newNdv);
|
||||
selectivity = StatsMathUtil.divide(newNdv, compareExprStats.ndv);
|
||||
} else {
|
||||
selectivity = 1.0;
|
||||
}
|
||||
} else {
|
||||
maxOptionValue = Math.min(maxOptionValue, compareExprStats.maxValue);
|
||||
minOptionValue = Math.max(minOptionValue, compareExprStats.minValue);
|
||||
compareExprStatsBuilder.setMaxValue(maxOptionValue);
|
||||
compareExprStatsBuilder.setMaxExpr(maxOptionLiteral);
|
||||
compareExprStatsBuilder.setMinValue(minOptionValue);
|
||||
compareExprStatsBuilder.setMinExpr(minOptionLiteral);
|
||||
if (validInOptCount < compareExprStats.ndv) {
|
||||
compareExprStatsBuilder.setNdv(validInOptCount);
|
||||
selectivity = StatsMathUtil.divide(validInOptCount, compareExprStats.ndv);
|
||||
} else {
|
||||
selectivity = 1.0;
|
||||
}
|
||||
}
|
||||
maxOption = Math.min(maxOption, compareExprStats.maxValue);
|
||||
minOption = Math.max(minOption, compareExprStats.minValue);
|
||||
compareExprStatsBuilder.setMaxValue(maxOption);
|
||||
compareExprStatsBuilder.setMinValue(minOption);
|
||||
|
||||
selectivity = StatsMathUtil.minNonNaN(1.0, validInOptCount / compareExprStats.ndv);
|
||||
compareExprStatsBuilder.setNdv(validInOptCount);
|
||||
Statistics estimated = new Statistics(context.statistics);
|
||||
estimated = estimated.withSel(selectivity);
|
||||
if (compareExpr instanceof SlotReference) {
|
||||
estimated.addColumnStats(compareExpr,
|
||||
compareExprStatsBuilder.build());
|
||||
}
|
||||
estimated.addColumnStats(compareExpr,
|
||||
compareExprStatsBuilder.build());
|
||||
|
||||
return estimated;
|
||||
}
|
||||
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.doris.nereids.stats;
|
||||
|
||||
import org.apache.doris.analysis.IntLiteral;
|
||||
import org.apache.doris.nereids.trees.expressions.And;
|
||||
import org.apache.doris.nereids.trees.expressions.Cast;
|
||||
import org.apache.doris.nereids.trees.expressions.EqualTo;
|
||||
@ -418,7 +419,9 @@ class FilterEstimationTest {
|
||||
.setAvgSizeByte(4)
|
||||
.setNumNulls(0)
|
||||
.setMinValue(1)
|
||||
.setMaxValue(10);
|
||||
.setMinExpr(new IntLiteral(1))
|
||||
.setMaxValue(10)
|
||||
.setMaxExpr(new IntLiteral(10));
|
||||
slotToColumnStat.put(a, builder.build());
|
||||
Statistics stat = new Statistics(1000, slotToColumnStat);
|
||||
FilterEstimation filterEstimation = new FilterEstimation();
|
||||
|
||||
Reference in New Issue
Block a user