[nereids] consider numNulls in filter estimation (#29184)
consider numNulls in filter estimation
This commit is contained in:
@ -117,6 +117,8 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
colBuilder.setMinValue(union.getLow()).setMinExpr(union.getLowExpr())
|
||||
.setMaxValue(union.getHigh()).setMaxExpr(union.getHighExpr())
|
||||
.setNdv(union.getDistinctValues());
|
||||
double maxNumNulls = Math.max(leftColStats.numNulls, rightColStats.numNulls);
|
||||
colBuilder.setNumNulls(Math.min(colBuilder.getCount(), maxNumNulls));
|
||||
orStats.addColumnStats(slot, colBuilder.build());
|
||||
}
|
||||
}
|
||||
@ -175,7 +177,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
}
|
||||
|
||||
private Statistics updateLessThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
|
||||
ColumnStatistic statsForRight, EstimationContext context, boolean contains) {
|
||||
ColumnStatistic statsForRight, EstimationContext context) {
|
||||
StatisticRange rightRange = new StatisticRange(statsForLeft.minValue, statsForLeft.minExpr,
|
||||
statsForRight.maxValue, statsForRight.maxExpr,
|
||||
statsForLeft.ndv, leftExpr.getDataType());
|
||||
@ -185,7 +187,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
}
|
||||
|
||||
private Statistics updateGreaterThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
|
||||
ColumnStatistic statsForRight, EstimationContext context, boolean contains) {
|
||||
ColumnStatistic statsForRight, EstimationContext context) {
|
||||
StatisticRange rightRange = new StatisticRange(statsForRight.minValue, statsForRight.minExpr,
|
||||
statsForLeft.maxValue, statsForLeft.maxExpr,
|
||||
statsForLeft.ndv, leftExpr.getDataType());
|
||||
@ -202,12 +204,9 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
return estimateEqualTo(cp, statsForLeft, statsForRight, context);
|
||||
} else {
|
||||
if (cp instanceof LessThan || cp instanceof LessThanEqual) {
|
||||
return updateLessThanLiteral(cp.left(), statsForLeft, statsForRight,
|
||||
context, cp instanceof LessThanEqual);
|
||||
return updateLessThanLiteral(cp.left(), statsForLeft, statsForRight, context);
|
||||
} else if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
|
||||
|
||||
return updateGreaterThanLiteral(cp.left(), statsForLeft, statsForRight, context,
|
||||
cp instanceof GreaterThanEqual);
|
||||
return updateGreaterThanLiteral(cp.left(), statsForLeft, statsForRight, context);
|
||||
} else {
|
||||
throw new RuntimeException(String.format("Unexpected expression : %s", cp.toSql()));
|
||||
}
|
||||
@ -225,6 +224,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
} else {
|
||||
selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv);
|
||||
}
|
||||
selectivity = getNotNullSelectivity(statsForLeft, selectivity);
|
||||
Statistics equalStats = context.statistics.withSel(selectivity);
|
||||
Expression left = cp.left();
|
||||
equalStats.addColumnStats(left, statsForRight);
|
||||
@ -331,10 +331,12 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
selectivity = 1.0;
|
||||
}
|
||||
}
|
||||
compareExprStatsBuilder.setNumNulls(0);
|
||||
Statistics estimated = new Statistics(context.statistics);
|
||||
ColumnStatistic stats = compareExprStatsBuilder.build();
|
||||
selectivity = getNotNullSelectivity(stats, selectivity);
|
||||
estimated = estimated.withSel(selectivity);
|
||||
estimated.addColumnStats(compareExpr,
|
||||
compareExprStatsBuilder.build());
|
||||
estimated.addColumnStats(compareExpr, stats);
|
||||
context.addKeyIfSlot(compareExpr);
|
||||
return estimated;
|
||||
}
|
||||
@ -394,6 +396,11 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
.setMaxValue(originColStats.maxValue)
|
||||
.setMaxExpr(originColStats.maxExpr);
|
||||
}
|
||||
if (not.child().getInputSlots().size() == 1 && !(child instanceof IsNull)) {
|
||||
// only consider the single column numNull, otherwise, ignore
|
||||
rowCount = Math.max(rowCount - originColStats.numNulls, 1);
|
||||
statisticsBuilder.setRowCount(rowCount);
|
||||
}
|
||||
statisticsBuilder.putColumnStatistics(slot, colBuilder.build());
|
||||
}
|
||||
}
|
||||
@ -460,15 +467,18 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
.setMaxValue(Double.POSITIVE_INFINITY)
|
||||
.setMaxExpr(null)
|
||||
.setNdv(0)
|
||||
.setCount(0);
|
||||
.setCount(0)
|
||||
.setNumNulls(0);
|
||||
} else {
|
||||
leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
|
||||
.setMinValue(intersectRange.getLow())
|
||||
.setMinExpr(intersectRange.getLowExpr())
|
||||
.setMaxValue(intersectRange.getHigh())
|
||||
.setMaxExpr(intersectRange.getHighExpr())
|
||||
.setNdv(intersectRange.getDistinctValues());
|
||||
.setNdv(intersectRange.getDistinctValues())
|
||||
.setNumNulls(0);
|
||||
double sel = leftRange.overlapPercentWith(rightRange);
|
||||
sel = getNotNullSelectivity(leftStats, sel);
|
||||
updatedStatistics = context.statistics.withSel(sel);
|
||||
leftColumnStatisticBuilder.setCount(updatedStatistics.getRowCount());
|
||||
}
|
||||
@ -488,6 +498,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
intersectBuilder.setNdv(intersect.getDistinctValues());
|
||||
intersectBuilder.setMinValue(intersect.getLow());
|
||||
intersectBuilder.setMaxValue(intersect.getHigh());
|
||||
intersectBuilder.setNumNulls(0);
|
||||
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
|
||||
Statistics updatedStatistics = context.statistics.withSel(sel);
|
||||
updatedStatistics.addColumnStats(leftExpr, intersectBuilder.build());
|
||||
@ -568,10 +579,34 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
"col stats not found. slot=%s in %s",
|
||||
like.left().toSql(), like.toSql());
|
||||
ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(origin);
|
||||
colBuilder.setNdv(origin.ndv * DEFAULT_LIKE_COMPARISON_SELECTIVITY).setNumNulls(0);
|
||||
double selectivity = StatsMathUtil.divide(DEFAULT_LIKE_COMPARISON_SELECTIVITY, origin.ndv);
|
||||
double notNullSel = getNotNullSelectivity(origin, selectivity);
|
||||
colBuilder.setNdv(origin.ndv * DEFAULT_LIKE_COMPARISON_SELECTIVITY)
|
||||
.setCount(notNullSel * context.statistics.getRowCount()).setNumNulls(0);
|
||||
statsBuilder.putColumnStatistics(like.left(), colBuilder.build());
|
||||
context.addKeyIfSlot(like.left());
|
||||
}
|
||||
return statsBuilder.build();
|
||||
}
|
||||
|
||||
private double getNotNullSelectivity(ColumnStatistic stats, double origSel) {
|
||||
double rowCount = stats.count;
|
||||
double numNulls = stats.numNulls;
|
||||
|
||||
// comment following check since current rowCount and ndv may be inconsistant
|
||||
// e.g, rowCount has been reduced by one filter but another filter column's
|
||||
// ndv and numNull remains originally, which will unexpectedly go into the following
|
||||
// normalization.
|
||||
|
||||
//if (numNulls > rowCount - ndv) {
|
||||
// numNulls = rowCount - ndv > 0 ? rowCount - ndv : 0;
|
||||
//}
|
||||
double notNullSel = rowCount <= 1.0 ? 1.0 : 1 - getValidSelectivity(numNulls / rowCount);
|
||||
double validSel = origSel * notNullSel;
|
||||
return getValidSelectivity(validSel);
|
||||
}
|
||||
|
||||
private static double getValidSelectivity(double nullSel) {
|
||||
return nullSel < 0 ? 0 : (nullSel > 1 ? 1 : nullSel);
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,10 +64,10 @@ class FilterEstimationTest {
|
||||
Or or = new Or(greaterThan1, lessThan);
|
||||
Map<Expression, ColumnStatistic> columnStat = new HashMap<>();
|
||||
ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(500).setNdv(500).setAvgSizeByte(4)
|
||||
.setNumNulls(500).setDataSize(0)
|
||||
.setNumNulls(0).setDataSize(0)
|
||||
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
|
||||
ColumnStatistic bStats = new ColumnStatisticBuilder().setCount(500).setNdv(500).setAvgSizeByte(4)
|
||||
.setNumNulls(500).setDataSize(0)
|
||||
.setNumNulls(0).setDataSize(0)
|
||||
.setMinValue(0).setMaxValue(1000).setMinExpr(null).setIsUnknown(true).build();
|
||||
columnStat.put(a, aStats);
|
||||
columnStat.put(b, bStats);
|
||||
@ -93,10 +93,10 @@ class FilterEstimationTest {
|
||||
And and = new And(greaterThan1, lessThan);
|
||||
Map<Expression, ColumnStatistic> columnStat = new HashMap<>();
|
||||
ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(500).setNdv(500)
|
||||
.setAvgSizeByte(4).setNumNulls(500).setDataSize(0)
|
||||
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
|
||||
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
|
||||
ColumnStatistic bStats = new ColumnStatisticBuilder().setCount(500).setNdv(500)
|
||||
.setAvgSizeByte(4).setNumNulls(500).setDataSize(0)
|
||||
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
|
||||
.setMinValue(0).setMaxValue(1000).setMinExpr(null).setIsUnknown(true).build();
|
||||
columnStat.put(a, aStats);
|
||||
columnStat.put(b, bStats);
|
||||
@ -185,13 +185,13 @@ class FilterEstimationTest {
|
||||
Or or = new Or(and, equalTo);
|
||||
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
|
||||
ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(500).setNdv(500)
|
||||
.setAvgSizeByte(4).setNumNulls(500).setDataSize(0)
|
||||
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
|
||||
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
|
||||
ColumnStatistic bStats = new ColumnStatisticBuilder().setCount(500).setNdv(500)
|
||||
.setAvgSizeByte(4).setNumNulls(500).setDataSize(0)
|
||||
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
|
||||
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
|
||||
ColumnStatistic cStats = new ColumnStatisticBuilder().setCount(500).setNdv(500)
|
||||
.setAvgSizeByte(4).setNumNulls(500).setDataSize(0)
|
||||
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
|
||||
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
|
||||
slotToColumnStat.put(a, aStats);
|
||||
slotToColumnStat.put(b, bStats);
|
||||
@ -910,4 +910,193 @@ class FilterEstimationTest {
|
||||
Statistics result = filterEstimation.estimate(not, stats);
|
||||
Assertions.assertEquals(result.getRowCount(), 90);
|
||||
}
|
||||
|
||||
/**
|
||||
* a = 1
|
||||
*/
|
||||
@Test
|
||||
public void testNumNullsEqualTo() {
|
||||
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
|
||||
.setNdv(2)
|
||||
.setAvgSizeByte(4)
|
||||
.setNumNulls(8)
|
||||
.setMaxValue(2)
|
||||
.setMinValue(1)
|
||||
.setCount(10);
|
||||
IntegerLiteral int1 = new IntegerLiteral(1);
|
||||
EqualTo equalTo = new EqualTo(a, int1);
|
||||
Statistics stats = new Statistics(10, new HashMap<>());
|
||||
stats.addColumnStats(a, builder.build());
|
||||
FilterEstimation filterEstimation = new FilterEstimation();
|
||||
Statistics result = filterEstimation.estimate(equalTo, stats);
|
||||
Assertions.assertEquals(result.getRowCount(), 1.0, 0.01);
|
||||
}
|
||||
|
||||
/**
|
||||
* a > 1
|
||||
*/
|
||||
@Test
|
||||
public void testNumNullsComparable() {
|
||||
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
|
||||
.setNdv(2)
|
||||
.setAvgSizeByte(4)
|
||||
.setNumNulls(8)
|
||||
.setMaxValue(2)
|
||||
.setMinValue(1)
|
||||
.setCount(10);
|
||||
IntegerLiteral int1 = new IntegerLiteral(1);
|
||||
GreaterThan greaterThan = new GreaterThan(a, int1);
|
||||
Statistics stats = new Statistics(10, new HashMap<>());
|
||||
stats.addColumnStats(a, builder.build());
|
||||
FilterEstimation filterEstimation = new FilterEstimation();
|
||||
Statistics result = filterEstimation.estimate(greaterThan, stats);
|
||||
Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
|
||||
}
|
||||
|
||||
/**
|
||||
* a in (1, 2)
|
||||
*/
|
||||
@Test
|
||||
public void testNumNullsIn() {
|
||||
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
|
||||
.setNdv(2)
|
||||
.setAvgSizeByte(4)
|
||||
.setNumNulls(8)
|
||||
.setMaxValue(2)
|
||||
.setMinValue(1)
|
||||
.setCount(10);
|
||||
IntegerLiteral int1 = new IntegerLiteral(1);
|
||||
IntegerLiteral int2 = new IntegerLiteral(2);
|
||||
InPredicate in = new InPredicate(a, Lists.newArrayList(int1, int2));
|
||||
Statistics stats = new Statistics(10, new HashMap<>());
|
||||
stats.addColumnStats(a, builder.build());
|
||||
FilterEstimation filterEstimation = new FilterEstimation();
|
||||
Statistics result = filterEstimation.estimate(in, stats);
|
||||
Assertions.assertEquals(result.getRowCount(), 10.0, 0.01);
|
||||
}
|
||||
|
||||
/**
|
||||
* not a = 1
|
||||
*/
|
||||
@Test
|
||||
public void testNumNullsNotEqualTo() {
|
||||
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
|
||||
.setNdv(2)
|
||||
.setAvgSizeByte(4)
|
||||
.setNumNulls(8)
|
||||
.setMaxValue(2)
|
||||
.setMinValue(1)
|
||||
.setCount(10);
|
||||
IntegerLiteral int1 = new IntegerLiteral(1);
|
||||
EqualTo equalTo = new EqualTo(a, int1);
|
||||
Not not = new Not(equalTo);
|
||||
Statistics stats = new Statistics(10, new HashMap<>());
|
||||
stats.addColumnStats(a, builder.build());
|
||||
FilterEstimation filterEstimation = new FilterEstimation();
|
||||
Statistics result = filterEstimation.estimate(not, stats);
|
||||
Assertions.assertEquals(result.getRowCount(), 1.0, 0.01);
|
||||
}
|
||||
|
||||
/**
|
||||
* a not in (1, 2)
|
||||
*/
|
||||
@Test
|
||||
public void testNumNullsNotIn() {
|
||||
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
|
||||
.setNdv(2)
|
||||
.setAvgSizeByte(4)
|
||||
.setNumNulls(8)
|
||||
.setMaxValue(2)
|
||||
.setMinValue(1)
|
||||
.setCount(10);
|
||||
IntegerLiteral int1 = new IntegerLiteral(1);
|
||||
IntegerLiteral int2 = new IntegerLiteral(2);
|
||||
InPredicate in = new InPredicate(a, Lists.newArrayList(int1, int2));
|
||||
Not not = new Not(in);
|
||||
Statistics stats = new Statistics(10, new HashMap<>());
|
||||
stats.addColumnStats(a, builder.build());
|
||||
FilterEstimation filterEstimation = new FilterEstimation();
|
||||
Statistics result = filterEstimation.estimate(not, stats);
|
||||
Assertions.assertEquals(result.getRowCount(), 1.0, 0.01);
|
||||
}
|
||||
|
||||
/**
|
||||
* a >= 1 and a <= 2
|
||||
*/
|
||||
@Test
|
||||
public void testNumNullsAnd() {
|
||||
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
|
||||
.setNdv(2)
|
||||
.setAvgSizeByte(4)
|
||||
.setNumNulls(8)
|
||||
.setMaxValue(2)
|
||||
.setMinValue(1)
|
||||
.setCount(10);
|
||||
IntegerLiteral int1 = new IntegerLiteral(1);
|
||||
IntegerLiteral int2 = new IntegerLiteral(2);
|
||||
GreaterThanEqual greaterThanEqual = new GreaterThanEqual(a, int1);
|
||||
LessThanEqual lessThanEqual = new LessThanEqual(a, int2);
|
||||
And and = new And(greaterThanEqual, lessThanEqual);
|
||||
Statistics stats = new Statistics(10, new HashMap<>());
|
||||
stats.addColumnStats(a, builder.build());
|
||||
FilterEstimation filterEstimation = new FilterEstimation();
|
||||
Statistics result = filterEstimation.estimate(and, stats);
|
||||
Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
|
||||
}
|
||||
|
||||
/**
|
||||
* a >= 1 or a <= 2
|
||||
*/
|
||||
@Test
|
||||
public void testNumNullsOr() {
|
||||
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
|
||||
.setNdv(2)
|
||||
.setAvgSizeByte(4)
|
||||
.setNumNulls(8)
|
||||
.setMaxValue(2)
|
||||
.setMinValue(1)
|
||||
.setCount(10);
|
||||
IntegerLiteral int1 = new IntegerLiteral(1);
|
||||
IntegerLiteral int2 = new IntegerLiteral(2);
|
||||
GreaterThanEqual greaterThanEqual = new GreaterThanEqual(a, int2);
|
||||
LessThanEqual lessThanEqual = new LessThanEqual(a, int1);
|
||||
Or or = new Or(greaterThanEqual, lessThanEqual);
|
||||
Statistics stats = new Statistics(10, new HashMap<>());
|
||||
stats.addColumnStats(a, builder.build());
|
||||
FilterEstimation filterEstimation = new FilterEstimation();
|
||||
Statistics result = filterEstimation.estimate(or, stats);
|
||||
Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
|
||||
}
|
||||
|
||||
/**
|
||||
* a >= 1 or a is null
|
||||
*/
|
||||
@Test
|
||||
public void testNumNullsOrIsNull() {
|
||||
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
|
||||
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
|
||||
.setNdv(2)
|
||||
.setAvgSizeByte(4)
|
||||
.setNumNulls(8)
|
||||
.setMaxValue(2)
|
||||
.setMinValue(1)
|
||||
.setCount(10);
|
||||
IntegerLiteral int1 = new IntegerLiteral(1);
|
||||
GreaterThanEqual greaterThanEqual = new GreaterThanEqual(a, int1);
|
||||
IsNull isNull = new IsNull(a);
|
||||
Or or = new Or(greaterThanEqual, isNull);
|
||||
Statistics stats = new Statistics(10, new HashMap<>());
|
||||
stats.addColumnStats(a, builder.build());
|
||||
FilterEstimation filterEstimation = new FilterEstimation();
|
||||
Statistics result = filterEstimation.estimate(or, stats);
|
||||
Assertions.assertEquals(result.getRowCount(), 10.0, 0.01);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user