From 90b2ee90b22c0c4a1b42f78ab66972cf790ca449 Mon Sep 17 00:00:00 2001 From: xzj7019 <131111794+xzj7019@users.noreply.github.com> Date: Tue, 2 Jan 2024 13:51:11 +0800 Subject: [PATCH] [nereids] consider numNulls in filter estimation (#29184) consider numNulls in filter estimation --- .../doris/nereids/stats/FilterEstimation.java | 59 +++-- .../nereids/stats/FilterEstimationTest.java | 203 +++++++++++++++++- 2 files changed, 243 insertions(+), 19 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index b716b350f2..c086aaef5c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -117,6 +117,8 @@ public class FilterEstimation extends ExpressionVisitor rowCount - ndv) { + // numNulls = rowCount - ndv > 0 ? rowCount - ndv : 0; + //} + double notNullSel = rowCount <= 1.0 ? 1.0 : 1 - getValidSelectivity(numNulls / rowCount); + double validSel = origSel * notNullSel; + return getValidSelectivity(validSel); + } + + private static double getValidSelectivity(double nullSel) { + return nullSel < 0 ? 0 : (nullSel > 1 ? 1 : nullSel); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index f5491c6333..b476cc563b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -64,10 +64,10 @@ class FilterEstimationTest { Or or = new Or(greaterThan1, lessThan); Map columnStat = new HashMap<>(); ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(500).setNdv(500).setAvgSizeByte(4) - .setNumNulls(500).setDataSize(0) + .setNumNulls(0).setDataSize(0) .setMinValue(0).setMaxValue(1000).setMinExpr(null).build(); ColumnStatistic bStats = new ColumnStatisticBuilder().setCount(500).setNdv(500).setAvgSizeByte(4) - .setNumNulls(500).setDataSize(0) + .setNumNulls(0).setDataSize(0) .setMinValue(0).setMaxValue(1000).setMinExpr(null).setIsUnknown(true).build(); columnStat.put(a, aStats); columnStat.put(b, bStats); @@ -93,10 +93,10 @@ class FilterEstimationTest { And and = new And(greaterThan1, lessThan); Map columnStat = new HashMap<>(); ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(500).setNdv(500) - .setAvgSizeByte(4).setNumNulls(500).setDataSize(0) + .setAvgSizeByte(4).setNumNulls(0).setDataSize(0) .setMinValue(0).setMaxValue(1000).setMinExpr(null).build(); ColumnStatistic bStats = new ColumnStatisticBuilder().setCount(500).setNdv(500) - .setAvgSizeByte(4).setNumNulls(500).setDataSize(0) + .setAvgSizeByte(4).setNumNulls(0).setDataSize(0) .setMinValue(0).setMaxValue(1000).setMinExpr(null).setIsUnknown(true).build(); columnStat.put(a, aStats); columnStat.put(b, bStats); @@ -185,13 +185,13 @@ class FilterEstimationTest { Or or = new Or(and, equalTo); Map slotToColumnStat = new HashMap<>(); ColumnStatistic aStats = new ColumnStatisticBuilder().setCount(500).setNdv(500) - .setAvgSizeByte(4).setNumNulls(500).setDataSize(0) + .setAvgSizeByte(4).setNumNulls(0).setDataSize(0) .setMinValue(0).setMaxValue(1000).setMinExpr(null).build(); ColumnStatistic bStats = new ColumnStatisticBuilder().setCount(500).setNdv(500) - .setAvgSizeByte(4).setNumNulls(500).setDataSize(0) + .setAvgSizeByte(4).setNumNulls(0).setDataSize(0) .setMinValue(0).setMaxValue(1000).setMinExpr(null).build(); ColumnStatistic cStats = new ColumnStatisticBuilder().setCount(500).setNdv(500) - .setAvgSizeByte(4).setNumNulls(500).setDataSize(0) + .setAvgSizeByte(4).setNumNulls(0).setDataSize(0) .setMinValue(0).setMaxValue(1000).setMinExpr(null).build(); slotToColumnStat.put(a, aStats); slotToColumnStat.put(b, bStats); @@ -910,4 +910,193 @@ class FilterEstimationTest { Statistics result = filterEstimation.estimate(not, stats); Assertions.assertEquals(result.getRowCount(), 90); } + + /** + * a = 1 + */ + @Test + public void testNumNullsEqualTo() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + EqualTo equalTo = new EqualTo(a, int1); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builder.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(equalTo, stats); + Assertions.assertEquals(result.getRowCount(), 1.0, 0.01); + } + + /** + * a > 1 + */ + @Test + public void testNumNullsComparable() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + GreaterThan greaterThan = new GreaterThan(a, int1); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builder.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(greaterThan, stats); + Assertions.assertEquals(result.getRowCount(), 2.0, 0.01); + } + + /** + * a in (1, 2) + */ + @Test + public void testNumNullsIn() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + IntegerLiteral int2 = new IntegerLiteral(2); + InPredicate in = new InPredicate(a, Lists.newArrayList(int1, int2)); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builder.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(in, stats); + Assertions.assertEquals(result.getRowCount(), 10.0, 0.01); + } + + /** + * not a = 1 + */ + @Test + public void testNumNullsNotEqualTo() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + EqualTo equalTo = new EqualTo(a, int1); + Not not = new Not(equalTo); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builder.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(not, stats); + Assertions.assertEquals(result.getRowCount(), 1.0, 0.01); + } + + /** + * a not in (1, 2) + */ + @Test + public void testNumNullsNotIn() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + IntegerLiteral int2 = new IntegerLiteral(2); + InPredicate in = new InPredicate(a, Lists.newArrayList(int1, int2)); + Not not = new Not(in); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builder.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(not, stats); + Assertions.assertEquals(result.getRowCount(), 1.0, 0.01); + } + + /** + * a >= 1 and a <= 2 + */ + @Test + public void testNumNullsAnd() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + IntegerLiteral int2 = new IntegerLiteral(2); + GreaterThanEqual greaterThanEqual = new GreaterThanEqual(a, int1); + LessThanEqual lessThanEqual = new LessThanEqual(a, int2); + And and = new And(greaterThanEqual, lessThanEqual); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builder.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(and, stats); + Assertions.assertEquals(result.getRowCount(), 2.0, 0.01); + } + + /** + * a >= 1 or a <= 2 + */ + @Test + public void testNumNullsOr() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + IntegerLiteral int2 = new IntegerLiteral(2); + GreaterThanEqual greaterThanEqual = new GreaterThanEqual(a, int2); + LessThanEqual lessThanEqual = new LessThanEqual(a, int1); + Or or = new Or(greaterThanEqual, lessThanEqual); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builder.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(or, stats); + Assertions.assertEquals(result.getRowCount(), 2.0, 0.01); + } + + /** + * a >= 1 or a is null + */ + @Test + public void testNumNullsOrIsNull() { + SlotReference a = new SlotReference("a", IntegerType.INSTANCE); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(2) + .setAvgSizeByte(4) + .setNumNulls(8) + .setMaxValue(2) + .setMinValue(1) + .setCount(10); + IntegerLiteral int1 = new IntegerLiteral(1); + GreaterThanEqual greaterThanEqual = new GreaterThanEqual(a, int1); + IsNull isNull = new IsNull(a); + Or or = new Or(greaterThanEqual, isNull); + Statistics stats = new Statistics(10, new HashMap<>()); + stats.addColumnStats(a, builder.build()); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics result = filterEstimation.estimate(or, stats); + Assertions.assertEquals(result.getRowCount(), 10.0, 0.01); + } + }