diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java index c90b3dd8e1..6bbafdbe5b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java @@ -131,6 +131,11 @@ public class ColStatsData { } public ColumnStatistic toColumnStatistic() { + // For non-empty table, return UNKNOWN if we can't collect ndv value. + // Because inaccurate ndv is very misleading. + if (count > 0 && ndv == 0 && count != nullCount) { + return ColumnStatistic.UNKNOWN; + } try { ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(); columnStatisticBuilder.setCount(count); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java index f8ed6a1b6a..a512fbadbd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java @@ -25,8 +25,8 @@ public class ColumnStatisticBuilder { private double avgSizeByte; private double numNulls; private double dataSize; - private double minValue; - private double maxValue; + private double minValue = Double.NEGATIVE_INFINITY; + private double maxValue = Double.POSITIVE_INFINITY; private LiteralExpr minExpr; private LiteralExpr maxExpr; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java index a281f9b0ec..88ccd661b5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java @@ -37,18 +37,17 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader 0 && stats.ndv == 0 && stats.count != stats.numNulls) { + columnStatistic = Optional.of(ColumnStatistic.UNKNOWN); + } + } return columnStatistic; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index 62e11f5c9d..1826f10a38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -173,7 +173,10 @@ public class StatisticsCache { String colId = statsId.colId; final StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colId); - final ColumnStatistic c = ColumnStatistic.fromResultRow(r); + ColumnStatistic c = ColumnStatistic.fromResultRow(r); + if (c.count > 0 && c.ndv == 0 && c.count != c.numNulls) { + c = ColumnStatistic.UNKNOWN; + } putCache(k, c); } catch (Throwable t) { LOG.warn("Error when preheating stats cache", t); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index 7bad73801a..0206a09325 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -134,6 +134,8 @@ class FilterEstimationTest { Map slotToColumnStat = new HashMap<>(); ColumnStatisticBuilder builder = new ColumnStatisticBuilder() .setNdv(500) + .setMaxValue(0) + .setMinValue(0) .setIsUnknown(false); slotToColumnStat.put(a, builder.build()); Statistics stat = new Statistics(1000, slotToColumnStat); diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 055d8dfec5..f793e769de 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -2672,6 +2672,23 @@ PARTITION `p599` VALUES IN (599) sql """drop stats alter_test""" alter_result = sql """show table stats alter_test""" assertEquals("false", alter_result[0][7]) + sql """alter table alter_test modify column id set stats ('row_count'='100', 'ndv'='0', 'num_nulls'='0.0', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');""" + alter_result = sql """show column stats alter_test(id)""" + assertEquals(1, alter_result.size()) + alter_result = sql """show column cached stats alter_test(id)""" + assertEquals(0, alter_result.size()) + alter_result = sql """show column cached stats alter_test(id)""" + assertEquals(0, alter_result.size()) + sql """alter table alter_test modify column id set stats ('row_count'='100', 'ndv'='0', 'num_nulls'='100', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');""" + alter_result = sql """show column stats alter_test(id)""" + assertEquals(1, alter_result.size()) + alter_result = sql """show column cached stats alter_test(id)""" + assertEquals(1, alter_result.size()) + sql """alter table alter_test modify column id set stats ('row_count'='100', 'ndv'='1', 'num_nulls'='0', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');""" + alter_result = sql """show column stats alter_test(id)""" + assertEquals(1, alter_result.size()) + alter_result = sql """show column cached stats alter_test(id)""" + assertEquals(1, alter_result.size()) // Test trigger type, manual default full, manual high health value, sample empty, kill job, show analyze sql """DROP DATABASE IF EXISTS trigger"""