diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJob.java index c0ef0345a2..ec65b9af14 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJob.java @@ -125,7 +125,7 @@ public class DeriveStatsJob extends Job { // child group's row count unchanged when the parent group expression is a project operation. double parentRowCount = groupExpression.getOwnerGroup().getStatistics().getRowCount(); groupExpression.children().forEach(g -> g.setStatistics( - g.getStatistics().updateRowCountAndColStats(parentRowCount)) + g.getStatistics().withRowCountAndEnforceValid(parentRowCount)) ); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index 30bcba4234..6526f78836 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -151,7 +151,7 @@ public class ExpressionEstimation extends ExpressionVisitor leftInputSlots = leftExpr.getInputSlots(); Set rightInputSlots = rightExpr.getInputSlots(); for (Slot slot : context.keyColumns) { @@ -164,7 +164,7 @@ public class FilterEstimation extends ExpressionVisitor options = inPredicate.getOptions(); // init minOption and maxOption by compareExpr.max and compareExpr.min respectively, @@ -348,7 +348,7 @@ public class FilterEstimation extends ExpressionVisitor { private Statistics computeAssertNumRows(long desiredNumOfRows) { Statistics statistics = groupExpression.childStatistics(0); - statistics.withRowCount(Math.min(1, statistics.getRowCount())); + statistics.withRowCountAndEnforceValid(Math.min(1, statistics.getRowCount())); return statistics; } @@ -657,7 +657,7 @@ public class StatsCalculator extends DefaultPlanVisitor { private Statistics computeTopN(TopN topN) { Statistics stats = groupExpression.childStatistics(0); - return stats.withRowCount(Math.min(stats.getRowCount(), topN.getLimit())); + return stats.withRowCountAndEnforceValid(Math.min(stats.getRowCount(), topN.getLimit())); } private Statistics computePartitionTopN(PartitionTopN partitionTopN) { @@ -690,12 +690,12 @@ public class StatsCalculator extends DefaultPlanVisitor { // TODO: for the filter push down window situation, we will prune the row count twice // because we keep the pushed down filter. And it will be calculated twice, one of them in 'PartitionTopN' // and the other is in 'Filter'. It's hard to dismiss. - return childStats.updateRowCountAndColStats(rowCount); + return childStats.withRowCountAndEnforceValid(rowCount); } private Statistics computeLimit(Limit limit) { Statistics stats = groupExpression.childStatistics(0); - return stats.withRowCount(Math.min(stats.getRowCount(), limit.getLimit())); + return stats.withRowCountAndEnforceValid(Math.min(stats.getRowCount(), limit.getLimit())); } private double estimateGroupByRowCount(List groupByExpressions, Statistics childStats) { @@ -878,7 +878,7 @@ public class StatsCalculator extends DefaultPlanVisitor { for (int i = 1; i < setOperation.getArity(); ++i) { rowCount = Math.min(rowCount, groupExpression.childStatistics(i).getRowCount()); } - double minProd = Double.MAX_VALUE; + double minProd = Double.POSITIVE_INFINITY; for (Group group : groupExpression.children()) { Statistics statistics = group.getStatistics(); double prod = 1.0; @@ -896,7 +896,7 @@ public class StatsCalculator extends DefaultPlanVisitor { leftChildStats.addColumnStats(outputs.get(i), leftChildStats.findColumnStatistics(leftChildOutputs.get(i))); } - return leftChildStats.withRowCount(rowCount); + return leftChildStats.withRowCountAndEnforceValid(rowCount); } private Statistics computeGenerate(Generate generate) { @@ -910,8 +910,8 @@ public class StatsCalculator extends DefaultPlanVisitor { for (Slot output : generate.getGeneratorOutput()) { ColumnStatistic columnStatistic = new ColumnStatisticBuilder() .setCount(count) - .setMinValue(Double.MAX_VALUE) - .setMaxValue(Double.MIN_VALUE) + .setMinValue(Double.NEGATIVE_INFINITY) + .setMaxValue(Double.POSITIVE_INFINITY) .setNdv(count) .setNumNulls(0) .setAvgSizeByte(output.getDataType().width()) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java index 9d0cd838a7..46086e5c93 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java @@ -27,6 +27,7 @@ import org.apache.doris.nereids.types.DateTimeV2Type; import org.apache.doris.nereids.types.DateType; import org.apache.doris.nereids.types.DateV2Type; +import java.time.DateTimeException; import java.time.LocalDate; import java.time.temporal.ChronoUnit; @@ -49,11 +50,15 @@ public abstract class DateLikeType extends PrimitiveType { return 0; } if (Double.isInfinite(high) || Double.isInfinite(low)) { - return high - low; + return Double.POSITIVE_INFINITY; + } + try { + LocalDate to = toLocalDate(high); + LocalDate from = toLocalDate(low); + return ChronoUnit.DAYS.between(from, to); + } catch (DateTimeException e) { + return Double.POSITIVE_INFINITY; } - LocalDate to = toLocalDate(high); - LocalDate from = toLocalDate(low); - return ChronoUnit.DAYS.between(from, to); } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 67bc308bca..c6b019f669 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -177,10 +177,10 @@ public class ColumnStatistic { columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); } catch (AnalysisException e) { LOG.warn("Failed to deserialize column {} min value {}.", col, min, e); - columnStatisticBuilder.setMinValue(Double.MIN_VALUE); + columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); } } else { - columnStatisticBuilder.setMinValue(Double.MIN_VALUE); + columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); } if (max != null && !max.equalsIgnoreCase("NULL")) { try { @@ -188,10 +188,10 @@ public class ColumnStatistic { columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); } catch (AnalysisException e) { LOG.warn("Failed to deserialize column {} max value {}.", col, max, e); - columnStatisticBuilder.setMaxValue(Double.MAX_VALUE); + columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); } } else { - columnStatisticBuilder.setMaxValue(Double.MAX_VALUE); + columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); } columnStatisticBuilder.setUpdatedTime(row.get(13)); return columnStatisticBuilder.build(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index ef9d77d1c5..31dcb7ce99 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -37,26 +37,6 @@ public class Statistics { // the byte size of one tuple private double tupleSize; - /** - * after filter, compute the new ndv of a column - * @param ndv original ndv of column - * @param newRowCount the row count of table after filter - * @param oldRowCount the row count of table before filter - * @return the new ndv after filter - */ - public static double computeNdv(double ndv, double newRowCount, double oldRowCount) { - if (newRowCount > oldRowCount) { - return ndv; - } - double selectOneTuple = newRowCount / StatsMathUtil.nonZeroDivisor(oldRowCount); - double allTuplesOfSameDistinctValueNotSelected = Math.pow((1 - selectOneTuple), oldRowCount / ndv); - if (allTuplesOfSameDistinctValueNotSelected == 1.0) { - // avoid NaN - return ndv; - } - return Math.min(ndv * (1 - allTuplesOfSameDistinctValueNotSelected), newRowCount); - } - public Statistics(Statistics another) { this.rowCount = another.rowCount; this.expressionToColumnStats = new HashMap<>(another.expressionToColumnStats); @@ -80,53 +60,19 @@ public class Statistics { return rowCount; } - /* - * Return a stats with new rowCount and fix each column stats. - */ public Statistics withRowCount(double rowCount) { - if (Double.isNaN(rowCount)) { - return this; - } - Statistics statistics = new Statistics(rowCount, new HashMap<>(expressionToColumnStats)); - statistics.fix(rowCount, StatsMathUtil.nonZeroDivisor(this.rowCount)); - return statistics; - } - - public Statistics setRowCount(double rowCount) { return new Statistics(rowCount, new HashMap<>(expressionToColumnStats)); } /** * Update by count. */ - public Statistics updateRowCountAndColStats(double rowCount) { + public Statistics withRowCountAndEnforceValid(double rowCount) { Statistics statistics = new Statistics(rowCount, expressionToColumnStats); - for (Entry entry : expressionToColumnStats.entrySet()) { - ColumnStatistic columnStatistic = entry.getValue(); - ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(columnStatistic); - columnStatisticBuilder.setNdv(Math.min(columnStatistic.ndv, rowCount)); - columnStatisticBuilder.setNumNulls(rowCount - columnStatistic.numNulls); - columnStatisticBuilder.setCount(rowCount); - expressionToColumnStats.put(entry.getKey(), columnStatisticBuilder.build()); - } + statistics.enforceValid(); return statistics; } - /** - * Fix by sel. - */ - public void fix(double newRowCount, double originRowCount) { - double sel = newRowCount / originRowCount; - for (Entry entry : expressionToColumnStats.entrySet()) { - ColumnStatistic columnStatistic = entry.getValue(); - ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(columnStatistic); - columnStatisticBuilder.setNdv(computeNdv(columnStatistic.ndv, newRowCount, originRowCount)); - columnStatisticBuilder.setNumNulls(Math.min(columnStatistic.numNulls * sel, newRowCount)); - columnStatisticBuilder.setCount(newRowCount); - expressionToColumnStats.put(entry.getKey(), columnStatisticBuilder.build()); - } - } - public void enforceValid() { for (Entry entry : expressionToColumnStats.entrySet()) { ColumnStatistic columnStatistic = entry.getValue(); @@ -137,8 +83,8 @@ public class Statistics { columnStatisticBuilder.setNumNulls(Math.min(columnStatistic.numNulls, rowCount - ndv)); columnStatisticBuilder.setCount(rowCount); columnStatistic = columnStatisticBuilder.build(); + expressionToColumnStats.put(entry.getKey(), columnStatistic); } - expressionToColumnStats.put(entry.getKey(), columnStatistic); } } @@ -148,21 +94,12 @@ public class Statistics { } public Statistics withSel(double sel) { - return withSel(sel, true); - } - - public Statistics withSel(double sel, boolean updateColStats) { sel = StatsMathUtil.minNonNaN(sel, 1); if (Double.isNaN(rowCount)) { return this; } double newCount = rowCount * sel; - double originCount = rowCount; - Statistics statistics = new Statistics(newCount, new HashMap<>(expressionToColumnStats)); - if (updateColStats) { - statistics.fix(newCount, StatsMathUtil.nonZeroDivisor(originCount)); - } - return statistics; + return new Statistics(newCount, new HashMap<>(expressionToColumnStats)); } public Statistics addColumnStats(Expression expression, ColumnStatistic columnStatistic) { @@ -176,11 +113,6 @@ public class Statistics { && expressionToColumnStats.get(s).isUnKnown); } - public Statistics merge(Statistics statistics) { - expressionToColumnStats.putAll(statistics.expressionToColumnStats); - return this; - } - private double computeTupleSize() { if (tupleSize <= 0) { double tempSize = 0.0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index dd4e4f8515..40ae13a0e0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -650,8 +650,8 @@ public class StatisticsUtil { TableScan tableScan = table.newScan().includeColumnStats(); ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(); columnStatisticBuilder.setCount(0); - columnStatisticBuilder.setMaxValue(Double.MAX_VALUE); - columnStatisticBuilder.setMinValue(Double.MIN_VALUE); + columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); + columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); columnStatisticBuilder.setDataSize(0); columnStatisticBuilder.setAvgSizeByte(0); columnStatisticBuilder.setNumNulls(0);