From 6f6d744a2a84c2a08009fdb2d199ead9d5541bfb Mon Sep 17 00:00:00 2001 From: minghong Date: Fri, 19 May 2023 08:54:24 +0800 Subject: [PATCH] [fix](nereids) avoid 0 row count in stats derive #19640 row count of join estimation is at least 1 to make less error propagation. --- .../doris/nereids/stats/JoinEstimation.java | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java index ac8545a1bd..7848e5e433 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java @@ -97,15 +97,19 @@ public class JoinEstimation { EqualTo equal = normalizeHashJoinCondition(expression, leftStats, rightStats); ColumnStatistic eqLeftColStats = ExpressionEstimation.estimate(equal.left(), leftStats); ColumnStatistic eqRightColStats = ExpressionEstimation.estimate(equal.right(), rightStats); - boolean trustable = eqRightColStats.ndv / rightStats.getRowCount() > almostUniqueThreshold - || eqLeftColStats.ndv / leftStats.getRowCount() > almostUniqueThreshold; + double rightStatsRowCount = StatsMathUtil.nonZeroDivisor(rightStats.getRowCount()); + double leftStatsRowCount = StatsMathUtil.nonZeroDivisor(leftStats.getRowCount()); + boolean trustable = eqRightColStats.ndv / rightStatsRowCount > almostUniqueThreshold + || eqLeftColStats.ndv / leftStatsRowCount > almostUniqueThreshold; if (!trustable) { + double rNdv = StatsMathUtil.nonZeroDivisor(eqRightColStats.ndv); + double lNdv = StatsMathUtil.nonZeroDivisor(eqLeftColStats.ndv); if (leftBigger) { - unTrustEqualRatio.add((rightStats.getRowCount() / eqRightColStats.ndv) - * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / eqLeftColStats.ndv); + unTrustEqualRatio.add((rightStatsRowCount / rNdv) + * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / lNdv); } else { - unTrustEqualRatio.add((leftStats.getRowCount() / eqLeftColStats.ndv) - * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / eqRightColStats.ndv); + unTrustEqualRatio.add((leftStatsRowCount / lNdv) + * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / rNdv); } } return trustable; @@ -114,10 +118,12 @@ public class JoinEstimation { Statistics innerJoinStats; Statistics crossJoinStats = new StatisticsBuilder() - .setRowCount(leftStats.getRowCount() * rightStats.getRowCount()) + .setRowCount(Math.max(1, leftStats.getRowCount() * rightStats.getRowCount())) .putColumnStatistics(leftStats.columnStatistics()) .putColumnStatistics(rightStats.columnStatistics()) .build(); + + double outputRowCount = 1; if (!trustableConditions.isEmpty()) { List> sortedJoinConditions = join.getHashJoinConjuncts().stream() .map(expression -> Pair.of(expression, estimateJoinConditionSel(crossJoinStats, expression))) @@ -136,21 +142,24 @@ public class JoinEstimation { for (int i = 0; i < sortedJoinConditions.size(); i++) { sel *= Math.pow(sortedJoinConditions.get(i).second, 1 / Math.pow(2, i)); } - innerJoinStats = crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel); + outputRowCount = Math.max(1, crossJoinStats.getRowCount() * sel); } else { - double outputRowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount()); + outputRowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount()); Optional ratio = unTrustEqualRatio.stream().max(Double::compareTo); if (ratio.isPresent()) { - outputRowCount = outputRowCount * ratio.get(); + outputRowCount = Math.max(1, outputRowCount * ratio.get()); } - innerJoinStats = crossJoinStats.updateRowCountOnly(outputRowCount); } - + innerJoinStats = crossJoinStats.updateRowCountOnly(outputRowCount); if (!join.getOtherJoinConjuncts().isEmpty()) { FilterEstimation filterEstimation = new FilterEstimation(); innerJoinStats = filterEstimation.estimate( ExpressionUtils.and(join.getOtherJoinConjuncts()), innerJoinStats); + if (innerJoinStats.getRowCount() <= 0) { + innerJoinStats = new StatisticsBuilder(innerJoinStats).setRowCount(1).build(); + } } + innerJoinStats.setWidth(leftStats.getWidth() + rightStats.getWidth()); innerJoinStats.setPenalty(0); return innerJoinStats; @@ -196,7 +205,7 @@ public class JoinEstimation { rowCount = rightStats.getRowCount() - semiRowCount; } } - return rowCount; + return Math.max(1, rowCount); } private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics rightStats, Join join) {