From dc4b719528e6d67c38fa0bceaf1459bd6c436d75 Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Sat, 25 Mar 2023 02:02:36 +0900 Subject: [PATCH] [enhancement](stats) Make estimation with histogram much more precisely (#18053) --- fe/fe-core/src/main/cup/sql_parser.cup | 4 ++ .../doris/nereids/stats/FilterEstimation.java | 54 +++++++++++++------ 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 3faf2ff64d..3df587db3d 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -2799,6 +2799,10 @@ analyze_stmt ::= boolean is_histogram = true; RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties, is_whole_tbl, is_histogram); :} + | KW_ANALYZE KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM + {: + RESULT = new AnalyzeStmt(tbl, null, null, new HashMap<>(), true, true); + :} ; // Grant statement diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index f2904d38af..53b09f659f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -171,9 +171,9 @@ public class FilterEstimation extends ExpressionVisitor= numVal && bucket.lower <= numVal) { - double overlapPercentInBucket = StatsMathUtil.minNonNaN(1, (numVal - bucket.lower) - / (bucket.upper - bucket.lower)); + double overlapPercentInBucket; + if (numVal == bucket.upper && numVal == bucket.lower) { + if (contains) { + overlapPercentInBucket = 1; + } else { + overlapPercentInBucket = 0; + } + } else { + overlapPercentInBucket = StatsMathUtil.minNonNaN(1, (numVal - bucket.lower) + / (bucket.upper - bucket.lower)); + } double overlapCountInBucket = overlapPercentInBucket * bucket.count; double sel = StatsMathUtil.minNonNaN(1, (bucket.preSum + overlapCountInBucket) / StatsMathUtil.nonZeroDivisor(context.statistics.getRowCount())); @@ -461,16 +473,26 @@ public class FilterEstimation extends ExpressionVisitor= numVal && bucket.lower <= numVal) { - double overlapPercentInBucket = StatsMathUtil.minNonNaN(1, ((bucket.upper - numVal) - / (bucket.upper - bucket.lower))); - double overlapCountInBucket = (1 - overlapPercentInBucket) * bucket.count; - double sel = StatsMathUtil.minNonNaN(1, (leftHist.size() - bucket.preSum - overlapCountInBucket) + double overlapPercentInBucket; + if (numVal == bucket.upper && numVal == bucket.lower) { + if (contains) { + overlapPercentInBucket = 1; + } else { + overlapPercentInBucket = 0; + } + } else { + overlapPercentInBucket = StatsMathUtil.minNonNaN(1, ((bucket.upper - numVal) + / (bucket.upper - bucket.lower))); + } + double overlapCountInBucket = overlapPercentInBucket * bucket.count; + double sel = StatsMathUtil.minNonNaN(1, + (leftHist.size() - bucket.preSum - (bucket.count - overlapCountInBucket)) / context.statistics.getRowCount()); List updatedBucketList = new ArrayList<>(); updatedBucketList.add(new Bucket(numVal, bucket.upper, overlapPercentInBucket * bucket.count,