[enhancement](stats) Make estimation with histogram much more precisely (#18053)

This commit is contained in:
AKIRA
2023-03-25 02:02:36 +09:00
committed by GitHub
parent 51962fbfaf
commit dc4b719528
2 changed files with 42 additions and 16 deletions

View File

@ -2799,6 +2799,10 @@ analyze_stmt ::=
boolean is_histogram = true;
RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties, is_whole_tbl, is_histogram);
:}
| KW_ANALYZE KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM
{:
RESULT = new AnalyzeStmt(tbl, null, null, new HashMap<>(), true, true);
:}
;
// Grant statement

View File

@ -171,9 +171,9 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
}
private Statistics updateLessThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
double val, EstimationContext context) {
double val, EstimationContext context, boolean contains) {
if (statsForLeft.histogram != null) {
return estimateLessThanLiteralWithHistogram(leftExpr, statsForLeft, val, context);
return estimateLessThanLiteralWithHistogram(leftExpr, statsForLeft, val, context, contains);
}
return estimateBinaryComparisonFilter(leftExpr,
statsForLeft,
@ -181,9 +181,9 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
}
private Statistics updateGreaterThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
double val, EstimationContext context) {
double val, EstimationContext context, boolean contains) {
if (statsForLeft.histogram != null) {
return estimateGreaterThanLiteralWithHistogram(leftExpr, statsForLeft, val, context);
return estimateGreaterThanLiteralWithHistogram(leftExpr, statsForLeft, val, context, contains);
}
StatisticRange rightRange = new StatisticRange(val, Double.POSITIVE_INFINITY,
statsForLeft.ndv);
@ -230,15 +230,18 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
} else {
if (cp instanceof LessThan || cp instanceof LessThanEqual) {
if (context.isNot) {
return updateGreaterThanLiteral(cp.left(), statsForLeft, val, context);
return updateGreaterThanLiteral(cp.left(), statsForLeft, val, context,
!(cp instanceof LessThanEqual));
} else {
return updateLessThanLiteral(cp.left(), statsForLeft, val, context);
return updateLessThanLiteral(cp.left(), statsForLeft, val, context, cp instanceof LessThanEqual);
}
} else if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {
if (context.isNot) {
return updateLessThanLiteral(cp.left(), statsForLeft, val, context);
return updateLessThanLiteral(cp.left(), statsForLeft, val, context,
!(cp instanceof GreaterThanEqual));
} else {
return updateGreaterThanLiteral(cp.left(), statsForLeft, val, context);
return updateGreaterThanLiteral(cp.left(), statsForLeft, val, context,
cp instanceof GreaterThanEqual);
}
} else {
throw new RuntimeException(String.format("Unexpected expression : %s", cp.toSql()));
@ -436,14 +439,23 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
}
private Statistics estimateLessThanLiteralWithHistogram(Expression leftExpr, ColumnStatistic leftStats,
double numVal, EstimationContext context) {
double numVal, EstimationContext context, boolean contains) {
Histogram leftHist = leftStats.histogram;
for (int i = 0; i < leftHist.buckets.size(); i++) {
Bucket bucket = leftHist.buckets.get(i);
if (bucket.upper >= numVal && bucket.lower <= numVal) {
double overlapPercentInBucket = StatsMathUtil.minNonNaN(1, (numVal - bucket.lower)
/ (bucket.upper - bucket.lower));
double overlapPercentInBucket;
if (numVal == bucket.upper && numVal == bucket.lower) {
if (contains) {
overlapPercentInBucket = 1;
} else {
overlapPercentInBucket = 0;
}
} else {
overlapPercentInBucket = StatsMathUtil.minNonNaN(1, (numVal - bucket.lower)
/ (bucket.upper - bucket.lower));
}
double overlapCountInBucket = overlapPercentInBucket * bucket.count;
double sel = StatsMathUtil.minNonNaN(1, (bucket.preSum + overlapCountInBucket)
/ StatsMathUtil.nonZeroDivisor(context.statistics.getRowCount()));
@ -461,16 +473,26 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
}
private Statistics estimateGreaterThanLiteralWithHistogram(Expression leftExpr, ColumnStatistic leftStats,
double numVal, EstimationContext context) {
double numVal, EstimationContext context, boolean contains) {
Histogram leftHist = leftStats.histogram;
for (int i = 0; i < leftHist.buckets.size(); i++) {
Bucket bucket = leftHist.buckets.get(i);
if (bucket.upper >= numVal && bucket.lower <= numVal) {
double overlapPercentInBucket = StatsMathUtil.minNonNaN(1, ((bucket.upper - numVal)
/ (bucket.upper - bucket.lower)));
double overlapCountInBucket = (1 - overlapPercentInBucket) * bucket.count;
double sel = StatsMathUtil.minNonNaN(1, (leftHist.size() - bucket.preSum - overlapCountInBucket)
double overlapPercentInBucket;
if (numVal == bucket.upper && numVal == bucket.lower) {
if (contains) {
overlapPercentInBucket = 1;
} else {
overlapPercentInBucket = 0;
}
} else {
overlapPercentInBucket = StatsMathUtil.minNonNaN(1, ((bucket.upper - numVal)
/ (bucket.upper - bucket.lower)));
}
double overlapCountInBucket = overlapPercentInBucket * bucket.count;
double sel = StatsMathUtil.minNonNaN(1,
(leftHist.size() - bucket.preSum - (bucket.count - overlapCountInBucket))
/ context.statistics.getRowCount());
List<Bucket> updatedBucketList = new ArrayList<>();
updatedBucketList.add(new Bucket(numVal, bucket.upper, overlapPercentInBucket * bucket.count,