[fix](nereids) derive column stats for 'expr and A is not null' (#37235) (#37498)

pick from #37235 
the algorithm for computing stats for "expr1 and expr2" predicate is as
following:
1. compute output stats of expr1 based on input stats. the result stats
is denoted by leftStats
2. compute stats of expr2 based on leftStats after step1, leftStats
should be normalized to avoid abnormal cases, such as ndv > rowCount or
numNulls > rowCount

Issue Number: close #xxx

<!--Describe your changes.-->

## Proposed changes

Issue Number: close #xxx

<!--Describe your changes.-->
This commit is contained in:
minghong
2024-07-09 17:46:57 +08:00
committed by GitHub
parent 5247e0ff3a
commit 9b075bc873
5 changed files with 175 additions and 123 deletions

View File

@ -110,6 +110,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
Expression leftExpr = predicate.child(0);
Expression rightExpr = predicate.child(1);
Statistics leftStats = leftExpr.accept(this, context);
leftStats = leftStats.normalizeByRatio(context.statistics.getRowCount());
Statistics andStats = rightExpr.accept(this,
new EstimationContext(leftStats));
if (predicate instanceof And) {

View File

@ -204,4 +204,22 @@ public class Statistics {
public int getWidthInJoinCluster() {
return widthInJoinCluster;
}
public Statistics normalizeByRatio(double originRowCount) {
if (rowCount >= originRowCount || rowCount <= 0) {
return this;
}
StatisticsBuilder builder = new StatisticsBuilder(this);
double ratio = rowCount / originRowCount;
for (Entry<Expression, ColumnStatistic> entry : expressionToColumnStats.entrySet()) {
ColumnStatistic colStats = entry.getValue();
if (colStats.numNulls != 0 || colStats.ndv > rowCount) {
ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(colStats);
colStatsBuilder.setNumNulls(colStats.numNulls * ratio);
colStatsBuilder.setNdv(Math.min(rowCount - colStatsBuilder.getNumNulls(), colStats.ndv));
builder.putColumnStatistics(entry.getKey(), colStatsBuilder.build());
}
}
return builder.build();
}
}