[opt](nereids) optimize filter estimation for pattern "col=col" #18716

Tpc-h q10 and q5 benefit from this optimization.

For a given hash join condition, A=B, sometimes both A and B are reduced by filters. In this pr, both reductions are counted in join estimation.
This commit is contained in:
minghong
2023-04-17 11:44:35 +08:00
committed by GitHub
parent b5b0148010
commit a2278dbc6c
5 changed files with 67 additions and 56 deletions

View File

@ -389,7 +389,22 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
rightBuilder.setNdv(rightIntersectLeft.getDistinctValues());
rightBuilder.setMinValue(rightIntersectLeft.getLow());
rightBuilder.setMaxValue(rightIntersectLeft.getDistinctValues());
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
double sel;
double reduceRatio = 0.25;
double bothSideReducedRatio = 0.9;
if (leftStats.ndv < leftStats.originalNdv * bothSideReducedRatio
&& rightStats.ndv < rightStats.originalNdv * bothSideReducedRatio) {
double sel1;
if (leftStats.ndv > rightStats.ndv) {
sel1 = 1 / StatsMathUtil.nonZeroDivisor(leftStats.ndv);
} else {
sel1 = 1 / StatsMathUtil.nonZeroDivisor(rightStats.ndv);
}
double sel2 = Math.min(rightStats.ndv / rightStats.originalNdv, leftStats.ndv / leftStats.originalNdv);
sel = sel1 * Math.pow(sel2, reduceRatio);
} else {
sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
}
Statistics updatedStatistics = context.statistics.withSel(sel);
updatedStatistics.addColumnStats(leftExpr, leftBuilder.build());
updatedStatistics.addColumnStats(rightExpr, rightBuilder.build());