[opt](nereids) optimize filter estimation for pattern "col=col" #18716
Tpc-h q10 and q5 benefit from this optimization. For a given hash join condition, A=B, sometimes both A and B are reduced by filters. In this pr, both reductions are counted in join estimation.
This commit is contained in:
@ -389,7 +389,22 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
rightBuilder.setNdv(rightIntersectLeft.getDistinctValues());
|
||||
rightBuilder.setMinValue(rightIntersectLeft.getLow());
|
||||
rightBuilder.setMaxValue(rightIntersectLeft.getDistinctValues());
|
||||
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
|
||||
double sel;
|
||||
double reduceRatio = 0.25;
|
||||
double bothSideReducedRatio = 0.9;
|
||||
if (leftStats.ndv < leftStats.originalNdv * bothSideReducedRatio
|
||||
&& rightStats.ndv < rightStats.originalNdv * bothSideReducedRatio) {
|
||||
double sel1;
|
||||
if (leftStats.ndv > rightStats.ndv) {
|
||||
sel1 = 1 / StatsMathUtil.nonZeroDivisor(leftStats.ndv);
|
||||
} else {
|
||||
sel1 = 1 / StatsMathUtil.nonZeroDivisor(rightStats.ndv);
|
||||
}
|
||||
double sel2 = Math.min(rightStats.ndv / rightStats.originalNdv, leftStats.ndv / leftStats.originalNdv);
|
||||
sel = sel1 * Math.pow(sel2, reduceRatio);
|
||||
} else {
|
||||
sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
|
||||
}
|
||||
Statistics updatedStatistics = context.statistics.withSel(sel);
|
||||
updatedStatistics.addColumnStats(leftExpr, leftBuilder.build());
|
||||
updatedStatistics.addColumnStats(rightExpr, rightBuilder.build());
|
||||
|
||||
Reference in New Issue
Block a user