[opt](nereids)add explanation why we always update col stats in StatsCalculator.

This commit is contained in:
minghong
2023-09-18 13:47:37 +08:00
committed by GitHub
parent f3e350e8ec
commit 1153907897

View File

@ -227,6 +227,17 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
groupExpression.getOwnerGroup().setStatistics(newStats);
groupExpression.setEstOutputRowCount(newStats.getRowCount());
} else {
// the reason why we update col stats here.
// consider join between 3 tables: A/B/C with join condition: A.id=B.id=C.id and a filter: C.id=1
// in the final join result, the ndv of A.id/B.id/C.id should be 1
// suppose we have 2 candidate plans
// plan1: (A join B on A.id=B.id) join C on B.id=C.id
// plan2:(B join C)join A
// suppose plan1 is estimated before plan2
//
// after estimate the outer join of plan1 (join C), we update B.id.ndv=1, but A.id.ndv is not updated
// then we estimate plan2. the stats of plan2 is denoted by stats2. obviously, stats2.A.id.ndv is 1
// now we update OwnerGroup().getStatistics().A.id.ndv to 1
groupExpression.getOwnerGroup().getStatistics().updateNdv(newStats);
}
groupExpression.setStatDerived(true);