From 115390789766d7167a0be59ca196af469732f01d Mon Sep 17 00:00:00 2001 From: minghong Date: Mon, 18 Sep 2023 13:47:37 +0800 Subject: [PATCH] [opt](nereids)add explanation why we always update col stats in StatsCalculator. --- .../apache/doris/nereids/stats/StatsCalculator.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 93792e62b6..c3b187aad7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -227,6 +227,17 @@ public class StatsCalculator extends DefaultPlanVisitor { groupExpression.getOwnerGroup().setStatistics(newStats); groupExpression.setEstOutputRowCount(newStats.getRowCount()); } else { + // the reason why we update col stats here. + // consider join between 3 tables: A/B/C with join condition: A.id=B.id=C.id and a filter: C.id=1 + // in the final join result, the ndv of A.id/B.id/C.id should be 1 + // suppose we have 2 candidate plans + // plan1: (A join B on A.id=B.id) join C on B.id=C.id + // plan2:(B join C)join A + // suppose plan1 is estimated before plan2 + // + // after estimate the outer join of plan1 (join C), we update B.id.ndv=1, but A.id.ndv is not updated + // then we estimate plan2. the stats of plan2 is denoted by stats2. obviously, stats2.A.id.ndv is 1 + // now we update OwnerGroup().getStatistics().A.id.ndv to 1 groupExpression.getOwnerGroup().getStatistics().updateNdv(newStats); } groupExpression.setStatDerived(true);