From 8ee7bc430da56c23fd3a5f886be5bfd1cf3fc650 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:12:03 +0800 Subject: [PATCH] [fix](Nereids) should derive stats asap to avoid npe (#34238) we do derive stats job eager to avoid un derive stats due to merge group and optimize group consider: we have two groups burned by order: G1 and G2 then we have job by order derive G2, optimize group expression in G2, derive G1, optimize group expression in G1 if G1 merged into G2, then we maybe generated job optimize group G2 before derive G1 in this case, we will do get stats from G1's child before derive G1's child stats then we will meet NPE in CostModel. --- .../nereids/jobs/cascades/ApplyRuleJob.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java index 5560c369dd..eb4f86bb0c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java @@ -34,6 +34,8 @@ import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import com.google.common.collect.Lists; + import java.util.HashMap; import java.util.List; @@ -68,6 +70,7 @@ public class ApplyRuleJob extends Job { } countJobExecutionTimesOfGroupExpressions(groupExpression); + List deriveStatsJobs = Lists.newArrayList(); GroupExpressionMatching groupExpressionMatching = new GroupExpressionMatching(rule.getPattern(), groupExpression); for (Plan plan : groupExpressionMatching) { @@ -87,7 +90,7 @@ public class ApplyRuleJob extends Job { if (newPlan instanceof LogicalPlan) { pushJob(new OptimizeGroupExpressionJob(newGroupExpression, context)); if (!rule.getRuleType().equals(RuleType.LOGICAL_JOIN_COMMUTE)) { - pushJob(new DeriveStatsJob(newGroupExpression, context)); + deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context)); } else { // The Join Commute rule preserves the operator's expression and children, // thereby not altering the statistics. Hence, there is no need to derive statistics for it. @@ -101,7 +104,7 @@ public class ApplyRuleJob extends Job { // logicalTopN ==> GlobalPhysicalTopN // -> localPhysicalTopN // These implementation rules integrate rules for plan shape transformation. - pushJob(new DeriveStatsJob(newGroupExpression, context)); + deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context)); } else { newGroupExpression.setStatDerived(true); } @@ -111,6 +114,17 @@ public class ApplyRuleJob extends Job { APPLY_RULE_TRACER.log(TransformEvent.of(groupExpression, plan, newPlans, rule.getRuleType()), rule::isRewrite); } + // we do derive stats job eager to avoid un derive stats due to merge group and optimize group + // consider: + // we have two groups burned by order: G1 and G2 + // then we have job by order derive G2, optimize group expression in G2, + // derive G1, optimize group expression in G1 + // if G1 merged into G2, then we maybe generated job optimize group G2 before derive G1 + // in this case, we will do get stats from G1's child before derive G1's child stats + // then we will meet NPE in CostModel. + for (DeriveStatsJob deriveStatsJob : deriveStatsJobs) { + pushJob(deriveStatsJob); + } } groupExpression.setApplied(rule); }