From 8ee7bc430da56c23fd3a5f886be5bfd1cf3fc650 Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Mon, 29 Apr 2024 11:12:03 +0800
Subject: [PATCH] [fix](Nereids) should derive stats asap to avoid npe (#34238)

we do derive stats job eager to avoid un derive stats due to merge group and optimize group
consider:
  we have two groups burned by order: G1 and G2
  then we have job by order derive G2, optimize group expression in G2,
    derive G1, optimize group expression in G1
  if G1 merged into G2, then we maybe generated job optimize group G2 before derive G1
  in this case, we will do get stats from G1's child before derive G1's child stats
  then we will meet NPE in CostModel.
---
 .../nereids/jobs/cascades/ApplyRuleJob.java    | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
index 5560c369dd..eb4f86bb0c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
@@ -34,6 +34,8 @@ import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 
+import com.google.common.collect.Lists;
+
 import java.util.HashMap;
 import java.util.List;
 
@@ -68,6 +70,7 @@ public class ApplyRuleJob extends Job {
         }
         countJobExecutionTimesOfGroupExpressions(groupExpression);
 
+        List<DeriveStatsJob> deriveStatsJobs = Lists.newArrayList();
         GroupExpressionMatching groupExpressionMatching
                 = new GroupExpressionMatching(rule.getPattern(), groupExpression);
         for (Plan plan : groupExpressionMatching) {
@@ -87,7 +90,7 @@ public class ApplyRuleJob extends Job {
                 if (newPlan instanceof LogicalPlan) {
                     pushJob(new OptimizeGroupExpressionJob(newGroupExpression, context));
                     if (!rule.getRuleType().equals(RuleType.LOGICAL_JOIN_COMMUTE)) {
-                        pushJob(new DeriveStatsJob(newGroupExpression, context));
+                        deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context));
                     } else {
                         // The Join Commute rule preserves the operator's expression and children,
                         // thereby not altering the statistics. Hence, there is no need to derive statistics for it.
@@ -101,7 +104,7 @@ public class ApplyRuleJob extends Job {
                         // logicalTopN ==> GlobalPhysicalTopN
                         //                   -> localPhysicalTopN
                         // These implementation rules integrate rules for plan shape transformation.
-                        pushJob(new DeriveStatsJob(newGroupExpression, context));
+                        deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context));
                     } else {
                         newGroupExpression.setStatDerived(true);
                     }
@@ -111,6 +114,17 @@ public class ApplyRuleJob extends Job {
                 APPLY_RULE_TRACER.log(TransformEvent.of(groupExpression, plan, newPlans, rule.getRuleType()),
                         rule::isRewrite);
             }
+            // we do derive stats job eager to avoid un derive stats due to merge group and optimize group
+            // consider:
+            //   we have two groups burned by order: G1 and G2
+            //   then we have job by order derive G2, optimize group expression in G2,
+            //     derive G1, optimize group expression in G1
+            //   if G1 merged into G2, then we maybe generated job optimize group G2 before derive G1
+            //   in this case, we will do get stats from G1's child before derive G1's child stats
+            //   then we will meet NPE in CostModel.
+            for (DeriveStatsJob deriveStatsJob : deriveStatsJobs) {
+                pushJob(deriveStatsJob);
+            }
         }
         groupExpression.setApplied(rule);
     }