[fix](Nereids) should derive stats asap to avoid npe (#34238)

we do derive stats job eager to avoid un derive stats due to merge group and optimize group consider: we have two groups burned by order: G1 and G2 then we have job by order derive G2, optimize group expression in G2, derive G1, optimize group expression in G1 if G1 merged into G2, then we maybe generated job optimize group G2 before derive G1 in this case, we will do get stats from G1's child before derive G1's child stats then we will meet NPE in CostModel.
2024-04-29 11:12:03 +08:00
parent 84396dbb39
commit 8ee7bc430d
1 changed files with 16 additions and 2 deletions
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
@ -34,6 +34,8 @@ import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;

+import com.google.common.collect.Lists;
+
 import java.util.HashMap;
 import java.util.List;

@ -68,6 +70,7 @@ public class ApplyRuleJob extends Job {
        }
        countJobExecutionTimesOfGroupExpressions(groupExpression);

+        List<DeriveStatsJob> deriveStatsJobs = Lists.newArrayList();
        GroupExpressionMatching groupExpressionMatching
                = new GroupExpressionMatching(rule.getPattern(), groupExpression);
        for (Plan plan : groupExpressionMatching) {
@ -87,7 +90,7 @@ public class ApplyRuleJob extends Job {
                if (newPlan instanceof LogicalPlan) {
                    pushJob(new OptimizeGroupExpressionJob(newGroupExpression, context));
                    if (!rule.getRuleType().equals(RuleType.LOGICAL_JOIN_COMMUTE)) {
-                        pushJob(new DeriveStatsJob(newGroupExpression, context));
+                        deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context));
                    } else {
                        // The Join Commute rule preserves the operator's expression and children,
                        // thereby not altering the statistics. Hence, there is no need to derive statistics for it.
@ -101,7 +104,7 @@ public class ApplyRuleJob extends Job {
                        // logicalTopN ==> GlobalPhysicalTopN
                        //                   -> localPhysicalTopN
                        // These implementation rules integrate rules for plan shape transformation.
-                        pushJob(new DeriveStatsJob(newGroupExpression, context));
+                        deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context));
                    } else {
                        newGroupExpression.setStatDerived(true);
                    }
@ -111,6 +114,17 @@ public class ApplyRuleJob extends Job {
                APPLY_RULE_TRACER.log(TransformEvent.of(groupExpression, plan, newPlans, rule.getRuleType()),
                        rule::isRewrite);
            }
+            // we do derive stats job eager to avoid un derive stats due to merge group and optimize group
+            // consider:
+            //   we have two groups burned by order: G1 and G2
+            //   then we have job by order derive G2, optimize group expression in G2,
+            //     derive G1, optimize group expression in G1
+            //   if G1 merged into G2, then we maybe generated job optimize group G2 before derive G1
+            //   in this case, we will do get stats from G1's child before derive G1's child stats
+            //   then we will meet NPE in CostModel.
+            for (DeriveStatsJob deriveStatsJob : deriveStatsJobs) {
+                pushJob(deriveStatsJob);
+            }
        }
        groupExpression.setApplied(rule);
    }