diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
index 5560c369dd..eb4f86bb0c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
@@ -34,6 +34,8 @@ import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 
+import com.google.common.collect.Lists;
+
 import java.util.HashMap;
 import java.util.List;
 
@@ -68,6 +70,7 @@ public class ApplyRuleJob extends Job {
         }
         countJobExecutionTimesOfGroupExpressions(groupExpression);
 
+        List<DeriveStatsJob> deriveStatsJobs = Lists.newArrayList();
         GroupExpressionMatching groupExpressionMatching
                 = new GroupExpressionMatching(rule.getPattern(), groupExpression);
         for (Plan plan : groupExpressionMatching) {
@@ -87,7 +90,7 @@ public class ApplyRuleJob extends Job {
                 if (newPlan instanceof LogicalPlan) {
                     pushJob(new OptimizeGroupExpressionJob(newGroupExpression, context));
                     if (!rule.getRuleType().equals(RuleType.LOGICAL_JOIN_COMMUTE)) {
-                        pushJob(new DeriveStatsJob(newGroupExpression, context));
+                        deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context));
                     } else {
                         // The Join Commute rule preserves the operator's expression and children,
                         // thereby not altering the statistics. Hence, there is no need to derive statistics for it.
@@ -101,7 +104,7 @@ public class ApplyRuleJob extends Job {
                         // logicalTopN ==> GlobalPhysicalTopN
                         //                   -> localPhysicalTopN
                         // These implementation rules integrate rules for plan shape transformation.
-                        pushJob(new DeriveStatsJob(newGroupExpression, context));
+                        deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context));
                     } else {
                         newGroupExpression.setStatDerived(true);
                     }
@@ -111,6 +114,17 @@ public class ApplyRuleJob extends Job {
                 APPLY_RULE_TRACER.log(TransformEvent.of(groupExpression, plan, newPlans, rule.getRuleType()),
                         rule::isRewrite);
             }
+            // we do derive stats job eager to avoid un derive stats due to merge group and optimize group
+            // consider:
+            //   we have two groups burned by order: G1 and G2
+            //   then we have job by order derive G2, optimize group expression in G2,
+            //     derive G1, optimize group expression in G1
+            //   if G1 merged into G2, then we maybe generated job optimize group G2 before derive G1
+            //   in this case, we will do get stats from G1's child before derive G1's child stats
+            //   then we will meet NPE in CostModel.
+            for (DeriveStatsJob deriveStatsJob : deriveStatsJobs) {
+                pushJob(deriveStatsJob);
+            }
         }
         groupExpression.setApplied(rule);
     }