From 0f24500ff8484bcfd3b9760c0f20bb98a860b859 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Fri, 3 Nov 2023 14:12:31 +0800 Subject: [PATCH] [fix](Nereids) RewriteCteChildren not work with cost based rewritter (#26326) we use a map to record rewrite cte children result to avoid rewrite twice in cost based rewritter. However, we record cte outer and inner in one map, and use null as outer result's key, use cte id as inner result's key. This is wrong, because every anchor has an outer, and we could only record one outer. So when we use the cache in cost based rewritter, we get wrong outer plan from the cache. Then the error will be thrown as below: ``` Caused by: java.lang.IllegalArgumentException: Stats for CTE: CTEId#1 not found at com.google.common.base.Preconditions.checkArgument(Preconditions.java:143) ~[guava-32.1.2-jre.jar:?] at org.apache.doris.nereids.stats.StatsCalculator.visitLogicalCTEConsumer(StatsCalculator.java:1049) ~[classes/:?] at org.apache.doris.nereids.stats.StatsCalculator.visitLogicalCTEConsumer(StatsCalculator.java:147) ~[classes/:?] at org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer.accept(LogicalCTEConsumer.java:111) ~[classes/:?] at org.apache.doris.nereids.stats.StatsCalculator.estimate(StatsCalculator.java:222) ~[classes/:?] at org.apache.doris.nereids.stats.StatsCalculator.estimate(StatsCalculator.java:200) ~[classes/:?] at org.apache.doris.nereids.jobs.cascades.DeriveStatsJob.execute(DeriveStatsJob.java:108) ~[classes/:?] at org.apache.doris.nereids.jobs.scheduler.SimpleJobScheduler.executeJobPool(SimpleJobScheduler.java:39) ~[classes/:?] at org.apache.doris.nereids.jobs.executor.Optimizer.execute(Optimizer.java:51) ~[classes/:?] at org.apache.doris.nereids.jobs.rewrite.CostBasedRewriteJob.getCost(CostBasedRewriteJob.java:98) ~[classes/:?] at org.apache.doris.nereids.jobs.rewrite.CostBasedRewriteJob.execute(CostBasedRewriteJob.java:64) ~[classes/:?] at org.apache.doris.nereids.jobs.executor.AbstractBatchJobExecutor.execute(AbstractBatchJobExecutor.java:119) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visit(RewriteCteChildren.java:72) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visit(RewriteCteChildren.java:56) ~[classes/:?] at org.apache.doris.nereids.trees.plans.visitor.PlanVisitor.visitLogicalSink(PlanVisitor.java:118) ~[classes/:?] at org.apache.doris.nereids.trees.plans.visitor.SinkVisitor.visitLogicalResultSink(SinkVisitor.java:72) ~[classes/:?] at org.apache.doris.nereids.trees.plans.logical.LogicalResultSink.accept(LogicalResultSink.java:58) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visitLogicalCTEAnchor(RewriteCteChildren.java:86) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visitLogicalCTEAnchor(RewriteCteChildren.java:56) ~[classes/:?] at org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor.accept(LogicalCTEAnchor.java:60) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visitLogicalCTEAnchor(RewriteCteChildren.java:86) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visitLogicalCTEAnchor(RewriteCteChildren.java:56) ~[classes/:?] at org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor.accept(LogicalCTEAnchor.java:60) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.rewriteRoot(RewriteCteChildren.java:67) ~[classes/:?] at org.apache.doris.nereids.jobs.rewrite.CustomRewriteJob.execute(CustomRewriteJob.java:58) ~[classes/:?] at org.apache.doris.nereids.jobs.executor.AbstractBatchJobExecutor.execute(AbstractBatchJobExecutor.java:119) ~[classes/:?] at org.apache.doris.nereids.NereidsPlanner.rewrite(NereidsPlanner.java:275) ~[classes/:?] at org.apache.doris.nereids.NereidsPlanner.plan(NereidsPlanner.java:218) ~[classes/:?] at org.apache.doris.nereids.NereidsPlanner.plan(NereidsPlanner.java:118) ~[classes/:?] at org.apache.doris.nereids.trees.plans.commands.ExplainCommand.run(ExplainCommand.java:81) ~[classes/:?] at org.apache.doris.qe.StmtExecutor.executeByNereids(StmtExecutor.java:550) ~[classes/:?] ``` --- .../org/apache/doris/nereids/StatementContext.java | 11 ++++++++--- .../nereids/jobs/rewrite/CostBasedRewriteJob.java | 2 +- .../nereids/rules/rewrite/RewriteCteChildren.java | 12 ++++++------ regression-test/suites/nereids_syntax_p0/cte.groovy | 5 +++++ 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 13415c504f..16ad3bc0bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -85,7 +85,8 @@ public class StatementContext { private final Map> cteIdToConsumerUnderProjects = new HashMap<>(); // Used to update consumer's stats private final Map, Group>>> cteIdToConsumerGroup = new HashMap<>(); - private final Map rewrittenCtePlan = new HashMap<>(); + private final Map rewrittenCteProducer = new HashMap<>(); + private final Map rewrittenCteConsumer = new HashMap<>(); private final Map hintMap = Maps.newLinkedHashMap(); private final Set viewDdlSqlSet = Sets.newHashSet(); @@ -230,8 +231,12 @@ public class StatementContext { return cteIdToConsumerGroup; } - public Map getRewrittenCtePlan() { - return rewrittenCtePlan; + public Map getRewrittenCteProducer() { + return rewrittenCteProducer; + } + + public Map getRewrittenCteConsumer() { + return rewrittenCteConsumer; } public void addViewDdlSql(String ddlSql) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java index 2e5132f4dd..2a7f0903b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java @@ -89,7 +89,7 @@ public class CostBasedRewriteJob implements RewriteJob { CascadesContext rootCtx = currentCtx.getRoot(); if (rootCtx.getRewritePlan() instanceof LogicalCTEAnchor) { // set subtree rewrite cache - currentCtx.getStatementContext().getRewrittenCtePlan() + currentCtx.getStatementContext().getRewrittenCteProducer() .put(currentCtx.getCurrentTree().orElse(null), (LogicalPlan) cboCtx.getRewritePlan()); // Do Whole tree rewrite CascadesContext rootCtxCopy = CascadesContext.newCurrentTreeContext(rootCtx); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java index 5aa286e67f..3318f9990d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java @@ -77,14 +77,14 @@ public class RewriteCteChildren extends DefaultPlanRewriter imp public Plan visitLogicalCTEAnchor(LogicalCTEAnchor cteAnchor, CascadesContext cascadesContext) { LogicalPlan outer; - if (cascadesContext.getStatementContext().getRewrittenCtePlan().containsKey(null)) { - outer = cascadesContext.getStatementContext().getRewrittenCtePlan().get(null); + if (cascadesContext.getStatementContext().getRewrittenCteConsumer().containsKey(cteAnchor.getCteId())) { + outer = cascadesContext.getStatementContext().getRewrittenCteProducer().get(cteAnchor.getCteId()); } else { CascadesContext outerCascadesCtx = CascadesContext.newSubtreeContext( Optional.empty(), cascadesContext, cteAnchor.child(1), cascadesContext.getCurrentJobContext().getRequiredProperties()); outer = (LogicalPlan) cteAnchor.child(1).accept(this, outerCascadesCtx); - cascadesContext.getStatementContext().getRewrittenCtePlan().put(null, outer); + cascadesContext.getStatementContext().getRewrittenCteConsumer().put(cteAnchor.getCteId(), outer); } boolean reserveAnchor = outer.anyMatch(p -> { if (p instanceof LogicalCTEConsumer) { @@ -104,8 +104,8 @@ public class RewriteCteChildren extends DefaultPlanRewriter imp public Plan visitLogicalCTEProducer(LogicalCTEProducer cteProducer, CascadesContext cascadesContext) { LogicalPlan child; - if (cascadesContext.getStatementContext().getRewrittenCtePlan().containsKey(cteProducer.getCteId())) { - child = cascadesContext.getStatementContext().getRewrittenCtePlan().get(cteProducer.getCteId()); + if (cascadesContext.getStatementContext().getRewrittenCteProducer().containsKey(cteProducer.getCteId())) { + child = cascadesContext.getStatementContext().getRewrittenCteProducer().get(cteProducer.getCteId()); } else { child = (LogicalPlan) cteProducer.child(); child = tryToConstructFilter(cascadesContext, cteProducer.getCteId(), child); @@ -118,7 +118,7 @@ public class RewriteCteChildren extends DefaultPlanRewriter imp CascadesContext rewrittenCtx = CascadesContext.newSubtreeContext( Optional.of(cteProducer.getCteId()), cascadesContext, child, PhysicalProperties.ANY); child = (LogicalPlan) child.accept(this, rewrittenCtx); - cascadesContext.getStatementContext().getRewrittenCtePlan().put(cteProducer.getCteId(), child); + cascadesContext.getStatementContext().getRewrittenCteProducer().put(cteProducer.getCteId(), child); } return cteProducer.withChildren(child); } diff --git a/regression-test/suites/nereids_syntax_p0/cte.groovy b/regression-test/suites/nereids_syntax_p0/cte.groovy index 2fffefd806..ba94556991 100644 --- a/regression-test/suites/nereids_syntax_p0/cte.groovy +++ b/regression-test/suites/nereids_syntax_p0/cte.groovy @@ -324,5 +324,10 @@ suite("cte") { ) tab WHERE Id IN (1, 2) """ + + // rewrite cte children should work well with cost based rewrite rule. rely on rewrite rule: InferSetOperatorDistinct + sql """ + WITH cte_0 AS ( SELECT 1 AS a ), cte_1 AS ( SELECT 1 AS a ) select * from cte_0, cte_1 union select * from cte_0, cte_1 + """ }