From 2b3e75bb2725ee49291ebf1b4ac0cb2796aef190 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Fri, 5 Jan 2024 18:13:21 +0800 Subject: [PATCH] [fix](Nereids) exists should not return null (#29435) --- .../rules/analysis/SubqueryToApply.java | 3 +- .../rules/rewrite/ExistsApplyToJoin.java | 36 +++++++++---------- .../trees/plans/logical/LogicalApply.java | 9 ++--- .../shape/query10.out | 2 +- .../shape/query35.out | 2 +- .../noStatsRfPrune/query10.out | 2 +- .../noStatsRfPrune/query35.out | 2 +- .../no_stats_shape/query10.out | 2 +- .../no_stats_shape/query35.out | 2 +- .../rf_prune/query10.out | 2 +- .../rf_prune/query35.out | 2 +- .../shape/query10.out | 2 +- .../shape/query35.out | 2 +- 13 files changed, 33 insertions(+), 35 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java index 675575ad29..12eed92b40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java @@ -37,6 +37,7 @@ import org.apache.doris.nereids.trees.expressions.ScalarSubquery; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.SubqueryExpr; +import org.apache.doris.nereids.trees.expressions.functions.scalar.Nvl; import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral; import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter; import org.apache.doris.nereids.trees.plans.Plan; @@ -442,7 +443,7 @@ public class SubqueryToApply implements AnalysisRuleFactory { MarkJoinSlotReference markJoinSlotReference = new MarkJoinSlotReference(statementContext.generateColumnName()); context.setSubqueryToMarkJoinSlot(exists, Optional.of(markJoinSlotReference)); - return markJoinSlotReference; + return new Nvl(markJoinSlotReference, BooleanLiteral.FALSE); } else { return BooleanLiteral.TRUE; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExistsApplyToJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExistsApplyToJoin.java index 52c09a7e5a..5da56d42ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExistsApplyToJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExistsApplyToJoin.java @@ -45,27 +45,27 @@ import java.util.ArrayList; import java.util.Optional; /** - * Convert Existsapply to LogicalJoin. + * Convert ExistsApply to LogicalJoin. *

* Exists * Correlated -> LEFT_SEMI_JOIN * apply LEFT_SEMI_JOIN(Correlated Predicate) * / \ --> / \ * input queryPlan input queryPlan - * + *

* UnCorrelated -> CROSS_JOIN(limit(1)) * apply CROSS_JOIN * / \ --> / \ * input queryPlan input limit(1) * | * queryPlan - * + *

* Not Exists * Correlated -> LEFT_ANTI_JOIN * apply LEFT_ANTI_JOIN(Correlated Predicate) * / \ --> / \ * input queryPlan input queryPlan - * + *

* UnCorrelated -> CROSS_JOIN(Count(*)) * Filter(count(*) = 0) * | @@ -89,29 +89,24 @@ public class ExistsApplyToJoin extends OneRewriteRuleFactory { }).toRule(RuleType.EXISTS_APPLY_TO_JOIN); } - private Plan correlatedToJoin(LogicalApply apply) { + private Plan correlatedToJoin(LogicalApply apply) { Optional correlationFilter = apply.getCorrelationFilter(); - Expression predicate = correlationFilter.get(); if (((Exists) apply.getSubqueryExpr()).isNot()) { return new LogicalJoin<>(JoinType.LEFT_ANTI_JOIN, ExpressionUtils.EMPTY_CONDITION, - predicate != null - ? ExpressionUtils.extractConjunction(predicate) - : ExpressionUtils.EMPTY_CONDITION, + correlationFilter.map(ExpressionUtils::extractConjunction).orElse(ExpressionUtils.EMPTY_CONDITION), new DistributeHint(DistributeType.NONE), apply.getMarkJoinSlotReference(), apply.children()); } else { return new LogicalJoin<>(JoinType.LEFT_SEMI_JOIN, ExpressionUtils.EMPTY_CONDITION, - predicate != null - ? ExpressionUtils.extractConjunction(predicate) - : ExpressionUtils.EMPTY_CONDITION, + correlationFilter.map(ExpressionUtils::extractConjunction).orElse(ExpressionUtils.EMPTY_CONDITION), new DistributeHint(DistributeType.NONE), apply.getMarkJoinSlotReference(), apply.children()); } } - private Plan unCorrelatedToJoin(LogicalApply unapply) { + private Plan unCorrelatedToJoin(LogicalApply unapply) { if (((Exists) unapply.getSubqueryExpr()).isNot()) { return unCorrelatedNotExist(unapply); } else { @@ -119,21 +114,22 @@ public class ExistsApplyToJoin extends OneRewriteRuleFactory { } } - private Plan unCorrelatedNotExist(LogicalApply unapply) { - LogicalLimit newLimit = new LogicalLimit<>(1, 0, LimitPhase.ORIGIN, (LogicalPlan) unapply.right()); + private Plan unCorrelatedNotExist(LogicalApply unapply) { + LogicalLimit newLimit = new LogicalLimit<>(1, 0, LimitPhase.ORIGIN, (LogicalPlan) unapply.right()); Alias alias = new Alias(new Count(), "count(*)"); - LogicalAggregate newAgg = new LogicalAggregate<>(new ArrayList<>(), + LogicalAggregate newAgg = new LogicalAggregate<>(new ArrayList<>(), ImmutableList.of(alias), newLimit); - LogicalJoin newJoin = new LogicalJoin<>(JoinType.CROSS_JOIN, ExpressionUtils.EMPTY_CONDITION, + LogicalJoin newJoin = new LogicalJoin<>(JoinType.CROSS_JOIN, ExpressionUtils.EMPTY_CONDITION, ExpressionUtils.EMPTY_CONDITION, - new DistributeHint(DistributeType.NONE), unapply.getMarkJoinSlotReference(), + new DistributeHint(DistributeType.NONE), + unapply.getMarkJoinSlotReference(), (LogicalPlan) unapply.left(), newAgg); return new LogicalFilter<>(ImmutableSet.of(new EqualTo(newAgg.getOutput().get(0), new IntegerLiteral(0))), newJoin); } - private Plan unCorrelatedExist(LogicalApply unapply) { - LogicalLimit newLimit = new LogicalLimit<>(1, 0, LimitPhase.ORIGIN, (LogicalPlan) unapply.right()); + private Plan unCorrelatedExist(LogicalApply unapply) { + LogicalLimit newLimit = new LogicalLimit<>(1, 0, LimitPhase.ORIGIN, (LogicalPlan) unapply.right()); return new LogicalJoin<>(JoinType.CROSS_JOIN, ExpressionUtils.EMPTY_CONDITION, ExpressionUtils.EMPTY_CONDITION, new DistributeHint(DistributeType.NONE), unapply.getMarkJoinSlotReference(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalApply.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalApply.java index 16e2e964ca..04656f0bd1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalApply.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalApply.java @@ -164,7 +164,7 @@ public class LogicalApply that = (LogicalApply) o; return Objects.equals(correlationSlot, that.getCorrelationSlot()) && Objects.equals(subqueryExpr, that.getSubqueryExpr()) && Objects.equals(correlationFilter, that.getCorrelationFilter()) @@ -192,10 +192,11 @@ public class LogicalApply() + .addAll(correlationSlot) + .build(); } - return new ImmutableList.Builder() - .addAll(correlationSlot) - .build(); } public LogicalApply withSubqueryExprAndChildren(SubqueryExpr subqueryExpr, List children) { diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query10.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query10.out index 03b3bf074e..59e87830c0 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query10.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query10.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalTopN[LOCAL_SORT] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter(($c$1 OR $c$2)) +--------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) ----------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() --------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query35.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query35.out index 9c06ffd145..79412a9b42 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query35.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query35.out @@ -9,7 +9,7 @@ PhysicalResultSink ------------PhysicalDistribute[DistributionSpecHash] --------------hashAgg[LOCAL] ----------------PhysicalProject -------------------filter(($c$1 OR $c$2)) +------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) --------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() ------------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query10.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query10.out index b07426b5f3..73add01541 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query10.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query10.out @@ -9,7 +9,7 @@ PhysicalResultSink ------------PhysicalDistribute[DistributionSpecHash] --------------hashAgg[LOCAL] ----------------PhysicalProject -------------------filter(($c$1 OR $c$2)) +------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) --------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() ------------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query35.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query35.out index 4712a4893d..b6c9c4373e 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query35.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query35.out @@ -9,7 +9,7 @@ PhysicalResultSink ------------PhysicalDistribute[DistributionSpecHash] --------------hashAgg[LOCAL] ----------------PhysicalProject -------------------filter(($c$1 OR $c$2)) +------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) --------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() ------------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query10.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query10.out index 3cd5b3070c..6a46828f0a 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query10.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query10.out @@ -9,7 +9,7 @@ PhysicalResultSink ------------PhysicalDistribute[DistributionSpecHash] --------------hashAgg[LOCAL] ----------------PhysicalProject -------------------filter(($c$1 OR $c$2)) +------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) --------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() ------------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query35.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query35.out index 5f7e82e197..c967d42e5e 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query35.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query35.out @@ -9,7 +9,7 @@ PhysicalResultSink ------------PhysicalDistribute[DistributionSpecHash] --------------hashAgg[LOCAL] ----------------PhysicalProject -------------------filter(($c$1 OR $c$2)) +------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) --------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() ------------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out index dab1ca1091..dc3529bdf2 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query10.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalTopN[LOCAL_SORT] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter(($c$1 OR $c$2)) +--------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) ----------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() --------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query35.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query35.out index 7beb459e11..7f0a43176d 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query35.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query35.out @@ -9,7 +9,7 @@ PhysicalResultSink ------------PhysicalDistribute[DistributionSpecHash] --------------hashAgg[LOCAL] ----------------PhysicalProject -------------------filter(($c$1 OR $c$2)) +------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) --------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() ------------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out index dab1ca1091..dc3529bdf2 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query10.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalTopN[LOCAL_SORT] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter(($c$1 OR $c$2)) +--------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) ----------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() --------------------PhysicalProject diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query35.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query35.out index 0b1662a0be..35d0777a6f 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query35.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query35.out @@ -9,7 +9,7 @@ PhysicalResultSink ------------PhysicalDistribute[DistributionSpecHash] --------------hashAgg[LOCAL] ----------------PhysicalProject -------------------filter(($c$1 OR $c$2)) +------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE))) --------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() ----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() ------------------------PhysicalProject