diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateOuterJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateOuterJoin.java index 66d536e863..440e5d73ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateOuterJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateOuterJoin.java @@ -21,6 +21,8 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.IsNull; +import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.JoinType; import org.apache.doris.nereids.trees.plans.Plan; @@ -31,7 +33,9 @@ import org.apache.doris.nereids.util.Utils; import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet.Builder; +import com.google.common.collect.Sets; +import java.util.Collection; import java.util.HashSet; import java.util.Optional; import java.util.Set; @@ -65,32 +69,50 @@ public class EliminateOuterJoin extends OneRewriteRuleFactory { } JoinType newJoinType = tryEliminateOuterJoin(join.getJoinType(), canFilterLeftNull, canFilterRightNull); - Set conjuncts = new HashSet<>(); - join.getHashJoinConjuncts().forEach(expression -> { - EqualTo equalTo = (EqualTo) expression; - if (canFilterLeftNull) { - JoinUtils.addIsNotNullIfNullableToCollection(equalTo.left(), conjuncts); - } - if (canFilterRightNull) { - JoinUtils.addIsNotNullIfNullableToCollection(equalTo.right(), conjuncts); - } - }); - JoinUtils.JoinSlotCoverageChecker checker = new JoinUtils.JoinSlotCoverageChecker( - join.left().getOutput(), - join.right().getOutput()); - join.getOtherJoinConjuncts().stream().filter(EqualTo.class::isInstance).forEach(expr -> { - EqualTo equalTo = (EqualTo) expr; - if (checker.isHashJoinCondition(equalTo)) { - if (canFilterLeftNull) { - JoinUtils.addIsNotNullIfNullableToCollection(equalTo.left(), conjuncts); - } - if (canFilterRightNull) { - JoinUtils.addIsNotNullIfNullableToCollection(equalTo.right(), conjuncts); - } - } - }); + Set conjuncts = Sets.newHashSet(); conjuncts.addAll(filter.getConjuncts()); - return filter.withConjuncts(conjuncts).withChildren(join.withJoinType(newJoinType)); + boolean conjunctsChanged = false; + if (!notNullSlots.isEmpty()) { + for (Slot slot : notNullSlots) { + Not isNotNull = new Not(new IsNull(slot)); + isNotNull.isGeneratedIsNotNull = true; + conjunctsChanged |= conjuncts.add(isNotNull); + } + } + if (newJoinType.isInnerJoin()) { + /* + * for example: (A left join B on A.a=B.b) join C on B.x=C.x + * inner join condition B.x=C.x implies 'B.x is not null', + * by which the left outer join could be eliminated. Finally, the join transformed to + * (A join B on A.a=B.b) join C on B.x=C.x. + * This elimination can be processed recursively. + * + * TODO: is_not_null can also be inferred from A < B and so on + */ + conjunctsChanged |= join.getHashJoinConjuncts().stream() + .map(EqualTo.class::cast) + .map(equalTo -> + (EqualTo) JoinUtils.swapEqualToForChildrenOrder(equalTo, join.left().getOutputSet())) + .map(equalTo -> createIsNotNullIfNecessary(equalTo, conjuncts) + ).anyMatch(Boolean::booleanValue); + + JoinUtils.JoinSlotCoverageChecker checker = new JoinUtils.JoinSlotCoverageChecker( + join.left().getOutput(), + join.right().getOutput()); + conjunctsChanged |= join.getOtherJoinConjuncts().stream().filter(EqualTo.class::isInstance) + .map(EqualTo.class::cast) + .filter(equalTo -> checker.isHashJoinCondition(equalTo)) + .map(equalTo -> (EqualTo) JoinUtils.swapEqualToForChildrenOrder(equalTo, + join.left().getOutputSet())) + .map(equalTo -> + createIsNotNullIfNecessary(equalTo, conjuncts)) + .anyMatch(Boolean::booleanValue); + } + if (conjunctsChanged) { + return filter.withConjuncts(conjuncts.stream().collect(ImmutableSet.toImmutableSet())) + .withChildren(join.withJoinType(newJoinType)); + } + return filter.withChildren(join.withJoinType(newJoinType)); }).toRule(RuleType.ELIMINATE_OUTER_JOIN); } @@ -112,4 +134,19 @@ public class EliminateOuterJoin extends OneRewriteRuleFactory { } return joinType; } + + private boolean createIsNotNullIfNecessary(EqualTo swapedEqualTo, Collection container) { + boolean containerChanged = false; + if (swapedEqualTo.left().nullable()) { + Not not = new Not(new IsNull(swapedEqualTo.left())); + not.isGeneratedIsNotNull = true; + containerChanged |= container.add(not); + } + if (swapedEqualTo.right().nullable()) { + Not not = new Not(new IsNull(swapedEqualTo.right())); + not.isGeneratedIsNotNull = true; + containerChanged |= container.add(not); + } + return containerChanged; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/JoinUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/JoinUtils.java index 6c6a4fa280..a60fe59f12 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/JoinUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/JoinUtils.java @@ -27,7 +27,6 @@ import org.apache.doris.nereids.properties.DistributionSpecReplicated; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.ExprId; import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.IsNull; import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapContains; @@ -42,7 +41,6 @@ import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -63,23 +61,7 @@ public class JoinUtils { } /** - * for a given expr, if expr is nullable, add 'expr is not null' in to container. - * this is used to eliminate outer join. - * for example: (A left join B on A.a=B.b) join C on B.x=C.x - * inner join condition B.x=C.x implies that 'B.x is not null' can be used to filter B, - * with 'B.x is not null' predicate, we could eliminate outer join, and the join transformed to - * (A join B on A.a=B.b) join C on B.x=C.x - */ - public static void addIsNotNullIfNullableToCollection(Expression expr, Collection container) { - if (expr.nullable()) { - Not not = new Not(new IsNull(expr)); - not.isGeneratedIsNotNull = true; - container.add(not); - } - } - - /** - * util class + * for a given equation, judge if it can be used as hash join condition */ public static final class JoinSlotCoverageChecker { Set leftExprIds; @@ -90,6 +72,11 @@ public class JoinUtils { rightExprIds = right.stream().map(Slot::getExprId).collect(Collectors.toSet()); } + JoinSlotCoverageChecker(Set left, Set right) { + leftExprIds = left; + rightExprIds = right; + } + /** * PushDownExpressionInHashConjuncts ensure the "slots" is only one slot. */ diff --git a/regression-test/data/nereids_p0/eliminate_outer_join/eliminate_outer_join.out b/regression-test/data/nereids_p0/eliminate_outer_join/eliminate_outer_join.out new file mode 100644 index 0000000000..1ab1363883 --- /dev/null +++ b/regression-test/data/nereids_p0/eliminate_outer_join/eliminate_outer_join.out @@ -0,0 +1,156 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[INNER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------hashJoin[INNER_JOIN](nation.n_nationkey = supplier.s_suppkey) +----------PhysicalDistribute +------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey) +--------------PhysicalOlapScan[region] +--------------PhysicalDistribute +----------------filter(( not n_nationkey IS NULL)(nation.n_nationkey > 1)) +------------------PhysicalOlapScan[nation] +----------PhysicalDistribute +------------filter((supplier.s_suppkey > 1)( not s_suppkey IS NULL)) +--------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------filter((partsupp.ps_suppkey > 1)( not ps_suppkey IS NULL)) +------------PhysicalOlapScan[partsupp] + +-- !2 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[RIGHT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------filter((supplier.s_suppkey > 1)) +------------hashJoin[FULL_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +--------------PhysicalDistribute +----------------hashJoin[FULL_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +------------------PhysicalOlapScan[region] +------------------PhysicalDistribute +--------------------PhysicalOlapScan[nation] +--------------PhysicalDistribute +----------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------filter((partsupp.ps_suppkey > 1)( not ps_suppkey IS NULL)) +------------PhysicalOlapScan[partsupp] + +-- !3 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[RIGHT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------filter((supplier.s_suppkey > 1)) +------------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +--------------PhysicalDistribute +----------------hashJoin[FULL_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +------------------PhysicalOlapScan[region] +------------------PhysicalDistribute +--------------------PhysicalOlapScan[nation] +--------------PhysicalDistribute +----------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------filter((partsupp.ps_suppkey > 1)( not ps_suppkey IS NULL)) +------------PhysicalOlapScan[partsupp] + +-- !4 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +--------filter(( not r_name IS NULL)(region.r_name = '')) +----------PhysicalOlapScan[region] +--------PhysicalDistribute +----------PhysicalOlapScan[nation] + +-- !5 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +--------PhysicalDistribute +----------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +------------filter(( not r_name IS NULL)(region.r_name = '')) +--------------PhysicalOlapScan[region] +------------PhysicalDistribute +--------------PhysicalOlapScan[nation] +--------PhysicalDistribute +----------PhysicalOlapScan[supplier] + +-- !6 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[LEFT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +------------PhysicalDistribute +--------------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +----------------filter(( not r_name IS NULL)(region.r_name = '')) +------------------PhysicalOlapScan[region] +----------------PhysicalDistribute +------------------PhysicalOlapScan[nation] +------------PhysicalDistribute +--------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------PhysicalOlapScan[partsupp] + +-- !7 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[FULL_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +------------PhysicalDistribute +--------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey) +----------------filter(( not r_regionkey IS NULL)) +------------------PhysicalOlapScan[region] +----------------PhysicalDistribute +------------------filter(( not n_regionkey IS NULL)) +--------------------PhysicalOlapScan[nation] +------------PhysicalDistribute +--------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------PhysicalOlapScan[partsupp] + +-- !8 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[LEFT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +------------PhysicalDistribute +--------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey) +----------------filter(( not r_regionkey IS NULL)( not r_name IS NULL)(region.r_name = '')) +------------------PhysicalOlapScan[region] +----------------PhysicalDistribute +------------------filter(( not n_regionkey IS NULL)) +--------------------PhysicalOlapScan[nation] +------------PhysicalDistribute +--------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------PhysicalOlapScan[partsupp] + +-- !9 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[LEFT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------hashJoin[INNER_JOIN](nation.n_nationkey = supplier.s_suppkey) +----------PhysicalDistribute +------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey) +--------------filter(( not r_regionkey IS NULL)( not r_name IS NULL)(region.r_name = '')) +----------------PhysicalOlapScan[region] +--------------PhysicalDistribute +----------------filter(( not n_regionkey IS NULL)) +------------------PhysicalOlapScan[nation] +----------PhysicalDistribute +------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------PhysicalOlapScan[partsupp] + diff --git a/regression-test/data/nereids_p0/eliminate_outer_join/test_eliminate_outer_join.out b/regression-test/data/nereids_p0/eliminate_outer_join/test_eliminate_outer_join.out new file mode 100644 index 0000000000..b1736547b2 --- /dev/null +++ b/regression-test/data/nereids_p0/eliminate_outer_join/test_eliminate_outer_join.out @@ -0,0 +1,138 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[INNER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------hashJoin[INNER_JOIN](nation.n_nationkey = supplier.s_suppkey) +----------PhysicalDistribute +------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey) +--------------PhysicalOlapScan[region] +--------------PhysicalDistribute +----------------filter(( not n_nationkey IS NULL)(nation.n_nationkey > 1)) +------------------PhysicalOlapScan[nation] +----------PhysicalDistribute +------------filter((supplier.s_suppkey > 1)( not s_suppkey IS NULL)) +--------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------filter((partsupp.ps_suppkey > 1)( not ps_suppkey IS NULL)) +------------PhysicalOlapScan[partsupp] + +-- !2 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[RIGHT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------filter((supplier.s_suppkey > 1)) +------------hashJoin[FULL_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +--------------PhysicalDistribute +----------------hashJoin[FULL_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +------------------PhysicalOlapScan[region] +------------------PhysicalDistribute +--------------------PhysicalOlapScan[nation] +--------------PhysicalDistribute +----------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------filter((partsupp.ps_suppkey > 1)( not ps_suppkey IS NULL)) +------------PhysicalOlapScan[partsupp] + +-- !3 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[RIGHT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------filter((supplier.s_suppkey > 1)) +------------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +--------------PhysicalDistribute +----------------hashJoin[FULL_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +------------------PhysicalOlapScan[region] +------------------PhysicalDistribute +--------------------PhysicalOlapScan[nation] +--------------PhysicalDistribute +----------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------filter((partsupp.ps_suppkey > 1)( not ps_suppkey IS NULL)) +------------PhysicalOlapScan[partsupp] + +-- !4 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +--------filter(( not r_name IS NULL)(region.r_name = '')) +----------PhysicalOlapScan[region] +--------PhysicalDistribute +----------PhysicalOlapScan[nation] + +-- !5 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +--------PhysicalDistribute +----------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +------------filter(( not r_name IS NULL)(region.r_name = '')) +--------------PhysicalOlapScan[region] +------------PhysicalDistribute +--------------PhysicalOlapScan[nation] +--------PhysicalDistribute +----------PhysicalOlapScan[supplier] + +-- !6 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[LEFT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +------------PhysicalDistribute +--------------hashJoin[LEFT_OUTER_JOIN](region.r_regionkey = nation.n_regionkey) +----------------filter(( not r_name IS NULL)(region.r_name = '')) +------------------PhysicalOlapScan[region] +----------------PhysicalDistribute +------------------PhysicalOlapScan[nation] +------------PhysicalDistribute +--------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------PhysicalOlapScan[partsupp] + +-- !7 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[FULL_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +------------PhysicalDistribute +--------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey) +----------------filter(( not r_regionkey IS NULL)) +------------------PhysicalOlapScan[region] +----------------PhysicalDistribute +------------------filter(( not n_regionkey IS NULL)) +--------------------PhysicalOlapScan[nation] +------------PhysicalDistribute +--------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------PhysicalOlapScan[partsupp] + +-- !8 -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashJoin[LEFT_OUTER_JOIN](partsupp.ps_suppkey = supplier.s_suppkey) +--------PhysicalDistribute +----------hashJoin[LEFT_OUTER_JOIN](nation.n_nationkey = supplier.s_suppkey) +------------PhysicalDistribute +--------------hashJoin[INNER_JOIN](region.r_regionkey = nation.n_regionkey) +----------------filter(( not r_regionkey IS NULL)( not r_name IS NULL)(region.r_name = '')) +------------------PhysicalOlapScan[region] +----------------PhysicalDistribute +------------------filter(( not n_regionkey IS NULL)) +--------------------PhysicalOlapScan[nation] +------------PhysicalDistribute +--------------PhysicalOlapScan[supplier] +--------PhysicalDistribute +----------PhysicalOlapScan[partsupp] + diff --git a/regression-test/suites/nereids_p0/eliminate_outer_join/eliminate_outer_join.groovy b/regression-test/suites/nereids_p0/eliminate_outer_join/eliminate_outer_join.groovy new file mode 100644 index 0000000000..e226130c76 --- /dev/null +++ b/regression-test/suites/nereids_p0/eliminate_outer_join/eliminate_outer_join.groovy @@ -0,0 +1,197 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("eliminate_outer_join") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "set disable_nereids_rules='ELIMINATE_NOT_NULL'" + sql "set disable_join_reorder=true" + sql "set forbid_unknown_col_stats=false" + + String database = context.config.getDbNameByFile(context.file) + sql "drop database if exists ${database}" + sql "create database ${database}" + sql "use ${database}" + + sql ''' + drop table if exists partsupp; + ''' + + sql ''' + CREATE TABLE partsupp ( + ps_partkey int NOT NULL, + ps_suppkey int NOT NULL, + ps_availqty int NOT NULL, + ps_supplycost decimal(15, 2) NOT NULL, + ps_comment VARCHAR(199) NOT NULL + )ENGINE=OLAP + DUPLICATE KEY(`ps_partkey`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`ps_partkey`) BUCKETS 24 + PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "part_partsupp" + ); + ''' + + sql ''' + drop table if exists supplier + ''' + + sql ''' + CREATE TABLE supplier ( + s_suppkey int NOT NULL, + s_name VARCHAR(25) NOT NULL, + s_address VARCHAR(40) NOT NULL, + s_nationkey int NOT NULL, + s_phone VARCHAR(15) NOT NULL, + s_acctbal decimal(15, 2) NOT NULL, + s_comment VARCHAR(101) NOT NULL + )ENGINE=OLAP + DUPLICATE KEY(`s_suppkey`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12 + PROPERTIES ( + "replication_num" = "1" + ); + ''' + + sql ''' + drop table if exists nation; + ''' + + sql ''' + CREATE TABLE `nation` ( + `n_nationkey` int(11) NOT NULL, + `n_name` varchar(25) NOT NULL, + `n_regionkey` int(11) NOT NULL, + `n_comment` varchar(152) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`N_NATIONKEY`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + ''' + + sql ''' + drop table if exists region; + ''' + + sql ''' + CREATE TABLE region ( + r_regionkey int NOT NULL, + r_name VARCHAR(25) NOT NULL, + r_comment VARCHAR(152) + )ENGINE=OLAP + DUPLICATE KEY(`r_regionkey`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + ''' + + qt_1 ''' + --eliminate outer joins in cascading + explain shape plan + select * + from region + left join nation on r_regionkey=n_regionkey -->inner + left join supplier on n_nationkey=s_suppkey -->inner + left join partsupp on ps_suppkey=s_suppkey -->inner + where ps_suppkey > 1 + ''' + + qt_2 ''' + -- full join ps => right join ps, other outer joins are not eliminated + explain shape plan + select * + from region + full join nation on r_regionkey=n_regionkey -->full + full join supplier on n_nationkey=s_suppkey -->full + full join partsupp on ps_suppkey=s_suppkey -->right + where ps_suppkey > 1; + ''' + + qt_3 ''' + explain shape plan + select * + from region + full join nation on r_regionkey=n_regionkey -- full + left join supplier on n_nationkey=s_suppkey -- left + full join partsupp on ps_suppkey=s_suppkey -- right + where ps_suppkey > 1; + ''' + + qt_4 ''' + explain shape plan + select * + from region + full join nation on r_regionkey=n_regionkey -- left + where r_name = ""; + ''' + + qt_5 ''' + explain shape plan + select * + from region + full join nation on r_regionkey=n_regionkey -- left + left join supplier on n_nationkey=s_suppkey -- left + where r_name = ""; + ''' + + qt_6 ''' + explain shape plan + select * + from region + full join nation on r_regionkey=n_regionkey -- left + left join supplier on n_nationkey=s_suppkey -- left + full join partsupp on ps_suppkey=s_suppkey -- left + where r_name = ""; + ''' + + qt_7''' + explain shape plan + select * + from region + join nation on r_regionkey=n_regionkey -- inner + left join supplier on n_nationkey=s_suppkey -- left + full join partsupp on ps_suppkey=s_suppkey; -- full + ''' + + qt_8''' + explain shape plan + select * + from region + join nation on r_regionkey=n_regionkey --inner + left join supplier on n_nationkey=s_suppkey --left + full join partsupp on ps_suppkey=s_suppkey --left + where r_name = ""; + ''' + + qt_9 ''' + explain shape plan + select * + from region + join nation on r_regionkey=n_regionkey --inner + right join supplier on n_nationkey=s_suppkey --inner + full join partsupp on ps_suppkey=s_suppkey --left + where r_name = ""; + ''' +}