diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index c4a6e37bbe..4a19ab9254 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -72,7 +72,6 @@ public class StatementContext { private int maxNAryInnerJoin = 0; private boolean isDpHyp = false; - private boolean isOtherJoinReorder = false; // hasUnknownColStats true if any column stats in the tables used by this sql is unknown // the algorithm to derive plan when column stats are unknown is implemented in cascading framework, not in dphyper. @@ -158,14 +157,6 @@ public class StatementContext { isDpHyp = dpHyp; } - public boolean isOtherJoinReorder() { - return isOtherJoinReorder; - } - - public void setOtherJoinReorder(boolean otherJoinReorder) { - isOtherJoinReorder = otherJoinReorder; - } - public ExprId getNextExprId() { return exprIdGenerator.getNextId(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/OptimizeGroupExpressionJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/OptimizeGroupExpressionJob.java index 72426f0fa1..16cf90b786 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/OptimizeGroupExpressionJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/OptimizeGroupExpressionJob.java @@ -75,7 +75,6 @@ public class OptimizeGroupExpressionJob extends Job { || context.getCascadesContext().getMemo().getGroupExpressionsSize() > context.getCascadesContext() .getConnectContext().getSessionVariable().memoMaxGroupExpressionSize; boolean isDpHyp = context.getCascadesContext().getStatementContext().isDpHyp(); - boolean isOtherJoinReorder = context.getCascadesContext().getStatementContext().isOtherJoinReorder(); boolean isEnableBushyTree = context.getCascadesContext().getConnectContext().getSessionVariable() .isEnableBushyTree(); boolean isLeftZigZagTree = context.getCascadesContext().getConnectContext() @@ -86,11 +85,7 @@ public class OptimizeGroupExpressionJob extends Job { if (isDisableJoinReorder) { return Collections.emptyList(); } else if (isDpHyp) { - if (isOtherJoinReorder) { - return getRuleSet().getDPHypReorderRules(); - } else { - return Collections.emptyList(); - } + return getRuleSet().getDPHypReorderRules(); } else if (isLeftZigZagTree) { return getRuleSet().getLeftZigZagTreeJoinReorder(); } else if (isEnableBushyTree) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java index 4f042c527c..5f945ab838 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java @@ -67,7 +67,6 @@ public class Optimizer { boolean isDpHyp = getSessionVariable().enableDPHypOptimizer || maxJoinCount > maxTableCount; cascadesContext.getStatementContext().setDpHyp(isDpHyp); - cascadesContext.getStatementContext().setOtherJoinReorder(false); if (!getSessionVariable().isDisableJoinReorder() && isDpHyp && maxJoinCount <= getSessionVariable().getMaxJoinNumberOfReorder()) { //RightNow, dphyper can only order 64 join operators @@ -85,7 +84,6 @@ public class Optimizer { // Due to EnsureProjectOnTopJoin, root group can't be Join Group, so DPHyp doesn't change the root group cascadesContext.pushJob(new JoinOrderJob(root, cascadesContext.getCurrentJobContext())); cascadesContext.getJobScheduler().executeJobPool(cascadesContext); - cascadesContext.getStatementContext().setOtherJoinReorder(true); } private SessionVariable getSessionVariable() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java index eece2d8c3d..8af8a51709 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java @@ -17,10 +17,9 @@ package org.apache.doris.nereids.jobs.joinorder.hypergraph.receiver; -import org.apache.doris.nereids.hint.DistributeHint; import org.apache.doris.nereids.jobs.JobContext; -import org.apache.doris.nereids.jobs.cascades.CostAndEnforcerJob; import org.apache.doris.nereids.jobs.cascades.DeriveStatsJob; +import org.apache.doris.nereids.jobs.cascades.OptimizeGroupExpressionJob; import org.apache.doris.nereids.jobs.joinorder.hypergraph.HyperGraph; import org.apache.doris.nereids.jobs.joinorder.hypergraph.bitmap.LongBitmap; import org.apache.doris.nereids.jobs.joinorder.hypergraph.edge.Edge; @@ -29,40 +28,28 @@ import org.apache.doris.nereids.memo.CopyInResult; import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.memo.Memo; -import org.apache.doris.nereids.properties.FunctionalDependencies; -import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; -import org.apache.doris.nereids.trees.plans.DistributeType; import org.apache.doris.nereids.trees.plans.GroupPlan; import org.apache.doris.nereids.trees.plans.JoinType; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; -import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalJoin; -import org.apache.doris.nereids.trees.plans.physical.PhysicalDistribute; -import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin; -import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin; -import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.util.ExpressionUtils; -import org.apache.doris.nereids.util.JoinUtils; import org.apache.doris.nereids.util.PlanUtils; import org.apache.doris.qe.ConnectContext; import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import java.util.ArrayList; import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Optional; import java.util.Set; -import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -91,7 +78,6 @@ public class PlanReceiver implements AbstractReceiver { this.finalOutputs = outputs; } - /** * Emit a new plan from bottom to top *

@@ -130,21 +116,18 @@ public class PlanReceiver implements AbstractReceiver { } long fullKey = LongBitmap.newBitmapUnion(left, right); - List physicalJoins = proposeAllPhysicalJoins(joinType, leftPlan, rightPlan, hashConjuncts, + LogicalPlan logicalPlan = proposeJoin(joinType, leftPlan, rightPlan, hashConjuncts, otherConjuncts); - List physicalPlans = proposeProject(physicalJoins, edges, left, right); + logicalPlan = proposeProject(logicalPlan, edges, left, right); // Second, we copy all physical plan to Group and generate properties and calculate cost if (!planTable.containsKey(fullKey)) { - planTable.put(fullKey, memo.newGroup(physicalPlans.get(0).getLogicalProperties())); + planTable.put(fullKey, memo.newGroup(logicalPlan.getLogicalProperties())); } Group group = planTable.get(fullKey); - for (Plan plan : physicalPlans) { - CopyInResult copyInResult = memo.copyIn(plan, group, false, planTable); - GroupExpression physicalExpression = copyInResult.correspondingExpression; - proposeAllDistributedPlans(physicalExpression); - } + CopyInResult copyInResult = memo.copyIn(logicalPlan, group, false, planTable); + proposeAllDistributedPlans(copyInResult.correspondingExpression); return true; } @@ -204,7 +187,7 @@ public class PlanReceiver implements AbstractReceiver { } private void proposeAllDistributedPlans(GroupExpression groupExpression) { - jobContext.getCascadesContext().pushJob(new CostAndEnforcerJob(groupExpression, + jobContext.getCascadesContext().pushJob(new OptimizeGroupExpressionJob(groupExpression, new JobContext(jobContext.getCascadesContext(), PhysicalProperties.ANY, Double.MAX_VALUE))); if (!groupExpression.isStatDerived()) { jobContext.getCascadesContext().pushJob(new DeriveStatsJob(groupExpression, @@ -213,42 +196,16 @@ public class PlanReceiver implements AbstractReceiver { jobContext.getCascadesContext().getJobScheduler().executeJobPool(jobContext.getCascadesContext()); } - private List proposeAllPhysicalJoins(JoinType joinType, Plan left, Plan right, List hashConjuncts, + private LogicalPlan proposeJoin(JoinType joinType, Plan left, Plan right, List hashConjuncts, List otherConjuncts) { - // Check whether only NSL can be performed - LogicalProperties joinProperties = new LogicalProperties( - () -> JoinUtils.getJoinOutput(joinType, left, right), () -> FunctionalDependencies.EMPTY_FUNC_DEPS); - List plans = Lists.newArrayList(); - if (JoinUtils.shouldNestedLoopJoin(joinType, hashConjuncts)) { - plans.add(new PhysicalNestedLoopJoin<>(joinType, hashConjuncts, otherConjuncts, - Optional.empty(), joinProperties, - left, right)); - if (joinType.isSwapJoinType()) { - plans.add(new PhysicalNestedLoopJoin<>(joinType.swap(), hashConjuncts, otherConjuncts, Optional.empty(), - joinProperties, - right, left)); - } - } else { - plans.add(new PhysicalHashJoin<>(joinType, hashConjuncts, otherConjuncts, - new DistributeHint(DistributeType.NONE), Optional.empty(), - joinProperties, - left, right)); - if (joinType.isSwapJoinType()) { - plans.add(new PhysicalHashJoin<>(joinType.swap(), hashConjuncts, otherConjuncts, - new DistributeHint(DistributeType.NONE), - Optional.empty(), - joinProperties, - right, left)); - } - } - return plans; + return new LogicalJoin<>(joinType, hashConjuncts, otherConjuncts, left, right); } @Override public void addGroup(long bitmap, Group group) { Preconditions.checkArgument(LongBitmap.getCardinality(bitmap) == 1); usdEdges.put(bitmap, new BitSet()); - Plan plan = proposeProject(Lists.newArrayList(new GroupPlan(group)), new ArrayList<>(), bitmap, bitmap).get(0); + Plan plan = proposeProject(new GroupPlan(group), new ArrayList<>(), bitmap, bitmap); if (!(plan instanceof GroupPlan)) { CopyInResult copyInResult = jobContext.getCascadesContext().getMemo().copyIn(plan, null, false, planTable); group = copyInResult.correspondingExpression.getOwnerGroup(); @@ -274,59 +231,13 @@ public class PlanReceiver implements AbstractReceiver { @Override public Group getBestPlan(long bitmap) { - // If there are some rules relied on the logical join, we need to make logical Expression - // However, it cost 15% of total optimized time. - makeLogicalExpression(() -> planTable.get(bitmap)); return planTable.get(bitmap); } - private void makeLogicalExpression(Supplier root) { - if (!root.get().getLogicalExpressions().isEmpty()) { - return; - } - - // only makeLogicalExpression for those winners - Set hasGenerated = new HashSet<>(); - for (PhysicalProperties physicalProperties : root.get().getAllProperties()) { - GroupExpression groupExpression = root.get().getBestPlan(physicalProperties); - if (hasGenerated.contains(groupExpression) || groupExpression.getPlan() instanceof PhysicalDistribute) { - continue; - } - hasGenerated.add(groupExpression); - - // process child first, plan's child may be changed due to mergeGroup - // due to mergeGroup, the children Group of groupExpression may be replaced, so we need to use lambda to - // get the child to make we can get child at the time we use child. - // If we use for child: groupExpression.children(), it means that we take it in advance. It may cause NPE, - // work flow: get children() to get left, right -> copyIn left() -> mergeGroup -> right is merged -> NPE - Plan physicalPlan = groupExpression.getPlan(); - for (int i = 0; i < groupExpression.children().size(); i++) { - int childIdx = i; - makeLogicalExpression(() -> groupExpression.child(childIdx)); - } - - Plan logicalPlan; - if (physicalPlan instanceof PhysicalProject) { - PhysicalProject physicalProject = (PhysicalProject) physicalPlan; - logicalPlan = new LogicalProject<>(physicalProject.getProjects(), - new GroupPlan(groupExpression.child(0))); - } else if (physicalPlan instanceof AbstractPhysicalJoin) { - AbstractPhysicalJoin physicalJoin = (AbstractPhysicalJoin) physicalPlan; - logicalPlan = new LogicalJoin<>(physicalJoin.getJoinType(), physicalJoin.getHashJoinConjuncts(), - physicalJoin.getOtherJoinConjuncts(), - new DistributeHint(DistributeType.NONE), physicalJoin.getMarkJoinSlotReference(), - groupExpression.children().stream().map(g -> new GroupPlan(g)).collect(Collectors.toList())); - } else { - throw new RuntimeException("DPhyp can only handle join and project operator"); - } - jobContext.getCascadesContext().getMemo().copyIn(logicalPlan, root.get(), false, planTable); - } - } - - private List proposeProject(List allChild, List edges, long left, long right) { + private LogicalPlan proposeProject(LogicalPlan join, List edges, long left, long right) { long fullKey = LongBitmap.newBitmapUnion(left, right); - List outputs = allChild.get(0).getOutput(); - Set outputSet = allChild.get(0).getOutputSet(); + List outputs = join.getOutput(); + Set outputSet = join.getOutputSet(); List complexProjects = new ArrayList<>(); // Calculate complex expression should be done by current(fullKey) node @@ -354,40 +265,29 @@ public class PlanReceiver implements AbstractReceiver { // calculate required columns by all parents Set requireSlots = calculateRequiredSlots(left, right, edges); List allProjects = Stream.concat( - outputs.stream().filter(e -> requireSlots.contains(e)), + outputs.stream().filter(requireSlots::contains), complexProjects.stream().filter(e -> requireSlots.contains(e.toSlot())) ).collect(Collectors.toList()); - // propose physical project + // propose logical project if (allProjects.isEmpty()) { allProjects.add(ExpressionUtils.selectMinimumColumn(outputs)); } if (outputSet.equals(new HashSet<>(allProjects))) { - return allChild; + return join; } - Set childOutputSet = allChild.get(0).getOutputSet(); + Set childOutputSet = join.getOutputSet(); List projects = allProjects.stream() .filter(expr -> childOutputSet.containsAll(expr.getInputSlots())) .collect(Collectors.toList()); + LogicalPlan project = join; if (!outputSet.equals(new HashSet<>(projects))) { - LogicalProperties projectProperties = new LogicalProperties( - () -> projects.stream() - .map(NamedExpression::toSlot) - .collect(ImmutableList.toImmutableList()), () -> FunctionalDependencies.EMPTY_FUNC_DEPS); - allChild = allChild.stream() - .map(c -> new PhysicalProject<>(projects, projectProperties, c)) - .collect(Collectors.toList()); - } - if (!(!projects.isEmpty() && projects.size() == allProjects.size())) { - Set s1 = projects.stream().collect(Collectors.toSet()); - List s2 = allProjects.stream().filter(e -> !s1.contains(e)).collect(Collectors.toList()); - System.out.println(s2); + project = new LogicalProject<>(projects, join); } Preconditions.checkState(!projects.isEmpty() && projects.size() == allProjects.size(), - " there are some projects left " + projects + allProjects); - - return allChild; + " there are some projects left %s %s", projects, allProjects); + return project; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java index 92a5bb9ddb..bb5b2f3dcf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java @@ -226,10 +226,6 @@ public class RuleSet { .addAll(OTHER_REORDER_RULES) .build(); - public static final List DPHYP_REORDER_RULES = ImmutableList.builder() - .add(JoinCommute.BUSHY.build()) - .build(); - public static final List MATERIALIZED_VIEW_RULES = planRuleFactories() .add(MaterializedViewOnlyJoinRule.INSTANCE) .add(MaterializedViewProjectJoinRule.INSTANCE) @@ -243,6 +239,11 @@ public class RuleSet { .add(MaterializedViewFilterProjectAggregateRule.INSTANCE) .build(); + public static final List DPHYP_REORDER_RULES = ImmutableList.builder() + .addAll(MATERIALIZED_VIEW_RULES) + .add(JoinCommute.BUSHY.build()) + .build(); + public List getDPHypReorderRules() { return DPHYP_REORDER_RULES; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java index 6c78193abf..3d98abd3af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java @@ -91,6 +91,13 @@ public class LogicalJoin hashJoinConjuncts, List otherJoinConjuncts, + LEFT_CHILD_TYPE leftChild, RIGHT_CHILD_TYPE rightChild) { + this(joinType, hashJoinConjuncts, otherJoinConjuncts, + new DistributeHint(DistributeType.NONE), Optional.empty(), + Optional.empty(), Optional.empty(), leftChild, rightChild); + } + public LogicalJoin(JoinType joinType, List hashJoinConjuncts, List otherJoinConjuncts, DistributeHint hint, LEFT_CHILD_TYPE leftChild, RIGHT_CHILD_TYPE rightChild) { this(joinType, hashJoinConjuncts, otherJoinConjuncts, hint, Optional.empty(), Optional.empty(), diff --git a/regression-test/data/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.out b/regression-test/data/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.out new file mode 100644 index 0000000000..1cf464023e --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.out @@ -0,0 +1,335 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query1_0_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query1_0_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query1_1_before -- +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query1_1_after -- +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query1_2_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query1_2_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query1_3_before -- +1 1 +1 1 +1 1 +1 1 +1 1 +2 2 +2 2 +2 2 + +-- !query1_3_after -- +1 1 +1 1 +1 1 +1 1 +1 1 +2 2 +2 2 +2 2 + +-- !query1_4_before -- +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query1_4_after -- +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query1_5_before -- +6 +6 + +-- !query1_5_after -- +6 +6 + +-- !query2_0_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query2_0_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query2_1_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query2_1_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query2_2_before -- +4 +4 +4 +4 +6 +6 + +-- !query2_2_after -- +4 +4 +4 +4 +6 +6 + +-- !query2_3_before -- +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query2_3_after -- +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query3_0_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_0_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_1_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_1_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_2_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_2_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_3_before -- +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query3_3_after -- +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query3_4_before -- +1 1 +1 1 +1 1 +1 1 +1 1 + +-- !query3_4_after -- +1 1 +1 1 +1 1 +1 1 +1 1 + +-- !query4_0_before -- +4 +4 +4 +4 +4 +4 + +-- !query4_0_after -- +4 +4 +4 +4 +4 +4 + +-- !query5_0_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query5_0_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query6_0_before -- +2 3 2023-12-08 +2 3 2023-12-08 + +-- !query6_0_after -- +2 3 2023-12-08 +2 3 2023-12-08 + +-- !query7_0_before -- +2 3 2023-12-08 +2 3 2023-12-08 + +-- !query7_0_after -- +2 3 2023-12-08 +2 3 2023-12-08 + +-- !query10_0_before -- + +-- !query10_0_after -- + diff --git a/regression-test/data/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.out b/regression-test/data/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.out new file mode 100644 index 0000000000..845ef3933d --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.out @@ -0,0 +1,297 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query1_0_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query1_0_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query1_1_before -- +4 +4 +4 +4 +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query1_1_after -- +4 +4 +4 +4 +4 +4 +4 +4 +6 +6 +6 +6 + +-- !query1_2_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query1_2_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query1_3_before -- +1 1 +1 1 +1 1 +1 1 +1 1 +2 2 +2 2 +2 2 + +-- !query1_3_after -- +1 1 +1 1 +1 1 +1 1 +1 1 +2 2 +2 2 +2 2 + +-- !query2_0_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query2_0_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query2_1_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query2_1_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query2_2_before -- +4 +4 +4 +4 +6 +6 + +-- !query2_2_after -- +4 +4 +4 +4 +6 +6 + +-- !query2_3_before -- +4 +4 +4 +4 +6 +6 + +-- !query2_3_after -- +4 +4 +4 +4 +6 +6 + +-- !query3_0_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_0_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_1_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_1_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_2_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query3_2_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query4_0_before -- +4 +4 + +-- !query4_0_after -- +4 +4 + +-- !query5_0_before -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query5_0_after -- +4 +4 +4 +4 +4 +4 +6 +6 + +-- !query5_1_before -- +2023-12-08 2023-12-08 2 3 +2023-12-08 2023-12-08 2 3 + +-- !query5_1_after -- +2023-12-08 2023-12-08 2 3 +2023-12-08 2023-12-08 2 3 + +-- !query6_0_before -- +2 3 2023-12-08 +2 3 2023-12-08 +2 3 2023-12-12 +2 4 2023-12-10 +3 3 2023-12-11 +4 3 2023-12-09 + +-- !query6_0_after -- +2 3 2023-12-08 +2 3 2023-12-08 +2 3 2023-12-12 +2 4 2023-12-10 +3 3 2023-12-11 +4 3 2023-12-09 + +-- !query7_0_before -- +3 3 2023-12-11 + +-- !query7_0_after -- +3 3 2023-12-11 + +-- !query7_1_before -- + +-- !query7_1_after -- + +-- !query8_0_before -- +1 0 8 0 10.0000 10.50 9.50 +2 0 2 0 11.5000 11.50 11.50 +3 0 0 0 23.0000 33.50 12.50 +4 0 0 0 43.2000 43.20 43.20 +5 0 0 0 28.7000 56.20 1.20 + +-- !query8_0_after -- +1 0 8 0 10.0000 10.50 9.50 +2 0 2 0 11.5000 11.50 11.50 +3 0 0 0 23.0000 33.50 12.50 +4 0 0 0 43.2000 43.20 43.20 +5 0 0 0 28.7000 56.20 1.20 + diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query64.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query64.out index fed32dc285..7c84cef758 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query64.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query64.out @@ -9,25 +9,90 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ------------PhysicalProject --------------hashJoin[INNER_JOIN] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF19 d_date_sk->[c_first_shipto_date_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF18 ss_customer_sk->[c_customer_sk] +------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF18 c_customer_sk->[ss_customer_sk] --------------------PhysicalDistribute[DistributionSpecHash] ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF17 ca_address_sk->[c_current_addr_sk] +------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF17 p_promo_sk->[ss_promo_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF16 ca_address_sk->[ss_addr_sk] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF14 ss_item_sk->[sr_item_sk];RF15 ss_ticket_number->[sr_ticket_number] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF14 RF15 +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF13 cs_item_sk->[ss_item_sk,i_item_sk] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF12 cd_demo_sk->[ss_cdemo_sk] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF11 i_item_sk->[ss_item_sk] +----------------------------------------------------PhysicalProject +------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF10 s_store_sk->[ss_store_sk] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF9 ib_income_band_sk->[hd_income_band_sk] +------------------------------------------------------------PhysicalProject +--------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF8 hd_demo_sk->[ss_hdemo_sk] +----------------------------------------------------------------PhysicalProject +------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[ss_sold_date_sk] +--------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF7 RF8 RF10 RF11 RF12 RF13 RF16 RF17 RF18 +--------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------------------------------PhysicalProject +------------------------------------------------------------------------filter(d_year IN (1999, 2000)) +--------------------------------------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF9 +------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------------------------PhysicalProject +----------------------------------------------------------------PhysicalOlapScan[income_band] +--------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------------------PhysicalProject +------------------------------------------------------------PhysicalOlapScan[store] +----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +------------------------------------------------------PhysicalProject +--------------------------------------------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) +----------------------------------------------------------PhysicalOlapScan[item] apply RFs: RF13 +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------------------PhysicalProject +--------------------------------------------filter((sale > (2 * refund))) +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF5 cr_order_number->[cs_order_number];RF6 cr_item_sk->[cs_item_sk] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5 RF6 +--------------------------------------------------------PhysicalProject +----------------------------------------------------------PhysicalOlapScan[catalog_returns] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[customer_address] +--------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[promotion] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk] --------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF16 cd_demo_sk->[c_current_cdemo_sk] +------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF3 cd_demo_sk->[c_current_cdemo_sk] --------------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF15 d_date_sk->[c_first_sales_date_sk] +------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[c_first_sales_date_sk] --------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF14 hd_demo_sk->[c_current_hdemo_sk] +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[c_current_hdemo_sk] ------------------------------------------PhysicalProject ---------------------------------------------PhysicalOlapScan[customer] apply RFs: RF14 RF15 RF16 RF17 RF18 RF19 +--------------------------------------------PhysicalOlapScan[customer] apply RFs: RF1 RF2 RF3 RF4 RF19 ------------------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF13 ib_income_band_sk->[hd_income_band_sk] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF0 ib_income_band_sk->[hd_income_band_sk] ------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF13 +--------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF0 ------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------------------------PhysicalProject ----------------------------------------------------PhysicalOlapScan[income_band] @@ -40,71 +105,6 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[customer_address] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF11 ss_item_sk->[sr_item_sk];RF12 ss_ticket_number->[sr_ticket_number] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[store_returns] apply RFs: RF11 RF12 ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF10 cs_item_sk->[ss_item_sk,i_item_sk] ---------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF9 p_promo_sk->[ss_promo_sk] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF8 cd_demo_sk->[ss_cdemo_sk] -------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF7 ca_address_sk->[ss_addr_sk] -------------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF6 i_item_sk->[ss_item_sk] -----------------------------------------------------PhysicalProject -------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] ---------------------------------------------------------PhysicalProject -----------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF4 ib_income_band_sk->[hd_income_band_sk] -------------------------------------------------------------PhysicalProject ---------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF3 hd_demo_sk->[ss_hdemo_sk] -----------------------------------------------------------------PhysicalProject -------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] ---------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF5 RF6 RF7 RF8 RF9 RF10 ---------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -----------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------filter(d_year IN (1999, 2000)) ---------------------------------------------------------------------------PhysicalOlapScan[date_dim] -----------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF4 -------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------------------PhysicalProject -----------------------------------------------------------------PhysicalOlapScan[income_band] ---------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -----------------------------------------------------------PhysicalProject -------------------------------------------------------------PhysicalOlapScan[store] -----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------------------PhysicalProject ---------------------------------------------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) -----------------------------------------------------------PhysicalOlapScan[item] apply RFs: RF10 -------------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------------PhysicalProject -----------------------------------------------------PhysicalOlapScan[customer_address] -------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------PhysicalProject -----------------------------------------------PhysicalOlapScan[customer_demographics] ---------------------------------------PhysicalDistribute[DistributionSpecReplicated] -----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[promotion] ---------------------------------PhysicalProject -----------------------------------filter((sale > (2 * refund))) -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_order_number->[cs_order_number];RF1 cr_item_sk->[cs_item_sk] -----------------------------------------------PhysicalProject -------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 -----------------------------------------------PhysicalProject -------------------------------------------------PhysicalOlapScan[catalog_returns] ----------------PhysicalDistribute[DistributionSpecReplicated] ------------------PhysicalProject --------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query64.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query64.out index 5443c04d5c..e5e915d442 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query64.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query64.out @@ -11,52 +11,27 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------PhysicalProject ------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF18 ss_customer_sk->[c_customer_sk] +----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) ------------------------PhysicalDistribute[DistributionSpecHash] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() -------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() -------------------------------------------PhysicalProject ---------------------------------------------PhysicalOlapScan[customer] apply RFs: RF18 -------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() -------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[household_demographics] -------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------PhysicalProject -----------------------------------------------------PhysicalOlapScan[income_band] -------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[customer_demographics] -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[customer_address] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF12 ss_item_sk->[sr_item_sk];RF13 ss_ticket_number->[sr_ticket_number] +----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_returns] apply RFs: RF12 RF13 -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() +--------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF16 ss_addr_sk->[ca_address_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF16 +----------------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF10 ss_cdemo_sk->[cd_demo_sk] +--------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF14 ss_item_sk->[sr_item_sk];RF15 ss_ticket_number->[sr_ticket_number] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF14 RF15 ----------------------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------------------PhysicalProject ---------------------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF10 -----------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF9 ss_addr_sk->[ca_address_sk] -----------------------------------------------PhysicalProject -------------------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF9 +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF13 ss_cdemo_sk->[cd_demo_sk] ----------------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[ss_item_sk];RF8 i_item_sk->[cs_item_sk] +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF13 +----------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF11 i_item_sk->[ss_item_sk];RF12 i_item_sk->[cs_item_sk] --------------------------------------------------PhysicalProject ----------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() ------------------------------------------------------PhysicalProject @@ -64,11 +39,11 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------------------------------------------------PhysicalProject ------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() --------------------------------------------------------------PhysicalProject -----------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +----------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[ss_sold_date_sk] ------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF2 cs_item_sk->[ss_item_sk] +--------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF6 cs_item_sk->[ss_item_sk] ----------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF7 +------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7 RF11 ----------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------------------------------------------------PhysicalProject --------------------------------------------------------------------------filter((sale > (2 * refund))) @@ -76,9 +51,9 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash] --------------------------------------------------------------------------------hashAgg[LOCAL] ----------------------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_order_number->[cs_order_number];RF1 cr_item_sk->[cs_item_sk] +------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF4 cr_order_number->[cs_order_number];RF5 cr_item_sk->[cs_item_sk] --------------------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF8 +----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 RF5 RF12 --------------------------------------------------------------------------------------PhysicalProject ----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] ------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] @@ -98,9 +73,34 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------------------------------------------PhysicalProject ------------------------------------------------------filter((item.i_current_price <= 33.00) and (item.i_current_price >= 24.00) and i_color IN ('blanched', 'brown', 'burlywood', 'chocolate', 'drab', 'medium')) --------------------------------------------------------PhysicalOlapScan[item] -------------------------------------PhysicalDistribute[DistributionSpecReplicated] +------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[promotion] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() +------------------------------------PhysicalDistribute[DistributionSpecHash] --------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[promotion] +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() +------------------------------------------PhysicalProject +--------------------------------------------PhysicalOlapScan[customer] +------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[household_demographics] +------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[income_band] +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[customer_address] --------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------PhysicalProject ------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query64.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query64.out index 55860e7b0b..43bdb50fcc 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query64.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query64.out @@ -11,64 +11,39 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------PhysicalProject ------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF19 d_date_sk->[c_first_sales_date_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF18 ss_customer_sk->[c_customer_sk] +----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF18 c_customer_sk->[ss_customer_sk] ------------------------PhysicalDistribute[DistributionSpecHash] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF17 ca_address_sk->[c_current_addr_sk] -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF16 cd_demo_sk->[c_current_cdemo_sk] -------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF15 hd_demo_sk->[c_current_hdemo_sk] -------------------------------------------PhysicalProject ---------------------------------------------PhysicalOlapScan[customer] apply RFs: RF15 RF16 RF17 RF18 RF19 RF20 -------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF14 ib_income_band_sk->[hd_income_band_sk] -------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF14 -------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------PhysicalProject -----------------------------------------------------PhysicalOlapScan[income_band] -------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[customer_demographics] -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[customer_address] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF12 ss_item_sk->[sr_item_sk];RF13 ss_ticket_number->[sr_ticket_number] +----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF17 p_promo_sk->[ss_promo_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_returns] apply RFs: RF12 RF13 -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF11 p_promo_sk->[ss_promo_sk] +--------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF16 ss_addr_sk->[ca_address_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF16 +----------------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF10 ss_cdemo_sk->[cd_demo_sk] +--------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF14 ss_item_sk->[sr_item_sk];RF15 ss_ticket_number->[sr_ticket_number] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF14 RF15 ----------------------------------------PhysicalDistribute[DistributionSpecHash] ------------------------------------------PhysicalProject ---------------------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF10 -----------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF9 ss_addr_sk->[ca_address_sk] -----------------------------------------------PhysicalProject -------------------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF9 +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF13 ss_cdemo_sk->[cd_demo_sk] ----------------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[ss_item_sk];RF8 i_item_sk->[cs_item_sk] +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF13 +----------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF11 i_item_sk->[ss_item_sk];RF12 i_item_sk->[cs_item_sk] --------------------------------------------------PhysicalProject -----------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF6 s_store_sk->[ss_store_sk] +----------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF10 s_store_sk->[ss_store_sk] ------------------------------------------------------PhysicalProject ---------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF5 ib_income_band_sk->[hd_income_band_sk] +--------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF9 ib_income_band_sk->[hd_income_band_sk] ----------------------------------------------------------PhysicalProject -------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF4 hd_demo_sk->[ss_hdemo_sk] +------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF8 hd_demo_sk->[ss_hdemo_sk] --------------------------------------------------------------PhysicalProject -----------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +----------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[ss_sold_date_sk] ------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF2 cs_item_sk->[ss_item_sk] +--------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF6 cs_item_sk->[ss_item_sk] ----------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF4 RF6 RF7 RF11 +------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7 RF8 RF10 RF11 RF17 RF18 ----------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------------------------------------------------PhysicalProject --------------------------------------------------------------------------filter((sale > (2 * refund))) @@ -76,9 +51,9 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash] --------------------------------------------------------------------------------hashAgg[LOCAL] ----------------------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_order_number->[cs_order_number];RF1 cr_item_sk->[cs_item_sk] +------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF4 cr_order_number->[cs_order_number];RF5 cr_item_sk->[cs_item_sk] --------------------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF8 +----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 RF5 RF12 --------------------------------------------------------------------------------------PhysicalProject ----------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] ------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] @@ -87,7 +62,7 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ------------------------------------------------------------------------PhysicalOlapScan[date_dim] --------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------------------------------------PhysicalProject -------------------------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF5 +------------------------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF9 ----------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------------------------------------PhysicalProject --------------------------------------------------------------PhysicalOlapScan[income_band] @@ -98,9 +73,34 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ----------------------------------------------------PhysicalProject ------------------------------------------------------filter((item.i_current_price <= 33.00) and (item.i_current_price >= 24.00) and i_color IN ('blanched', 'brown', 'burlywood', 'chocolate', 'drab', 'medium')) --------------------------------------------------------PhysicalOlapScan[item] -------------------------------------PhysicalDistribute[DistributionSpecReplicated] +------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[promotion] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF2 cd_demo_sk->[c_current_cdemo_sk] +------------------------------------PhysicalDistribute[DistributionSpecHash] --------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[promotion] +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[c_current_hdemo_sk] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalOlapScan[customer] apply RFs: RF1 RF2 RF3 RF19 RF20 +------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF0 ib_income_band_sk->[hd_income_band_sk] +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF0 +------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[income_band] +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[customer_address] --------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------PhysicalProject ------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/suites/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.groovy b/regression-test/suites/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.groovy new file mode 100644 index 0000000000..5fd8db4bcd --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.groovy @@ -0,0 +1,464 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("inner_join_dphyp") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "SET enable_nereids_planner=true" + sql "set runtime_filter_mode=OFF" + sql "SET enable_fallback_to_original_planner=false" + sql "SET enable_materialized_view_rewrite=true" + sql "SET enable_nereids_timeout = false" + sql "SET enable_dphyp_optimizer = true" + + sql """ + drop table if exists orders + """ + + sql """ + CREATE TABLE IF NOT EXISTS orders ( + o_orderkey INTEGER NOT NULL, + o_custkey INTEGER NOT NULL, + o_orderstatus CHAR(1) NOT NULL, + o_totalprice DECIMALV3(15,2) NOT NULL, + o_orderdate DATE NOT NULL, + o_orderpriority CHAR(15) NOT NULL, + o_clerk CHAR(15) NOT NULL, + o_shippriority INTEGER NOT NULL, + o_comment VARCHAR(79) NOT NULL + ) + DUPLICATE KEY(o_orderkey, o_custkey) + PARTITION BY RANGE(o_orderdate) (PARTITION `day_2` VALUES LESS THAN ('2023-12-30')) + DISTRIBUTED BY HASH(o_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ + drop table if exists lineitem + """ + + sql""" + CREATE TABLE IF NOT EXISTS lineitem ( + l_orderkey INTEGER NOT NULL, + l_partkey INTEGER NOT NULL, + l_suppkey INTEGER NOT NULL, + l_linenumber INTEGER NOT NULL, + l_quantity DECIMALV3(15,2) NOT NULL, + l_extendedprice DECIMALV3(15,2) NOT NULL, + l_discount DECIMALV3(15,2) NOT NULL, + l_tax DECIMALV3(15,2) NOT NULL, + l_returnflag CHAR(1) NOT NULL, + l_linestatus CHAR(1) NOT NULL, + l_shipdate DATE NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct CHAR(25) NOT NULL, + l_shipmode CHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL + ) + DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber) + PARTITION BY RANGE(l_shipdate) (PARTITION `day_1` VALUES LESS THAN ('2023-12-30')) + DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ + drop table if exists partsupp + """ + + sql """ + CREATE TABLE IF NOT EXISTS partsupp ( + ps_partkey INTEGER NOT NULL, + ps_suppkey INTEGER NOT NULL, + ps_availqty INTEGER NOT NULL, + ps_supplycost DECIMALV3(15,2) NOT NULL, + ps_comment VARCHAR(199) NOT NULL + ) + DUPLICATE KEY(ps_partkey, ps_suppkey) + DISTRIBUTED BY HASH(ps_partkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ insert into lineitem values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-08', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (2, 4, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-09', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (3, 2, 4, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-10', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (4, 3, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-11', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (5, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-12-12', '2023-12-12', '2023-12-13', 'c', 'd', 'xxxxxxxxx'); + """ + + sql """ + insert into orders values + (1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi'); + """ + + sql """ + insert into partsupp values + (2, 3, 9, 10.01, 'supply1'), + (2, 3, 10, 11.01, 'supply2'); + """ + + def check_rewrite = { mv_sql, query_sql, mv_name -> + + sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" + sql""" + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS ${mv_sql} + """ + + def job_name = getJobName(db, mv_name); + waitingMTMVTaskFinished(job_name) + explain { + sql("${query_sql}") + contains("${mv_name}(${mv_name})") + } + } + + def check_not_match = { mv_sql, query_sql, mv_name -> + + sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" + sql""" + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS ${mv_sql} + """ + + def job_name = getJobName(db, mv_name); + waitingMTMVTaskFinished(job_name) + explain { + sql("${query_sql}") + notContains("${mv_name}(${mv_name})") + } + } + + // without filter + def mv1_0 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query1_0 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + order_qt_query1_0_before "${query1_0}" + check_rewrite(mv1_0, query1_0, "mv1_0") + order_qt_query1_0_after "${query1_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0""" + + + def mv1_1 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY, partsupp.PS_AVAILQTY " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "inner join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY " + + "and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY" + def query1_1 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "inner join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY " + + "and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY" + order_qt_query1_1_before "${query1_1}" + check_rewrite(mv1_1, query1_1, "mv1_1") + order_qt_query1_1_after "${query1_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_1""" + + def mv1_2 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + + "from orders " + + "inner join lineitem on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query1_2 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + order_qt_query1_2_before "${query1_2}" + // join direction is not same, should not match + check_rewrite(mv1_2, query1_2, "mv1_2") + order_qt_query1_2_after "${query1_2}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_2""" + + // select with complex expression + def mv1_3 = "select l_linenumber, o_custkey " + + "from orders " + + "inner join lineitem on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query1_3 = "select IFNULL(orders.O_CUSTKEY, 0) as custkey_not_null, " + + "case when l_linenumber in (1,2,3) then l_linenumber else o_custkey end as case_when " + + "from orders " + + "inner join lineitem on orders.O_ORDERKEY = lineitem.L_ORDERKEY" + order_qt_query1_3_before "${query1_3}" + check_rewrite(mv1_3, query1_3, "mv1_3") + order_qt_query1_3_after "${query1_3}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_3""" + + def mv1_4 = """ + select lineitem.L_LINENUMBER, orders.O_CUSTKEY, partsupp.PS_AVAILQTY + from lineitem + inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY + inner join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY + and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY; + """ + def query1_4 = """ + select lineitem.L_LINENUMBER + from lineitem + inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY + inner join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY + and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY; + """ + order_qt_query1_4_before "${query1_4}" + check_rewrite(mv1_4, query1_4, "mv1_4") + order_qt_query1_4_after "${query1_4}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_4""" + + def mv1_5 = """ + select lineitem.L_LINENUMBER, orders.O_CUSTKEY, l_partkey, o_shippriority + from lineitem + inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY; + """ + def query1_5 = """ + select lineitem.L_LINENUMBER + from lineitem + inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY + and o_shippriority = l_partkey; + """ + order_qt_query1_5_before "${query1_5}" + check_rewrite(mv1_5, query1_5, "mv1_5") + order_qt_query1_5_after "${query1_5}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_5""" + + // filter outside + left + def mv2_0 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + + "from orders " + + "inner join lineitem on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query2_0 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where lineitem.L_LINENUMBER > 0" + order_qt_query2_0_before "${query2_0}" + check_rewrite(mv2_0, query2_0, "mv2_0") + order_qt_query2_0_after "${query2_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_0""" + + + def mv2_1 = "select t1.L_LINENUMBER, orders.O_CUSTKEY " + + "from (select * from lineitem where L_LINENUMBER > 1) t1 " + + "inner join orders on t1.L_ORDERKEY = orders.O_ORDERKEY " + def query2_1 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where lineitem.L_LINENUMBER > 1" + order_qt_query2_1_before "${query2_1}" + check_rewrite(mv2_1, query2_1, "mv2_1") + order_qt_query2_1_after "${query2_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_1""" + + + def mv2_2 = "select t1.L_LINENUMBER, orders.O_CUSTKEY, l_suppkey " + + "from (select * from lineitem where L_LINENUMBER > 1) t1 " + + "inner join orders on t1.L_ORDERKEY = orders.O_ORDERKEY " + def query2_2 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where lineitem.L_LINENUMBER > 1 and l_suppkey = 3" + order_qt_query2_2_before "${query2_2}" + check_rewrite(mv2_2, query2_2, "mv2_2") + order_qt_query2_2_after "${query2_2}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_2""" + + + def mv2_3 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY, partsupp.PS_AVAILQTY, l_suppkey " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "inner join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY " + + "and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY" + def query2_3= "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "inner join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY " + + "and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY " + + "where lineitem.L_LINENUMBER > 1 and l_suppkey = 3 " + order_qt_query2_3_before "${query2_3}" + check_rewrite(mv2_3, query2_3, "mv2_3") + order_qt_query2_3_after "${query2_3}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_3""" + + + // filter outside + right + def mv3_0 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query3_0 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where orders.O_ORDERSTATUS = 'o'" + order_qt_query3_0_before "${query3_0}" + // use a filed not from mv, should not success + check_not_match(mv3_0, query3_0, "mv3_0") + order_qt_query3_0_after "${query3_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_0""" + + + def mv3_1 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY, orders.O_ORDERSTATUS " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query3_1 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where orders.O_ORDERSTATUS = 'o'" + order_qt_query3_1_before "${query3_1}" + check_rewrite(mv3_1, query3_1, "mv3_1") + order_qt_query3_1_after "${query3_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_1""" + + + def mv3_2 = "select lineitem.L_LINENUMBER, t2.O_CUSTKEY, t2.O_ORDERSTATUS " + + "from lineitem " + + "inner join " + + "(select * from orders where O_ORDERSTATUS = 'o') t2 " + + "on lineitem.L_ORDERKEY = t2.O_ORDERKEY " + def query3_2 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where orders.O_ORDERSTATUS = 'o'" + order_qt_query3_2_before "${query3_2}" + check_rewrite(mv3_2, query3_2, "mv3_2") + order_qt_query3_2_after "${query3_2}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_2""" + + + def mv3_3 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY, partsupp.PS_AVAILQTY " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "inner join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY " + + "and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY" + def query3_3= "select lineitem.L_LINENUMBER " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "inner join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY " + + "and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY " + + "where o_custkey in (1, 2, 3, 4) " + order_qt_query3_3_before "${query3_3}" + check_rewrite(mv3_3, query3_3, "mv3_3") + order_qt_query3_3_after "${query3_3}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_3""" + + // join derive, the mv is outer join with filter and query is inner join + // the predicate should be ComparisonPredicate + def mv3_4 = """ + select l_linenumber, o_custkey + from orders + left join lineitem on lineitem.L_ORDERKEY = orders.O_ORDERKEY + where o_custkey = 1; + """ + def query3_4 = """ + select IFNULL(orders.O_CUSTKEY, 0) as custkey_not_null, + case when l_linenumber in (1,2,3) then l_linenumber else o_custkey end as case_when + from orders + inner join lineitem on orders.O_ORDERKEY = lineitem.L_ORDERKEY + where o_custkey = 1 and l_linenumber > 0; + """ + order_qt_query3_4_before "${query3_4}" + check_rewrite(mv3_4, query3_4, "mv3_4") + order_qt_query3_4_after "${query3_4}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_4""" + + + // filter outside + left + right + def mv4_0 = "select l_linenumber, o_custkey, o_orderkey, o_orderstatus " + + "from lineitem " + + "inner join orders on lineitem.l_orderkey = orders.o_orderkey " + def query4_0 = "select lineitem.l_linenumber " + + "from lineitem " + + "inner join orders on lineitem.l_orderkey = orders.o_orderkey " + + "where o_orderstatus = 'o' AND l_linenumber in (1, 2, 3, 4, 5) " + order_qt_query4_0_before "${query4_0}" + check_rewrite(mv4_0, query4_0, "mv4_0") + order_qt_query4_0_after "${query4_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv4_0""" + + + // filter inside + left + def mv5_0 = "select lineitem.l_linenumber, orders.o_custkey " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where lineitem.L_LINENUMBER > 1" + def query5_0 = "select t1.L_LINENUMBER " + + "from (select * from lineitem where l_linenumber > 1) t1 " + + "inner join orders on t1.l_orderkey = orders.O_ORDERKEY " + order_qt_query5_0_before "${query5_0}" + check_rewrite(mv5_0, query5_0, "mv5_0") + order_qt_query5_0_after "${query5_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv5_0""" + + + // filter inside + right + def mv6_0 = "select l_shipdate, o_orderdate, l_partkey, l_suppkey " + + "from lineitem " + + "inner join (select * from orders where o_orderdate = '2023-12-08') t2 " + + "on lineitem.l_orderkey = o_orderkey and l_shipdate = o_orderdate " + def query6_0 = "select l_partkey, l_suppkey, l_shipdate " + + "from lineitem t1 " + + "inner join (select * from orders where o_orderdate = '2023-12-08') t2 " + + "on t1.l_orderkey = o_orderkey and t1.l_shipdate = o_orderdate " + order_qt_query6_0_before "${query6_0}" + check_rewrite(mv6_0, query6_0, "mv6_0") + order_qt_query6_0_after "${query6_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv6_0""" + + + // filter inside + left + right + def mv7_0 = "select l_shipdate, o_orderdate, l_partkey, l_suppkey " + + "from lineitem " + + "inner join (select * from orders where o_orderdate = '2023-12-08') t2 " + + "on lineitem.l_orderkey = o_orderkey and l_shipdate = o_orderdate " + def query7_0 = "select l_partkey, l_suppkey, l_shipdate " + + "from (select l_shipdate, l_orderkey, l_partkey, l_suppkey " + + "from lineitem where l_partkey in (2, 3, 4)) t1 " + + "inner join (select * from orders where o_orderdate = '2023-12-08') t2 " + + "on t1.l_orderkey = o_orderkey and t1.l_shipdate = o_orderdate " + + "where l_partkey = 2" + order_qt_query7_0_before "${query7_0}" + check_rewrite(mv7_0, query7_0, "mv7_0") + order_qt_query7_0_after "${query7_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv7_0""" + + + // check not match, because use a filed orders.O_SHIPPRIORITY which not in mv + def mv10_0 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + + "from lineitem " + + "inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY" + def query10_0 = "select orders.O_CUSTKEY " + + "from orders " + + "inner join lineitem on orders.O_ORDERKEY = lineitem.L_ORDERKEY " + + "WHERE lineitem.L_LINENUMBER > 0 AND orders.O_CUSTKEY = 1 AND " + + "orders.O_SHIPPRIORITY = 2" + order_qt_query10_0_before "${query10_0}" + check_not_match(mv10_0, query10_0, "mv10_0") + order_qt_query10_0_after "${query10_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv10_0""" +} diff --git a/regression-test/suites/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.groovy b/regression-test/suites/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.groovy new file mode 100644 index 0000000000..de253de782 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.groovy @@ -0,0 +1,469 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("outer_join_dphyp") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "SET enable_nereids_planner=true" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql "SET enable_fallback_to_original_planner=false" + sql "SET enable_materialized_view_rewrite=true" + sql "SET enable_nereids_timeout = false" + sql "SET enable_dphyp_optimizer = true" + sql """ + drop table if exists orders + """ + + sql """ + CREATE TABLE IF NOT EXISTS orders ( + o_orderkey INTEGER NOT NULL, + o_custkey INTEGER NOT NULL, + o_orderstatus CHAR(1) NOT NULL, + o_totalprice DECIMALV3(15,2) NOT NULL, + o_orderdate DATE NOT NULL, + o_orderpriority CHAR(15) NOT NULL, + o_clerk CHAR(15) NOT NULL, + o_shippriority INTEGER NOT NULL, + O_COMMENT VARCHAR(79) NOT NULL + ) + DUPLICATE KEY(o_orderkey, o_custkey) + PARTITION BY RANGE(o_orderdate) (PARTITION `day_2` VALUES LESS THAN ('2023-12-30')) + DISTRIBUTED BY HASH(o_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + drop table if exists lineitem + """ + + sql""" + CREATE TABLE IF NOT EXISTS lineitem ( + l_orderkey INTEGER NOT NULL, + l_partkey INTEGER NOT NULL, + l_suppkey INTEGER NOT NULL, + l_linenumber INTEGER NOT NULL, + l_quantity DECIMALV3(15,2) NOT NULL, + l_extendedprice DECIMALV3(15,2) NOT NULL, + l_discount DECIMALV3(15,2) NOT NULL, + l_tax DECIMALV3(15,2) NOT NULL, + l_returnflag CHAR(1) NOT NULL, + l_linestatus CHAR(1) NOT NULL, + l_shipdate DATE NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct CHAR(25) NOT NULL, + l_shipmode CHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL + ) + DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber) + PARTITION BY RANGE(l_shipdate) (PARTITION `day_1` VALUES LESS THAN ('2023-12-30')) + DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + drop table if exists partsupp + """ + + sql """ + CREATE TABLE IF NOT EXISTS partsupp ( + ps_partkey INTEGER NOT NULL, + ps_suppkey INTEGER NOT NULL, + ps_availqty INTEGER NOT NULL, + ps_supplycost DECIMALV3(15,2) NOT NULL, + ps_comment VARCHAR(199) NOT NULL + ) + DUPLICATE KEY(ps_partkey, ps_suppkey) + DISTRIBUTED BY HASH(ps_partkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ insert into lineitem values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-08', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (2, 4, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-09', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (3, 2, 4, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-10', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (4, 3, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-11', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (5, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-12-12', '2023-12-12', '2023-12-13', 'c', 'd', 'xxxxxxxxx'); + """ + + sql """ + insert into orders values + (1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi'); + """ + + sql """ + insert into partsupp values + (2, 3, 9, 10.01, 'supply1'), + (2, 3, 10, 11.01, 'supply2'); + """ + + def check_rewrite = { mv_sql, query_sql, mv_name -> + + sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" + sql""" + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS ${mv_sql} + """ + + def job_name = getJobName(db, mv_name); + waitingMTMVTaskFinished(job_name) + explain { + sql("${query_sql}") + contains("${mv_name}(${mv_name})") + } + } + + def check_not_match = { mv_sql, query_sql, mv_name -> + + sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" + sql""" + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS ${mv_sql} + """ + + def job_name = getJobName(db, mv_name); + waitingMTMVTaskFinished(job_name) + explain { + sql("${query_sql}") + notContains("${mv_name}(${mv_name})") + } + } + + // without filter + def mv1_0 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query1_0 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + order_qt_query1_0_before "${query1_0}" + check_rewrite(mv1_0, query1_0, "mv1_0") + order_qt_query1_0_after "${query1_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0""" + + + def mv1_1 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY, partsupp.PS_AVAILQTY " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "left join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY " + + "and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY" + def query1_1 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "left join partsupp on lineitem.L_PARTKEY = partsupp.PS_PARTKEY " + + "and lineitem.L_SUPPKEY = partsupp.PS_SUPPKEY" + order_qt_query1_1_before "${query1_1}" + check_rewrite(mv1_1, query1_1, "mv1_1") + order_qt_query1_1_after "${query1_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_1""" + + def mv1_2 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + + "from orders " + + "left join lineitem on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query1_2 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + order_qt_query1_2_before "${query1_2}" + // join direction is not same, should not match + check_not_match(mv1_2, query1_2, "mv1_2") + order_qt_query1_2_after "${query1_2}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_2""" + + // select with complex expression + def mv1_3 = "select l_linenumber, o_custkey " + + "from orders " + + "left join lineitem on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query1_3 = "select IFNULL(orders.O_CUSTKEY, 0) as custkey_not_null, " + + "case when l_linenumber in (1,2,3) then l_linenumber else o_custkey end as case_when " + + "from orders " + + "left join lineitem on orders.O_ORDERKEY = lineitem.L_ORDERKEY" + order_qt_query1_3_before "${query1_3}" + check_rewrite(mv1_3, query1_3, "mv1_3") + order_qt_query1_3_after "${query1_3}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_3""" + + + // filter outside + left + def mv2_0 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + + "from orders " + + "left join lineitem on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query2_0 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where lineitem.L_LINENUMBER > 0" + order_qt_query2_0_before "${query2_0}" + check_not_match(mv2_0, query2_0, "mv2_0") + order_qt_query2_0_after "${query2_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_0""" + + + def mv2_1 = "select t1.L_LINENUMBER, orders.O_CUSTKEY " + + "from (select * from lineitem where L_LINENUMBER > 1) t1 " + + "left join orders on t1.L_ORDERKEY = orders.O_ORDERKEY " + def query2_1 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where lineitem.L_LINENUMBER > 1" + order_qt_query2_1_before "${query2_1}" + check_rewrite(mv2_1, query2_1, "mv2_1") + order_qt_query2_1_after "${query2_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_1""" + + + def mv2_2 =""" + select t1.L_LINENUMBER, orders.O_CUSTKEY + from (select * from lineitem where L_LINENUMBER > 1) t1 + left join orders on t1.L_ORDERKEY = orders.O_ORDERKEY; + """ + def query2_2 = """ + select lineitem.L_LINENUMBER + from lineitem + left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY + where lineitem.L_LINENUMBER > 1 and l_suppkey = 3; + """ + order_qt_query2_2_before "${query2_2}" + check_not_match(mv2_2, query2_2, "mv2_2") + order_qt_query2_2_after "${query2_2}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_2""" + + + def mv2_3 =""" + select t1.L_LINENUMBER, orders.O_CUSTKEY, l_suppkey + from (select * from lineitem where L_LINENUMBER > 1) t1 + left join orders on t1.L_ORDERKEY = orders.O_ORDERKEY; + """ + def query2_3 = """ + select lineitem.L_LINENUMBER + from lineitem + left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY + where lineitem.L_LINENUMBER > 1 and l_suppkey = 3; + """ + order_qt_query2_3_before "${query2_3}" + check_rewrite(mv2_3, query2_3, "mv2_3") + order_qt_query2_3_after "${query2_3}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_3""" + + + // filter outside + right + def mv3_0 = """ + select lineitem.L_LINENUMBER, orders.O_CUSTKEY + from lineitem + left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY; + """ + def query3_0 = """ + select lineitem.L_LINENUMBER + from lineitem + left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY + where orders.O_ORDERSTATUS = 'o'; + """ + order_qt_query3_0_before "${query3_0}" + // use a filed not from mv, should not success + check_not_match(mv3_0, query3_0, "mv3_0") + order_qt_query3_0_after "${query3_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_0""" + + + def mv3_1 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY, orders.O_ORDERSTATUS " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + def query3_1 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where orders.O_ORDERSTATUS = 'o'" + order_qt_query3_1_before "${query3_1}" + check_rewrite(mv3_1, query3_1, "mv3_1") + order_qt_query3_1_after "${query3_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_1""" + + + def mv3_2 = "select lineitem.L_LINENUMBER, t2.O_CUSTKEY, t2.O_ORDERSTATUS " + + "from lineitem " + + "left join " + + "(select * from orders where O_ORDERSTATUS = 'o') t2 " + + "on lineitem.L_ORDERKEY = t2.O_ORDERKEY " + def query3_2 = "select lineitem.L_LINENUMBER " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where orders.O_ORDERSTATUS = 'o'" + order_qt_query3_2_before "${query3_2}" + // should not success, as mv filter is under left outer input + check_not_match(mv3_2, query3_2, "mv3_2") + order_qt_query3_2_after "${query3_2}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_2""" + + + // filter outside + left + right + def mv4_0 = """ + select l_linenumber, o_custkey, o_orderkey, o_orderstatus + from lineitem + left join orders on lineitem.l_orderkey = orders.o_orderkey; + """ + def query4_0 = """ + select lineitem.l_linenumber + from lineitem + left join orders on lineitem.l_orderkey = orders.o_orderkey + where o_orderstatus = 'o' AND o_orderkey = 1; + """ + order_qt_query4_0_before "${query4_0}" + check_rewrite(mv4_0, query4_0, "mv4_0") + order_qt_query4_0_after "${query4_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv4_0""" + + + // filter inside + left + def mv5_0 = "select lineitem.l_linenumber, orders.o_custkey " + + "from lineitem " + + "left join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY " + + "where lineitem.L_LINENUMBER > 1" + def query5_0 = "select t1.L_LINENUMBER " + + "from (select * from lineitem where l_linenumber > 1) t1 " + + "left join orders on t1.l_orderkey = orders.O_ORDERKEY " + order_qt_query5_0_before "${query5_0}" + check_rewrite(mv5_0, query5_0, "mv5_0") + order_qt_query5_0_after "${query5_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv5_0""" + + + def mv5_1 = """ + select l_shipdate, o_orderdate, l_partkey, l_suppkey + from (select * from lineitem where l_shipdate = '2023-12-08' ) t1 + left join orders + on t1.l_orderkey = orders.o_orderkey + """ + def query5_1 = """ + select l_shipdate, o_orderdate, l_partkey, l_suppkey + from lineitem + left join orders + on lineitem.l_orderkey = orders.o_orderkey + where o_orderdate = '2023-12-08' + """ + order_qt_query5_1_before "${query5_1}" + check_not_match(mv5_1, query5_1, "mv5_1") + order_qt_query5_1_after "${query5_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv5_1""" + + + // filter inside + right + def mv6_0 = "select l_shipdate, o_orderdate, l_partkey, l_suppkey " + + "from lineitem " + + "left join (select * from orders where o_orderdate = '2023-12-08') t2 " + + "on lineitem.l_orderkey = o_orderkey and l_shipdate = o_orderdate " + def query6_0 = "select l_partkey, l_suppkey, l_shipdate " + + "from lineitem t1 " + + "left join (select * from orders where o_orderdate = '2023-12-08') t2 " + + "on t1.l_orderkey = o_orderkey and t1.l_shipdate = o_orderdate " + order_qt_query6_0_before "${query6_0}" + check_rewrite(mv6_0, query6_0, "mv6_0") + order_qt_query6_0_after "${query6_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv6_0""" + + + // filter inside + left + right + def mv7_0 = "select l_shipdate, o_orderdate, l_partkey, l_suppkey " + + "from lineitem " + + "left join (select * from orders where o_orderdate = '2023-12-08') t2 " + + "on lineitem.l_orderkey = o_orderkey and l_shipdate = o_orderdate " + def query7_0 = "select l_partkey, l_suppkey, l_shipdate " + + "from (select l_shipdate, l_orderkey, l_partkey, l_suppkey " + + "from lineitem where l_partkey in (3, 4)) t1 " + + "left join (select * from orders where o_orderdate = '2023-12-08') t2 " + + "on t1.l_orderkey = o_orderkey and t1.l_shipdate = o_orderdate " + + "where l_partkey = 3" + order_qt_query7_0_before "${query7_0}" + check_rewrite(mv7_0, query7_0, "mv7_0") + order_qt_query7_0_after "${query7_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv7_0""" + + + def mv7_1 = """ + select l_shipdate, o_orderdate, l_partkey, l_suppkey + from lineitem + left join orders + on lineitem.l_orderkey = orders.o_orderkey + where l_shipdate = '2023-12-08' and o_orderdate = '2023-12-08'; + """ + def query7_1 = """ + select l_shipdate, o_orderdate, l_partkey, l_suppkey + from (select * from lineitem where l_shipdate = '2023-10-17' ) t1 + left join orders + on t1.l_orderkey = orders.o_orderkey; + """ + order_qt_query7_1_before "${query7_1}" + check_not_match(mv7_1, query7_1, "mv7_1") + order_qt_query7_1_after "${query7_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv7_1""" + + + // self join test + def mv8_0 = """ + select + a.o_orderkey, + count(distinct a.o_orderstatus) num1, + SUM(CASE WHEN a.o_orderstatus = 'o' AND a.o_shippriority = 1 AND a.o_orderdate = '2023-12-08' AND b.o_orderdate = '2023-12-09' THEN a.o_shippriority+b.o_custkey ELSE 0 END) num2, + SUM(CASE WHEN a.o_orderstatus = 'o' AND a.o_shippriority = 1 AND a.o_orderdate >= '2023-12-01' AND a.o_orderdate <= '2023-12-09' THEN a.o_shippriority+b.o_custkey ELSE 0 END) num3, + SUM(CASE WHEN a.o_orderstatus = 'o' AND a.o_shippriority in (1,2) AND a.o_orderdate >= '2023-12-08' AND b.o_orderdate <= '2023-12-09' THEN a.o_shippriority-b.o_custkey ELSE 0 END) num4, + AVG(a.o_totalprice) num5, + MAX(b.o_totalprice) num6, + MIN(a.o_totalprice) num7 + from + orders a + left outer join orders b + on a.o_orderkey = b.o_orderkey + and a.o_custkey = b.o_custkey + group by a.o_orderkey; + """ + def query8_0 = """ + select + a.o_orderkey, + SUM(CASE WHEN a.o_orderstatus = 'o' AND a.o_shippriority = 1 AND a.o_orderdate = '2023-12-08' AND b.o_orderdate = '2023-12-09' THEN a.o_shippriority+b.o_custkey ELSE 0 END) num2, + SUM(CASE WHEN a.o_orderstatus = 'o' AND a.o_shippriority = 1 AND a.o_orderdate >= '2023-12-01' AND a.o_orderdate <= '2023-12-09' THEN a.o_shippriority+b.o_custkey ELSE 0 END) num3, + SUM(CASE WHEN a.o_orderstatus = 'o' AND a.o_shippriority in (1,2) AND a.o_orderdate >= '2023-12-08' AND b.o_orderdate <= '2023-12-09' THEN a.o_shippriority-b.o_custkey ELSE 0 END) num4, + AVG(a.o_totalprice) num5, + MAX(b.o_totalprice) num6, + MIN(a.o_totalprice) num7 + from + orders a + left outer join orders b + on a.o_orderkey = b.o_orderkey + and a.o_custkey = b.o_custkey + group by a.o_orderkey; + """ + order_qt_query8_0_before "${query8_0}" + check_rewrite(mv8_0, query8_0, "mv8_0") + order_qt_query8_0_after "${query8_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv8_0""" +}