From d25cbdd4dc0746acce1c2bc0d3d7d83f85d6e222 Mon Sep 17 00:00:00 2001 From: jakevin Date: Tue, 12 Dec 2023 15:38:31 +0800 Subject: [PATCH] [feature](Nereids): one side eager aggregation (#28143) --- .../doris/nereids/jobs/executor/Rewriter.java | 6 + .../apache/doris/nereids/rules/RuleType.java | 3 + .../PushDownCountThroughJoinOneSide.java | 216 ++++++ .../PushDownSumThroughJoinOneSide.java | 209 ++++++ .../PushDownCountThroughJoinOneSideTest.java | 139 ++++ .../PushDownSumThroughJoinOneSideTest.java | 135 ++++ .../eager_aggregate/basic_one_side.out | 60 ++ .../push_down_count_through_join_one_side.out | 672 ++++++++++++++++++ .../push_down_sum_through_join_one_side.out | 366 ++++++++++ .../eager_aggregate/basic_one_side.groovy | 130 ++++ ...sh_down_count_through_join_one_side.groovy | 239 +++++++ ...push_down_sum_through_join_one_side.groovy | 154 ++++ 12 files changed, 2329 insertions(+) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownCountThroughJoinOneSide.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownSumThroughJoinOneSide.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownCountThroughJoinOneSideTest.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownSumThroughJoinOneSideTest.java create mode 100644 regression-test/data/nereids_rules_p0/eager_aggregate/basic_one_side.out create mode 100644 regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.out create mode 100644 regression-test/data/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.out create mode 100644 regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy create mode 100644 regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.groovy create mode 100644 regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 4d1c330d52..0ded776717 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -91,6 +91,7 @@ import org.apache.doris.nereids.rules.rewrite.PullUpProjectUnderTopN; import org.apache.doris.nereids.rules.rewrite.PushConjunctsIntoEsScan; import org.apache.doris.nereids.rules.rewrite.PushConjunctsIntoJdbcScan; import org.apache.doris.nereids.rules.rewrite.PushDownCountThroughJoin; +import org.apache.doris.nereids.rules.rewrite.PushDownCountThroughJoinOneSide; import org.apache.doris.nereids.rules.rewrite.PushDownDistinctThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughProject; import org.apache.doris.nereids.rules.rewrite.PushDownLimit; @@ -98,6 +99,7 @@ import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughUnion; import org.apache.doris.nereids.rules.rewrite.PushDownMinMaxThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownSumThroughJoin; +import org.apache.doris.nereids.rules.rewrite.PushDownSumThroughJoinOneSide; import org.apache.doris.nereids.rules.rewrite.PushDownTopNThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownTopNThroughUnion; import org.apache.doris.nereids.rules.rewrite.PushDownTopNThroughWindow; @@ -275,6 +277,10 @@ public class Rewriter extends AbstractBatchJobExecutor { new PushDownMinMaxThroughJoin(), new PushDownCountThroughJoin() ), + topDown( + new PushDownSumThroughJoinOneSide(), + new PushDownCountThroughJoinOneSide() + ), custom(RuleType.PUSH_DOWN_DISTINCT_THROUGH_JOIN, PushDownDistinctThroughJoin::new) ), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index f88de61ebc..41db8bb5df 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -167,6 +167,9 @@ public enum RuleType { PUSH_DOWN_SUM_THROUGH_JOIN(RuleTypeClass.REWRITE), PUSH_DOWN_COUNT_THROUGH_JOIN(RuleTypeClass.REWRITE), + PUSH_DOWN_SUM_THROUGH_JOIN_ONE_SIDE(RuleTypeClass.REWRITE), + PUSH_DOWN_COUNT_THROUGH_JOIN_ONE_SIDE(RuleTypeClass.REWRITE), + TRANSPOSE_LOGICAL_SEMI_JOIN_LOGICAL_JOIN(RuleTypeClass.REWRITE), TRANSPOSE_LOGICAL_SEMI_JOIN_LOGICAL_JOIN_PROJECT(RuleTypeClass.REWRITE), LOGICAL_SEMI_JOIN_COMMUTE(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownCountThroughJoinOneSide.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownCountThroughJoinOneSide.java new file mode 100644 index 0000000000..5abe33fb14 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownCountThroughJoinOneSide.java @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; +import org.apache.doris.nereids.trees.expressions.functions.agg.Count; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * TODO: distinct | just push one level + * Support Pushdown Count(col). + * Count(col) -> Sum( cnt ) + *

+ * Related paper "Eager aggregation and lazy aggregation". + *

+ *  aggregate: count(x)
+ *  |
+ *  join
+ *  |   \
+ *  |    *
+ *  (x)
+ *  ->
+ *  aggregate: Sum( cnt )
+ *  |
+ *  join
+ *  |   \
+ *  |    *
+ *  aggregate: count(x) as cnt
+ *  
+ * Notice: rule can't optimize condition that groupby is empty when Count(*) exists. + */ +public class PushDownCountThroughJoinOneSide implements RewriteRuleFactory { + @Override + public List buildRules() { + return ImmutableList.of( + logicalAggregate(innerLogicalJoin()) + .when(agg -> agg.child().getOtherJoinConjuncts().isEmpty()) + .whenNot(agg -> agg.child().children().stream().anyMatch(p -> p instanceof LogicalAggregate)) + .when(agg -> agg.getGroupByExpressions().stream().allMatch(e -> e instanceof Slot)) + .when(agg -> { + Set funcs = agg.getAggregateFunctions(); + return !funcs.isEmpty() && funcs.stream() + .allMatch(f -> f instanceof Count && !f.isDistinct() + && (!((Count) f).isCountStar() && f.child(0) instanceof Slot)); + }) + .thenApply(ctx -> { + Set enableNereidsRules = ctx.cascadesContext.getConnectContext() + .getSessionVariable().getEnableNereidsRules(); + if (!enableNereidsRules.contains(RuleType.PUSH_DOWN_COUNT_THROUGH_JOIN_ONE_SIDE.type())) { + return null; + } + LogicalAggregate> agg = ctx.root; + return pushCount(agg, agg.child(), ImmutableList.of()); + }) + .toRule(RuleType.PUSH_DOWN_COUNT_THROUGH_JOIN_ONE_SIDE), + logicalAggregate(logicalProject(innerLogicalJoin())) + .when(agg -> agg.child().isAllSlots()) + .when(agg -> agg.child().child().getOtherJoinConjuncts().isEmpty()) + .whenNot(agg -> agg.child().children().stream().anyMatch(p -> p instanceof LogicalAggregate)) + .when(agg -> agg.getGroupByExpressions().stream().allMatch(e -> e instanceof Slot)) + .when(agg -> { + Set funcs = agg.getAggregateFunctions(); + return !funcs.isEmpty() && funcs.stream() + .allMatch(f -> f instanceof Count && !f.isDistinct() + && (!((Count) f).isCountStar() && f.child(0) instanceof Slot)); + }) + .thenApply(ctx -> { + Set enableNereidsRules = ctx.cascadesContext.getConnectContext() + .getSessionVariable().getEnableNereidsRules(); + if (!enableNereidsRules.contains(RuleType.PUSH_DOWN_COUNT_THROUGH_JOIN_ONE_SIDE.type())) { + return null; + } + LogicalAggregate>> agg = ctx.root; + return pushCount(agg, agg.child().child(), agg.child().getProjects()); + }) + .toRule(RuleType.PUSH_DOWN_COUNT_THROUGH_JOIN_ONE_SIDE) + ); + } + + private LogicalAggregate pushCount(LogicalAggregate agg, + LogicalJoin join, List projects) { + List leftOutput = join.left().getOutput(); + List rightOutput = join.right().getOutput(); + + List leftCounts = new ArrayList<>(); + List rightCounts = new ArrayList<>(); + for (AggregateFunction f : agg.getAggregateFunctions()) { + Count count = (Count) f; + Slot slot = (Slot) count.child(0); + if (leftOutput.contains(slot)) { + leftCounts.add(count); + } else if (rightOutput.contains(slot)) { + rightCounts.add(count); + } else { + throw new IllegalStateException("Slot " + slot + " not found in join output"); + } + } + + Set leftGroupBy = new HashSet<>(); + Set rightGroupBy = new HashSet<>(); + for (Expression e : agg.getGroupByExpressions()) { + Slot slot = (Slot) e; + if (leftOutput.contains(slot)) { + leftGroupBy.add(slot); + } else if (rightOutput.contains(slot)) { + rightGroupBy.add(slot); + } else { + return null; + } + } + join.getHashJoinConjuncts().forEach(e -> e.getInputSlots().forEach(slot -> { + if (leftOutput.contains(slot)) { + leftGroupBy.add(slot); + } else if (rightOutput.contains(slot)) { + rightGroupBy.add(slot); + } else { + throw new IllegalStateException("Slot " + slot + " not found in join output"); + } + })); + + Plan left = join.left(); + Plan right = join.right(); + + Map leftCntSlotToOutput = new HashMap<>(); + Map rightCntSlotToOutput = new HashMap<>(); + + // left Count agg + if (!leftCounts.isEmpty()) { + Builder leftCntAggOutputBuilder = ImmutableList.builder() + .addAll(leftGroupBy); + leftCounts.forEach(func -> { + Alias alias = func.alias(func.getName()); + leftCntSlotToOutput.put((Slot) func.child(0), alias); + leftCntAggOutputBuilder.add(alias); + }); + left = new LogicalAggregate<>(ImmutableList.copyOf(leftGroupBy), leftCntAggOutputBuilder.build(), + join.left()); + } + + // right Count agg + if (!rightCounts.isEmpty()) { + Builder rightCntAggOutputBuilder = ImmutableList.builder() + .addAll(rightGroupBy); + rightCounts.forEach(func -> { + Alias alias = func.alias(func.getName()); + rightCntSlotToOutput.put((Slot) func.child(0), alias); + rightCntAggOutputBuilder.add(alias); + }); + + right = new LogicalAggregate<>(ImmutableList.copyOf(rightGroupBy), rightCntAggOutputBuilder.build(), + join.right()); + } + + Preconditions.checkState(left != join.left() || right != join.right()); + Plan newJoin = join.withChildren(left, right); + + // top Sum agg + // count(slot) -> sum( count(slot) as cnt ) + List newOutputExprs = new ArrayList<>(); + for (NamedExpression ne : agg.getOutputExpressions()) { + if (ne instanceof Alias && ((Alias) ne).child() instanceof Count) { + Count oldTopCnt = (Count) ((Alias) ne).child(); + + Slot slot = (Slot) oldTopCnt.child(0); + if (leftCntSlotToOutput.containsKey(slot)) { + Expression expr = new Sum(leftCntSlotToOutput.get(slot).toSlot()); + newOutputExprs.add((NamedExpression) ne.withChildren(expr)); + } else if (rightCntSlotToOutput.containsKey(slot)) { + Expression expr = new Sum(rightCntSlotToOutput.get(slot).toSlot()); + newOutputExprs.add((NamedExpression) ne.withChildren(expr)); + } else { + throw new IllegalStateException("Slot " + slot + " not found in join output"); + } + } else { + newOutputExprs.add(ne); + } + } + return agg.withAggOutputChild(newOutputExprs, newJoin); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownSumThroughJoinOneSide.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownSumThroughJoinOneSide.java new file mode 100644 index 0000000000..3f4fa09cd7 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownSumThroughJoinOneSide.java @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * TODO: distinct + * Related paper "Eager aggregation and lazy aggregation". + *
+ * aggregate: Sum(x)
+ * |
+ * join
+ * |   \
+ * |    *
+ * (x)
+ * ->
+ * aggregate: Sum(sum1)
+ * |
+ * join
+ * |   \
+ * |    *
+ * aggregate: Sum(x) as sum1
+ * 
+ */ +public class PushDownSumThroughJoinOneSide implements RewriteRuleFactory { + @Override + public List buildRules() { + return ImmutableList.of( + logicalAggregate(innerLogicalJoin()) + .when(agg -> agg.child().getOtherJoinConjuncts().isEmpty()) + .whenNot(agg -> agg.child().children().stream().anyMatch(p -> p instanceof LogicalAggregate)) + .when(agg -> { + Set funcs = agg.getAggregateFunctions(); + return !funcs.isEmpty() && funcs.stream() + .allMatch(f -> f instanceof Sum && !f.isDistinct() && f.child(0) instanceof Slot); + }) + .thenApply(ctx -> { + Set enableNereidsRules = ctx.cascadesContext.getConnectContext() + .getSessionVariable().getEnableNereidsRules(); + if (!enableNereidsRules.contains(RuleType.PUSH_DOWN_SUM_THROUGH_JOIN_ONE_SIDE.type())) { + return null; + } + LogicalAggregate> agg = ctx.root; + return pushSum(agg, agg.child(), ImmutableList.of()); + }) + .toRule(RuleType.PUSH_DOWN_SUM_THROUGH_JOIN), + logicalAggregate(logicalProject(innerLogicalJoin())) + .when(agg -> agg.child().isAllSlots()) + .when(agg -> agg.child().child().getOtherJoinConjuncts().isEmpty()) + .whenNot(agg -> agg.child().children().stream().anyMatch(p -> p instanceof LogicalAggregate)) + .when(agg -> { + Set funcs = agg.getAggregateFunctions(); + return !funcs.isEmpty() && funcs.stream() + .allMatch(f -> f instanceof Sum && !f.isDistinct() && f.child(0) instanceof Slot); + }) + .thenApply(ctx -> { + Set enableNereidsRules = ctx.cascadesContext.getConnectContext() + .getSessionVariable().getEnableNereidsRules(); + if (!enableNereidsRules.contains(RuleType.PUSH_DOWN_SUM_THROUGH_JOIN_ONE_SIDE.type())) { + return null; + } + LogicalAggregate>> agg = ctx.root; + return pushSum(agg, agg.child().child(), agg.child().getProjects()); + }) + .toRule(RuleType.PUSH_DOWN_SUM_THROUGH_JOIN) + ); + } + + private LogicalAggregate pushSum(LogicalAggregate agg, + LogicalJoin join, List projects) { + List leftOutput = join.left().getOutput(); + List rightOutput = join.right().getOutput(); + + List leftSums = new ArrayList<>(); + List rightSums = new ArrayList<>(); + for (AggregateFunction f : agg.getAggregateFunctions()) { + Sum sum = (Sum) f; + Slot slot = (Slot) sum.child(); + if (leftOutput.contains(slot)) { + leftSums.add(sum); + } else if (rightOutput.contains(slot)) { + rightSums.add(sum); + } else { + throw new IllegalStateException("Slot " + slot + " not found in join output"); + } + } + if (leftSums.isEmpty() && rightSums.isEmpty()) { + return null; + } + + Set leftGroupBy = new HashSet<>(); + Set rightGroupBy = new HashSet<>(); + for (Expression e : agg.getGroupByExpressions()) { + Slot slot = (Slot) e; + if (leftOutput.contains(slot)) { + leftGroupBy.add(slot); + } else if (rightOutput.contains(slot)) { + rightGroupBy.add(slot); + } else { + return null; + } + } + join.getHashJoinConjuncts().forEach(e -> e.getInputSlots().forEach(slot -> { + if (leftOutput.contains(slot)) { + leftGroupBy.add(slot); + } else if (rightOutput.contains(slot)) { + rightGroupBy.add(slot); + } else { + throw new IllegalStateException("Slot " + slot + " not found in join output"); + } + })); + + Plan left = join.left(); + Plan right = join.right(); + + Map leftSumSlotToOutput = new HashMap<>(); + Map rightSumSlotToOutput = new HashMap<>(); + + // left Sum agg + if (!leftSums.isEmpty()) { + Builder leftSumAggOutputBuilder = ImmutableList.builder() + .addAll(leftGroupBy); + leftSums.forEach(func -> { + Alias alias = func.alias(func.getName()); + leftSumSlotToOutput.put((Slot) func.child(0), alias); + leftSumAggOutputBuilder.add(alias); + }); + left = new LogicalAggregate<>(ImmutableList.copyOf(leftGroupBy), leftSumAggOutputBuilder.build(), + join.left()); + } + + // right Sum agg + if (!rightSums.isEmpty()) { + Builder rightSumAggOutputBuilder = ImmutableList.builder() + .addAll(rightGroupBy); + rightSums.forEach(func -> { + Alias alias = func.alias(func.getName()); + rightSumSlotToOutput.put((Slot) func.child(0), alias); + rightSumAggOutputBuilder.add(alias); + }); + right = new LogicalAggregate<>(ImmutableList.copyOf(rightGroupBy), rightSumAggOutputBuilder.build(), + join.right()); + } + + Preconditions.checkState(left != join.left() || right != join.right()); + Plan newJoin = join.withChildren(left, right); + + // top Sum agg + // replace sum(x) -> sum(sum#) + List newOutputExprs = new ArrayList<>(); + for (NamedExpression ne : agg.getOutputExpressions()) { + if (ne instanceof Alias && ((Alias) ne).child() instanceof Sum) { + Sum oldTopSum = (Sum) ((Alias) ne).child(); + + Slot slot = (Slot) oldTopSum.child(0); + if (leftSumSlotToOutput.containsKey(slot)) { + Expression expr = new Sum(leftSumSlotToOutput.get(slot).toSlot()); + newOutputExprs.add((NamedExpression) ne.withChildren(expr)); + } else if (rightSumSlotToOutput.containsKey(slot)) { + Expression expr = new Sum(rightSumSlotToOutput.get(slot).toSlot()); + newOutputExprs.add((NamedExpression) ne.withChildren(expr)); + } else { + throw new IllegalStateException("Slot " + slot + " not found in join output"); + } + } else { + newOutputExprs.add(ne); + } + } + return agg.withAggOutputChild(newOutputExprs, newJoin); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownCountThroughJoinOneSideTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownCountThroughJoinOneSideTest.java new file mode 100644 index 0000000000..3106eb30f4 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownCountThroughJoinOneSideTest.java @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.common.Pair; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.functions.agg.Count; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.util.LogicalPlanBuilder; +import org.apache.doris.nereids.util.MemoPatternMatchSupported; +import org.apache.doris.nereids.util.MemoTestUtils; +import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.nereids.util.PlanConstructor; +import org.apache.doris.qe.SessionVariable; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import mockit.Mock; +import mockit.MockUp; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +class PushDownCountThroughJoinOneSideTest implements MemoPatternMatchSupported { + private static final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + private static final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + private MockUp mockUp = new MockUp() { + @Mock + public Set getEnableNereidsRules() { + return ImmutableSet.of(RuleType.PUSH_DOWN_COUNT_THROUGH_JOIN_ONE_SIDE.type()); + } + }; + + @Test + void testSingleCount() { + Alias count = new Count(scan1.getOutput().get(0)).alias("count"); + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.INNER_JOIN, Pair.of(0, 0)) + .aggGroupUsingIndex(ImmutableList.of(0), ImmutableList.of(scan1.getOutput().get(0), count)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyTopDown(new PushDownCountThroughJoinOneSide()) + .printlnTree() + .matches( + logicalAggregate( + logicalJoin( + logicalAggregate(), + logicalOlapScan() + ) + ) + ); + } + + @Test + void testMultiCount() { + Alias leftCnt1 = new Count(scan1.getOutput().get(0)).alias("leftCnt1"); + Alias leftCnt2 = new Count(scan1.getOutput().get(1)).alias("leftCnt2"); + Alias rightCnt1 = new Count(scan2.getOutput().get(1)).alias("rightCnt1"); + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.INNER_JOIN, Pair.of(0, 0)) + .aggGroupUsingIndex(ImmutableList.of(0), + ImmutableList.of(scan1.getOutput().get(0), leftCnt1, leftCnt2, rightCnt1)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyTopDown(new PushDownCountThroughJoinOneSide()) + .matches( + logicalAggregate( + logicalJoin( + logicalAggregate(), + logicalAggregate() + ) + ) + ); + } + + @Test + void testSingleCountStar() { + Alias count = new Count().alias("countStar"); + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.INNER_JOIN, Pair.of(0, 0)) + .aggGroupUsingIndex(ImmutableList.of(0), ImmutableList.of(scan1.getOutput().get(0), count)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyTopDown(new PushDownCountThroughJoinOneSide()) + .printlnTree() + .matches( + logicalAggregate( + logicalJoin( + logicalOlapScan(), + logicalOlapScan() + ) + ) + ); + } + + @Test + void testBothSideCountAndCountStar() { + Alias leftCnt = new Count(scan1.getOutput().get(0)).alias("leftCnt"); + Alias rightCnt = new Count(scan2.getOutput().get(0)).alias("rightCnt"); + Alias countStar = new Count().alias("countStar"); + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.INNER_JOIN, Pair.of(0, 0)) + .aggGroupUsingIndex(ImmutableList.of(0), + ImmutableList.of(scan1.getOutput().get(0), leftCnt, rightCnt, countStar)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyTopDown(new PushDownCountThroughJoinOneSide()) + .matches( + logicalAggregate( + logicalJoin( + logicalOlapScan(), + logicalOlapScan() + ) + ) + ); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownSumThroughJoinOneSideTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownSumThroughJoinOneSideTest.java new file mode 100644 index 0000000000..2e0f124b81 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownSumThroughJoinOneSideTest.java @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.common.Pair; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.util.LogicalPlanBuilder; +import org.apache.doris.nereids.util.MemoPatternMatchSupported; +import org.apache.doris.nereids.util.MemoTestUtils; +import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.nereids.util.PlanConstructor; +import org.apache.doris.qe.SessionVariable; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import mockit.Mock; +import mockit.MockUp; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +class PushDownSumThroughJoinOneSideTest implements MemoPatternMatchSupported { + private static final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + private static final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + private MockUp mockUp = new MockUp() { + @Mock + public Set getEnableNereidsRules() { + return ImmutableSet.of(RuleType.PUSH_DOWN_SUM_THROUGH_JOIN_ONE_SIDE.type()); + } + }; + + @Test + void testSingleJoinLeftSum() { + Alias sum = new Sum(scan1.getOutput().get(1)).alias("sum"); + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.INNER_JOIN, Pair.of(0, 0)) + .aggGroupUsingIndex(ImmutableList.of(0), ImmutableList.of(scan1.getOutput().get(0), sum)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyTopDown(new PushDownSumThroughJoinOneSide()) + .matches( + logicalAggregate( + logicalJoin( + logicalAggregate(), + logicalOlapScan() + ) + ) + ); + } + + @Test + void testSingleJoinRightSum() { + Alias sum = new Sum(scan2.getOutput().get(1)).alias("sum"); + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.INNER_JOIN, Pair.of(0, 0)) + .aggGroupUsingIndex(ImmutableList.of(0), ImmutableList.of(scan1.getOutput().get(0), sum)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyTopDown(new PushDownSumThroughJoinOneSide()) + .matches( + logicalAggregate( + logicalJoin( + logicalOlapScan(), + logicalAggregate() + ) + ) + ); + } + + @Test + void testAggNotOutputGroupBy() { + // agg don't output group by + Alias sum = new Sum(scan1.getOutput().get(1)).alias("sum"); + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.INNER_JOIN, Pair.of(0, 0)) + .aggGroupUsingIndex(ImmutableList.of(0), ImmutableList.of(sum)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyTopDown(new PushDownSumThroughJoinOneSide()) + .matches( + logicalAggregate( + logicalJoin( + logicalAggregate(), + logicalOlapScan() + ) + ) + ); + } + + @Test + void testMultiSum() { + Alias leftSum1 = new Sum(scan1.getOutput().get(0)).alias("leftSum1"); + Alias leftSum2 = new Sum(scan1.getOutput().get(1)).alias("leftSum2"); + Alias rightSum1 = new Sum(scan2.getOutput().get(1)).alias("rightSum1"); + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.INNER_JOIN, Pair.of(0, 0)) + .aggGroupUsingIndex(ImmutableList.of(0), + ImmutableList.of(scan1.getOutput().get(0), leftSum1, leftSum2, rightSum1)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyTopDown(new PushDownSumThroughJoinOneSide()) + .matches( + logicalAggregate( + logicalJoin( + logicalAggregate(), + logicalAggregate() + ) + ) + ); + } +} diff --git a/regression-test/data/nereids_rules_p0/eager_aggregate/basic_one_side.out b/regression-test/data/nereids_rules_p0/eager_aggregate/basic_one_side.out new file mode 100644 index 0000000000..7d99f92434 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/eager_aggregate/basic_one_side.out @@ -0,0 +1,60 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +PhysicalResultSink +--PhysicalDistribute +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((a.device_id = b.device_id)) otherCondition=() build RFs:RF0 device_id->[device_id] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------filter((a.event_id = 'ad_click')) +--------------------PhysicalOlapScan[com_dd_library_one_side] apply RFs: RF0 +--------------PhysicalDistribute +----------------PhysicalProject +------------------filter((cast(experiment_id as DOUBLE) = 37)) +--------------------PhysicalOlapScan[shunt_log_com_dd_library_one_side] + +-- !2 -- +PhysicalResultSink +--PhysicalDistribute +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((a.device_id = b.device_id)) otherCondition=() build RFs:RF0 device_id->[device_id] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------PhysicalOlapScan[com_dd_library_one_side] apply RFs: RF0 +--------------PhysicalDistribute +----------------filter((cast(experiment_id as DOUBLE) = 73)) +------------------PhysicalOlapScan[shunt_log_com_dd_library_one_side] + +-- !3 -- +PhysicalResultSink +--PhysicalDistribute +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((a.device_id = b.device_id)) otherCondition=() build RFs:RF0 device_id->[device_id] +--------------PhysicalOlapScan[com_dd_library_one_side] apply RFs: RF0 +--------------PhysicalDistribute +----------------filter((cast(experiment_id as DOUBLE) = 73)) +------------------PhysicalOlapScan[shunt_log_com_dd_library_one_side] + +-- !4 -- +PhysicalResultSink +--PhysicalDistribute +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((a.device_id = b.device_id)) otherCondition=() +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------PhysicalOlapScan[com_dd_library_one_side] +--------------PhysicalDistribute +----------------PhysicalOlapScan[shunt_log_com_dd_library_one_side] + diff --git a/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.out b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.out new file mode 100644 index 0000000000..487763d55d --- /dev/null +++ b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.out @@ -0,0 +1,672 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !groupby_pushdown_basic -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------hashAgg[LOCAL] +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_left_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_right_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_full_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[FULL_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_left_semi_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------hashAgg[LOCAL] +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_left_anti_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_complex_conditions -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=((t1.name < t2.name)) +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_with_aggregate -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_subquery -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] apply RFs: RF0 +----------------hashAgg[LOCAL] +------------------filter((count_t_one_side.score > 10)) +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_outer_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_deep_subquery -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] apply RFs: RF0 +----------------hashAgg[LOCAL] +------------------filter((count_t_one_side.score > 10)) +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_having -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------filter((count(score) > 100)) +--------hashAgg[GLOBAL] +----------PhysicalDistribute +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +------------------hashAgg[LOCAL] +--------------------PhysicalOlapScan[count_t_one_side] +------------------PhysicalProject +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_mixed_aggregates -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_multi_table_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[LOCAL] +--------PhysicalProject +----------hashJoin[INNER_JOIN] hashCondition=((t1.name = t3.name)) otherCondition=() +------------hashAgg[GLOBAL] +--------------PhysicalDistribute +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------------hashAgg[LOCAL] +------------------------PhysicalOlapScan[count_t_one_side] +----------------------PhysicalProject +------------------------PhysicalOlapScan[count_t_one_side] +------------PhysicalDistribute +--------------PhysicalProject +----------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_with_order_by -- +PhysicalResultSink +--PhysicalProject +----PhysicalQuickSort[MERGE_SORT] +------PhysicalDistribute +--------PhysicalQuickSort[LOCAL_SORT] +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +--------------------hashAgg[LOCAL] +----------------------PhysicalOlapScan[count_t_one_side] +--------------------PhysicalProject +----------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_multiple_equal_conditions -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +----------------hashAgg[LOCAL] +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_equal_conditions_with_aggregate -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_equal_conditions_non_aggregate_selection -- +PhysicalResultSink +--PhysicalDistribute +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +--------------hashAgg[LOCAL] +----------------PhysicalOlapScan[count_t_one_side] +--------------PhysicalProject +----------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_equal_conditions_non_aggregate_selection_with_aggregate -- +PhysicalResultSink +--PhysicalDistribute +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +--------------hashAgg[LOCAL] +----------------PhysicalOlapScan[count_t_one_side] +--------------hashAgg[LOCAL] +----------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_with_where_clause -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] apply RFs: RF0 +----------------hashAgg[LOCAL] +------------------filter((t1.score > 50)) +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_varied_aggregates -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_with_order_by_limit -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +--------------------hashAgg[LOCAL] +----------------------PhysicalOlapScan[count_t_one_side] +--------------------PhysicalProject +----------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_alias_multiple_equal_conditions -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1_alias.id = t2_alias.id) and (t1_alias.name = t2_alias.name)) otherCondition=() +----------------hashAgg[LOCAL] +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_complex_join_condition -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.score = t2.score)) otherCondition=(( not (name = name))) +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_function_processed_columns -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_nested_queries -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------filter((count_t_one_side.id < 100)) +--------------------PhysicalOlapScan[count_t_one_side] apply RFs: RF0 +----------------hashAgg[LOCAL] +------------------filter((count_t_one_side.score > 20) and (t1.id < 100)) +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_basic -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_left_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_right_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_full_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[FULL_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_left_semi_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_left_anti_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_complex_conditions -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=((t1.name < t2.name)) +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_with_aggregate -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_subquery -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] apply RFs: RF0 +----------------PhysicalProject +------------------filter((count_t_one_side.score > 10)) +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_outer_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_deep_subquery -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] apply RFs: RF0 +----------------PhysicalProject +------------------filter((count_t_one_side.score > 10)) +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_having -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------filter((count(*) > 100)) +--------hashAgg[GLOBAL] +----------PhysicalDistribute +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +------------------PhysicalProject +--------------------PhysicalOlapScan[count_t_one_side] +------------------PhysicalProject +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_multi_table_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.name = t3.name)) otherCondition=() +----------------PhysicalProject +------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +--------------------PhysicalProject +----------------------PhysicalOlapScan[count_t_one_side] +--------------------PhysicalProject +----------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_with_order_by -- +PhysicalResultSink +--PhysicalProject +----PhysicalQuickSort[MERGE_SORT] +------PhysicalDistribute +--------PhysicalQuickSort[LOCAL_SORT] +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +--------------------PhysicalProject +----------------------PhysicalOlapScan[count_t_one_side] +--------------------PhysicalProject +----------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_multiple_equal_conditions -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_equal_conditions_non_aggregate_selection -- +PhysicalResultSink +--PhysicalDistribute +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +--------------PhysicalProject +----------------PhysicalOlapScan[count_t_one_side] +--------------PhysicalProject +----------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_with_where_clause -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] apply RFs: RF0 +----------------PhysicalProject +------------------filter((t1.score > 50)) +--------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_varied_aggregates -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_with_order_by_limit -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +--------------------PhysicalProject +----------------------PhysicalOlapScan[count_t_one_side] +--------------------PhysicalProject +----------------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_complex_join_condition -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.score = t2.score)) otherCondition=(( not (name = name))) +----------------PhysicalOlapScan[count_t_one_side] +----------------PhysicalOlapScan[count_t_one_side] + +-- !groupby_pushdown_nested_queries -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------filter((count_t_one_side.id < 100)) +--------------------PhysicalOlapScan[count_t_one_side] apply RFs: RF0 +----------------PhysicalProject +------------------filter((count_t_one_side.score > 20) and (t1.id < 100)) +--------------------PhysicalOlapScan[count_t_one_side] + diff --git a/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.out b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.out new file mode 100644 index 0000000000..873f67fabb --- /dev/null +++ b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.out @@ -0,0 +1,366 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !groupby_pushdown_basic -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------hashAgg[LOCAL] +------------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_left_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_right_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_full_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[FULL_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_left_semi_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------hashAgg[LOCAL] +------------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_left_anti_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_complex_conditions -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=((t1.name < t2.name)) +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_with_aggregate -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_subquery -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] apply RFs: RF0 +----------------hashAgg[LOCAL] +------------------filter((sum_t_one_side.score > 10)) +--------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_outer_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_deep_subquery -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] apply RFs: RF0 +----------------hashAgg[LOCAL] +------------------filter((sum_t_one_side.score > 10)) +--------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_having -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------filter((sum(score) > 100)) +--------hashAgg[GLOBAL] +----------PhysicalDistribute +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +------------------hashAgg[LOCAL] +--------------------PhysicalOlapScan[sum_t_one_side] +------------------PhysicalProject +--------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_mixed_aggregates -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_multi_table_join -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[LOCAL] +--------PhysicalProject +----------hashJoin[INNER_JOIN] hashCondition=((t1.name = t3.name)) otherCondition=() +------------hashAgg[GLOBAL] +--------------PhysicalDistribute +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------------hashAgg[LOCAL] +------------------------PhysicalOlapScan[sum_t_one_side] +----------------------PhysicalProject +------------------------PhysicalOlapScan[sum_t_one_side] +------------PhysicalDistribute +--------------PhysicalProject +----------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_with_order_by -- +PhysicalResultSink +--PhysicalProject +----PhysicalQuickSort[MERGE_SORT] +------PhysicalDistribute +--------PhysicalQuickSort[LOCAL_SORT] +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +--------------------hashAgg[LOCAL] +----------------------PhysicalOlapScan[sum_t_one_side] +--------------------PhysicalProject +----------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_multiple_equal_conditions -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +----------------hashAgg[LOCAL] +------------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_equal_conditions_with_aggregate -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_equal_conditions_non_aggregate_selection -- +PhysicalResultSink +--PhysicalDistribute +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +--------------hashAgg[LOCAL] +----------------PhysicalOlapScan[sum_t_one_side] +--------------PhysicalProject +----------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_equal_conditions_non_aggregate_selection_with_aggregate -- +PhysicalResultSink +--PhysicalDistribute +----hashAgg[GLOBAL] +------PhysicalDistribute +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.name = t2.name)) otherCondition=() +--------------hashAgg[LOCAL] +----------------PhysicalOlapScan[sum_t_one_side] +--------------hashAgg[LOCAL] +----------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_with_where_clause -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] apply RFs: RF0 +----------------hashAgg[LOCAL] +------------------filter((t1.score > 50)) +--------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_varied_aggregates -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_with_order_by_limit -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +--------------------hashAgg[LOCAL] +----------------------PhysicalOlapScan[sum_t_one_side] +--------------------PhysicalProject +----------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_alias_multiple_equal_conditions -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1_alias.id = t2_alias.id) and (t1_alias.name = t2_alias.name)) otherCondition=() +----------------hashAgg[LOCAL] +------------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_complex_join_condition -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id) and (t1.score = t2.score)) otherCondition=(( not (name = name))) +----------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_function_processed_columns -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] +----------------PhysicalProject +------------------PhysicalOlapScan[sum_t_one_side] + +-- !groupby_pushdown_nested_queries -- +PhysicalResultSink +--PhysicalDistribute +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] +----------------PhysicalProject +------------------filter((sum_t_one_side.id < 100)) +--------------------PhysicalOlapScan[sum_t_one_side] apply RFs: RF0 +----------------hashAgg[LOCAL] +------------------filter((sum_t_one_side.score > 20) and (t1.id < 100)) +--------------------PhysicalOlapScan[sum_t_one_side] + diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy new file mode 100644 index 0000000000..fe98ce57bf --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("eager_aggregate_basic_one_side") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + sql "SET ENABLE_NEREIDS_RULES=push_down_min_max_through_join_one_side" + sql "SET ENABLE_NEREIDS_RULES=push_down_sum_through_join_one_side" + sql "SET ENABLE_NEREIDS_RULES=push_down_count_through_join_one_side" + + sql """ + DROP TABLE IF EXISTS shunt_log_com_dd_library_one_side; + """ + sql """ + DROP TABLE IF EXISTS com_dd_library_one_side; + """ + + sql""" + CREATE TABLE `shunt_log_com_dd_library_one_side` ( + `device_id` varchar(255) NOT NULL, + `experiment_id` varchar(255) NOT NULL, + `group_id` varchar(255) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`device_id`) + DISTRIBUTED BY HASH(`device_id`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql""" + CREATE TABLE `com_dd_library_one_side` ( + `event_id` varchar(255) NULL, + `device_id` varchar(255) NULL DEFAULT "", + `time_stamp` datetime NULL + ) ENGINE=OLAP + DUPLICATE KEY(`event_id`) + DISTRIBUTED BY HASH(`device_id`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + qt_1 """ + explain shape plan + select + b.group_id, + COUNT(a.event_id) + from + com_dd_library_one_side a + join shunt_log_com_dd_library_one_side b on + a.device_id = b.device_id + where + a.event_id = "ad_click" + and b.experiment_id = 37 + group by + b.group_id; + """ + + qt_2 """ + explain shape plan + select + a.event_id, + b.experiment_id, + b.group_id, + COUNT(a.event_id) + from + com_dd_library_one_side a + join shunt_log_com_dd_library_one_side b on + a.device_id = b.device_id + where + b.experiment_id = 73 + group by + b.group_id, + b.experiment_id, + a.event_id; + """ + + qt_3 """ + explain shape plan + select + a.event_id, + b.experiment_id, + b.group_id, + COUNT(a.event_id), + date_format(a.time_stamp, '%Y-%m-%d') as dayF + from + com_dd_library_one_side a + join shunt_log_com_dd_library_one_side b on + a.device_id = b.device_id + where + b.experiment_id = 73 + group by + b.group_id, + b.experiment_id, + a.event_id, + dayF; + """ + + qt_4 """ + explain shape plan + select + a.event_id, + b.experiment_id, + b.group_id, + COUNT(a.event_id) + from + com_dd_library_one_side a + join shunt_log_com_dd_library_one_side b on + a.device_id = b.device_id + group by + b.group_id, + b.experiment_id, + a.event_id; + """ +} diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.groovy new file mode 100644 index 0000000000..d8f3e84485 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.groovy @@ -0,0 +1,239 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("push_down_count_through_join_one_side") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + sql """ + DROP TABLE IF EXISTS count_t_one_side; + """ + + sql """ + CREATE TABLE IF NOT EXISTS count_t_one_side( + `id` int(32), + `score` int(64) NULL, + `name` varchar(64) NULL + ) ENGINE = OLAP + DISTRIBUTED BY HASH(id) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql "insert into count_t_one_side values (1, 1, 'a')" + sql "insert into count_t_one_side values (2, null, 'a')" + sql "insert into count_t_one_side values (3, 1, null)" + sql "insert into count_t_one_side values (4, 2, 'b')" + sql "insert into count_t_one_side values (5, null, 'b')" + sql "insert into count_t_one_side values (6, 2, null)" + sql "insert into count_t_one_side values (7, 3, 'c')" + sql "insert into count_t_one_side values (8, null, 'c')" + sql "insert into count_t_one_side values (9, 3, null)" + sql "insert into count_t_one_side values (10, null, null)" + + sql "SET ENABLE_NEREIDS_RULES=push_down_count_through_join_one_side" + + qt_groupby_pushdown_basic """ + explain shape plan select count(t1.score) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_left_join """ + explain shape plan select count(t1.score) from count_t_one_side t1 left join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_right_join """ + explain shape plan select count(t1.score) from count_t_one_side t1 right join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_full_join """ + explain shape plan select count(t1.score) from count_t_one_side t1 full join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_left_semi_join """ + explain shape plan select count(t1.score) from count_t_one_side t1 inner join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_left_anti_join """ + explain shape plan select count(t1.score) from count_t_one_side t1 left anti join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_complex_conditions """ + explain shape plan select count(t1.score) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id and t1.name < t2.name group by t1.name; + """ + + qt_groupby_pushdown_with_aggregate """ + explain shape plan select count(t1.score), avg(t1.score) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_subquery """ + explain shape plan select count(t1.score) from (select * from count_t_one_side where score > 10) t1 join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_outer_join """ + explain shape plan select count(t1.score) from count_t_one_side t1 left join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_deep_subquery """ + explain shape plan select count(t1.score) from (select * from (select * from count_t_one_side) count_t_one_side where score > 10) t1 join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_having """ + explain shape plan select count(t1.score) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name having count(t1.score) > 100; + """ + + qt_groupby_pushdown_mixed_aggregates """ + explain shape plan select count(t1.score), count(*), max(t1.score) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_multi_table_join """ + explain shape plan select count(t1.score) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id join count_t_one_side t3 on t1.name = t3.name group by t1.name; + """ + + qt_groupby_pushdown_with_order_by """ + explain shape plan select count(t1.score) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name order by t1.name; + """ + + qt_groupby_pushdown_multiple_equal_conditions """ + explain shape plan select count(t1.score) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_equal_conditions_with_aggregate """ + explain shape plan select max(t1.score), count(t2.score) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_equal_conditions_non_aggregate_selection """ + explain shape plan select t1.name, count(t1.score) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_equal_conditions_non_aggregate_selection_with_aggregate """ + explain shape plan select t1.name, count(t1.score), count(t2.score) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_with_where_clause """ + explain shape plan select count(t1.score) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id and t1.score > 50 group by t1.name; + """ + + qt_groupby_pushdown_varied_aggregates """ + explain shape plan select count(t1.score), avg(t1.id), count(t2.name) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_with_order_by_limit """ + explain shape plan select count(t1.score) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name order by count(t1.score) limit 10; + """ + + qt_groupby_pushdown_alias_multiple_equal_conditions """ + explain shape plan select count(t1_alias.score) from count_t_one_side t1_alias join count_t_one_side t2_alias on t1_alias.id = t2_alias.id and t1_alias.name = t2_alias.name group by t1_alias.name; + """ + + qt_groupby_pushdown_complex_join_condition """ + explain shape plan select count(t1.score) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id and t1.score = t2.score and t1.name <> t2.name group by t1.name; + """ + + qt_groupby_pushdown_function_processed_columns """ + explain shape plan select count(LENGTH(t1.name)) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_nested_queries """ + explain shape plan select count(t1.score) from (select * from count_t_one_side where score > 20) t1 join (select * from count_t_one_side where id < 100) t2 on t1.id = t2.id group by t1.name; + """ + + /* COUNT(*) */ + qt_groupby_pushdown_basic """ + explain shape plan select count(*) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_left_join """ + explain shape plan select count(*) from count_t_one_side t1 left join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_right_join """ + explain shape plan select count(*) from count_t_one_side t1 right join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_full_join """ + explain shape plan select count(*) from count_t_one_side t1 full join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_left_semi_join """ + explain shape plan select count(*) from count_t_one_side t1 inner join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_left_anti_join """ + explain shape plan select count(*) from count_t_one_side t1 left anti join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_complex_conditions """ + explain shape plan select count(*) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id and t1.name < t2.name group by t1.name; + """ + + qt_groupby_pushdown_with_aggregate """ + explain shape plan select count(*) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_subquery """ + explain shape plan select count(*) from (select * from count_t_one_side where score > 10) t1 join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_outer_join """ + explain shape plan select count(*) from count_t_one_side t1 left join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_deep_subquery """ + explain shape plan select count(*) from (select * from (select * from count_t_one_side) count_t_one_side where score > 10) t1 join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_having """ + explain shape plan select count(*) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name having count(*) > 100; + """ + + qt_groupby_pushdown_multi_table_join """ + explain shape plan select count(*) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id join count_t_one_side t3 on t1.name = t3.name group by t1.name; + """ + + qt_groupby_pushdown_with_order_by """ + explain shape plan select count(*) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name order by t1.name; + """ + + qt_groupby_pushdown_multiple_equal_conditions """ + explain shape plan select count(*) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_equal_conditions_non_aggregate_selection """ + explain shape plan select t1.name, count(*) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_with_where_clause """ + explain shape plan select count(*) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id and t1.score > 50 group by t1.name; + """ + + qt_groupby_pushdown_varied_aggregates """ + explain shape plan select count(*), avg(t1.id), count(t2.name) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_with_order_by_limit """ + explain shape plan select count(*) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name order by count(*) limit 10; + """ + + qt_groupby_pushdown_complex_join_condition """ + explain shape plan select count(*) from count_t_one_side t1 join count_t_one_side t2 on t1.id = t2.id and t1.score = t2.score and t1.name <> t2.name group by t1.name; + """ + + qt_groupby_pushdown_nested_queries """ + explain shape plan select count(*) from (select * from count_t_one_side where score > 20) t1 join (select * from count_t_one_side where id < 100) t2 on t1.id = t2.id group by t1.name; + """ +} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.groovy new file mode 100644 index 0000000000..29c5376b78 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.groovy @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("push_down_sum_through_join_one_side") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + sql """ + DROP TABLE IF EXISTS sum_t_one_side; + """ + + sql """ + CREATE TABLE IF NOT EXISTS sum_t_one_side( + `id` int(32), + `score` int(64) NULL, + `name` varchar(64) NULL + ) ENGINE = OLAP + DISTRIBUTED BY HASH(id) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql "insert into sum_t_one_side values (1, 1, 'a')" + sql "insert into sum_t_one_side values (2, null, 'a')" + sql "insert into sum_t_one_side values (3, 1, null)" + sql "insert into sum_t_one_side values (4, 2, 'b')" + sql "insert into sum_t_one_side values (5, null, 'b')" + sql "insert into sum_t_one_side values (6, 2, null)" + sql "insert into sum_t_one_side values (7, 3, 'c')" + sql "insert into sum_t_one_side values (8, null, 'c')" + sql "insert into sum_t_one_side values (9, 3, null)" + sql "insert into sum_t_one_side values (10, null, null)" + + sql "SET ENABLE_NEREIDS_RULES=push_down_sum_through_join_one_side" + + qt_groupby_pushdown_basic """ + explain shape plan select sum(t1.score) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_left_join """ + explain shape plan select sum(t1.score) from sum_t_one_side t1 left join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_right_join """ + explain shape plan select sum(t1.score) from sum_t_one_side t1 right join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_full_join """ + explain shape plan select sum(t1.score) from sum_t_one_side t1 full join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_left_semi_join """ + explain shape plan select sum(t1.score) from sum_t_one_side t1 inner join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_left_anti_join """ + explain shape plan select sum(t1.score) from sum_t_one_side t1 left anti join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_complex_conditions """ + explain shape plan select sum(t1.score) from sum_t_one_side t1 join sum_t_one_side t2 on t1.id = t2.id and t1.name < t2.name group by t1.name; + """ + + qt_groupby_pushdown_with_aggregate """ + explain shape plan select sum(t1.score), avg(t1.score) from sum_t_one_side t1 join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_subquery """ + explain shape plan select sum(t1.score) from (select * from sum_t_one_side where score > 10) t1 join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_outer_join """ + explain shape plan select sum(t1.score) from sum_t_one_side t1 left join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_deep_subquery """ + explain shape plan select sum(t1.score) from (select * from (select * from sum_t_one_side) sum_t_one_side where score > 10) t1 join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_having """ + explain shape plan select sum(t1.score) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id group by t1.name having sum(t1.score) > 100; + """ + + qt_groupby_pushdown_mixed_aggregates """ + explain shape plan select sum(t1.score), count(*), max(t1.score) from sum_t_one_side t1 join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_multi_table_join """ + explain shape plan select sum(t1.score) from sum_t_one_side t1 join sum_t_one_side t2 on t1.id = t2.id join sum_t_one_side t3 on t1.name = t3.name group by t1.name; + """ + + qt_groupby_pushdown_with_order_by """ + explain shape plan select sum(t1.score) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id group by t1.name order by t1.name; + """ + + qt_groupby_pushdown_multiple_equal_conditions """ + explain shape plan select sum(t1.score) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_equal_conditions_with_aggregate """ + explain shape plan select max(t1.score), sum(t2.score) from sum_t_one_side t1 join sum_t_one_side t2 on t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_equal_conditions_non_aggregate_selection """ + explain shape plan select t1.name, sum(t1.score) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_equal_conditions_non_aggregate_selection_with_aggregate """ + explain shape plan select t1.name, sum(t1.score), sum(t2.score) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id and t1.name = t2.name group by t1.name; + """ + + qt_groupby_pushdown_with_where_clause """ + explain shape plan select sum(t1.score) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id and t1.score > 50 group by t1.name; + """ + + qt_groupby_pushdown_varied_aggregates """ + explain shape plan select sum(t1.score), avg(t1.id), count(t2.name) from sum_t_one_side t1 join sum_t_one_side t2 on t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_with_order_by_limit """ + explain shape plan select sum(t1.score) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id group by t1.name order by sum(t1.score) limit 10; + """ + + qt_groupby_pushdown_alias_multiple_equal_conditions """ + explain shape plan select sum(t1_alias.score) from sum_t_one_side t1_alias join sum_t_one_side t2_alias on t1_alias.id = t2_alias.id and t1_alias.name = t2_alias.name group by t1_alias.name; + """ + + qt_groupby_pushdown_complex_join_condition """ + explain shape plan select sum(t1.score) from sum_t_one_side t1 join sum_t_one_side t2 on t1.id = t2.id and t1.score = t2.score and t1.name <> t2.name group by t1.name; + """ + + qt_groupby_pushdown_function_processed_columns """ + explain shape plan select sum(LENGTH(t1.name)) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id group by t1.name; + """ + + qt_groupby_pushdown_nested_queries """ + explain shape plan select sum(t1.score) from (select * from sum_t_one_side where score > 20) t1 join (select * from sum_t_one_side where id < 100) t2 on t1.id = t2.id group by t1.name; + """ +} \ No newline at end of file