From 9e014cfb8ad2b90425036aed20496d762bcb80ef Mon Sep 17 00:00:00 2001 From: feiniaofeiafei <53502832+feiniaofeiafei@users.noreply.github.com> Date: Fri, 15 Mar 2024 14:25:52 +0800 Subject: [PATCH] [fix](nereids) fix bug when grouping has same grouping set (#32235) --- .../community/design/grouping_sets_design.md | 8 ++--- .../community/design/grouping_sets_design.md | 12 +++---- .../nereids/trees/plans/algebra/Repeat.java | 15 ++++++--- .../grouping_with_same_grouping_set.out | 9 ++++++ .../grouping_with_same_grouping_set.groovy | 31 +++++++++++++++++++ 5 files changed, 61 insertions(+), 14 deletions(-) create mode 100644 regression-test/data/nereids_rules_p0/grouping_sets/grouping_with_same_grouping_set.out create mode 100644 regression-test/suites/nereids_rules_p0/grouping_sets/grouping_with_same_grouping_set.groovy diff --git a/docs/en/community/design/grouping_sets_design.md b/docs/en/community/design/grouping_sets_design.md index 16acc33997..497a8b665f 100644 --- a/docs/en/community/design/grouping_sets_design.md +++ b/docs/en/community/design/grouping_sets_design.md @@ -45,11 +45,11 @@ This statement is equivalent to: ``` SELECT k1, k2, SUM( k3 ) FROM t GROUP BY k1, k2 -UNION +UNION ALL SELECT k1, null, SUM( k3 ) FROM t GROUP BY k1 -UNION +UNION ALL SELECT null, k2, SUM( k3 ) FROM t GROUP BY k2 -UNION +UNION ALL SELECT null, null, SUM( k3 ) FROM t ``` @@ -308,7 +308,7 @@ Expression, column name. ### 3.1 Overall Design Approaches -For `GROUPING SET` is equivalent to the `UNION` of `GROUP BY` . So we can expand input rows, and run an GROUP BY on these rows. +For `GROUPING SET` is equivalent to the `UNION ALL` of `GROUP BY` . So we can expand input rows, and run an GROUP BY on these rows. For example: diff --git a/docs/zh-CN/community/design/grouping_sets_design.md b/docs/zh-CN/community/design/grouping_sets_design.md index 0c19094cff..2edc7e395c 100644 --- a/docs/zh-CN/community/design/grouping_sets_design.md +++ b/docs/zh-CN/community/design/grouping_sets_design.md @@ -30,7 +30,7 @@ under the License. ### 1.1 GROUPING SETS 子句 -GROUP BY GROUPING SETS 是对 GROUP BY 子句的扩展,它能够在一个 GROUP BY 子句中一次实现多个集合的分组。其结果等价于将多个相应 GROUP BY 子句进行 UNION 操作。 +GROUP BY GROUPING SETS 是对 GROUP BY 子句的扩展,它能够在一个 GROUP BY 子句中一次实现多个集合的分组。其结果等价于将多个相应 GROUP BY 子句进行 UNION ALL 操作。 特别地,一个空的子集意味着将所有的行聚集到一个分组。 GROUP BY 子句是只含有一个元素的 GROUP BY GROUPING SETS 的特例。 @@ -45,11 +45,11 @@ SELECT k1, k2, SUM( k3 ) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k1), (k2), ( ``` SELECT k1, k2, SUM( k3 ) FROM t GROUP BY k1, k2 -UNION +UNION ALL SELECT k1, null, SUM( k3 ) FROM t GROUP BY k1 -UNION +UNION ALL SELECT null, k2, SUM( k3 ) FROM t GROUP BY k2 -UNION +UNION ALL SELECT null, null, SUM( k3 ) FROM t ``` @@ -314,7 +314,7 @@ GROUP BY CUBE ( expr [ , expr [ , ... ] ] ) ### 3.1 整体思路 -既然 GROUPING SET 子句逻辑上等价于多个相应 GROUP BY 子句的 UNION,可以通过扩展输入行(此输入行已经是通过下推条件过滤和投影后的), 在此基础上进行一个单一的 GROUP BY 操作来达到目的。 +既然 GROUPING SET 子句逻辑上等价于多个相应 GROUP BY 子句的 UNION ALL,可以通过扩展输入行(此输入行已经是通过下推条件过滤和投影后的), 在此基础上进行一个单一的 GROUP BY 操作来达到目的。 关键是怎样扩展输入行呢?下面举例说明: @@ -458,7 +458,7 @@ SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ((k1, k ``` -可以看到,其结果与对 GROUPING SETS 子句后每个子集进行 GROUP BY 后再进行 UNION 的结果一致。 +可以看到,其结果与对 GROUPING SETS 子句后每个子集进行 GROUP BY 后再进行 UNION ALL 的结果一致。 ``` select k1, k2, sum(k3) from t group by k1, k2 diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Repeat.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Repeat.java index 388cb42643..29258b6bba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Repeat.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Repeat.java @@ -228,11 +228,18 @@ public interface Repeat extends Aggregate { this.shapes = ImmutableList.copyOf(shapes); } - // compute a long value that backend need to fill to the GROUPING_ID slot + /**compute a long value that backend need to fill to the GROUPING_ID slot*/ public List computeVirtualGroupingIdValue() { - return shapes.stream() - .map(GroupingSetShape::computeLongValue) - .collect(ImmutableList.toImmutableList()); + Set res = Sets.newLinkedHashSet(); + long k = (long) Math.pow(2, flattenGroupingSetExpression.size()); + for (GroupingSetShape shape : shapes) { + Long val = shape.computeLongValue(); + while (res.contains(val)) { + val += k; + } + res.add(val); + } + return ImmutableList.copyOf(res); } public int indexOf(Expression expression) { diff --git a/regression-test/data/nereids_rules_p0/grouping_sets/grouping_with_same_grouping_set.out b/regression-test/data/nereids_rules_p0/grouping_sets/grouping_with_same_grouping_set.out new file mode 100644 index 0000000000..0d10c8d0a3 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/grouping_sets/grouping_with_same_grouping_set.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test -- +1 +1 +2 +2 +3 +3 + diff --git a/regression-test/suites/nereids_rules_p0/grouping_sets/grouping_with_same_grouping_set.groovy b/regression-test/suites/nereids_rules_p0/grouping_sets/grouping_with_same_grouping_set.groovy new file mode 100644 index 0000000000..8333ea4fb0 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/grouping_sets/grouping_with_same_grouping_set.groovy @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("grouping_with_same_grouping_set") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql """ + DROP TABLE IF EXISTS t1; + """ + sql """ + create table t1(a int) distributed by hash(a) buckets 1 properties ( 'replication_num' = '1'); + """ + sql """ + insert into t1 values (1), (2), (3); + """ + qt_test "select max(a) from t1 group by grouping sets ((a), (a)) order by 1" + +}