[fix](planner) fix bug in agg on constant column (#16442)
For performance reason, we want to remove constant column from groupingExprs.
For example:
`select sum(T.A) from T group by T.B, 'xyz'` is equivalent to `select sum(T.A) from T group by T.B`
We can remove constant column `abc` from groupingExprs.
But there is an exception when all groupingExpr are constant
For example:
sql1: `select 'abc' from t group by 'abc'`
is not equivalent to
sql2: `select 'abc' from t`
sql3: `select 'abc', sum(a) from t group by 'abc'`
is not equivalent to
sql4: `select 1, sum(a) from t`
(when t is empty, sql3 returns 0 tuple, sql4 return 1 tuple)
We need to keep some constant columns if all groupingExpr are constant.
Consider sql5 `select a from (select "abc" as a, 'def' as b) T group by b, a;`
if the constant column `a` is in select list, this column should not be removed.
sql5 is transformed to
sql6 `select a from (select "abc" as a, 'def' as b) T group by a;`
This commit is contained in:
@ -1301,11 +1301,65 @@ public class SelectStmt extends QueryStmt {
|
||||
substituteOrdinalsAliases(groupingExprs, "GROUP BY", analyzer, aliasFirst);
|
||||
|
||||
if (!groupByClause.isGroupByExtension() && !groupingExprs.isEmpty()) {
|
||||
ArrayList<Expr> tempExprs = new ArrayList<>(groupingExprs);
|
||||
groupingExprs.removeIf(Expr::isConstant);
|
||||
if (groupingExprs.isEmpty() && aggExprs.isEmpty()) {
|
||||
// should keep at least one expr to make the result correct
|
||||
groupingExprs.add(tempExprs.get(0));
|
||||
/*
|
||||
For performance reason, we want to remove constant column from groupingExprs.
|
||||
For example:
|
||||
`select sum(T.A) from T group by T.B, 'xyz'` is equivalent to `select sum(T.A) from T group by T.B`
|
||||
We can remove constant column `abc` from groupingExprs.
|
||||
|
||||
But there is an exception when all groupingExpr are constant
|
||||
For example:
|
||||
sql1: `select 'abc' from t group by 'abc'`
|
||||
is not equivalent to
|
||||
sql2: `select 'abc' from t`
|
||||
|
||||
sql3: `select 'abc', sum(a) from t group by 'abc'`
|
||||
is not equivalent to
|
||||
sql4: `select 1, sum(a) from t`
|
||||
(when t is empty, sql3 returns 0 tuple, sql4 return 1 tuple)
|
||||
|
||||
We need to keep some constant columns if all groupingExpr are constant.
|
||||
Consider sql5 `select a from (select "abc" as a, 'def' as b) T group by b, a;`
|
||||
if the constant column is in select list, this column should not be removed.
|
||||
*/
|
||||
|
||||
Expr theFirstConstantGroupingExpr = null;
|
||||
boolean someGroupExprRemoved = false;
|
||||
ArrayList<Expr> tempExprs = new ArrayList<>();
|
||||
for (Expr groupExpr : groupingExprs) {
|
||||
//remove groupExpr if it is const, and it is not in select list
|
||||
boolean removeConstGroupingKey = false;
|
||||
if (groupExpr.isConstant()) {
|
||||
if (theFirstConstantGroupingExpr == null) {
|
||||
theFirstConstantGroupingExpr = groupExpr;
|
||||
}
|
||||
boolean keyInSelectList = false;
|
||||
if (groupExpr instanceof SlotRef) {
|
||||
for (SelectListItem item : selectList.getItems()) {
|
||||
if (item.getExpr() instanceof SlotRef) {
|
||||
keyInSelectList = ((SlotRef) item.getExpr()).columnEqual(groupExpr);
|
||||
if (keyInSelectList) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
removeConstGroupingKey = ! keyInSelectList;
|
||||
}
|
||||
if (removeConstGroupingKey) {
|
||||
someGroupExprRemoved = true;
|
||||
} else {
|
||||
tempExprs.add(groupExpr);
|
||||
}
|
||||
}
|
||||
if (someGroupExprRemoved) {
|
||||
groupingExprs.clear();
|
||||
groupingExprs.addAll(tempExprs);
|
||||
//groupingExprs need at least one expr, it can be
|
||||
//any original grouping expr. we use the first one.
|
||||
if (groupingExprs.isEmpty()) {
|
||||
groupingExprs.add(theFirstConstantGroupingExpr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
11
regression-test/data/trino_p0/constant_group_key.out
Normal file
11
regression-test/data/trino_p0/constant_group_key.out
Normal file
@ -0,0 +1,11 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !scalar_count --
|
||||
0
|
||||
|
||||
-- !agg_count --
|
||||
|
||||
-- !scalar_sum --
|
||||
\N
|
||||
|
||||
-- !agg_sum --
|
||||
|
||||
@ -2,3 +2,9 @@
|
||||
-- !runConstantGroupBy_order --
|
||||
2
|
||||
|
||||
-- !runConstantGroupBy_order_2 --
|
||||
oneline
|
||||
|
||||
-- !runConstantGroupBy_order_3 --
|
||||
abc
|
||||
|
||||
|
||||
53
regression-test/suites/trino_p0/constant_group_key.groovy
Normal file
53
regression-test/suites/trino_p0/constant_group_key.groovy
Normal file
@ -0,0 +1,53 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
// This suit test remove constant group_by_key
|
||||
suite("constant_group_key") {
|
||||
//remove constant key
|
||||
explain {
|
||||
sql("select 'oneline' from nation group by n_nationkey, 'constant1'")
|
||||
contains "group by: `n_nationkey`"
|
||||
}
|
||||
|
||||
//reserve constant key in group by
|
||||
explain {
|
||||
sql("select 'oneline' from nation group by 'constant1'")
|
||||
contains "group by: 'constant1'"
|
||||
}
|
||||
|
||||
explain {
|
||||
sql("select 'oneline', sum(n_nationkey) from nation group by 'constant1', 'constant2'")
|
||||
contains "group by: 'constant1'"
|
||||
}
|
||||
|
||||
explain {
|
||||
sql("select a from (select '1' as b, 'abc' as a) T group by b, a")
|
||||
contains "group by: 'abc'"
|
||||
}
|
||||
|
||||
sql "drop table if exists cgk_tbl"
|
||||
sql """
|
||||
create table cgk_tbl (a int null)
|
||||
engine=olap duplicate key(a)
|
||||
distributed by hash(a) buckets 1
|
||||
properties ('replication_num'='1');"""
|
||||
|
||||
qt_scalar_count 'select count(*) from cgk_tbl';
|
||||
qt_agg_count "select count(*) from cgk_tbl group by 'any const str'"
|
||||
qt_scalar_sum 'select sum(a) from cgk_tbl';
|
||||
qt_agg_sum "select sum(a) from cgk_tbl group by 'any const str'"
|
||||
}
|
||||
@ -1 +1,4 @@
|
||||
select 2 from nation group by 1
|
||||
select 2 from nation group by 1;
|
||||
select "oneline" from nation group by "I am a const, but not to be removed";
|
||||
select a from (select "abc" as a, '1' as b) T group by b, a;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user