[fix](planner) fix bug in agg on constant column (#16442)

For performance reason, we want to remove constant column from groupingExprs.
For example:
                `select sum(T.A) from T group by T.B, 'xyz'` is equivalent to `select sum(T.A) from T group by T.B`
We can remove constant column `abc` from groupingExprs.

But there is an exception when all groupingExpr are constant
For example:

                sql1: `select 'abc' from t group by 'abc'`
                 is not equivalent to
                sql2: `select 'abc' from t`

                sql3: `select 'abc', sum(a) from t group by 'abc'`
                 is not equivalent to
                sql4: `select 1, sum(a) from t`
                (when t is empty, sql3 returns 0 tuple, sql4 return 1 tuple)

We need to keep some constant columns if all groupingExpr are constant.

Consider sql5 `select a from (select "abc" as a, 'def' as b) T group by b, a;`
if the constant column `a` is in select list, this column should not be removed.
sql5 is transformed to 
sql6 `select a from (select "abc" as a, 'def' as b) T group by a;`
This commit is contained in:
minghong
2023-02-13 11:26:08 +08:00
committed by GitHub
parent 46dd887ae2
commit a2b9b9edd7
5 changed files with 133 additions and 6 deletions

View File

@ -1301,11 +1301,65 @@ public class SelectStmt extends QueryStmt {
substituteOrdinalsAliases(groupingExprs, "GROUP BY", analyzer, aliasFirst);
if (!groupByClause.isGroupByExtension() && !groupingExprs.isEmpty()) {
ArrayList<Expr> tempExprs = new ArrayList<>(groupingExprs);
groupingExprs.removeIf(Expr::isConstant);
if (groupingExprs.isEmpty() && aggExprs.isEmpty()) {
// should keep at least one expr to make the result correct
groupingExprs.add(tempExprs.get(0));
/*
For performance reason, we want to remove constant column from groupingExprs.
For example:
`select sum(T.A) from T group by T.B, 'xyz'` is equivalent to `select sum(T.A) from T group by T.B`
We can remove constant column `abc` from groupingExprs.
But there is an exception when all groupingExpr are constant
For example:
sql1: `select 'abc' from t group by 'abc'`
is not equivalent to
sql2: `select 'abc' from t`
sql3: `select 'abc', sum(a) from t group by 'abc'`
is not equivalent to
sql4: `select 1, sum(a) from t`
(when t is empty, sql3 returns 0 tuple, sql4 return 1 tuple)
We need to keep some constant columns if all groupingExpr are constant.
Consider sql5 `select a from (select "abc" as a, 'def' as b) T group by b, a;`
if the constant column is in select list, this column should not be removed.
*/
Expr theFirstConstantGroupingExpr = null;
boolean someGroupExprRemoved = false;
ArrayList<Expr> tempExprs = new ArrayList<>();
for (Expr groupExpr : groupingExprs) {
//remove groupExpr if it is const, and it is not in select list
boolean removeConstGroupingKey = false;
if (groupExpr.isConstant()) {
if (theFirstConstantGroupingExpr == null) {
theFirstConstantGroupingExpr = groupExpr;
}
boolean keyInSelectList = false;
if (groupExpr instanceof SlotRef) {
for (SelectListItem item : selectList.getItems()) {
if (item.getExpr() instanceof SlotRef) {
keyInSelectList = ((SlotRef) item.getExpr()).columnEqual(groupExpr);
if (keyInSelectList) {
break;
}
}
}
}
removeConstGroupingKey = ! keyInSelectList;
}
if (removeConstGroupingKey) {
someGroupExprRemoved = true;
} else {
tempExprs.add(groupExpr);
}
}
if (someGroupExprRemoved) {
groupingExprs.clear();
groupingExprs.addAll(tempExprs);
//groupingExprs need at least one expr, it can be
//any original grouping expr. we use the first one.
if (groupingExprs.isEmpty()) {
groupingExprs.add(theFirstConstantGroupingExpr);
}
}
}

View File

@ -0,0 +1,11 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !scalar_count --
0
-- !agg_count --
-- !scalar_sum --
\N
-- !agg_sum --

View File

@ -2,3 +2,9 @@
-- !runConstantGroupBy_order --
2
-- !runConstantGroupBy_order_2 --
oneline
-- !runConstantGroupBy_order_3 --
abc

View File

@ -0,0 +1,53 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This suit test remove constant group_by_key
suite("constant_group_key") {
//remove constant key
explain {
sql("select 'oneline' from nation group by n_nationkey, 'constant1'")
contains "group by: `n_nationkey`"
}
//reserve constant key in group by
explain {
sql("select 'oneline' from nation group by 'constant1'")
contains "group by: 'constant1'"
}
explain {
sql("select 'oneline', sum(n_nationkey) from nation group by 'constant1', 'constant2'")
contains "group by: 'constant1'"
}
explain {
sql("select a from (select '1' as b, 'abc' as a) T group by b, a")
contains "group by: 'abc'"
}
sql "drop table if exists cgk_tbl"
sql """
create table cgk_tbl (a int null)
engine=olap duplicate key(a)
distributed by hash(a) buckets 1
properties ('replication_num'='1');"""
qt_scalar_count 'select count(*) from cgk_tbl';
qt_agg_count "select count(*) from cgk_tbl group by 'any const str'"
qt_scalar_sum 'select sum(a) from cgk_tbl';
qt_agg_sum "select sum(a) from cgk_tbl group by 'any const str'"
}

View File

@ -1 +1,4 @@
select 2 from nation group by 1
select 2 from nation group by 1;
select "oneline" from nation group by "I am a const, but not to be removed";
select a from (select "abc" as a, '1' as b) T group by b, a;