From 47097a3db82e27deecf2b2e8bdf809d88c89e627 Mon Sep 17 00:00:00 2001 From: camby <104178625@qq.com> Date: Tue, 10 Jan 2023 15:57:29 +0800 Subject: [PATCH] [fix](having) revert 15143 and fix having clause with multi-conditions (#15745) Describe your changes. Firstly having clause of Mysql is really very complex, we are hard to follow all rules, so we revert pr15143 to keep the logic the same as before. Secondly the origin implementation has problem while having clause has multi-conditions. For example: case1: here v2 inside having clause use table column test_having_alias_tb.v2 SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v2>1); ERROR 1105 (HY000): errCode = 2, detailMessage = HAVING clause not produced by aggregation output (missing from GROUP BY clause?): (`v2` > 1) case2: here v2 inside having clause use alias name v2 =sum(test_having_alias_tb.v2), another condition make logic of v2 differently. SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND v2>1) ORDER BY id,v; +------+------+------+ | id | v | v2 | +------+------+------+ | 2 | 1 | 3 | +------+------+------+ So here we try to make the having clause rules simple: Rule1: if alias name inside having clause is the same as column name, we use column name not alias name; Rule2: if alias name inside having clause do not have same name as column name, we use alias name; Co-authored-by: cambyzju --- .../org/apache/doris/analysis/SelectStmt.java | 38 ++++++++++--------- .../test_group_having_alias.out | 6 +++ .../test_group_having_alias.groovy | 6 ++- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index acf57f28b2..7e311ffa6f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -1022,35 +1022,37 @@ public class SelectStmt extends QueryStmt { * TODO: the a.key should be replaced by a.k1 instead of unknown column 'key' in 'a' */ - // according to mysql - // having clause should use column name inside group by clause, prior to alias. - // case1: having clause use column name table.v1, because v1 inside group by clause - // select id, sum(v1) v1 from table group by id,v1 having(v1>1); - // case2: having clause use alias name v2, because v2 is not inside group by clause - // select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND v2>1); - // case3: having clause use alias name v, because table do not have column name v - // select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1); + /* according to mysql (https://dev.mysql.com/doc/refman/8.0/en/select.html) + * "For GROUP BY or HAVING clauses, it searches the FROM clause before searching in the + * select_expr values. (For GROUP BY and HAVING, this differs from the pre-MySQL 5.0 behavior + * that used the same rules as for ORDER BY.)" + * case1: having clause use column name table.v1, because it searches the FROM clause firstly + * select id, sum(v1) v1 from table group by id,v1 having(v1>1); + * case2: having clause used in aggregate functions, such as sum(v2) here + * select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND sum(v2)>1); + * case3: having clause use alias name v, because table do not have column name v + * select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1); + * case4: having clause use alias name vsum, because table do not have column name vsum + * select id, floor(v1) v, sum(v2) vsum from table group by id,v having(v>1 AND vsum>1); + */ if (groupByClause != null) { - ExprSubstitutionMap excludeGroupByaliasSMap = aliasSMap.clone(); - // according to case2, maybe some having slots inside group by clause, some do not - List groupBySlots = Lists.newArrayList(); - for (Expr expr : groupByClause.getGroupingExprs()) { - expr.collect(SlotRef.class, groupBySlots); - } - for (Expr expr : groupBySlots) { - if (excludeGroupByaliasSMap.get(expr) == null) { + ExprSubstitutionMap excludeAliasSMap = aliasSMap.clone(); + List havingSlots = Lists.newArrayList(); + havingClause.collect(SlotRef.class, havingSlots); + for (Expr expr : havingSlots) { + if (excludeAliasSMap.get(expr) == null) { continue; } try { // try to use column name firstly expr.clone().analyze(analyzer); // analyze success means column name exist, do not use alias name - excludeGroupByaliasSMap.removeByLhsExpr(expr); + excludeAliasSMap.removeByLhsExpr(expr); } catch (AnalysisException ex) { // according to case3, column name do not exist, keep alias name inside alias map } } - havingClauseAfterAnaylzed = havingClause.substitute(excludeGroupByaliasSMap, analyzer, false); + havingClauseAfterAnaylzed = havingClause.substitute(excludeAliasSMap, analyzer, false); } else { // according to mysql // if there is no group by clause, the having clause should use alias diff --git a/regression-test/data/correctness_p0/test_group_having_alias.out b/regression-test/data/correctness_p0/test_group_having_alias.out index 8d3629b5f5..7a87076e28 100644 --- a/regression-test/data/correctness_p0/test_group_having_alias.out +++ b/regression-test/data/correctness_p0/test_group_having_alias.out @@ -20,3 +20,9 @@ -- !case3 -- 2 1 3 +-- !case4 -- +2 1 3 + +-- !case5 -- +2 3 + diff --git a/regression-test/suites/correctness_p0/test_group_having_alias.groovy b/regression-test/suites/correctness_p0/test_group_having_alias.groovy index 45350026e2..80187b9113 100644 --- a/regression-test/suites/correctness_p0/test_group_having_alias.groovy +++ b/regression-test/suites/correctness_p0/test_group_having_alias.groovy @@ -94,7 +94,9 @@ """ sql """ INSERT INTO test_having_alias_tb values(1,1,1),(2,2,2),(2,3,3); """ qt_case1 """ SELECT id, sum(v1) v1 FROM test_having_alias_tb GROUP BY id,v1 having(v1>1) ORDER BY id,v1; """ - qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND v2>1) ORDER BY id,v1; """ - qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND v2>1) ORDER BY id,v; """ + qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND sum(v2)>1) ORDER BY id,v1; """ + qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND sum(v2)>1) ORDER BY id,v; """ + qt_case4 """ SELECT id, v1-2 as v, sum(v2) vsum FROM test_having_alias_tb GROUP BY id,v having(v>0 AND vsum>1) ORDER BY id,v; """ + qt_case5 """ SELECT id, max(v1) v1 FROM test_having_alias_tb GROUP BY 1 having count(distinct v1)>1 ORDER BY id; """ sql """ DROP TABLE IF EXISTS `test_having_alias_tb`; """ }