From b9e819795f21bf09f3dca8f440987f80de559cd3 Mon Sep 17 00:00:00 2001 From: chenxiaobin19 <1025221611@qq.com> Date: Thu, 22 Feb 2024 20:48:11 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AD=90=E6=9F=A5=E8=AF=A2=E5=9C=BA=E6=99=AF?= =?UTF-8?q?=E5=8E=BB=E9=99=A4=E5=A4=9A=E4=BD=99=E7=9A=84distinct=E5=92=8Cg?= =?UTF-8?q?roup=20by=E5=AD=90=E5=8F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/backend/utils/misc/guc/guc_sql.cpp | 1 + src/gausskernel/optimizer/plan/subselect.cpp | 30 ++++++++ src/include/utils/guc.h | 6 +- src/test/regress/expected/subselect_part2.out | 71 +++++++++++++++++++ src/test/regress/sql/subselect_part2.sql | 20 ++++++ 5 files changed, 127 insertions(+), 1 deletion(-) diff --git a/src/common/backend/utils/misc/guc/guc_sql.cpp b/src/common/backend/utils/misc/guc/guc_sql.cpp index 1ce287c7e..ad4adcc82 100755 --- a/src/common/backend/utils/misc/guc/guc_sql.cpp +++ b/src/common/backend/utils/misc/guc/guc_sql.cpp @@ -248,6 +248,7 @@ static const struct config_enum_entry rewrite_options[] = { {"predpushforce", PRED_PUSH_FORCE, false}, {"disable_pullup_expr_sublink", SUBLINK_PULLUP_DISABLE_EXPR, false}, {"enable_sublink_pullup_enhanced", SUBLINK_PULLUP_ENHANCED, false}, + {"remove_redundant_distinct_group_by", REMOVE_REDUNDANT_DISTINCT_GROUP_BY, false}, {NULL, 0, false} }; diff --git a/src/gausskernel/optimizer/plan/subselect.cpp b/src/gausskernel/optimizer/plan/subselect.cpp index 80d2ea3f1..0e581faaf 100644 --- a/src/gausskernel/optimizer/plan/subselect.cpp +++ b/src/gausskernel/optimizer/plan/subselect.cpp @@ -2032,6 +2032,34 @@ void SS_process_ctes(PlannerInfo* root) } } +static void remove_redundant_distinct_group_by(Query* subselect) +{ + if (!ENABLE_REMOVE_REDUNDANT_DISTINCT_GROUP_BY()) { + return; + } + + /* + * Remove distinct if distinct-on is not used and there is no limitClause. + */ + if (subselect->distinctClause && !subselect->hasDistinctOn && + subselect->limitCount == NULL && subselect->limitOffset == NULL) { + list_free_deep(subselect->distinctClause); + subselect->distinctClause = NULL; + } + + /* + * Remove GROUP BY if there are no aggregate functions, HAVING clause, grouping + * sets and limitClause. + */ + + if (subselect->groupClause && !subselect->hasAggs && !subselect->hasWindowFuncs && + subselect->havingQual == NULL && subselect->groupingSets == NULL && + subselect->limitCount == NULL && subselect->limitOffset == NULL) { + list_free_deep(subselect->groupClause); + subselect->groupClause = NULL; + } +} + /* * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join * @@ -2111,6 +2139,8 @@ JoinExpr* convert_ANY_sublink_to_join(PlannerInfo* root, SubLink* sublink, bool if (contain_volatile_functions(sublink->testexpr)) return NULL; + remove_redundant_distinct_group_by(subselect); + /* * Okay, pull up the sub-select into upper range table. * diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 3510dd56d..ac85dbfcc 100755 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -402,7 +402,8 @@ typedef enum { PRED_PUSH_NORMAL = (1 << 7), PRED_PUSH_FORCE = (1 << 8), SUBLINK_PULLUP_DISABLE_EXPR = (1 << 9), /* disable pull sublink in expr clause */ - SUBLINK_PULLUP_ENHANCED = (1 << 10) + SUBLINK_PULLUP_ENHANCED = (1 << 10), + REMOVE_REDUNDANT_DISTINCT_GROUP_BY = (1 << 11) } rewrite_param; typedef enum { @@ -461,6 +462,9 @@ typedef enum { #define ENABLE_PRED_PUSH_ALL(root) \ ((ENABLE_PRED_PUSH(root) || ENABLE_PRED_PUSH_NORMAL(root) || ENABLE_PRED_PUSH_FORCE(root)) && permit_predpush(root)) +#define ENABLE_REMOVE_REDUNDANT_DISTINCT_GROUP_BY() \ + ((REMOVE_REDUNDANT_DISTINCT_GROUP_BY) & (uint)u_sess->attr.attr_sql.rewrite_rule) + #define ENABLE_SQL_BETA_FEATURE(feature) \ ((bool)((uint)u_sess->attr.attr_sql.sql_beta_feature & feature)) diff --git a/src/test/regress/expected/subselect_part2.out b/src/test/regress/expected/subselect_part2.out index 31a95b2f4..fc6b96054 100644 --- a/src/test/regress/expected/subselect_part2.out +++ b/src/test/regress/expected/subselect_part2.out @@ -544,3 +544,74 @@ drop table tab_sa2; drop table tab_sa3; drop table tab_sa4; drop table tab_sa5; +set rewrite_rule = 'remove_redundant_distinct_group_by'; +create table subselect_t1 (a int); +create table subselect_t2 (a int, b int); +insert into subselect_t1 values (1); +insert into subselect_t1 values (2); +insert into subselect_t1 values (-1); +insert into subselect_t2 values (1, 1); +insert into subselect_t2 values (2, 2); +insert into subselect_t2 values (-1, -1); +explain (costs off) select * from subselect_t1 where a in (select distinct a from subselect_t2); + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: (subselect_t1.a = subselect_t2.a) + -> Seq Scan on subselect_t1 + -> Hash + -> HashAggregate + Group By Key: subselect_t2.a + -> Seq Scan on subselect_t2 +(7 rows) + +select * from subselect_t1 where a in (select distinct a from subselect_t2); + a +---- + 1 + 2 + -1 +(3 rows) + +explain (costs off) select * from subselect_t1 where a in (select a from subselect_t2 group by a); + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: (subselect_t1.a = subselect_t2.a) + -> Seq Scan on subselect_t1 + -> Hash + -> HashAggregate + Group By Key: subselect_t2.a + -> Seq Scan on subselect_t2 +(7 rows) + +select * from subselect_t1 where a in (select a from subselect_t2 group by a); + a +---- + 1 + 2 + -1 +(3 rows) + +explain (costs off) select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2); + QUERY PLAN +----------------------------------------------------------- + Hash Semi Join + Hash Cond: (subselect_t1.a = "ANY_subquery".a) + -> Seq Scan on subselect_t1 + -> Hash + -> Subquery Scan on "ANY_subquery" + -> Unique + -> Sort + Sort Key: (abs(subselect_t2.a)) + -> Seq Scan on subselect_t2 +(9 rows) + +select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2); + a +--- + 1 + 2 +(2 rows) + +reset rewrite_rule; diff --git a/src/test/regress/sql/subselect_part2.sql b/src/test/regress/sql/subselect_part2.sql index 36d96d041..14019cddd 100644 --- a/src/test/regress/sql/subselect_part2.sql +++ b/src/test/regress/sql/subselect_part2.sql @@ -387,3 +387,23 @@ drop table tab_sa2; drop table tab_sa3; drop table tab_sa4; drop table tab_sa5; + +set rewrite_rule = 'remove_redundant_distinct_group_by'; +create table subselect_t1 (a int); +create table subselect_t2 (a int, b int); +insert into subselect_t1 values (1); +insert into subselect_t1 values (2); +insert into subselect_t1 values (-1); +insert into subselect_t2 values (1, 1); +insert into subselect_t2 values (2, 2); +insert into subselect_t2 values (-1, -1); + +explain (costs off) select * from subselect_t1 where a in (select distinct a from subselect_t2); +select * from subselect_t1 where a in (select distinct a from subselect_t2); + +explain (costs off) select * from subselect_t1 where a in (select a from subselect_t2 group by a); +select * from subselect_t1 where a in (select a from subselect_t2 group by a); + +explain (costs off) select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2); +select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2); +reset rewrite_rule;