diff --git a/src/common/backend/utils/misc/guc/guc_sql.cpp b/src/common/backend/utils/misc/guc/guc_sql.cpp index 1ce287c7e..ad4adcc82 100755 --- a/src/common/backend/utils/misc/guc/guc_sql.cpp +++ b/src/common/backend/utils/misc/guc/guc_sql.cpp @@ -248,6 +248,7 @@ static const struct config_enum_entry rewrite_options[] = { {"predpushforce", PRED_PUSH_FORCE, false}, {"disable_pullup_expr_sublink", SUBLINK_PULLUP_DISABLE_EXPR, false}, {"enable_sublink_pullup_enhanced", SUBLINK_PULLUP_ENHANCED, false}, + {"remove_redundant_distinct_group_by", REMOVE_REDUNDANT_DISTINCT_GROUP_BY, false}, {NULL, 0, false} }; diff --git a/src/gausskernel/optimizer/plan/subselect.cpp b/src/gausskernel/optimizer/plan/subselect.cpp index 80d2ea3f1..0e581faaf 100644 --- a/src/gausskernel/optimizer/plan/subselect.cpp +++ b/src/gausskernel/optimizer/plan/subselect.cpp @@ -2032,6 +2032,34 @@ void SS_process_ctes(PlannerInfo* root) } } +static void remove_redundant_distinct_group_by(Query* subselect) +{ + if (!ENABLE_REMOVE_REDUNDANT_DISTINCT_GROUP_BY()) { + return; + } + + /* + * Remove distinct if distinct-on is not used and there is no limitClause. + */ + if (subselect->distinctClause && !subselect->hasDistinctOn && + subselect->limitCount == NULL && subselect->limitOffset == NULL) { + list_free_deep(subselect->distinctClause); + subselect->distinctClause = NULL; + } + + /* + * Remove GROUP BY if there are no aggregate functions, HAVING clause, grouping + * sets and limitClause. + */ + + if (subselect->groupClause && !subselect->hasAggs && !subselect->hasWindowFuncs && + subselect->havingQual == NULL && subselect->groupingSets == NULL && + subselect->limitCount == NULL && subselect->limitOffset == NULL) { + list_free_deep(subselect->groupClause); + subselect->groupClause = NULL; + } +} + /* * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join * @@ -2111,6 +2139,8 @@ JoinExpr* convert_ANY_sublink_to_join(PlannerInfo* root, SubLink* sublink, bool if (contain_volatile_functions(sublink->testexpr)) return NULL; + remove_redundant_distinct_group_by(subselect); + /* * Okay, pull up the sub-select into upper range table. * diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 3510dd56d..ac85dbfcc 100755 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -402,7 +402,8 @@ typedef enum { PRED_PUSH_NORMAL = (1 << 7), PRED_PUSH_FORCE = (1 << 8), SUBLINK_PULLUP_DISABLE_EXPR = (1 << 9), /* disable pull sublink in expr clause */ - SUBLINK_PULLUP_ENHANCED = (1 << 10) + SUBLINK_PULLUP_ENHANCED = (1 << 10), + REMOVE_REDUNDANT_DISTINCT_GROUP_BY = (1 << 11) } rewrite_param; typedef enum { @@ -461,6 +462,9 @@ typedef enum { #define ENABLE_PRED_PUSH_ALL(root) \ ((ENABLE_PRED_PUSH(root) || ENABLE_PRED_PUSH_NORMAL(root) || ENABLE_PRED_PUSH_FORCE(root)) && permit_predpush(root)) +#define ENABLE_REMOVE_REDUNDANT_DISTINCT_GROUP_BY() \ + ((REMOVE_REDUNDANT_DISTINCT_GROUP_BY) & (uint)u_sess->attr.attr_sql.rewrite_rule) + #define ENABLE_SQL_BETA_FEATURE(feature) \ ((bool)((uint)u_sess->attr.attr_sql.sql_beta_feature & feature)) diff --git a/src/test/regress/expected/subselect_part2.out b/src/test/regress/expected/subselect_part2.out index 31a95b2f4..fc6b96054 100644 --- a/src/test/regress/expected/subselect_part2.out +++ b/src/test/regress/expected/subselect_part2.out @@ -544,3 +544,74 @@ drop table tab_sa2; drop table tab_sa3; drop table tab_sa4; drop table tab_sa5; +set rewrite_rule = 'remove_redundant_distinct_group_by'; +create table subselect_t1 (a int); +create table subselect_t2 (a int, b int); +insert into subselect_t1 values (1); +insert into subselect_t1 values (2); +insert into subselect_t1 values (-1); +insert into subselect_t2 values (1, 1); +insert into subselect_t2 values (2, 2); +insert into subselect_t2 values (-1, -1); +explain (costs off) select * from subselect_t1 where a in (select distinct a from subselect_t2); + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: (subselect_t1.a = subselect_t2.a) + -> Seq Scan on subselect_t1 + -> Hash + -> HashAggregate + Group By Key: subselect_t2.a + -> Seq Scan on subselect_t2 +(7 rows) + +select * from subselect_t1 where a in (select distinct a from subselect_t2); + a +---- + 1 + 2 + -1 +(3 rows) + +explain (costs off) select * from subselect_t1 where a in (select a from subselect_t2 group by a); + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: (subselect_t1.a = subselect_t2.a) + -> Seq Scan on subselect_t1 + -> Hash + -> HashAggregate + Group By Key: subselect_t2.a + -> Seq Scan on subselect_t2 +(7 rows) + +select * from subselect_t1 where a in (select a from subselect_t2 group by a); + a +---- + 1 + 2 + -1 +(3 rows) + +explain (costs off) select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2); + QUERY PLAN +----------------------------------------------------------- + Hash Semi Join + Hash Cond: (subselect_t1.a = "ANY_subquery".a) + -> Seq Scan on subselect_t1 + -> Hash + -> Subquery Scan on "ANY_subquery" + -> Unique + -> Sort + Sort Key: (abs(subselect_t2.a)) + -> Seq Scan on subselect_t2 +(9 rows) + +select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2); + a +--- + 1 + 2 +(2 rows) + +reset rewrite_rule; diff --git a/src/test/regress/sql/subselect_part2.sql b/src/test/regress/sql/subselect_part2.sql index 36d96d041..14019cddd 100644 --- a/src/test/regress/sql/subselect_part2.sql +++ b/src/test/regress/sql/subselect_part2.sql @@ -387,3 +387,23 @@ drop table tab_sa2; drop table tab_sa3; drop table tab_sa4; drop table tab_sa5; + +set rewrite_rule = 'remove_redundant_distinct_group_by'; +create table subselect_t1 (a int); +create table subselect_t2 (a int, b int); +insert into subselect_t1 values (1); +insert into subselect_t1 values (2); +insert into subselect_t1 values (-1); +insert into subselect_t2 values (1, 1); +insert into subselect_t2 values (2, 2); +insert into subselect_t2 values (-1, -1); + +explain (costs off) select * from subselect_t1 where a in (select distinct a from subselect_t2); +select * from subselect_t1 where a in (select distinct a from subselect_t2); + +explain (costs off) select * from subselect_t1 where a in (select a from subselect_t2 group by a); +select * from subselect_t1 where a in (select a from subselect_t2 group by a); + +explain (costs off) select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2); +select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2); +reset rewrite_rule;