!4876 in子查询场景去除多余的distinct和group by子句

Merge pull request !4876 from chenxiaobin/removeGroupBy
This commit is contained in:
opengauss_bot
2024-02-23 01:53:28 +00:00
committed by Gitee
5 changed files with 127 additions and 1 deletions

View File

@ -248,6 +248,7 @@ static const struct config_enum_entry rewrite_options[] = {
{"predpushforce", PRED_PUSH_FORCE, false},
{"disable_pullup_expr_sublink", SUBLINK_PULLUP_DISABLE_EXPR, false},
{"enable_sublink_pullup_enhanced", SUBLINK_PULLUP_ENHANCED, false},
{"remove_redundant_distinct_group_by", REMOVE_REDUNDANT_DISTINCT_GROUP_BY, false},
{NULL, 0, false}
};

View File

@ -2032,6 +2032,34 @@ void SS_process_ctes(PlannerInfo* root)
}
}
static void remove_redundant_distinct_group_by(Query* subselect)
{
if (!ENABLE_REMOVE_REDUNDANT_DISTINCT_GROUP_BY()) {
return;
}
/*
* Remove distinct if distinct-on is not used and there is no limitClause.
*/
if (subselect->distinctClause && !subselect->hasDistinctOn &&
subselect->limitCount == NULL && subselect->limitOffset == NULL) {
list_free_deep(subselect->distinctClause);
subselect->distinctClause = NULL;
}
/*
* Remove GROUP BY if there are no aggregate functions, HAVING clause, grouping
* sets and limitClause.
*/
if (subselect->groupClause && !subselect->hasAggs && !subselect->hasWindowFuncs &&
subselect->havingQual == NULL && subselect->groupingSets == NULL &&
subselect->limitCount == NULL && subselect->limitOffset == NULL) {
list_free_deep(subselect->groupClause);
subselect->groupClause = NULL;
}
}
/*
* convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
*
@ -2111,6 +2139,8 @@ JoinExpr* convert_ANY_sublink_to_join(PlannerInfo* root, SubLink* sublink, bool
if (contain_volatile_functions(sublink->testexpr))
return NULL;
remove_redundant_distinct_group_by(subselect);
/*
* Okay, pull up the sub-select into upper range table.
*

View File

@ -402,7 +402,8 @@ typedef enum {
PRED_PUSH_NORMAL = (1 << 7),
PRED_PUSH_FORCE = (1 << 8),
SUBLINK_PULLUP_DISABLE_EXPR = (1 << 9), /* disable pull sublink in expr clause */
SUBLINK_PULLUP_ENHANCED = (1 << 10)
SUBLINK_PULLUP_ENHANCED = (1 << 10),
REMOVE_REDUNDANT_DISTINCT_GROUP_BY = (1 << 11)
} rewrite_param;
typedef enum {
@ -461,6 +462,9 @@ typedef enum {
#define ENABLE_PRED_PUSH_ALL(root) \
((ENABLE_PRED_PUSH(root) || ENABLE_PRED_PUSH_NORMAL(root) || ENABLE_PRED_PUSH_FORCE(root)) && permit_predpush(root))
#define ENABLE_REMOVE_REDUNDANT_DISTINCT_GROUP_BY() \
((REMOVE_REDUNDANT_DISTINCT_GROUP_BY) & (uint)u_sess->attr.attr_sql.rewrite_rule)
#define ENABLE_SQL_BETA_FEATURE(feature) \
((bool)((uint)u_sess->attr.attr_sql.sql_beta_feature & feature))

View File

@ -544,3 +544,74 @@ drop table tab_sa2;
drop table tab_sa3;
drop table tab_sa4;
drop table tab_sa5;
set rewrite_rule = 'remove_redundant_distinct_group_by';
create table subselect_t1 (a int);
create table subselect_t2 (a int, b int);
insert into subselect_t1 values (1);
insert into subselect_t1 values (2);
insert into subselect_t1 values (-1);
insert into subselect_t2 values (1, 1);
insert into subselect_t2 values (2, 2);
insert into subselect_t2 values (-1, -1);
explain (costs off) select * from subselect_t1 where a in (select distinct a from subselect_t2);
QUERY PLAN
------------------------------------------------
Hash Join
Hash Cond: (subselect_t1.a = subselect_t2.a)
-> Seq Scan on subselect_t1
-> Hash
-> HashAggregate
Group By Key: subselect_t2.a
-> Seq Scan on subselect_t2
(7 rows)
select * from subselect_t1 where a in (select distinct a from subselect_t2);
a
----
1
2
-1
(3 rows)
explain (costs off) select * from subselect_t1 where a in (select a from subselect_t2 group by a);
QUERY PLAN
------------------------------------------------
Hash Join
Hash Cond: (subselect_t1.a = subselect_t2.a)
-> Seq Scan on subselect_t1
-> Hash
-> HashAggregate
Group By Key: subselect_t2.a
-> Seq Scan on subselect_t2
(7 rows)
select * from subselect_t1 where a in (select a from subselect_t2 group by a);
a
----
1
2
-1
(3 rows)
explain (costs off) select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2);
QUERY PLAN
-----------------------------------------------------------
Hash Semi Join
Hash Cond: (subselect_t1.a = "ANY_subquery".a)
-> Seq Scan on subselect_t1
-> Hash
-> Subquery Scan on "ANY_subquery"
-> Unique
-> Sort
Sort Key: (abs(subselect_t2.a))
-> Seq Scan on subselect_t2
(9 rows)
select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2);
a
---
1
2
(2 rows)
reset rewrite_rule;

View File

@ -387,3 +387,23 @@ drop table tab_sa2;
drop table tab_sa3;
drop table tab_sa4;
drop table tab_sa5;
set rewrite_rule = 'remove_redundant_distinct_group_by';
create table subselect_t1 (a int);
create table subselect_t2 (a int, b int);
insert into subselect_t1 values (1);
insert into subselect_t1 values (2);
insert into subselect_t1 values (-1);
insert into subselect_t2 values (1, 1);
insert into subselect_t2 values (2, 2);
insert into subselect_t2 values (-1, -1);
explain (costs off) select * from subselect_t1 where a in (select distinct a from subselect_t2);
select * from subselect_t1 where a in (select distinct a from subselect_t2);
explain (costs off) select * from subselect_t1 where a in (select a from subselect_t2 group by a);
select * from subselect_t1 where a in (select a from subselect_t2 group by a);
explain (costs off) select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2);
select * from subselect_t1 where a in (select distinct on (abs(a)) a from subselect_t2);
reset rewrite_rule;