修复在层次查询中将where条件分割为join和非join,并将join条件下推

This commit is contained in:
luozihao
2024-04-30 11:53:18 +08:00
committed by yaoxin
parent 5ce3fc959d
commit be94cc71b5
6 changed files with 228 additions and 25 deletions

View File

@ -273,6 +273,7 @@ void transformStartWith(ParseState *pstate, SelectStmt *stmt, Query *qry)
transformSingleRTE(pstate, qry, &context, (Node *)stmt->startWithClause);
} else {
transformFromList(pstate, qry, &context, (Node *)stmt->startWithClause);
stmt->whereClause = context.whereClause;
}
return;
@ -1524,6 +1525,82 @@ static bool count_columnref_walker(Node *node, int *columnref_count)
return false;
}
/*
* split_where_expr_by_join
*
* According to the logic of Hierarchical Queries processing, start with operation will be as follows
* 1. Evaluating JOIN expr in where clause if there any(should only be muti-table join).
* 2. Evaluating START WITH/CONNECT BY condition.
* 3. Evaluating remaining where clause a.k.a NON-JOIN quals if there any.
*
* Based on that, we have to push down the join-qual for start-with/connect-by quals, but non-join quals save for later
* filtering right after start with operation is done.
* Note that, for OR expr, if we have to replace the sub_expr, we have to set it as FALSE, other case will be TRUE.
*
* For expr = (A AND (B OR C)) OR (D AND E) AND (F OR G) OR (H OR I), and we assume C, E, F, I is non_join qual,
*
* so the remaining JOIN expr will be
* (A AND B) OR (D) AND (G) OR H
* NON-JOIN expr will be
* (C) OR (E) AND (F) OR (I)
*/
static void split_where_expr_by_join(Node **p_expr_join, Node **p_expr_non_join)
{
if (p_expr_join == NULL || (p_expr_join != NULL && *p_expr_join == NULL)) {
return;
}
Node *expr_join = *p_expr_join;
Node *expr_non_join = *p_expr_non_join;
if (IsA(expr_join, A_Expr)) {
A_Expr *a_expr_join = (A_Expr *)expr_join;
A_Expr *a_expr_non_join = (A_Expr *)expr_non_join;
if (a_expr_join->kind == AEXPR_AND || a_expr_join->kind == AEXPR_OR) {
if (a_expr_join->lexpr != NULL) {
split_where_expr_by_join(&(a_expr_join->lexpr), &(a_expr_non_join->lexpr));
}
if (a_expr_join->rexpr != NULL) {
split_where_expr_by_join(&(a_expr_join->rexpr), &(a_expr_non_join->rexpr));
}
if (a_expr_join->lexpr == NULL) {
*p_expr_join = a_expr_join->rexpr;
} else if (a_expr_join->rexpr == NULL) {
*p_expr_join = a_expr_join->lexpr;
}
if (a_expr_non_join->lexpr == NULL) {
*p_expr_non_join = a_expr_non_join->rexpr;
} else if (a_expr_non_join->rexpr == NULL) {
*p_expr_non_join = a_expr_non_join->lexpr;
}
} else if (a_expr_join->kind == AEXPR_OP) {
int l_cref_count = 0;
int r_cref_count = 0;
count_columnref_walker(a_expr_join->lexpr, &l_cref_count);
count_columnref_walker(a_expr_join->rexpr, &r_cref_count);
/* if there're columnRefs at both sides meaning its a join-qual, otherwise, non-join-qual */
if (l_cref_count != 0 && r_cref_count != 0) {
pfree_ext(expr_non_join);
*p_expr_non_join = NULL;
} else {
pfree_ext(expr_join);
*p_expr_join = NULL;
}
} else {
/* must not be a join qual for non AEXPR_OP type */
pfree_ext(expr_join);
*p_expr_join = NULL;
}
} else {
/* must not be a join qual for non-a_expr type */
pfree_ext(expr_join);
*p_expr_join = NULL;
}
}
static bool walker_to_exclude_non_join_quals(Node *node, Node *context_node)
{
if (node == NULL) {
@ -1616,17 +1693,12 @@ static SelectStmt *CreateStartWithCTEInnerBranch(ParseState* pstate,
if (whereClause != NULL) {
JoinExpr *final_join = (JoinExpr *)origin_table;
/* pushdown requires deep copying of the quals */
Node *whereCopy = (Node *)copyObject(whereClause);
/* only join quals can be pushed down */
raw_expression_tree_walker((Node *)whereCopy,
(bool (*)())walker_to_exclude_non_join_quals, (void*)NULL);
if (final_join->quals == NULL) {
final_join->quals = whereCopy;
final_join->quals = whereClause;
} else {
final_join->quals =
(Node *)makeA_Expr(AEXPR_AND, NULL, whereCopy, final_join->quals, -1);
(Node *)makeA_Expr(AEXPR_AND, NULL, whereClause, final_join->quals, -1);
}
}
}
@ -1700,17 +1772,11 @@ static SelectStmt *CreateStartWithCTEOuterBranch(ParseState *pstate,
/* push whereClause down to init part, taking care to avoid NULL in expr. */
quals = (Node *)startWithExpr;
Node* whereClauseCopy = (Node *)copyObject(whereClause);
if (whereClause != NULL) {
/* only join quals can be pushed down */
raw_expression_tree_walker((Node*)whereClauseCopy,
(bool (*)())walker_to_exclude_non_join_quals, (void*)NULL);
}
if (quals == NULL) {
quals = whereClauseCopy;
quals = whereClause;
} else if (whereClause != NULL) {
quals = (Node *)makeA_Expr(AEXPR_AND, NULL, whereClauseCopy,
quals = (Node *)makeA_Expr(AEXPR_AND, NULL, whereClause,
(Node*)startWithExpr, -1);
}
@ -1878,15 +1944,19 @@ static void transformFromList(ParseState* pstate, Query* qry,
A_Expr *startWithExpr = (A_Expr *)context->startWithExpr;
A_Expr *connectByExpr = (A_Expr *)context->connectByExpr;
List *relInfoList = context->relInfoList;
Node *whereClause = context->whereClause;
Node *whereClauseOnlyJoin = (Node *)copyObject(context->whereClause);
if (context->whereClause != NULL) {
split_where_expr_by_join(&whereClauseOnlyJoin, &(context->whereClause));
}
/* make union-all branch for none-recursive part */
SelectStmt *outerBranch = CreateStartWithCTEOuterBranch(pstate, context,
relInfoList, (Node *)startWithExpr, whereClause);
relInfoList, (Node *)startWithExpr, whereClauseOnlyJoin);
/* make joinExpr for recursive part */
SelectStmt *innerBranch = CreateStartWithCTEInnerBranch(pstate, context,
relInfoList, (Node *)connectByExpr, whereClause);
relInfoList, (Node *)connectByExpr, whereClauseOnlyJoin);
CreateStartWithCTE(pstate, qry, outerBranch, innerBranch, context);

View File

@ -452,7 +452,6 @@ explain (costs off) select t1.ID,t1.VCH,pid,NAME,PTEX from TEST_HCB_FQB t1,TEST_
QUERY PLAN
----------------------------------------------------------------------------
CTE Scan on tmp_reuslt
Filter: ("t1@id" = "t2@id")
CTE tmp_reuslt
-> StartWith Operator
Start With pseudo atts: RUITR, array_key_1
@ -471,7 +470,7 @@ explain (costs off) select t1.ID,t1.VCH,pid,NAME,PTEX from TEST_HCB_FQB t1,TEST_
-> WorkTable Scan on tmp_reuslt
-> Hash
-> Seq Scan on test_hcb_fqb t1
(20 rows)
(19 rows)
CREATE OR REPLACE FUNCTION test_hcb_pro1(i_id in int) return int
AS
@ -1840,3 +1839,29 @@ select max(name) from test3 connect by parentid = prior id group by sys_connect_
drop table test3;
drop table test2;
drop table test1;
drop table if exists left_table;
drop table if exists right_table;
create table left_table(id int);
create table right_table(id int);
declare
i int:=0;
begin
for i in 1..5 loop
insert into left_table values(i);
insert into right_table values(i+1);
end loop;
commit;
end;
/
select left_table.id as id1,right_table.id as id2 from left_table,right_table where left_table.id+1=right_table.id start with left_table.id=1 connect by prior right_table.id=left_table.id;
id1 | id2
-----+-----
1 | 2
2 | 3
3 | 4
4 | 5
5 | 6
(5 rows)
drop table left_table;
drop table right_table;

View File

@ -2124,3 +2124,70 @@ connect by
drop synonym sy_pf;
drop table pf_org_rela_test;
drop table if exists sw_test;
-- test join clause in where split and push down into start with/connect by clause
drop table if exists sw_tb_1;
create table sw_tb_1(a int,b int,c int,d int);
create table sw_tb_2(a int,b int,c int,d int);
insert into sw_tb_1 values(1,1,1,1);
insert into sw_tb_1 values(2,2,2,2);
insert into sw_tb_1 values(3,3,3,3);
insert into sw_tb_1 values(4,4,4,4);
insert into sw_tb_2 values(1,1,1,1);
insert into sw_tb_2 values(2,2,2,2);
insert into sw_tb_2 values(3,3,3,3);
insert into sw_tb_2 values(4,4,4,4);
select * from sw_tb_1,sw_tb_2 where sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
a | b | c | d | a | b | c | d
---+---+---+---+---+---+---+---
3 | 3 | 3 | 3 | 3 | 3 | 3 | 3
4 | 4 | 4 | 4 | 4 | 4 | 4 | 4
(2 rows)
select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a=sw_tb_2.b or sw_tb_1.a not in (select 3)) and sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
a | b | c | d | a | b | c | d
---+---+---+---+---+---+---+---
4 | 4 | 4 | 4 | 4 | 4 | 4 | 4
(1 row)
select * from sw_tb_1,sw_tb_2 where sw_tb_1.a !=3 or sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
a | b | c | d | a | b | c | d
---+---+---+---+---+---+---+---
4 | 4 | 4 | 4 | 4 | 4 | 4 | 4
(1 row)
select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a+sw_tb_1.b=sw_tb_2.b or sw_tb_1.a=sw_tb_2.c) and (sw_tb_1.b=sw_tb_2.a or (substr(sw_tb_1.b,2)=substr(sw_tb_2.b,2) and sw_tb_1.b is null)) or (sw_tb_1.c=sw_tb_2.d or sw_tb_1.b!=2) start with sw_tb_1.a=2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
a | b | c | d | a | b | c | d
---+---+---+---+---+---+---+---
(0 rows)
explain select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a+sw_tb_1.b=sw_tb_2.b or sw_tb_1.a=sw_tb_2.c) and (sw_tb_1.b=sw_tb_2.a or (substr(sw_tb_1.b,2)=substr(sw_tb_2.b,2) and sw_tb_1.b is null)) or (sw_tb_1.c=sw_tb_2.d or sw_tb_1.b!=2) start with sw_tb_1.a=2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
CTE Scan on tmp_reuslt (cost=1205332.35..1220171.82 rows=652970 width=32)
Filter: (("sw_tb_1@b" IS NULL) OR ("sw_tb_1@b" <> 2))
CTE tmp_reuslt
-> StartWith Operator (cost=0.00..1205332.35 rows=659532 width=32)
Start With pseudo atts: RUITR, array_key_4
-> Recursive Union (cost=0.00..1205332.35 rows=659532 width=32)
-> Nested Loop (cost=0.00..659.38 rows=82 width=32)
Join Filter: (((((swtest.sw_tb_1.a + swtest.sw_tb_1.b) = swtest.sw_tb_2.b) OR (swtest.sw_tb_1.a = swtest.sw_tb_2.c)) AND ((swtest.sw_tb_1.b = swtest.sw_tb_2.a) OR (substr((swtest.sw_tb_1.b)::text, 2) = substr((swtest.sw_tb_2.b)::text, 2)))) OR (swtest.sw_tb_1.c = swtest.sw_tb_2.d))
-> Seq Scan on sw_tb_2 (cost=0.00..27.76 rows=1776 width=16)
-> Materialize (cost=0.00..32.25 rows=9 width=16)
-> Seq Scan on sw_tb_1 (cost=0.00..32.20 rows=9 width=16)
Filter: (a = 2)
-> Hash Join (cost=26.65..119148.23 rows=65945 width=32)
Hash Cond: (swtest.sw_tb_2.c = tmp_reuslt."sw_tb_1@d")
-> Materialize (cost=0.00..118341.56 rows=16083 width=32)
-> Nested Loop (cost=0.00..118341.56 rows=16083 width=32)
Join Filter: (((((swtest.sw_tb_1.a + swtest.sw_tb_1.b) = swtest.sw_tb_2.b) OR (swtest.sw_tb_1.a = swtest.sw_tb_2.c)) AND ((swtest.sw_tb_1.b = swtest.sw_tb_2.a) OR (substr((swtest.sw_tb_1.b)::text, 2) = substr((swtest.sw_tb_2.b)::text, 2)))) OR (swtest.sw_tb_1.c = swtest.sw_tb_2.d))
-> Seq Scan on sw_tb_1 (cost=0.00..27.76 rows=1776 width=16)
-> Materialize (cost=0.00..36.64 rows=1776 width=16)
-> Seq Scan on sw_tb_2 (cost=0.00..27.76 rows=1776 width=16)
-> Hash (cost=16.40..16.40 rows=820 width=4)
-> WorkTable Scan on tmp_reuslt (cost=0.00..16.40 rows=820 width=4)
(22 rows)
drop table sw_tb_1;
drop table sw_tb_2;

View File

@ -512,7 +512,6 @@ explain (costs off) select * from t1, t2 where t1.id = t2.id start with t1.id =
QUERY PLAN
-----------------------------------------------------------------------------
CTE Scan on tmp_reuslt
Filter: ("t1@id" = "t2@id")
CTE tmp_reuslt
-> StartWith Operator
Start With pseudo atts: RUITR, array_key_1
@ -531,7 +530,7 @@ explain (costs off) select * from t1, t2 where t1.id = t2.id start with t1.id =
-> WorkTable Scan on tmp_reuslt
-> Hash
-> Seq Scan on t2
(20 rows)
(19 rows)
explain (costs off) select * from t1 join t2 on t1.id = t2.id start with t1.id = t2.id and t1.id = 1 connect by prior t1.id = t1.pid;
QUERY PLAN
@ -561,7 +560,6 @@ explain (costs off) select * from t1, (select * from t2) as test where t1.id = t
QUERY PLAN
-----------------------------------------------------------------------------
CTE Scan on tmp_reuslt
Filter: ("t1@id" = "test@id")
CTE tmp_reuslt
-> StartWith Operator
Start With pseudo atts: RUITR, array_key_1
@ -580,7 +578,7 @@ explain (costs off) select * from t1, (select * from t2) as test where t1.id = t
-> WorkTable Scan on tmp_reuslt
-> Hash
-> Seq Scan on t2
(20 rows)
(19 rows)
explain (costs off) select id, (select id from t2 start with t2.id = t1.id connect by t2.id = t1.id limit 1) from t1 where id = 1;
ERROR: START WITH CONNECT BY clauses must have at least one prior key.

View File

@ -522,3 +522,24 @@ select max(name) from test3 connect by parentid = prior id group by sys_connect_
drop table test3;
drop table test2;
drop table test1;
drop table if exists left_table;
drop table if exists right_table;
create table left_table(id int);
create table right_table(id int);
declare
i int:=0;
begin
for i in 1..5 loop
insert into left_table values(i);
insert into right_table values(i+1);
end loop;
commit;
end;
/
select left_table.id as id1,right_table.id as id2 from left_table,right_table where left_table.id+1=right_table.id start with left_table.id=1 connect by prior right_table.id=left_table.id;
drop table left_table;
drop table right_table;

View File

@ -809,4 +809,26 @@ connect by
and sy_pf.org_rela_type = 'ADMINISTRATION';
drop synonym sy_pf;
drop table pf_org_rela_test;
drop table pf_org_rela_test;
drop table if exists sw_test;
-- test join clause in where split and push down into start with/connect by clause
drop table if exists sw_tb_1;
create table sw_tb_1(a int,b int,c int,d int);
create table sw_tb_2(a int,b int,c int,d int);
insert into sw_tb_1 values(1,1,1,1);
insert into sw_tb_1 values(2,2,2,2);
insert into sw_tb_1 values(3,3,3,3);
insert into sw_tb_1 values(4,4,4,4);
insert into sw_tb_2 values(1,1,1,1);
insert into sw_tb_2 values(2,2,2,2);
insert into sw_tb_2 values(3,3,3,3);
insert into sw_tb_2 values(4,4,4,4);
select * from sw_tb_1,sw_tb_2 where sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a=sw_tb_2.b or sw_tb_1.a not in (select 3)) and sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
select * from sw_tb_1,sw_tb_2 where sw_tb_1.a !=3 or sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a+sw_tb_1.b=sw_tb_2.b or sw_tb_1.a=sw_tb_2.c) and (sw_tb_1.b=sw_tb_2.a or (substr(sw_tb_1.b,2)=substr(sw_tb_2.b,2) and sw_tb_1.b is null)) or (sw_tb_1.c=sw_tb_2.d or sw_tb_1.b!=2) start with sw_tb_1.a=2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
explain select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a+sw_tb_1.b=sw_tb_2.b or sw_tb_1.a=sw_tb_2.c) and (sw_tb_1.b=sw_tb_2.a or (substr(sw_tb_1.b,2)=substr(sw_tb_2.b,2) and sw_tb_1.b is null)) or (sw_tb_1.c=sw_tb_2.d or sw_tb_1.b!=2) start with sw_tb_1.a=2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c;
drop table sw_tb_1;
drop table sw_tb_2;