From be94cc71b5709bd46912698c8c208bea0d93aef5 Mon Sep 17 00:00:00 2001 From: luozihao <1165977584@qq.com> Date: Tue, 30 Apr 2024 11:53:18 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=9C=A8=E5=B1=82=E6=AC=A1?= =?UTF-8?q?=E6=9F=A5=E8=AF=A2=E4=B8=AD=E5=B0=86where=E6=9D=A1=E4=BB=B6?= =?UTF-8?q?=E5=88=86=E5=89=B2=E4=B8=BAjoin=E5=92=8C=E9=9D=9Ejoin=EF=BC=8C?= =?UTF-8?q?=E5=B9=B6=E5=B0=86join=E6=9D=A1=E4=BB=B6=E4=B8=8B=E6=8E=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/backend/parser/parse_startwith.cpp | 106 +++++++++++++++--- src/test/regress/expected/sw_bugfix-1.out | 29 ++++- src/test/regress/expected/sw_bugfix-2.out | 67 +++++++++++ src/test/regress/expected/sw_icbc.out | 6 +- src/test/regress/sql/sw_bugfix-1.sql | 21 ++++ src/test/regress/sql/sw_bugfix-2.sql | 24 +++- 6 files changed, 228 insertions(+), 25 deletions(-) diff --git a/src/common/backend/parser/parse_startwith.cpp b/src/common/backend/parser/parse_startwith.cpp index 865761b03..e50e520ce 100644 --- a/src/common/backend/parser/parse_startwith.cpp +++ b/src/common/backend/parser/parse_startwith.cpp @@ -273,6 +273,7 @@ void transformStartWith(ParseState *pstate, SelectStmt *stmt, Query *qry) transformSingleRTE(pstate, qry, &context, (Node *)stmt->startWithClause); } else { transformFromList(pstate, qry, &context, (Node *)stmt->startWithClause); + stmt->whereClause = context.whereClause; } return; @@ -1524,6 +1525,82 @@ static bool count_columnref_walker(Node *node, int *columnref_count) return false; } +/* + * split_where_expr_by_join + * + * According to the logic of Hierarchical Queries processing, start with operation will be as follows + * 1. Evaluating JOIN expr in where clause if there any(should only be muti-table join). + * 2. Evaluating START WITH/CONNECT BY condition. + * 3. Evaluating remaining where clause a.k.a NON-JOIN quals if there any. + * + * Based on that, we have to push down the join-qual for start-with/connect-by quals, but non-join quals save for later + * filtering right after start with operation is done. + * Note that, for OR expr, if we have to replace the sub_expr, we have to set it as FALSE, other case will be TRUE. + * + * For expr = (A AND (B OR C)) OR (D AND E) AND (F OR G) OR (H OR I), and we assume C, E, F, I is non_join qual, + * + * so the remaining JOIN expr will be + * (A AND B) OR (D) AND (G) OR H + * NON-JOIN expr will be + * (C) OR (E) AND (F) OR (I) + */ +static void split_where_expr_by_join(Node **p_expr_join, Node **p_expr_non_join) +{ + if (p_expr_join == NULL || (p_expr_join != NULL && *p_expr_join == NULL)) { + return; + } + + Node *expr_join = *p_expr_join; + Node *expr_non_join = *p_expr_non_join; + + if (IsA(expr_join, A_Expr)) { + A_Expr *a_expr_join = (A_Expr *)expr_join; + A_Expr *a_expr_non_join = (A_Expr *)expr_non_join; + + if (a_expr_join->kind == AEXPR_AND || a_expr_join->kind == AEXPR_OR) { + if (a_expr_join->lexpr != NULL) { + split_where_expr_by_join(&(a_expr_join->lexpr), &(a_expr_non_join->lexpr)); + } + if (a_expr_join->rexpr != NULL) { + split_where_expr_by_join(&(a_expr_join->rexpr), &(a_expr_non_join->rexpr)); + } + if (a_expr_join->lexpr == NULL) { + *p_expr_join = a_expr_join->rexpr; + } else if (a_expr_join->rexpr == NULL) { + *p_expr_join = a_expr_join->lexpr; + } + + if (a_expr_non_join->lexpr == NULL) { + *p_expr_non_join = a_expr_non_join->rexpr; + } else if (a_expr_non_join->rexpr == NULL) { + *p_expr_non_join = a_expr_non_join->lexpr; + } + } else if (a_expr_join->kind == AEXPR_OP) { + int l_cref_count = 0; + int r_cref_count = 0; + count_columnref_walker(a_expr_join->lexpr, &l_cref_count); + count_columnref_walker(a_expr_join->rexpr, &r_cref_count); + + /* if there're columnRefs at both sides meaning its a join-qual, otherwise, non-join-qual */ + if (l_cref_count != 0 && r_cref_count != 0) { + pfree_ext(expr_non_join); + *p_expr_non_join = NULL; + } else { + pfree_ext(expr_join); + *p_expr_join = NULL; + } + } else { + /* must not be a join qual for non AEXPR_OP type */ + pfree_ext(expr_join); + *p_expr_join = NULL; + } + } else { + /* must not be a join qual for non-a_expr type */ + pfree_ext(expr_join); + *p_expr_join = NULL; + } +} + static bool walker_to_exclude_non_join_quals(Node *node, Node *context_node) { if (node == NULL) { @@ -1616,17 +1693,12 @@ static SelectStmt *CreateStartWithCTEInnerBranch(ParseState* pstate, if (whereClause != NULL) { JoinExpr *final_join = (JoinExpr *)origin_table; - /* pushdown requires deep copying of the quals */ - Node *whereCopy = (Node *)copyObject(whereClause); - /* only join quals can be pushed down */ - raw_expression_tree_walker((Node *)whereCopy, - (bool (*)())walker_to_exclude_non_join_quals, (void*)NULL); if (final_join->quals == NULL) { - final_join->quals = whereCopy; + final_join->quals = whereClause; } else { final_join->quals = - (Node *)makeA_Expr(AEXPR_AND, NULL, whereCopy, final_join->quals, -1); + (Node *)makeA_Expr(AEXPR_AND, NULL, whereClause, final_join->quals, -1); } } } @@ -1700,17 +1772,11 @@ static SelectStmt *CreateStartWithCTEOuterBranch(ParseState *pstate, /* push whereClause down to init part, taking care to avoid NULL in expr. */ quals = (Node *)startWithExpr; - Node* whereClauseCopy = (Node *)copyObject(whereClause); - if (whereClause != NULL) { - /* only join quals can be pushed down */ - raw_expression_tree_walker((Node*)whereClauseCopy, - (bool (*)())walker_to_exclude_non_join_quals, (void*)NULL); - } if (quals == NULL) { - quals = whereClauseCopy; + quals = whereClause; } else if (whereClause != NULL) { - quals = (Node *)makeA_Expr(AEXPR_AND, NULL, whereClauseCopy, + quals = (Node *)makeA_Expr(AEXPR_AND, NULL, whereClause, (Node*)startWithExpr, -1); } @@ -1878,15 +1944,19 @@ static void transformFromList(ParseState* pstate, Query* qry, A_Expr *startWithExpr = (A_Expr *)context->startWithExpr; A_Expr *connectByExpr = (A_Expr *)context->connectByExpr; List *relInfoList = context->relInfoList; - Node *whereClause = context->whereClause; + Node *whereClauseOnlyJoin = (Node *)copyObject(context->whereClause); + + if (context->whereClause != NULL) { + split_where_expr_by_join(&whereClauseOnlyJoin, &(context->whereClause)); + } /* make union-all branch for none-recursive part */ SelectStmt *outerBranch = CreateStartWithCTEOuterBranch(pstate, context, - relInfoList, (Node *)startWithExpr, whereClause); + relInfoList, (Node *)startWithExpr, whereClauseOnlyJoin); /* make joinExpr for recursive part */ SelectStmt *innerBranch = CreateStartWithCTEInnerBranch(pstate, context, - relInfoList, (Node *)connectByExpr, whereClause); + relInfoList, (Node *)connectByExpr, whereClauseOnlyJoin); CreateStartWithCTE(pstate, qry, outerBranch, innerBranch, context); diff --git a/src/test/regress/expected/sw_bugfix-1.out b/src/test/regress/expected/sw_bugfix-1.out index 7e753871a..805991a58 100644 --- a/src/test/regress/expected/sw_bugfix-1.out +++ b/src/test/regress/expected/sw_bugfix-1.out @@ -452,7 +452,6 @@ explain (costs off) select t1.ID,t1.VCH,pid,NAME,PTEX from TEST_HCB_FQB t1,TEST_ QUERY PLAN ---------------------------------------------------------------------------- CTE Scan on tmp_reuslt - Filter: ("t1@id" = "t2@id") CTE tmp_reuslt -> StartWith Operator Start With pseudo atts: RUITR, array_key_1 @@ -471,7 +470,7 @@ explain (costs off) select t1.ID,t1.VCH,pid,NAME,PTEX from TEST_HCB_FQB t1,TEST_ -> WorkTable Scan on tmp_reuslt -> Hash -> Seq Scan on test_hcb_fqb t1 -(20 rows) +(19 rows) CREATE OR REPLACE FUNCTION test_hcb_pro1(i_id in int) return int AS @@ -1840,3 +1839,29 @@ select max(name) from test3 connect by parentid = prior id group by sys_connect_ drop table test3; drop table test2; drop table test1; +drop table if exists left_table; +drop table if exists right_table; +create table left_table(id int); +create table right_table(id int); +declare +i int:=0; +begin +for i in 1..5 loop +insert into left_table values(i); +insert into right_table values(i+1); +end loop; +commit; +end; +/ +select left_table.id as id1,right_table.id as id2 from left_table,right_table where left_table.id+1=right_table.id start with left_table.id=1 connect by prior right_table.id=left_table.id; + id1 | id2 +-----+----- + 1 | 2 + 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 +(5 rows) + +drop table left_table; +drop table right_table; diff --git a/src/test/regress/expected/sw_bugfix-2.out b/src/test/regress/expected/sw_bugfix-2.out index 503977ec7..47da77ea0 100755 --- a/src/test/regress/expected/sw_bugfix-2.out +++ b/src/test/regress/expected/sw_bugfix-2.out @@ -2124,3 +2124,70 @@ connect by drop synonym sy_pf; drop table pf_org_rela_test; +drop table if exists sw_test; + +-- test join clause in where split and push down into start with/connect by clause +drop table if exists sw_tb_1; +create table sw_tb_1(a int,b int,c int,d int); +create table sw_tb_2(a int,b int,c int,d int); +insert into sw_tb_1 values(1,1,1,1); +insert into sw_tb_1 values(2,2,2,2); +insert into sw_tb_1 values(3,3,3,3); +insert into sw_tb_1 values(4,4,4,4); +insert into sw_tb_2 values(1,1,1,1); +insert into sw_tb_2 values(2,2,2,2); +insert into sw_tb_2 values(3,3,3,3); +insert into sw_tb_2 values(4,4,4,4); +select * from sw_tb_1,sw_tb_2 where sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; + a | b | c | d | a | b | c | d +---+---+---+---+---+---+---+--- + 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 +(2 rows) + +select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a=sw_tb_2.b or sw_tb_1.a not in (select 3)) and sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; + a | b | c | d | a | b | c | d +---+---+---+---+---+---+---+--- + 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 +(1 row) + +select * from sw_tb_1,sw_tb_2 where sw_tb_1.a !=3 or sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; + a | b | c | d | a | b | c | d +---+---+---+---+---+---+---+--- + 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 +(1 row) + +select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a+sw_tb_1.b=sw_tb_2.b or sw_tb_1.a=sw_tb_2.c) and (sw_tb_1.b=sw_tb_2.a or (substr(sw_tb_1.b,2)=substr(sw_tb_2.b,2) and sw_tb_1.b is null)) or (sw_tb_1.c=sw_tb_2.d or sw_tb_1.b!=2) start with sw_tb_1.a=2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; + a | b | c | d | a | b | c | d +---+---+---+---+---+---+---+--- +(0 rows) + +explain select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a+sw_tb_1.b=sw_tb_2.b or sw_tb_1.a=sw_tb_2.c) and (sw_tb_1.b=sw_tb_2.a or (substr(sw_tb_1.b,2)=substr(sw_tb_2.b,2) and sw_tb_1.b is null)) or (sw_tb_1.c=sw_tb_2.d or sw_tb_1.b!=2) start with sw_tb_1.a=2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + CTE Scan on tmp_reuslt (cost=1205332.35..1220171.82 rows=652970 width=32) + Filter: (("sw_tb_1@b" IS NULL) OR ("sw_tb_1@b" <> 2)) + CTE tmp_reuslt + -> StartWith Operator (cost=0.00..1205332.35 rows=659532 width=32) + Start With pseudo atts: RUITR, array_key_4 + -> Recursive Union (cost=0.00..1205332.35 rows=659532 width=32) + -> Nested Loop (cost=0.00..659.38 rows=82 width=32) + Join Filter: (((((swtest.sw_tb_1.a + swtest.sw_tb_1.b) = swtest.sw_tb_2.b) OR (swtest.sw_tb_1.a = swtest.sw_tb_2.c)) AND ((swtest.sw_tb_1.b = swtest.sw_tb_2.a) OR (substr((swtest.sw_tb_1.b)::text, 2) = substr((swtest.sw_tb_2.b)::text, 2)))) OR (swtest.sw_tb_1.c = swtest.sw_tb_2.d)) + -> Seq Scan on sw_tb_2 (cost=0.00..27.76 rows=1776 width=16) + -> Materialize (cost=0.00..32.25 rows=9 width=16) + -> Seq Scan on sw_tb_1 (cost=0.00..32.20 rows=9 width=16) + Filter: (a = 2) + -> Hash Join (cost=26.65..119148.23 rows=65945 width=32) + Hash Cond: (swtest.sw_tb_2.c = tmp_reuslt."sw_tb_1@d") + -> Materialize (cost=0.00..118341.56 rows=16083 width=32) + -> Nested Loop (cost=0.00..118341.56 rows=16083 width=32) + Join Filter: (((((swtest.sw_tb_1.a + swtest.sw_tb_1.b) = swtest.sw_tb_2.b) OR (swtest.sw_tb_1.a = swtest.sw_tb_2.c)) AND ((swtest.sw_tb_1.b = swtest.sw_tb_2.a) OR (substr((swtest.sw_tb_1.b)::text, 2) = substr((swtest.sw_tb_2.b)::text, 2)))) OR (swtest.sw_tb_1.c = swtest.sw_tb_2.d)) + -> Seq Scan on sw_tb_1 (cost=0.00..27.76 rows=1776 width=16) + -> Materialize (cost=0.00..36.64 rows=1776 width=16) + -> Seq Scan on sw_tb_2 (cost=0.00..27.76 rows=1776 width=16) + -> Hash (cost=16.40..16.40 rows=820 width=4) + -> WorkTable Scan on tmp_reuslt (cost=0.00..16.40 rows=820 width=4) +(22 rows) + +drop table sw_tb_1; +drop table sw_tb_2; diff --git a/src/test/regress/expected/sw_icbc.out b/src/test/regress/expected/sw_icbc.out index 6dd487021..3c07167aa 100644 --- a/src/test/regress/expected/sw_icbc.out +++ b/src/test/regress/expected/sw_icbc.out @@ -512,7 +512,6 @@ explain (costs off) select * from t1, t2 where t1.id = t2.id start with t1.id = QUERY PLAN ----------------------------------------------------------------------------- CTE Scan on tmp_reuslt - Filter: ("t1@id" = "t2@id") CTE tmp_reuslt -> StartWith Operator Start With pseudo atts: RUITR, array_key_1 @@ -531,7 +530,7 @@ explain (costs off) select * from t1, t2 where t1.id = t2.id start with t1.id = -> WorkTable Scan on tmp_reuslt -> Hash -> Seq Scan on t2 -(20 rows) +(19 rows) explain (costs off) select * from t1 join t2 on t1.id = t2.id start with t1.id = t2.id and t1.id = 1 connect by prior t1.id = t1.pid; QUERY PLAN @@ -561,7 +560,6 @@ explain (costs off) select * from t1, (select * from t2) as test where t1.id = t QUERY PLAN ----------------------------------------------------------------------------- CTE Scan on tmp_reuslt - Filter: ("t1@id" = "test@id") CTE tmp_reuslt -> StartWith Operator Start With pseudo atts: RUITR, array_key_1 @@ -580,7 +578,7 @@ explain (costs off) select * from t1, (select * from t2) as test where t1.id = t -> WorkTable Scan on tmp_reuslt -> Hash -> Seq Scan on t2 -(20 rows) +(19 rows) explain (costs off) select id, (select id from t2 start with t2.id = t1.id connect by t2.id = t1.id limit 1) from t1 where id = 1; ERROR: START WITH CONNECT BY clauses must have at least one prior key. diff --git a/src/test/regress/sql/sw_bugfix-1.sql b/src/test/regress/sql/sw_bugfix-1.sql index b651d6541..73b8a05b2 100644 --- a/src/test/regress/sql/sw_bugfix-1.sql +++ b/src/test/regress/sql/sw_bugfix-1.sql @@ -522,3 +522,24 @@ select max(name) from test3 connect by parentid = prior id group by sys_connect_ drop table test3; drop table test2; drop table test1; + +drop table if exists left_table; +drop table if exists right_table; +create table left_table(id int); +create table right_table(id int); + +declare +i int:=0; +begin +for i in 1..5 loop +insert into left_table values(i); +insert into right_table values(i+1); +end loop; +commit; +end; +/ + +select left_table.id as id1,right_table.id as id2 from left_table,right_table where left_table.id+1=right_table.id start with left_table.id=1 connect by prior right_table.id=left_table.id; + +drop table left_table; +drop table right_table; \ No newline at end of file diff --git a/src/test/regress/sql/sw_bugfix-2.sql b/src/test/regress/sql/sw_bugfix-2.sql index ae91aaa59..bc6a37187 100644 --- a/src/test/regress/sql/sw_bugfix-2.sql +++ b/src/test/regress/sql/sw_bugfix-2.sql @@ -809,4 +809,26 @@ connect by and sy_pf.org_rela_type = 'ADMINISTRATION'; drop synonym sy_pf; -drop table pf_org_rela_test; \ No newline at end of file +drop table pf_org_rela_test; + +drop table if exists sw_test; + +-- test join clause in where split and push down into start with/connect by clause +drop table if exists sw_tb_1; +create table sw_tb_1(a int,b int,c int,d int); +create table sw_tb_2(a int,b int,c int,d int); +insert into sw_tb_1 values(1,1,1,1); +insert into sw_tb_1 values(2,2,2,2); +insert into sw_tb_1 values(3,3,3,3); +insert into sw_tb_1 values(4,4,4,4); +insert into sw_tb_2 values(1,1,1,1); +insert into sw_tb_2 values(2,2,2,2); +insert into sw_tb_2 values(3,3,3,3); +insert into sw_tb_2 values(4,4,4,4); +select * from sw_tb_1,sw_tb_2 where sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; +select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a=sw_tb_2.b or sw_tb_1.a not in (select 3)) and sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; +select * from sw_tb_1,sw_tb_2 where sw_tb_1.a !=3 or sw_tb_1.c=sw_tb_2.d start with sw_tb_1.a>2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; +select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a+sw_tb_1.b=sw_tb_2.b or sw_tb_1.a=sw_tb_2.c) and (sw_tb_1.b=sw_tb_2.a or (substr(sw_tb_1.b,2)=substr(sw_tb_2.b,2) and sw_tb_1.b is null)) or (sw_tb_1.c=sw_tb_2.d or sw_tb_1.b!=2) start with sw_tb_1.a=2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; +explain select * from sw_tb_1,sw_tb_2 where (sw_tb_1.a+sw_tb_1.b=sw_tb_2.b or sw_tb_1.a=sw_tb_2.c) and (sw_tb_1.b=sw_tb_2.a or (substr(sw_tb_1.b,2)=substr(sw_tb_2.b,2) and sw_tb_1.b is null)) or (sw_tb_1.c=sw_tb_2.d or sw_tb_1.b!=2) start with sw_tb_1.a=2 connect by nocycle prior sw_tb_1.d=sw_tb_2.c; +drop table sw_tb_1; +drop table sw_tb_2; \ No newline at end of file