From 1d0c1a9e694380cff86269ff4cdb07315d7155a9 Mon Sep 17 00:00:00 2001 From: gentle_hu Date: Tue, 12 Apr 2022 19:53:28 +0800 Subject: [PATCH] Allow simplification of EXISTS() subqueries containing LIMIT --- src/gausskernel/optimizer/plan/subselect.cpp | 45 ++++++++++++++++--- src/test/regress/expected/subselect_part2.out | 42 +++++++++++++++++ src/test/regress/sql/subselect_part2.sql | 13 ++++++ 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/src/gausskernel/optimizer/plan/subselect.cpp b/src/gausskernel/optimizer/plan/subselect.cpp index f96e938fe..b65978096 100644 --- a/src/gausskernel/optimizer/plan/subselect.cpp +++ b/src/gausskernel/optimizer/plan/subselect.cpp @@ -100,7 +100,7 @@ static Node* convert_testexpr_mutator(Node* node, convert_testexpr_context* cont static bool subplan_is_hashable(Plan* plan); static bool testexpr_is_hashable(Node* testexpr); static bool hash_ok_operator(OpExpr* expr); -static bool simplify_EXISTS_query(Query* query); +static bool simplify_EXISTS_query(PlannerInfo* root, Query* query); static Query* convert_EXISTS_to_ANY(PlannerInfo* root, Query* subselect, Node** testexpr, List** paramIds); static Node* replace_correlation_vars_mutator(Node* node, PlannerInfo* root); static Node* process_sublinks_mutator(Node* node, process_sublinks_context* context); @@ -604,7 +604,7 @@ static Node* make_subplan( * If it's an EXISTS subplan, we might be able to simplify it. */ if (subLinkType == EXISTS_SUBLINK) - simple_exists = simplify_EXISTS_query(subquery); + simple_exists = simplify_EXISTS_query(root, subquery); /* * For an EXISTS subplan, tell lower-level planner to expect that only the @@ -687,7 +687,7 @@ static Node* make_subplan( /* Make a second copy of the original subquery */ subquery = (Query*)copyObject(orig_subquery); /* and re-simplify */ - simple_exists = simplify_EXISTS_query(subquery); + simple_exists = simplify_EXISTS_query(root, subquery); if (!simple_exists) ereport(ERROR, (errmodule(MOD_OPT), @@ -2156,7 +2156,7 @@ JoinExpr* convert_EXISTS_sublink_to_join(PlannerInfo* root, SubLink* sublink, bo * targetlist, we have to fail, because the pullup operation leaves us * with noplace to evaluate the targetlist. */ - if (!simplify_EXISTS_query(subselect)) + if (!simplify_EXISTS_query(root, subselect)) return NULL; /* @@ -2288,7 +2288,7 @@ JoinExpr* convert_EXISTS_sublink_to_join(PlannerInfo* root, SubLink* sublink, bo * * Returns TRUE if was able to discard the targetlist, else FALSE. */ -static bool simplify_EXISTS_query(Query* query) +static bool simplify_EXISTS_query(PlannerInfo *root, Query* query) { /* * We don't try to simplify at all if the query uses set operations, @@ -2300,9 +2300,42 @@ static bool simplify_EXISTS_query(Query* query) */ if (query->commandType != CMD_SELECT || query->setOperations || query->hasAggs || query->groupingSets || query->hasWindowFuncs || query->hasModifyingCTE || query->havingQual || query->limitOffset || - query->limitCount || query->rowMarks) + query->rowMarks) return false; + /* + * LIMIT with a constant positive (or NULL) value doesn't affect the + * semantics of EXISTS, so let's ignore such clauses. This is worth doing + * because people accustomed to certain other DBMSes may be in the habit + * of writing EXISTS(SELECT ... LIMIT 1) as an optimization. If there's a + * LIMIT with anything else as argument, though, we can't simplify. + */ + if (query->limitCount) { + /* + * The LIMIT clause has not yet been through eval_const_expressions, + * so we have to apply that here. It might seem like this is a waste + * of cycles, since the only case plausibly worth worrying about is + * "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)", + * so we have to fold constants or we're not going to recognize it. + */ + Node* node = eval_const_expressions(root, query->limitCount); + Const* limit = NULL; + + /* Might as well update the query if we simplified the clause. */ + query->limitCount = node; + + if (!IsA(node, Const)) + return false; + + limit = (Const*)node; + Assert(limit->consttype == INT8OID); + if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0) + return false; + + /* Whether or not the targetlist is safe, we can drop the LIMIT. */ + query->limitCount = NULL; + } + /* * Mustn't throw away the targetlist if it contains set-returning * functions; those could affect whether zero rows are returned! diff --git a/src/test/regress/expected/subselect_part2.out b/src/test/regress/expected/subselect_part2.out index cbacebc21..fb73e1036 100644 --- a/src/test/regress/expected/subselect_part2.out +++ b/src/test/regress/expected/subselect_part2.out @@ -431,6 +431,48 @@ WHERE 1=0); 1 (6 rows) +-- +-- Check EXISTS simplification with LIMIT +-- +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit null); + QUERY PLAN +------------------------------------------ + Hash Join + Hash Cond: (o.f1 = i.f1) + -> Seq Scan on int4_tbl o + -> Hash + -> HashAggregate + Group By Key: i.f1 + -> Seq Scan on int4_tbl i +(7 rows) + +explain (costs off) +select * from int4_tbl o where not exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 1); + QUERY PLAN +------------------------------------ + Hash Anti Join + Hash Cond: (o.f1 = i.f1) + -> Seq Scan on int4_tbl o + -> Hash + -> Seq Scan on int4_tbl i +(5 rows) + +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 0); + QUERY PLAN +-------------------------------------- + Seq Scan on int4_tbl o + Filter: (SubPlan 1) + SubPlan 1 + -> Limit + -> Seq Scan on int4_tbl i + Filter: (f1 = o.f1) +(6 rows) + drop table if exists location_type cascade; drop table if exists item_inventory_plan cascade; NOTICE: drop cascades to view usview03 diff --git a/src/test/regress/sql/subselect_part2.sql b/src/test/regress/sql/subselect_part2.sql index 18213334e..e12e54a6e 100644 --- a/src/test/regress/sql/subselect_part2.sql +++ b/src/test/regress/sql/subselect_part2.sql @@ -338,6 +338,19 @@ SELECT 'z' FROM location_type WHERE 1=0); +-- +-- Check EXISTS simplification with LIMIT +-- +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit null); +explain (costs off) +select * from int4_tbl o where not exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 1); +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 0); + drop table if exists location_type cascade; drop table if exists item_inventory_plan cascade;