From a3f1eca911dc24849fe371970312a9de896d8f78 Mon Sep 17 00:00:00 2001 From: wuyuechuan Date: Mon, 7 Dec 2020 11:33:28 +0800 Subject: [PATCH 1/2] Extract restriction OR clauses whether or not they are indexable. --- src/gausskernel/optimizer/path/Makefile | 4 +- src/gausskernel/optimizer/path/allpaths.cpp | 10 - src/gausskernel/optimizer/path/indxpath.cpp | 3 +- src/gausskernel/optimizer/path/orindxpath.cpp | 180 -------- src/gausskernel/optimizer/plan/planmain.cpp | 7 + src/gausskernel/optimizer/util/Makefile | 4 +- src/gausskernel/optimizer/util/orclauses.cpp | 323 ++++++++++++++ src/include/optimizer/orclauses.h | 21 + src/include/optimizer/paths.h | 7 - src/test/regress/expected/explain_pbe.out | 9 +- src/test/regress/expected/hw_pbe.out | 9 +- src/test/regress/expected/join.out | 415 ++++++++++-------- .../regress/expected/tpch_disablevec07.out | 30 +- src/test/regress/expected/tpchcol07.out | 22 +- src/test/regress/sql/join.sql | 16 + 15 files changed, 634 insertions(+), 426 deletions(-) delete mode 100755 src/gausskernel/optimizer/path/orindxpath.cpp create mode 100644 src/gausskernel/optimizer/util/orclauses.cpp create mode 100644 src/include/optimizer/orclauses.h diff --git a/src/gausskernel/optimizer/path/Makefile b/src/gausskernel/optimizer/path/Makefile index 9eb235ed4..196afba5b 100644 --- a/src/gausskernel/optimizer/path/Makefile +++ b/src/gausskernel/optimizer/path/Makefile @@ -20,10 +20,10 @@ endif ifeq ($(enable_multiple_nodes), yes) OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ - joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o + joinpath.o joinrels.o pathkeys.o tidpath.o else OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ - joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o \ + joinpath.o joinrels.o pathkeys.o tidpath.o \ pgxcpath_single.o streampath_single.o endif diff --git a/src/gausskernel/optimizer/path/allpaths.cpp b/src/gausskernel/optimizer/path/allpaths.cpp index 25694afce..9f3cde120 100755 --- a/src/gausskernel/optimizer/path/allpaths.cpp +++ b/src/gausskernel/optimizer/path/allpaths.cpp @@ -741,16 +741,6 @@ static void set_plain_rel_size(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry if (rte->tablesample == NULL) { /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); - - /* - * Check to see if we can extract any restriction conditions from join - * quals that are OR-of-AND structures. If so, add them to the rel's - * restriction list, and redo the above steps. - */ - if (create_or_index_quals(root, rel)) { - check_partial_indexes(root, rel); - set_baserel_size_estimates(root, rel); - } } else { /* Sampled relation */ set_tablesample_rel_size(root, rel, rte); diff --git a/src/gausskernel/optimizer/path/indxpath.cpp b/src/gausskernel/optimizer/path/indxpath.cpp index cf8ebd3d0..2382043f0 100755 --- a/src/gausskernel/optimizer/path/indxpath.cpp +++ b/src/gausskernel/optimizer/path/indxpath.cpp @@ -1516,7 +1516,8 @@ static Path* choose_bitmap_and(PlannerInfo* root, RelOptInfo* rel, List* paths, * we can remove this limitation. (But note that this also defends * against flat-out duplicate input paths, which can happen because * match_join_clauses_to_index will find the same OR join clauses that - * create_or_index_quals has pulled OR restriction clauses out of.) + * extract_restriction_or_clauses has pulled OR restriction clauses out + * of.) * * For the same reason, we reject AND combinations in which an index * predicate clause duplicates another clause. Here we find it necessary diff --git a/src/gausskernel/optimizer/path/orindxpath.cpp b/src/gausskernel/optimizer/path/orindxpath.cpp deleted file mode 100755 index 17bd7a022..000000000 --- a/src/gausskernel/optimizer/path/orindxpath.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* ------------------------------------------------------------------------- - * - * orindxpath.cpp - * Routines to find index paths that match a set of OR clauses - * - * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. - * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * src/gausskernel/optimizer/path/orindxpath.cpp - * - * ------------------------------------------------------------------------- - */ -#include "postgres.h" -#include "knl/knl_variable.h" - -#include "optimizer/cost.h" -#include "optimizer/paths.h" -#include "optimizer/restrictinfo.h" - -/* ---------- - * create_or_index_quals - * Examine join OR-of-AND quals to see if any useful restriction OR - * clauses can be extracted. If so, add them to the query. - * - * Although a join clause must reference other relations overall, - * an OR of ANDs clause might contain sub-clauses that reference just this - * relation and can be used to build a restriction clause. - * For example consider - * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); - * We can transform this into - * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) - * AND (a.x = 42 OR a.x = 44) - * AND (b.y = 43 OR b.z = 45); - * which opens the potential to build OR indexscans on a and b. In essence - * this is a partial transformation to CNF (AND of ORs format). It is not - * complete, however, because we do not unravel the original OR --- doing so - * would usually bloat the qualification expression to little gain. - * - * The added quals are partially redundant with the original OR, and therefore - * will cause the size of the joinrel to be underestimated when it is finally - * formed. (This would be true of a full transformation to CNF as well; the - * fault is not really in the transformation, but in clauselist_selectivity's - * inability to recognize redundant conditions.) To minimize the collateral - * damage, we want to minimize the number of quals added. Therefore we do - * not add every possible extracted restriction condition to the query. - * Instead, we search for the single restriction condition that generates - * the most useful (cheapest) OR indexscan, and add only that condition. - * This is a pretty ad-hoc heuristic, but quite useful. - * - * We can then compensate for the redundancy of the added qual by poking - * the recorded selectivity of the original OR clause, thereby ensuring - * the added qual doesn't change the estimated size of the joinrel when - * it is finally formed. This is a MAJOR HACK: it depends on the fact - * that clause selectivities are cached and on the fact that the same - * RestrictInfo node will appear in every joininfo list that might be used - * when the joinrel is formed. And it probably isn't right in cases where - * the size estimation is nonlinear (i.e., outer and IN joins). But it - * beats not doing anything. - * - * NOTE: one might think this messiness could be worked around by generating - * the indexscan path with a small path->rows value, and not touching the - * rel's baserestrictinfo or rel->rows. However, that does not work. - * The optimizer's fundamental design assumes that every general-purpose - * Path for a given relation generates the same number of rows. Without - * this assumption we'd not be able to optimize solely on the cost of Paths, - * but would have to take number of output rows into account as well. - * (The parameterized-paths stuff almost fixes this, but not quite...) - * - * 'rel' is the relation entry for which quals are to be created - * - * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE. - * If no quals available, returns FALSE and doesn't change rel. - * - * Note: check_partial_indexes() must have been run previously. - * ---------- - */ -bool create_or_index_quals(PlannerInfo* root, RelOptInfo* rel) -{ - BitmapOrPath* bestpath = NULL; - RestrictInfo* bestrinfo = NULL; - List* newrinfos = NIL; - RestrictInfo* or_rinfo = NULL; - Selectivity or_selec, orig_selec; - ListCell* i = NULL; - - /* Skip the whole mess if no indexes */ - if (rel->indexlist == NIL) - return false; - - /* - * Find potentially interesting OR joinclauses. We can use any joinclause - * that is considered safe to move to this rel by the parameterized-path - * machinery, even though what we are going to do with it is not exactly a - * parameterized path. - */ - foreach (i, rel->joininfo) { - RestrictInfo* rinfo = (RestrictInfo*)lfirst(i); - - if (restriction_is_or_clause(rinfo) && join_clause_is_movable_to(rinfo, rel->relid)) { - /* - * Use the generate_bitmap_or_paths() machinery to estimate the - * value of each OR clause. We can use regular restriction - * clauses along with the OR clause contents to generate - * indexquals. We pass restriction_only = true so that any - * sub-clauses that are actually joins will be ignored. - */ - List* orpaths = NIL; - ListCell* k = NULL; - - orpaths = generate_bitmap_or_paths(root, rel, list_make1(rinfo), rel->baserestrictinfo, true); - - if (rel->isPartitionedTable) { - orpaths = list_concat( - orpaths, GenerateBitmapOrPathsUseGPI(root, rel, list_make1(rinfo), rel->baserestrictinfo, true)); - } - - /* Locate the cheapest OR path */ - foreach (k, orpaths) { - BitmapOrPath* path = (BitmapOrPath*)lfirst(k); - - AssertEreport(IsA(path, BitmapOrPath), MOD_OPT, "Restriction information is incorrect"); - if (bestpath == NULL || path->path.total_cost < bestpath->path.total_cost) { - bestpath = path; - bestrinfo = rinfo; - } - } - } - } - - /* Fail if no suitable clauses found */ - if (bestpath == NULL) - return false; - - /* - * Convert the path's indexclauses structure to a RestrictInfo tree. We - * include any partial-index predicates so as to get a reasonable - * representation of what the path is actually scanning. - */ - newrinfos = make_restrictinfo_from_bitmapqual((Path*)bestpath, true, true); - - /* It's possible we get back something other than a single OR clause */ - if (list_length(newrinfos) != 1) - return false; - or_rinfo = (RestrictInfo*)linitial(newrinfos); - AssertEreport(IsA(or_rinfo, RestrictInfo), MOD_OPT, "Restriction clause does not contain OR"); - if (!restriction_is_or_clause(or_rinfo)) - return false; - - /* - * OK, add it to the rel's restriction list. - */ - rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos); - rel->baserestrict_min_security = Min(rel->baserestrict_min_security, or_rinfo->security_level); - - /* - * Adjust the original OR clause's cached selectivity to compensate for - * the selectivity of the added (but redundant) lower-level qual. This - * should result in the join rel getting approximately the same rows - * estimate as it would have gotten without all these shenanigans. (XXX - * major hack alert ... this depends on the assumption that the - * selectivity will stay cached ...) - * we don't need cache the selectivity because the index's selectivity is not accurate. - */ - or_selec = clause_selectivity(root, (Node*)or_rinfo, 0, JOIN_INNER, NULL, false); - if (or_selec > 0 && or_selec < 1) { - orig_selec = clause_selectivity(root, (Node*)bestrinfo, 0, JOIN_INNER, NULL, false); - bestrinfo->norm_selec = orig_selec / or_selec; - /* clamp result to sane range */ - if (bestrinfo->norm_selec > 1) - bestrinfo->norm_selec = 1; - /* It isn't an outer join clause, so no need to adjust outer_selec */ - } - - /* Tell caller to recompute partial index status and rowcount estimate */ - return true; -} - diff --git a/src/gausskernel/optimizer/plan/planmain.cpp b/src/gausskernel/optimizer/plan/planmain.cpp index afdd839f4..beff76b73 100755 --- a/src/gausskernel/optimizer/plan/planmain.cpp +++ b/src/gausskernel/optimizer/plan/planmain.cpp @@ -27,6 +27,7 @@ #include "parser/parse_hint.h" #include "pgxc/pgxc.h" #include "optimizer/cost.h" +#include "optimizer/orclauses.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" @@ -236,6 +237,12 @@ void query_planner(PlannerInfo* root, List* tlist, double tuple_fraction, double */ add_placeholders_to_base_rels(root); + /* + * Look for join OR clauses that we can extract single-relation + * restriction OR clauses from. + */ + extract_restriction_or_clauses(root); + /* * We should now have size estimates for every actual table involved in * the query, and we also know which if any have been deleted from the diff --git a/src/gausskernel/optimizer/util/Makefile b/src/gausskernel/optimizer/util/Makefile index e0179ebd4..1d959795e 100644 --- a/src/gausskernel/optimizer/util/Makefile +++ b/src/gausskernel/optimizer/util/Makefile @@ -20,11 +20,11 @@ endif SUBDIRS = learn ifeq ($(enable_multiple_nodes), yes) -OBJS = clauses.o joininfo.o pathnode.o placeholder.o plancat.o predtest.o \ +OBJS = clauses.o joininfo.o orclauses.o pathnode.o placeholder.o plancat.o predtest.o \ relnode.o restrictinfo.o tlist.o var.o pruning.o randomplan.o optimizerdebug.o planmem_walker.o \ nodegroups.o plananalyzer.o optcommon.o dataskew.o autoanalyzer.o bucketinfo.o bucketpruning.o else -OBJS = clauses.o joininfo.o pathnode.o placeholder.o plancat.o predtest.o \ +OBJS = clauses.o joininfo.o orclauses.o pathnode.o placeholder.o plancat.o predtest.o \ relnode.o restrictinfo.o tlist.o var.o pgxcship_single.o pruning.o randomplan.o optimizerdebug.o planmem_walker.o \ nodegroups.o plananalyzer.o optcommon.o dataskew.o autoanalyzer.o bucketinfo.o bucketpruning.o endif diff --git a/src/gausskernel/optimizer/util/orclauses.cpp b/src/gausskernel/optimizer/util/orclauses.cpp new file mode 100644 index 000000000..f03b74956 --- /dev/null +++ b/src/gausskernel/optimizer/util/orclauses.cpp @@ -0,0 +1,323 @@ +/*------------------------------------------------------------------------- + * + * orclauses.cpp + * Routines to extract restriction OR clauses from join OR clauses + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * + * IDENTIFICATION + * src/gausskernel/optimizer/util/orclauses.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/orclauses.h" +#include "optimizer/restrictinfo.h" + +static bool is_safe_restriction_clause_for(RestrictInfo* rinfo, RelOptInfo* rel); +static Expr* extract_or_clause(RestrictInfo* or_rinfo, RelOptInfo* rel); +static void consider_new_or_clause(PlannerInfo* root, RelOptInfo* rel, Expr* orclause, RestrictInfo* join_or_rinfo); + +/* + * extract_restriction_or_clauses + * Examine join OR-of-AND clauses to see if any useful restriction OR + * clauses can be extracted. If so, add them to the query. + * + * Although a join clause must reference multiple relations overall, + * an OR of ANDs clause might contain sub-clauses that reference just one + * relation and can be used to build a restriction clause for that rel. + * For example consider + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); + * We can transform this into + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) + * AND (a.x = 42 OR a.x = 44) + * AND (b.y = 43 OR b.z = 45); + * which allows the latter clauses to be applied during the scans of a and b, + * perhaps as index qualifications, and in any case reducing the number of + * rows arriving at the join. In essence this is a partial transformation to + * CNF (AND of ORs format). It is not complete, however, because we do not + * unravel the original OR --- doing so would usually bloat the qualification + * expression to little gain. + * + * The added quals are partially redundant with the original OR, and therefore + * would cause the size of the joinrel to be underestimated when it is finally + * formed. (This would be true of a full transformation to CNF as well; the + * fault is not really in the transformation, but in clauselist_selectivity's + * inability to recognize redundant conditions.) We can compensate for this + * redundancy by changing the cached selectivity of the original OR clause, + * cancelling out the (valid) reduction in the estimated sizes of the base + * relations so that the estimated joinrel size remains the same. This is + * a MAJOR HACK: it depends on the fact that clause selectivities are cached + * and on the fact that the same RestrictInfo node will appear in every + * joininfo list that might be used when the joinrel is formed. + * And it doesn't work in cases where the size estimation is nonlinear + * (i.e., outer and IN joins). But it beats not doing anything. + * + * We examine each base relation to see if join clauses associated with it + * contain extractable restriction conditions. If so, add those conditions + * to the rel's baserestrictinfo and update the cached selectivities of the + * join clauses. Note that the same join clause will be examined afresh + * from the point of view of each baserel that participates in it, so its + * cached selectivity may get updated multiple times. + */ +void extract_restriction_or_clauses(PlannerInfo* root) +{ + /* Examine each baserel for potential join OR clauses */ + for (int rti = 1; rti < root->simple_rel_array_size; rti++) { + RelOptInfo* rel = root->simple_rel_array[rti]; + ListCell* lc = NULL; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) { + continue; + } + + Assert(rel->relid == (uint)rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) { + continue; + } + + /* + * Find potentially interesting OR joinclauses. We can use any + * joinclause that is considered safe to move to this rel by the + * parameterized-path machinery, even though what we are going to do + * with it is not exactly a parameterized path. + * + * However, it seems best to ignore clauses that have been marked + * redundant (by setting norm_selec > 1). That likely can't happen + * for OR clauses, but let's be safe. + */ + foreach (lc, rel->joininfo) { + RestrictInfo* rinfo = (RestrictInfo*)lfirst(lc); + + if (restriction_is_or_clause(rinfo) && join_clause_is_movable_to(rinfo, rel->relid) && + rinfo->norm_selec <= 1) { + /* Try to extract a qual for this rel only */ + Expr* orclause = extract_or_clause(rinfo, rel); + + /* + * If successful, decide whether we want to use the clause, + * and insert it into the rel's restrictinfo list if so. + */ + if (orclause) { + consider_new_or_clause(root, rel, orclause, rinfo); + } + } + } + } +} + +/* + * Is the given primitive (non-OR) RestrictInfo safe to move to the rel? + */ +static bool is_safe_restriction_clause_for(RestrictInfo* rinfo, RelOptInfo* rel) +{ + /* + * We want clauses that mention the rel, and only the rel. So in + * particular pseudoconstant clauses can be rejected quickly. Then check + * the clause's Var membership. + */ + if (rinfo->pseudoconstant) { + return false; + } + if (!bms_equal(rinfo->clause_relids, rel->relids)) { + return false; + } + + /* We don't want extra evaluations of any volatile functions */ + if (contain_volatile_functions((Node*)rinfo->clause)) { + return false; + } + + return true; +} + +/* + * Try to extract a restriction clause mentioning only "rel" from the given + * join OR-clause. + * + * We must be able to extract at least one qual for this rel from each of + * the arms of the OR, else we can't use it. + * + * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL + * if no OR clause could be extracted. + */ +static Expr* extract_or_clause(RestrictInfo* or_rinfo, RelOptInfo* rel) +{ + List* clauselist = NIL; + ListCell* lc = NULL; + + /* + * Scan each arm of the input OR clause. Notice we descend into + * or_rinfo->orclause, which has RestrictInfo nodes embedded below the + * toplevel OR/AND structure. This is useful because we can use the info + * in those nodes to make is_safe_restriction_clause_for()'s checks + * cheaper. We'll strip those nodes from the returned tree, though, + * meaning that fresh ones will be built if the clause is accepted as a + * restriction clause. This might seem wasteful --- couldn't we re-use + * the existing RestrictInfos? But that'd require assuming that + * selectivity and other cached data is computed exactly the same way for + * a restriction clause as for a join clause, which seems undesirable. + */ + Assert(or_clause((Node*)or_rinfo->orclause)); + foreach (lc, ((BoolExpr*)or_rinfo->orclause)->args) { + Node* orarg = (Node*)lfirst(lc); + List* subclauses = NIL; + + /* OR arguments should be ANDs or sub-RestrictInfos */ + if (and_clause(orarg)) { + List* andargs = ((BoolExpr*)orarg)->args; + ListCell* lc2 = NULL; + + foreach (lc2, andargs) { + RestrictInfo* rinfo = (RestrictInfo*)lfirst(lc2); + + Assert(IsA(rinfo, RestrictInfo)); + if (restriction_is_or_clause(rinfo)) { + /* + * Recurse to deal with nested OR. Note we *must* recurse + * here, this isn't just overly-tense optimization: we + * have to descend far enough to find and strip all + * RestrictInfos in the expression. + */ + Expr* suborclause = NULL; + + suborclause = extract_or_clause(rinfo, rel); + if (suborclause) { + subclauses = lappend(subclauses, suborclause); + } + } else if (is_safe_restriction_clause_for(rinfo, rel)) { + subclauses = lappend(subclauses, rinfo->clause); + } + } + } else { + Assert(IsA(orarg, RestrictInfo)); + Assert(!restriction_is_or_clause((RestrictInfo*)orarg)); + if (is_safe_restriction_clause_for((RestrictInfo*)orarg, rel)) { + subclauses = lappend(subclauses, ((RestrictInfo*)orarg)->clause); + } + } + + /* + * If nothing could be extracted from this arm, we can't do anything + * with this OR clause. + */ + if (subclauses == NIL) { + return NULL; + } + + /* + * OK, add subclause(s) to the result OR. If we found more than one, + * we need an AND node. + */ + clauselist = lappend(clauselist, make_ands_explicit(subclauses)); + } + + /* + * If we got a restriction clause from every arm, wrap them up in an OR + * node. (In theory the OR node might be unnecessary, if there was only + * one arm --- but then the input OR node was also redundant.) + */ + if (clauselist != NIL) { + return make_orclause(clauselist); + } + return NULL; +} + +/* + * Consider whether a successfully-extracted restriction OR clause is + * actually worth using. If so, add it to the planner's data structures, + * and adjust the original join clause (join_or_rinfo) to compensate. + */ +static void consider_new_or_clause(PlannerInfo* root, RelOptInfo* rel, Expr* orclause, RestrictInfo* join_or_rinfo) +{ + RestrictInfo* or_rinfo = NULL; + Selectivity or_selec, orig_selec; + + /* + * Build a RestrictInfo from the new OR clause. We can assume it's valid + * as a base restriction clause. + */ + or_rinfo = make_restrictinfo(orclause, true, false, false, join_or_rinfo->security_level, NULL, NULL, NULL); + + /* + * Estimate its selectivity. (We could have done this earlier, but doing + * it on the RestrictInfo representation allows the result to get cached, + * saving work later.) + */ + or_selec = clause_selectivity(root, (Node*)or_rinfo, 0, JOIN_INNER, NULL); + + /* + * The clause is only worth adding to the query if it rejects a useful + * fraction of the base relation's rows; otherwise, it's just going to + * cause duplicate computation (since we will still have to check the + * original OR clause when the join is formed). Somewhat arbitrarily, we + * set the selectivity threshold at 0.9. + */ + if (or_selec > 0.9) { + return; /* forget it */ + } + /* + * OK, add it to the rel's restriction-clause list. + */ + rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); + + /* + * Adjust the original join OR clause's cached selectivity to compensate + * for the selectivity of the added (but redundant) lower-level qual. This + * should result in the join rel getting approximately the same rows + * estimate as it would have gotten without all these shenanigans. + * + * XXX major hack alert: this depends on the assumption that the + * selectivity will stay cached. + * + * XXX another major hack: we adjust only norm_selec, the cached + * selectivity for JOIN_INNER semantics, even though the join clause + * might've been an outer-join clause. This is partly because we can't + * easily identify the relevant SpecialJoinInfo here, and partly because + * the linearity assumption we're making would fail anyway. (If it is an + * outer-join clause, "rel" must be on the nullable side, else we'd not + * have gotten here. So the computation of the join size is going to be + * quite nonlinear with respect to the size of "rel", so it's not clear + * how we ought to adjust outer_selec even if we could compute its + * original value correctly.) + */ + if (or_selec > 0) { + SpecialJoinInfo sjinfo; + + /* + * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare + * approx_tuple_count() in costsize.c.) + */ + sjinfo.type = T_SpecialJoinInfo; + sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids, rel->relids); + sjinfo.min_righthand = rel->relids; + sjinfo.syn_lefthand = sjinfo.min_lefthand; + sjinfo.syn_righthand = sjinfo.min_righthand; + sjinfo.jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + sjinfo.lhs_strict = false; + sjinfo.delay_upper_joins = false; + sjinfo.join_quals = NIL; + + /* Compute inner-join size */ + orig_selec = clause_selectivity(root, (Node*)join_or_rinfo, 0, JOIN_INNER, &sjinfo); + + /* And hack cached selectivity so join size remains the same */ + join_or_rinfo->norm_selec = orig_selec / or_selec; + /* ensure result stays in sane range, in particular not "redundant" */ + if (join_or_rinfo->norm_selec > 1) { + join_or_rinfo->norm_selec = 1; + } + /* as explained above, we don't touch outer_selec */ + } +} diff --git a/src/include/optimizer/orclauses.h b/src/include/optimizer/orclauses.h new file mode 100644 index 000000000..dd8d35a6a --- /dev/null +++ b/src/include/optimizer/orclauses.h @@ -0,0 +1,21 @@ +/*------------------------------------------------------------------------- + * + * orclauses.h + * prototypes for orclauses.cpp. + * + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/optimizer/orclauses.h + * + *------------------------------------------------------------------------- + */ +#ifndef ORCLAUSES_H +#define ORCLAUSES_H + +#include "nodes/relation.h" + +extern void extract_restriction_or_clauses(PlannerInfo *root); + +#endif /* ORCLAUSES_H */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 503f93684..1f882e346 100755 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -55,13 +55,6 @@ inline bool CheckIndexPathUseGPI(IndexPath* ipath) return ipath->indexinfo->isGlobal; } - -/* - * orindxpath.c - * additional routines for indexable OR clauses - */ -extern bool create_or_index_quals(PlannerInfo* root, RelOptInfo* rel); - /* * tidpath.h * routines to generate tid paths diff --git a/src/test/regress/expected/explain_pbe.out b/src/test/regress/expected/explain_pbe.out index e94e6c249..dd6319334 100644 --- a/src/test/regress/expected/explain_pbe.out +++ b/src/test/regress/expected/explain_pbe.out @@ -3644,12 +3644,13 @@ EXPLAIN(COSTS FALSE) EXECUTE pa(10,10,10,10,11); QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------- Hash Join - Hash Cond: (pp1.c_int = pp2.c_int) + Hash Cond: (pp2.c_int = pp1.c_int) Join Filter: (((pp1.c_int = 10) AND (pp1.c_numeric = 10::numeric) AND (pp2.c_int = 10) AND (pp2.c_numeric = 10::numeric)) OR (pp1.id = 11)) - -> Seq Scan on pbe_prunning_001 pp1 + -> Seq Scan on pbe_prunning_002 pp2 -> Hash - -> Seq Scan on pbe_prunning_002 pp2 -(6 rows) + -> Seq Scan on pbe_prunning_001 pp1 + Filter: (((c_int = 10) AND (c_numeric = 10::numeric)) OR (id = 11)) +(7 rows) DEALLOCATE PREPARE pa; DROP TABLE pbe_prunning_001; diff --git a/src/test/regress/expected/hw_pbe.out b/src/test/regress/expected/hw_pbe.out index 347890a57..5662f65f3 100644 --- a/src/test/regress/expected/hw_pbe.out +++ b/src/test/regress/expected/hw_pbe.out @@ -3642,12 +3642,13 @@ EXPLAIN(COSTS FALSE) EXECUTE pa(10,10,10,10,11); QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- Hash Join - Hash Cond: (pp1.c_int = pp2.c_int) + Hash Cond: (pp2.c_int = pp1.c_int) Join Filter: (((pp1.c_int = $1) AND (pp1.c_numeric = $2) AND (pp2.c_int = $3) AND (pp2.c_numeric = $4)) OR (pp1.id = $5)) - -> Seq Scan on pbe_prunning_001 pp1 + -> Seq Scan on pbe_prunning_002 pp2 -> Hash - -> Seq Scan on pbe_prunning_002 pp2 -(6 rows) + -> Seq Scan on pbe_prunning_001 pp1 + Filter: (((c_int = $1) AND (c_numeric = $2)) OR (id = $5)) +(7 rows) DEALLOCATE PREPARE pa; DROP TABLE pbe_prunning_001; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 27fae6a5a..caee0f85b 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2668,6 +2668,10 @@ order by c.name; (3 rows) rollback; +analyse int4_tbl; +analyse int8_tbl; +analyse tenk1; +analyse text_tbl; -- -- test incorrect handling of placeholders that only appear in targetlists, -- per bug #6154 @@ -2724,18 +2728,17 @@ SELECT qq, unique1 ( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2 USING (qq) INNER JOIN tenk1 c ON qq = unique2; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Hash Join - Hash Cond: (COALESCE((COALESCE(a.q1, 0::bigint)), (COALESCE(b.q2, (-1)::bigint))) = c.unique2) + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Nested Loop -> Hash Full Join Hash Cond: (COALESCE(a.q1, 0::bigint) = COALESCE(b.q2, (-1)::bigint)) -> Seq Scan on int8_tbl a -> Hash -> Seq Scan on int8_tbl b - -> Hash - -> Seq Scan on tenk1 c -(9 rows) + -> Index Scan using tenk1_unique2 on tenk1 c + Index Cond: (unique2 = COALESCE((COALESCE(a.q1, 0::bigint)), (COALESCE(b.q2, (-1)::bigint)))) +(8 rows) SELECT qq, unique1 FROM @@ -2767,12 +2770,12 @@ order by 1,2; ----------------------------------------------------------- Sort Sort Key: t1.q1, t1.q2 - -> Hash Left Join - Hash Cond: (t1.q2 = t2.q1) + -> Hash Right Join + Hash Cond: (t2.q1 = t1.q2) Filter: (1 = (SubPlan 1)) - -> Seq Scan on int8_tbl t1 + -> Seq Scan on int8_tbl t2 -> Hash - -> Seq Scan on int8_tbl t2 + -> Seq Scan on int8_tbl t1 SubPlan 1 -> Limit -> Result @@ -2869,23 +2872,23 @@ select * from int4(sin(1)) q1, int4(sin(0)) q2 where q1 = thousand or q2 = thousand; - QUERY PLAN ------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------ Hash Join - Hash Cond: (int4_tbl.f1 = tenk1.twothousand) - -> Seq Scan on int4_tbl - -> Hash + Hash Cond: (tenk1.twothousand = int4_tbl.f1) + -> Nested Loop -> Nested Loop - -> Nested Loop - -> Function Scan on q1 - -> Function Scan on q2 - -> Bitmap Heap Scan on tenk1 - Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand)) - -> BitmapOr - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (q1.q1 = thousand) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (q2.q2 = thousand) + -> Function Scan on q1 + -> Function Scan on q2 + -> Bitmap Heap Scan on tenk1 + Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (q1.q1 = thousand) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (q2.q2 = thousand) + -> Hash + -> Seq Scan on int4_tbl (15 rows) explain (num_nodes off, nodes off, costs off) @@ -2894,22 +2897,62 @@ select * from int4(sin(1)) q1, int4(sin(0)) q2 where thousand = (q1 + q2); - QUERY PLAN --------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------- Hash Join - Hash Cond: (int4_tbl.f1 = tenk1.twothousand) - -> Seq Scan on int4_tbl - -> Hash + Hash Cond: (tenk1.twothousand = int4_tbl.f1) + -> Nested Loop -> Nested Loop - -> Nested Loop - -> Function Scan on q1 - -> Function Scan on q2 - -> Bitmap Heap Scan on tenk1 - Recheck Cond: (thousand = (q1.q1 + q2.q2)) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = (q1.q1 + q2.q2)) + -> Function Scan on q1 + -> Function Scan on q2 + -> Bitmap Heap Scan on tenk1 + Recheck Cond: (thousand = (q1.q1 + q2.q2)) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = (q1.q1 + q2.q2)) + -> Hash + -> Seq Scan on int4_tbl (12 rows) +-- +-- test extraction of restriction OR clauses from join OR clause +-- (we used to only do this for indexable clauses) +-- +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4))) + -> Seq Scan on tenk1 b + Filter: ((unique1 = 2) OR (hundred = 4)) + -> Bitmap Heap Scan on tenk1 a + Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = 3) +(11 rows) + +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.ten = 4); + QUERY PLAN +--------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.ten = 4))) + -> Seq Scan on tenk1 b + Filter: ((unique1 = 2) OR (ten = 4)) + -> Bitmap Heap Scan on tenk1 a + Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = 3) +(11 rows) + -- -- test ability to generate a suitable plan for a star-schema query -- @@ -2917,21 +2960,17 @@ explain (costs off) select * from tenk1, int8_tbl a, int8_tbl b where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2; - QUERY PLAN -------------------------------------------------------------- - Hash Join - Hash Cond: (tenk1.tenthous = b.q1) + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop + -> Seq Scan on int8_tbl b + Filter: (q2 = 2) -> Nested Loop -> Seq Scan on int8_tbl a Filter: (q2 = 1) - -> Bitmap Heap Scan on tenk1 - Recheck Cond: (thousand = a.q1) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = a.q1) - -> Hash - -> Seq Scan on int8_tbl b - Filter: (q2 = 2) -(12 rows) + -> Index Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand = a.q1) AND (tenthous = b.q1)) +(8 rows) -- -- test a corner case in which we shouldn't apply the star-schema optimization @@ -2949,24 +2988,23 @@ select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from left join tenk1 t2 on (subq1.y1 = t2.unique1) where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; - QUERY PLAN ----------------------------------------------------------------------------------- - Hash Join - Hash Cond: (i1.f1 = "*VALUES*".column2) - -> Seq Scan on int4_tbl i1 - -> Hash + QUERY PLAN +---------------------------------------------------------------------------- + Nested Loop + Join Filter: (t1.stringu1 > t2.stringu2) + -> Nested Loop + Join Filter: ("*VALUES*".column2 = i1.f1) -> Nested Loop - Join Filter: (t1.stringu1 > t2.stringu2) -> Nested Loop - -> Nested Loop - Join Filter: ("*VALUES*".column1 = "*VALUES*".column2) - -> Values Scan on "*VALUES*" - -> Values Scan on "*VALUES*" - -> Index Scan using tenk1_unique2 on tenk1 t1 - Index Cond: ((unique2 = (11)) AND (unique2 < 42)) - -> Index Scan using tenk1_unique1 on tenk1 t2 - Index Cond: (unique1 = "*VALUES*".column1) -(15 rows) + Join Filter: ("*VALUES*".column1 = "*VALUES*".column2) + -> Values Scan on "*VALUES*" + -> Values Scan on "*VALUES*" + -> Index Scan using tenk1_unique2 on tenk1 t1 + Index Cond: ((unique2 = (11)) AND (unique2 < 42)) + -> Seq Scan on int4_tbl i1 + -> Index Scan using tenk1_unique1 on tenk1 t2 + Index Cond: (unique1 = "*VALUES*".column1) +(14 rows) select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from tenk1 t1 @@ -3033,24 +3071,23 @@ select count(*) from tenk1 a join tenk1 b on a.unique1 = b.unique2 left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand join int4_tbl on b.thousand = f1; - QUERY PLAN -------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------- Aggregate - -> Hash Join - Hash Cond: (b.thousand = int4_tbl.f1) - -> Hash Left Join - Hash Cond: (a.thousand = c.thousand) - Join Filter: (a.unique2 = b.unique1) - -> Hash Join - Hash Cond: (a.unique1 = b.unique2) - -> Seq Scan on tenk1 a - -> Hash - -> Seq Scan on tenk1 b - -> Hash - -> Index Only Scan using tenk1_thous_tenthous on tenk1 c - -> Hash - -> Seq Scan on int4_tbl -(15 rows) + -> Nested Loop Left Join + Join Filter: (a.unique2 = b.unique1) + -> Nested Loop + -> Nested Loop + -> Seq Scan on int4_tbl + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: (thousand = int4_tbl.f1) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = int4_tbl.f1) + -> Index Scan using tenk1_unique1 on tenk1 a + Index Cond: (unique1 = b.unique2) + -> Index Only Scan using tenk1_thous_tenthous on tenk1 c + Index Cond: (thousand = a.thousand) +(14 rows) select count(*) from tenk1 a join tenk1 b on a.unique1 = b.unique2 @@ -3068,29 +3105,24 @@ select b.unique1 from join int4_tbl i1 on b.thousand = f1 right join int4_tbl i2 on i2.f1 = b.tenthous order by 1; - QUERY PLAN -------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------- Sort Sort Key: b.unique1 - -> Hash Right Join - Hash Cond: (b.tenthous = i2.f1) - -> Hash Join - Hash Cond: (b.thousand = i1.f1) - -> Hash Left Join - Hash Cond: (a.thousand = c.thousand) - Join Filter: (b.unique1 = 42) - -> Hash Join - Hash Cond: (a.unique1 = b.unique2) - -> Seq Scan on tenk1 a - -> Hash - -> Seq Scan on tenk1 b - -> Hash - -> Index Only Scan using tenk1_thous_tenthous on tenk1 c - -> Hash - -> Seq Scan on int4_tbl i1 - -> Hash - -> Seq Scan on int4_tbl i2 -(20 rows) + -> Nested Loop Left Join + -> Seq Scan on int4_tbl i2 + -> Nested Loop Left Join + Join Filter: (b.unique1 = 42) + -> Nested Loop + -> Nested Loop + -> Seq Scan on int4_tbl i1 + -> Index Scan using tenk1_thous_tenthous on tenk1 b + Index Cond: ((thousand = i1.f1) AND (i2.f1 = tenthous)) + -> Index Scan using tenk1_unique1 on tenk1 a + Index Cond: (unique1 = b.unique2) + -> Index Only Scan using tenk1_thous_tenthous on tenk1 c + Index Cond: (thousand = a.thousand) +(15 rows) select b.unique1 from tenk1 a join tenk1 b on a.unique1 = b.unique2 @@ -3117,13 +3149,12 @@ select q1, unique2, thousand, hundred where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123); QUERY PLAN -------------------------------------------------------------------------------------- - Hash Left Join - Hash Cond: (a.q1 = b.unique2) + Nested Loop Left Join Filter: ((COALESCE(b.thousand, 123) = a.q1) AND (a.q1 = COALESCE(b.hundred, 123))) -> Seq Scan on int8_tbl a - -> Hash - -> Seq Scan on tenk1 b -(6 rows) + -> Index Scan using tenk1_unique2 on tenk1 b + Index Cond: (a.q1 = unique2) +(5 rows) select q1, unique2, thousand, hundred from int8_tbl a left join tenk1 b on q1 = unique2 @@ -3138,13 +3169,12 @@ select f1, unique2, case when unique2 is null then f1 else 0 end where (case when unique2 is null then f1 else 0 end) = 0; QUERY PLAN -------------------------------------------------------------------- - Hash Left Join - Hash Cond: (a.f1 = b.unique2) + Nested Loop Left Join Filter: (CASE WHEN (b.unique2 IS NULL) THEN a.f1 ELSE 0 END = 0) -> Seq Scan on int4_tbl a - -> Hash - -> Index Only Scan using tenk1_unique2 on tenk1 b -(6 rows) + -> Index Only Scan using tenk1_unique2 on tenk1 b + Index Cond: (unique2 = a.f1) +(5 rows) select f1, unique2, case when unique2 is null then f1 else 0 end from int4_tbl a left join tenk1 b on f1 = unique2 @@ -3190,16 +3220,15 @@ select * from -- explain (num_nodes off, nodes off, costs off) select * from int4_tbl a left join tenk1 b on f1 = unique2 where f1 = 0; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------------------- Nested Loop Left Join Join Filter: (a.f1 = b.unique2) -> Seq Scan on int4_tbl a Filter: (f1 = 0) - -> Materialize - -> Index Scan using tenk1_unique2 on tenk1 b - Index Cond: (unique2 = 0) -(7 rows) + -> Index Scan using tenk1_unique2 on tenk1 b + Index Cond: (unique2 = 0) +(6 rows) explain (num_nodes off, nodes off, costs off) select * from tenk1 a full join tenk1 b using(unique2) where unique2 = 42; @@ -3230,15 +3259,15 @@ INSERT INTO c VALUES (0), (1); INSERT INTO d VALUES (1,3), (2,2), (3,1); -- all three cases should be optimizable into a simple seqscan explain (verbose on, costs off, nodes off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id; ---?.*QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: a.id, a.b_id (2 rows) explain (verbose on, costs off, nodes off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id; ---?.*QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: b.id, b.c_id (2 rows) @@ -3246,8 +3275,8 @@ explain (verbose on, costs off, nodes off) SELECT b.* FROM b LEFT JOIN c ON b.c_ explain (verbose on, costs off, nodes off) SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id) ON (a.b_id = b.id); ---?QUERY PLAN.* ---?-----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: a.id, a.b_id (2 rows) @@ -3257,8 +3286,8 @@ explain (verbose on, costs off, nodes off) select id from a where id in ( select b.id from b left join c on b.id = c.id ); ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Hash Join Output: a.id Hash Cond: (a.id = b.id) @@ -3275,8 +3304,8 @@ select id from a where id in ( explain (verbose on, costs off, nodes off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id and d.b = s.c_id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: d.a, d.b (2 rows) @@ -3285,8 +3314,8 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (verbose on, costs off, nodes off) select d.* from d left join (select distinct * from b) s on d.a = s.id and d.b = s.c_id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: d.a, d.b (2 rows) @@ -3296,8 +3325,8 @@ select d.* from d left join (select distinct * from b) s explain (verbose on, costs off, nodes off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Hash Right Join Output: d.a, d.b Hash Cond: (b.id = d.a) @@ -3316,8 +3345,8 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (verbose on, costs off, nodes off) select d.* from d left join (select distinct * from b) s on d.a = s.id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Hash Right Join Output: d.a, d.b Hash Cond: (b.id = d.a) @@ -3337,8 +3366,8 @@ select d.* from d left join (select distinct * from b) s explain (verbose on, costs off, nodes off) select d.* from d left join (select id from a union select id from b) s on d.a = s.id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: d.a, d.b (2 rows) @@ -3347,8 +3376,8 @@ select d.* from d left join (select id from a union select id from b) s explain (verbose on, costs off, nodes off) select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 on i8.q1 = i4.f1; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 (2 rows) @@ -3371,8 +3400,8 @@ select p.* from parent p left join child c on (p.k = c.k) order by 1,2; explain (verbose on, costs off, nodes off) select p.* from parent p left join child c on (p.k = c.k) order by 1,2; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Sort Output: p.k, p.pd Sort Key: p.k, p.pd @@ -3395,8 +3424,8 @@ explain (verbose on, costs off, nodes off) select p.*, linked from parent p left join (select c.*, true as linked from child c) as ss on (p.k = ss.k) order by p.k; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Merge Left Join Output: p.k, p.pd, (true) Merge Cond: (p.k = c.k) @@ -3418,8 +3447,8 @@ explain (verbose on, costs off, nodes off) select p.* from parent p left join child c on (p.k = c.k) where p.k = 1 and p.k = 2; ---?QUERY PLAN.* ---?----------------.* +--?.* +--?.* Result Output: p.k, p.pd One-Time Filter: false @@ -3519,8 +3548,8 @@ select t1.* from on (t1.f1 = b1.d1) left join int4_tbl i4 on (i8.q2 = i4.f1); ---?QUERY PLAN.* ---?----------------------------.* + QUERY PLAN +---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 Hash Cond: (i8.q2 = i4.f1) @@ -3529,33 +3558,35 @@ select t1.* from Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 - -> Hash Right Join + -> Materialize Output: i8.q2 - Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Right Join - Output: i8.q2, (NULL::integer) - Hash Cond: (i8b2.q1 = i8.q1) - -> Nested Loop - Output: i8b2.q1, NULL::integer - -> Seq Scan on public.int4_tbl i4b2 - Output: i4b2.f1 - -> Materialize - Output: i8b2.q1 + Output: i8.q2 + Hash Cond: ((NULL::integer) = i8b1.q2) + -> Hash Right Join + Output: i8.q2, (NULL::integer) + Hash Cond: (i8b2.q1 = i8.q1) + -> Nested Loop + Output: i8b2.q1, NULL::integer -> Seq Scan on public.int8_tbl i8b2 - Output: i8b2.q1 - -> Hash - Output: i8.q1, i8.q2 - -> Seq Scan on public.int8_tbl i8 + Output: i8b2.q1, i8b2.q2 + -> Materialize + Output: i4b2.f1 + -> Seq Scan on public.int4_tbl i4b2 + Output: i4b2.f1 + -> Hash Output: i8.q1, i8.q2 - -> Hash - Output: i8b1.q2 - -> Seq Scan on public.int8_tbl i8b1 + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + -> Hash Output: i8b1.q2 + -> Seq Scan on public.int8_tbl i8b1 + Output: i8b1.q2 -> Hash Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(34 rows) +(36 rows) select t1.* from text_tbl t1 @@ -3586,8 +3617,8 @@ select t1.* from on (t1.f1 = b1.d1) left join int4_tbl i4 on (i8.q2 = i4.f1); ---?QUERY PLAN.* ---?----------------------------.* + QUERY PLAN +---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 Hash Cond: (i8.q2 = i4.f1) @@ -3596,34 +3627,36 @@ select t1.* from Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 - -> Hash Right Join + -> Materialize Output: i8.q2 - Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Right Join - Output: i8.q2, (NULL::integer) - Hash Cond: (i8b2.q1 = i8.q1) - -> Hash Join - Output: i8b2.q1, NULL::integer - Hash Cond: (i4b2.f1 = i8b2.q1) - -> Seq Scan on public.int4_tbl i4b2 - Output: i4b2.f1 - -> Hash - Output: i8b2.q1 + Output: i8.q2 + Hash Cond: ((NULL::integer) = i8b1.q2) + -> Hash Right Join + Output: i8.q2, (NULL::integer) + Hash Cond: (i8b2.q1 = i8.q1) + -> Hash Join + Output: i8b2.q1, NULL::integer + Hash Cond: (i8b2.q1 = i4b2.f1) -> Seq Scan on public.int8_tbl i8b2 - Output: i8b2.q1 - -> Hash - Output: i8.q1, i8.q2 - -> Seq Scan on public.int8_tbl i8 + Output: i8b2.q1, i8b2.q2 + -> Hash + Output: i4b2.f1 + -> Seq Scan on public.int4_tbl i4b2 + Output: i4b2.f1 + -> Hash Output: i8.q1, i8.q2 - -> Hash - Output: i8b1.q2 - -> Seq Scan on public.int8_tbl i8b1 + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + -> Hash Output: i8b1.q2 + -> Seq Scan on public.int8_tbl i8b1 + Output: i8b1.q2 -> Hash Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(35 rows) +(37 rows) select t1.* from @@ -3672,12 +3705,10 @@ select * from -> Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 Filter: (i8.q2 = 456) - -> Materialize + -> Seq Scan on public.text_tbl t1 Output: t1.f1 - -> Seq Scan on public.text_tbl t1 - Output: t1.f1 - Filter: (t1.f1 = 'doh!'::text) -(23 rows) + Filter: (t1.f1 = 'doh!'::text) +(21 rows) select * from text_tbl t1 diff --git a/src/test/regress/expected/tpch_disablevec07.out b/src/test/regress/expected/tpch_disablevec07.out index 1da4618e6..f212d85d4 100644 --- a/src/test/regress/expected/tpch_disablevec07.out +++ b/src/test/regress/expected/tpch_disablevec07.out @@ -39,8 +39,8 @@ group by l_year order by revenue; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: (sum((lineitem.l_extendedprice * (1::numeric - lineitem.l_discount)))) -> HashAggregate @@ -58,22 +58,24 @@ order by -> Row Adapter -> CStore Scan on orders -> Hash - -> Hash Join - Hash Cond: (customer.c_nationkey = n2.n_nationkey) - -> Row Adapter - -> CStore Scan on customer - -> Hash - -> Nested Loop - Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) + -> Nested Loop + Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) + -> Hash Join + Hash Cond: (customer.c_nationkey = n2.n_nationkey) + -> Row Adapter + -> CStore Scan on customer + -> Hash -> Row Adapter - -> CStore Scan on nation n1 - -> Materialize - -> Row Adapter - -> CStore Scan on nation n2 + -> CStore Scan on nation n2 + Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar)) + -> Materialize + -> Row Adapter + -> CStore Scan on nation n1 + Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar)) -> Hash -> Row Adapter -> CStore Scan on supplier -(32 rows) +(34 rows) select supp_nation, diff --git a/src/test/regress/expected/tpchcol07.out b/src/test/regress/expected/tpchcol07.out index 60c9b821e..3f82fcad2 100644 --- a/src/test/regress/expected/tpchcol07.out +++ b/src/test/regress/expected/tpchcol07.out @@ -39,8 +39,8 @@ group by l_year order by revenue; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Row Adapter -> Vector Sort Sort Key: (sum((lineitem.l_extendedprice * (1::numeric - lineitem.l_discount)))) @@ -55,16 +55,18 @@ order by -> Vector Sonic Hash Join Hash Cond: (orders.o_custkey = customer.c_custkey) -> CStore Scan on orders - -> Vector Sonic Hash Join - Hash Cond: (customer.c_nationkey = n2.n_nationkey) - -> CStore Scan on customer - -> Vector Nest Loop - Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) + -> Vector Nest Loop + Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) + -> Vector Sonic Hash Join + Hash Cond: (customer.c_nationkey = n2.n_nationkey) + -> CStore Scan on customer + -> CStore Scan on nation n2 + Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar)) + -> Vector Materialize -> CStore Scan on nation n1 - -> Vector Materialize - -> CStore Scan on nation n2 + Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar)) -> CStore Scan on supplier -(23 rows) +(25 rows) select supp_nation, diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 0cd4e0dd9..a7f5ec7f9 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -722,6 +722,10 @@ order by c.name; rollback; +analyse int4_tbl; +analyse int8_tbl; +analyse tenk1; +analyse text_tbl; -- -- test incorrect handling of placeholders that only appear in targetlists, -- per bug #6154 @@ -825,6 +829,18 @@ select * from int4(sin(0)) q2 where thousand = (q1 + q2); +-- +-- test extraction of restriction OR clauses from join OR clause +-- (we used to only do this for indexable clauses) +-- + +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4); +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.ten = 4); + -- -- test ability to generate a suitable plan for a star-schema query -- From 91fc39c46869f58f53f859caa73c2a5621ae4489 Mon Sep 17 00:00:00 2001 From: wuyuechuan Date: Mon, 7 Dec 2020 11:33:53 +0800 Subject: [PATCH 2/2] Preserve AND OR flatness while extracting restriction OR clauses --- src/gausskernel/optimizer/util/orclauses.cpp | 11 ++++- src/test/regress/expected/explain_pbe.out | 6 +-- src/test/regress/expected/hw_pbe.out | 6 +-- src/test/regress/expected/join.out | 47 +++++++++++++------ .../regress/expected/tpch_disablevec07.out | 30 ++++++------ src/test/regress/expected/tpchcol07.out | 22 ++++----- src/test/regress/sql/join.sql | 4 ++ 7 files changed, 79 insertions(+), 47 deletions(-) diff --git a/src/gausskernel/optimizer/util/orclauses.cpp b/src/gausskernel/optimizer/util/orclauses.cpp index f03b74956..1b024b7f7 100644 --- a/src/gausskernel/optimizer/util/orclauses.cpp +++ b/src/gausskernel/optimizer/util/orclauses.cpp @@ -172,6 +172,7 @@ static Expr* extract_or_clause(RestrictInfo* or_rinfo, RelOptInfo* rel) foreach (lc, ((BoolExpr*)or_rinfo->orclause)->args) { Node* orarg = (Node*)lfirst(lc); List* subclauses = NIL; + Node* subclause = NULL; /* OR arguments should be ANDs or sub-RestrictInfos */ if (and_clause(orarg)) { @@ -217,9 +218,17 @@ static Expr* extract_or_clause(RestrictInfo* or_rinfo, RelOptInfo* rel) /* * OK, add subclause(s) to the result OR. If we found more than one, - * we need an AND node. + * we need an AND node. But if we found only one, and it is itself an + * OR node, add its subclauses to the result instead; this is needed + * to preserve AND/OR flatness (ie, no OR directly underneath OR). */ clauselist = lappend(clauselist, make_ands_explicit(subclauses)); + subclause = (Node*)make_ands_explicit(subclauses); + if (or_clause(subclause)) { + clauselist = list_concat(clauselist, list_copy(((BoolExpr*)subclause)->args)); + } else { + clauselist = lappend(clauselist, subclause); + } } /* diff --git a/src/test/regress/expected/explain_pbe.out b/src/test/regress/expected/explain_pbe.out index dd6319334..34d4e6e3e 100644 --- a/src/test/regress/expected/explain_pbe.out +++ b/src/test/regress/expected/explain_pbe.out @@ -3641,15 +3641,15 @@ INSERT INTO pbe_prunning_002 values (1, 1, 1); SET enable_pbe_optimization to false; PREPARE pa AS SELECT * FROM pbe_prunning_001 pp1 RIGHT OUTER JOIN pbe_prunning_002 pp2 ON pp1.c_int=pp2.c_int WHERE (pp1.c_int=$1 AND pp1.c_numeric=$2 AND pp2.c_int=$3 AND pp2.c_numeric=$4) or pp1.id=$5; EXPLAIN(COSTS FALSE) EXECUTE pa(10,10,10,10,11); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ Hash Join Hash Cond: (pp2.c_int = pp1.c_int) Join Filter: (((pp1.c_int = 10) AND (pp1.c_numeric = 10::numeric) AND (pp2.c_int = 10) AND (pp2.c_numeric = 10::numeric)) OR (pp1.id = 11)) -> Seq Scan on pbe_prunning_002 pp2 -> Hash -> Seq Scan on pbe_prunning_001 pp1 - Filter: (((c_int = 10) AND (c_numeric = 10::numeric)) OR (id = 11)) + Filter: (((c_int = 10) AND (c_numeric = 10::numeric)) OR ((c_int = 10) AND (c_numeric = 10::numeric)) OR (id = 11) OR (id = 11)) (7 rows) DEALLOCATE PREPARE pa; diff --git a/src/test/regress/expected/hw_pbe.out b/src/test/regress/expected/hw_pbe.out index 5662f65f3..7efc2bd88 100644 --- a/src/test/regress/expected/hw_pbe.out +++ b/src/test/regress/expected/hw_pbe.out @@ -3639,15 +3639,15 @@ INSERT INTO pbe_prunning_001 values (1, 1, 1); INSERT INTO pbe_prunning_002 values (1, 1, 1); PREPARE pa AS SELECT * FROM pbe_prunning_001 pp1 RIGHT OUTER JOIN pbe_prunning_002 pp2 ON pp1.c_int=pp2.c_int WHERE (pp1.c_int=$1 AND pp1.c_numeric=$2 AND pp2.c_int=$3 AND pp2.c_numeric=$4) or pp1.id=$5; EXPLAIN(COSTS FALSE) EXECUTE pa(10,10,10,10,11); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------ Hash Join Hash Cond: (pp2.c_int = pp1.c_int) Join Filter: (((pp1.c_int = $1) AND (pp1.c_numeric = $2) AND (pp2.c_int = $3) AND (pp2.c_numeric = $4)) OR (pp1.id = $5)) -> Seq Scan on pbe_prunning_002 pp2 -> Hash -> Seq Scan on pbe_prunning_001 pp1 - Filter: (((c_int = $1) AND (c_numeric = $2)) OR (id = $5)) + Filter: (((c_int = $1) AND (c_numeric = $2)) OR ((c_int = $1) AND (c_numeric = $2)) OR (id = $5) OR (id = $5)) (7 rows) DEALLOCATE PREPARE pa; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index caee0f85b..ce779ad63 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2728,7 +2728,7 @@ SELECT qq, unique1 ( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2 USING (qq) INNER JOIN tenk1 c ON qq = unique2; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------------------------------- Nested Loop -> Hash Full Join @@ -2872,7 +2872,7 @@ select * from int4(sin(1)) q1, int4(sin(0)) q2 where q1 = thousand or q2 = thousand; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------ Hash Join Hash Cond: (tenk1.twothousand = int4_tbl.f1) @@ -2897,7 +2897,7 @@ select * from int4(sin(1)) q1, int4(sin(0)) q2 where thousand = (q1 + q2); - QUERY PLAN + QUERY PLAN -------------------------------------------------------------- Hash Join Hash Cond: (tenk1.twothousand = int4_tbl.f1) @@ -2920,12 +2920,12 @@ where thousand = (q1 + q2); explain (costs off) select * from tenk1 a join tenk1 b on (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4); - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------------------------- Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4))) -> Seq Scan on tenk1 b - Filter: ((unique1 = 2) OR (hundred = 4)) + Filter: ((unique1 = 2) OR (unique1 = 2) OR (hundred = 4) OR (hundred = 4)) -> Bitmap Heap Scan on tenk1 a Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) -> BitmapOr @@ -2938,12 +2938,12 @@ select * from tenk1 a join tenk1 b on explain (costs off) select * from tenk1 a join tenk1 b on (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.ten = 4); - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------------------------------- Nested Loop Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.ten = 4))) -> Seq Scan on tenk1 b - Filter: ((unique1 = 2) OR (ten = 4)) + Filter: ((unique1 = 2) OR (unique1 = 2) OR (ten = 4) OR (ten = 4)) -> Bitmap Heap Scan on tenk1 a Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) -> BitmapOr @@ -2953,6 +2953,25 @@ select * from tenk1 a join tenk1 b on Index Cond: (unique2 = 3) (11 rows) +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or + ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) + -> Seq Scan on tenk1 a + -> Materialize + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: ((unique1 = 2) OR (hundred = 4)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 2) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 4) +(11 rows) + -- -- test ability to generate a suitable plan for a star-schema query -- @@ -2960,7 +2979,7 @@ explain (costs off) select * from tenk1, int8_tbl a, int8_tbl b where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Nested Loop -> Seq Scan on int8_tbl b @@ -2988,7 +3007,7 @@ select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from left join tenk1 t2 on (subq1.y1 = t2.unique1) where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------- Nested Loop Join Filter: (t1.stringu1 > t2.stringu2) @@ -3071,7 +3090,7 @@ select count(*) from tenk1 a join tenk1 b on a.unique1 = b.unique2 left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand join int4_tbl on b.thousand = f1; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------- Aggregate -> Nested Loop Left Join @@ -3105,7 +3124,7 @@ select b.unique1 from join int4_tbl i1 on b.thousand = f1 right join int4_tbl i2 on i2.f1 = b.tenthous order by 1; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------------------------------------- Sort Sort Key: b.unique1 @@ -3220,7 +3239,7 @@ select * from -- explain (num_nodes off, nodes off, costs off) select * from int4_tbl a left join tenk1 b on f1 = unique2 where f1 = 0; - QUERY PLAN + QUERY PLAN ------------------------------------------------- Nested Loop Left Join Join Filter: (a.f1 = b.unique2) @@ -3548,7 +3567,7 @@ select t1.* from on (t1.f1 = b1.d1) left join int4_tbl i4 on (i8.q2 = i4.f1); - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 @@ -3617,7 +3636,7 @@ select t1.* from on (t1.f1 = b1.d1) left join int4_tbl i4 on (i8.q2 = i4.f1); - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 diff --git a/src/test/regress/expected/tpch_disablevec07.out b/src/test/regress/expected/tpch_disablevec07.out index f212d85d4..79a32e631 100644 --- a/src/test/regress/expected/tpch_disablevec07.out +++ b/src/test/regress/expected/tpch_disablevec07.out @@ -39,8 +39,8 @@ group by l_year order by revenue; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: (sum((lineitem.l_extendedprice * (1::numeric - lineitem.l_discount)))) -> HashAggregate @@ -58,20 +58,20 @@ order by -> Row Adapter -> CStore Scan on orders -> Hash - -> Nested Loop - Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) - -> Hash Join - Hash Cond: (customer.c_nationkey = n2.n_nationkey) - -> Row Adapter - -> CStore Scan on customer - -> Hash + -> Hash Join + Hash Cond: (customer.c_nationkey = n2.n_nationkey) + -> Row Adapter + -> CStore Scan on customer + -> Hash + -> Nested Loop + Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) -> Row Adapter - -> CStore Scan on nation n2 - Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar)) - -> Materialize - -> Row Adapter - -> CStore Scan on nation n1 - Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar)) + -> CStore Scan on nation n1 + Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar)) + -> Materialize + -> Row Adapter + -> CStore Scan on nation n2 + Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar) OR (n_name = 'FRANCE'::bpchar)) -> Hash -> Row Adapter -> CStore Scan on supplier diff --git a/src/test/regress/expected/tpchcol07.out b/src/test/regress/expected/tpchcol07.out index 3f82fcad2..8b5cca56d 100644 --- a/src/test/regress/expected/tpchcol07.out +++ b/src/test/regress/expected/tpchcol07.out @@ -39,8 +39,8 @@ group by l_year order by revenue; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Row Adapter -> Vector Sort Sort Key: (sum((lineitem.l_extendedprice * (1::numeric - lineitem.l_discount)))) @@ -55,16 +55,16 @@ order by -> Vector Sonic Hash Join Hash Cond: (orders.o_custkey = customer.c_custkey) -> CStore Scan on orders - -> Vector Nest Loop - Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) - -> Vector Sonic Hash Join - Hash Cond: (customer.c_nationkey = n2.n_nationkey) - -> CStore Scan on customer - -> CStore Scan on nation n2 - Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar)) - -> Vector Materialize + -> Vector Sonic Hash Join + Hash Cond: (customer.c_nationkey = n2.n_nationkey) + -> CStore Scan on customer + -> Vector Nest Loop + Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) -> CStore Scan on nation n1 - Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar)) + Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar)) + -> Vector Materialize + -> CStore Scan on nation n2 + Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar) OR (n_name = 'FRANCE'::bpchar)) -> CStore Scan on supplier (25 rows) diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index a7f5ec7f9..f6c7e2407 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -840,6 +840,10 @@ select * from tenk1 a join tenk1 b on explain (costs off) select * from tenk1 a join tenk1 b on (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.ten = 4); +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or + ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4); -- -- test ability to generate a suitable plan for a star-schema query