diff --git a/src/gausskernel/optimizer/path/Makefile b/src/gausskernel/optimizer/path/Makefile index 9eb235ed4..196afba5b 100644 --- a/src/gausskernel/optimizer/path/Makefile +++ b/src/gausskernel/optimizer/path/Makefile @@ -20,10 +20,10 @@ endif ifeq ($(enable_multiple_nodes), yes) OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ - joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o + joinpath.o joinrels.o pathkeys.o tidpath.o else OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ - joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o \ + joinpath.o joinrels.o pathkeys.o tidpath.o \ pgxcpath_single.o streampath_single.o endif diff --git a/src/gausskernel/optimizer/path/allpaths.cpp b/src/gausskernel/optimizer/path/allpaths.cpp index a5adfd88e..844ae5cfb 100755 --- a/src/gausskernel/optimizer/path/allpaths.cpp +++ b/src/gausskernel/optimizer/path/allpaths.cpp @@ -741,16 +741,6 @@ static void set_plain_rel_size(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry if (rte->tablesample == NULL) { /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); - - /* - * Check to see if we can extract any restriction conditions from join - * quals that are OR-of-AND structures. If so, add them to the rel's - * restriction list, and redo the above steps. - */ - if (create_or_index_quals(root, rel)) { - check_partial_indexes(root, rel); - set_baserel_size_estimates(root, rel); - } } else { /* Sampled relation */ set_tablesample_rel_size(root, rel, rte); diff --git a/src/gausskernel/optimizer/path/indxpath.cpp b/src/gausskernel/optimizer/path/indxpath.cpp index 219bdd172..93ff1b7f3 100755 --- a/src/gausskernel/optimizer/path/indxpath.cpp +++ b/src/gausskernel/optimizer/path/indxpath.cpp @@ -1521,7 +1521,8 @@ static Path* choose_bitmap_and(PlannerInfo* root, RelOptInfo* rel, List* paths, * we can remove this limitation. (But note that this also defends * against flat-out duplicate input paths, which can happen because * match_join_clauses_to_index will find the same OR join clauses that - * create_or_index_quals has pulled OR restriction clauses out of.) + * extract_restriction_or_clauses has pulled OR restriction clauses out + * of.) * * For the same reason, we reject AND combinations in which an index * predicate clause duplicates another clause. Here we find it necessary diff --git a/src/gausskernel/optimizer/path/orindxpath.cpp b/src/gausskernel/optimizer/path/orindxpath.cpp deleted file mode 100755 index 17bd7a022..000000000 --- a/src/gausskernel/optimizer/path/orindxpath.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* ------------------------------------------------------------------------- - * - * orindxpath.cpp - * Routines to find index paths that match a set of OR clauses - * - * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. - * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * src/gausskernel/optimizer/path/orindxpath.cpp - * - * ------------------------------------------------------------------------- - */ -#include "postgres.h" -#include "knl/knl_variable.h" - -#include "optimizer/cost.h" -#include "optimizer/paths.h" -#include "optimizer/restrictinfo.h" - -/* ---------- - * create_or_index_quals - * Examine join OR-of-AND quals to see if any useful restriction OR - * clauses can be extracted. If so, add them to the query. - * - * Although a join clause must reference other relations overall, - * an OR of ANDs clause might contain sub-clauses that reference just this - * relation and can be used to build a restriction clause. - * For example consider - * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); - * We can transform this into - * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) - * AND (a.x = 42 OR a.x = 44) - * AND (b.y = 43 OR b.z = 45); - * which opens the potential to build OR indexscans on a and b. In essence - * this is a partial transformation to CNF (AND of ORs format). It is not - * complete, however, because we do not unravel the original OR --- doing so - * would usually bloat the qualification expression to little gain. - * - * The added quals are partially redundant with the original OR, and therefore - * will cause the size of the joinrel to be underestimated when it is finally - * formed. (This would be true of a full transformation to CNF as well; the - * fault is not really in the transformation, but in clauselist_selectivity's - * inability to recognize redundant conditions.) To minimize the collateral - * damage, we want to minimize the number of quals added. Therefore we do - * not add every possible extracted restriction condition to the query. - * Instead, we search for the single restriction condition that generates - * the most useful (cheapest) OR indexscan, and add only that condition. - * This is a pretty ad-hoc heuristic, but quite useful. - * - * We can then compensate for the redundancy of the added qual by poking - * the recorded selectivity of the original OR clause, thereby ensuring - * the added qual doesn't change the estimated size of the joinrel when - * it is finally formed. This is a MAJOR HACK: it depends on the fact - * that clause selectivities are cached and on the fact that the same - * RestrictInfo node will appear in every joininfo list that might be used - * when the joinrel is formed. And it probably isn't right in cases where - * the size estimation is nonlinear (i.e., outer and IN joins). But it - * beats not doing anything. - * - * NOTE: one might think this messiness could be worked around by generating - * the indexscan path with a small path->rows value, and not touching the - * rel's baserestrictinfo or rel->rows. However, that does not work. - * The optimizer's fundamental design assumes that every general-purpose - * Path for a given relation generates the same number of rows. Without - * this assumption we'd not be able to optimize solely on the cost of Paths, - * but would have to take number of output rows into account as well. - * (The parameterized-paths stuff almost fixes this, but not quite...) - * - * 'rel' is the relation entry for which quals are to be created - * - * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE. - * If no quals available, returns FALSE and doesn't change rel. - * - * Note: check_partial_indexes() must have been run previously. - * ---------- - */ -bool create_or_index_quals(PlannerInfo* root, RelOptInfo* rel) -{ - BitmapOrPath* bestpath = NULL; - RestrictInfo* bestrinfo = NULL; - List* newrinfos = NIL; - RestrictInfo* or_rinfo = NULL; - Selectivity or_selec, orig_selec; - ListCell* i = NULL; - - /* Skip the whole mess if no indexes */ - if (rel->indexlist == NIL) - return false; - - /* - * Find potentially interesting OR joinclauses. We can use any joinclause - * that is considered safe to move to this rel by the parameterized-path - * machinery, even though what we are going to do with it is not exactly a - * parameterized path. - */ - foreach (i, rel->joininfo) { - RestrictInfo* rinfo = (RestrictInfo*)lfirst(i); - - if (restriction_is_or_clause(rinfo) && join_clause_is_movable_to(rinfo, rel->relid)) { - /* - * Use the generate_bitmap_or_paths() machinery to estimate the - * value of each OR clause. We can use regular restriction - * clauses along with the OR clause contents to generate - * indexquals. We pass restriction_only = true so that any - * sub-clauses that are actually joins will be ignored. - */ - List* orpaths = NIL; - ListCell* k = NULL; - - orpaths = generate_bitmap_or_paths(root, rel, list_make1(rinfo), rel->baserestrictinfo, true); - - if (rel->isPartitionedTable) { - orpaths = list_concat( - orpaths, GenerateBitmapOrPathsUseGPI(root, rel, list_make1(rinfo), rel->baserestrictinfo, true)); - } - - /* Locate the cheapest OR path */ - foreach (k, orpaths) { - BitmapOrPath* path = (BitmapOrPath*)lfirst(k); - - AssertEreport(IsA(path, BitmapOrPath), MOD_OPT, "Restriction information is incorrect"); - if (bestpath == NULL || path->path.total_cost < bestpath->path.total_cost) { - bestpath = path; - bestrinfo = rinfo; - } - } - } - } - - /* Fail if no suitable clauses found */ - if (bestpath == NULL) - return false; - - /* - * Convert the path's indexclauses structure to a RestrictInfo tree. We - * include any partial-index predicates so as to get a reasonable - * representation of what the path is actually scanning. - */ - newrinfos = make_restrictinfo_from_bitmapqual((Path*)bestpath, true, true); - - /* It's possible we get back something other than a single OR clause */ - if (list_length(newrinfos) != 1) - return false; - or_rinfo = (RestrictInfo*)linitial(newrinfos); - AssertEreport(IsA(or_rinfo, RestrictInfo), MOD_OPT, "Restriction clause does not contain OR"); - if (!restriction_is_or_clause(or_rinfo)) - return false; - - /* - * OK, add it to the rel's restriction list. - */ - rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos); - rel->baserestrict_min_security = Min(rel->baserestrict_min_security, or_rinfo->security_level); - - /* - * Adjust the original OR clause's cached selectivity to compensate for - * the selectivity of the added (but redundant) lower-level qual. This - * should result in the join rel getting approximately the same rows - * estimate as it would have gotten without all these shenanigans. (XXX - * major hack alert ... this depends on the assumption that the - * selectivity will stay cached ...) - * we don't need cache the selectivity because the index's selectivity is not accurate. - */ - or_selec = clause_selectivity(root, (Node*)or_rinfo, 0, JOIN_INNER, NULL, false); - if (or_selec > 0 && or_selec < 1) { - orig_selec = clause_selectivity(root, (Node*)bestrinfo, 0, JOIN_INNER, NULL, false); - bestrinfo->norm_selec = orig_selec / or_selec; - /* clamp result to sane range */ - if (bestrinfo->norm_selec > 1) - bestrinfo->norm_selec = 1; - /* It isn't an outer join clause, so no need to adjust outer_selec */ - } - - /* Tell caller to recompute partial index status and rowcount estimate */ - return true; -} - diff --git a/src/gausskernel/optimizer/plan/planmain.cpp b/src/gausskernel/optimizer/plan/planmain.cpp index afdd839f4..beff76b73 100755 --- a/src/gausskernel/optimizer/plan/planmain.cpp +++ b/src/gausskernel/optimizer/plan/planmain.cpp @@ -27,6 +27,7 @@ #include "parser/parse_hint.h" #include "pgxc/pgxc.h" #include "optimizer/cost.h" +#include "optimizer/orclauses.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" @@ -236,6 +237,12 @@ void query_planner(PlannerInfo* root, List* tlist, double tuple_fraction, double */ add_placeholders_to_base_rels(root); + /* + * Look for join OR clauses that we can extract single-relation + * restriction OR clauses from. + */ + extract_restriction_or_clauses(root); + /* * We should now have size estimates for every actual table involved in * the query, and we also know which if any have been deleted from the diff --git a/src/gausskernel/optimizer/util/Makefile b/src/gausskernel/optimizer/util/Makefile index e0179ebd4..1d959795e 100644 --- a/src/gausskernel/optimizer/util/Makefile +++ b/src/gausskernel/optimizer/util/Makefile @@ -20,11 +20,11 @@ endif SUBDIRS = learn ifeq ($(enable_multiple_nodes), yes) -OBJS = clauses.o joininfo.o pathnode.o placeholder.o plancat.o predtest.o \ +OBJS = clauses.o joininfo.o orclauses.o pathnode.o placeholder.o plancat.o predtest.o \ relnode.o restrictinfo.o tlist.o var.o pruning.o randomplan.o optimizerdebug.o planmem_walker.o \ nodegroups.o plananalyzer.o optcommon.o dataskew.o autoanalyzer.o bucketinfo.o bucketpruning.o else -OBJS = clauses.o joininfo.o pathnode.o placeholder.o plancat.o predtest.o \ +OBJS = clauses.o joininfo.o orclauses.o pathnode.o placeholder.o plancat.o predtest.o \ relnode.o restrictinfo.o tlist.o var.o pgxcship_single.o pruning.o randomplan.o optimizerdebug.o planmem_walker.o \ nodegroups.o plananalyzer.o optcommon.o dataskew.o autoanalyzer.o bucketinfo.o bucketpruning.o endif diff --git a/src/gausskernel/optimizer/util/orclauses.cpp b/src/gausskernel/optimizer/util/orclauses.cpp new file mode 100644 index 000000000..1b024b7f7 --- /dev/null +++ b/src/gausskernel/optimizer/util/orclauses.cpp @@ -0,0 +1,332 @@ +/*------------------------------------------------------------------------- + * + * orclauses.cpp + * Routines to extract restriction OR clauses from join OR clauses + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * + * IDENTIFICATION + * src/gausskernel/optimizer/util/orclauses.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/orclauses.h" +#include "optimizer/restrictinfo.h" + +static bool is_safe_restriction_clause_for(RestrictInfo* rinfo, RelOptInfo* rel); +static Expr* extract_or_clause(RestrictInfo* or_rinfo, RelOptInfo* rel); +static void consider_new_or_clause(PlannerInfo* root, RelOptInfo* rel, Expr* orclause, RestrictInfo* join_or_rinfo); + +/* + * extract_restriction_or_clauses + * Examine join OR-of-AND clauses to see if any useful restriction OR + * clauses can be extracted. If so, add them to the query. + * + * Although a join clause must reference multiple relations overall, + * an OR of ANDs clause might contain sub-clauses that reference just one + * relation and can be used to build a restriction clause for that rel. + * For example consider + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); + * We can transform this into + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) + * AND (a.x = 42 OR a.x = 44) + * AND (b.y = 43 OR b.z = 45); + * which allows the latter clauses to be applied during the scans of a and b, + * perhaps as index qualifications, and in any case reducing the number of + * rows arriving at the join. In essence this is a partial transformation to + * CNF (AND of ORs format). It is not complete, however, because we do not + * unravel the original OR --- doing so would usually bloat the qualification + * expression to little gain. + * + * The added quals are partially redundant with the original OR, and therefore + * would cause the size of the joinrel to be underestimated when it is finally + * formed. (This would be true of a full transformation to CNF as well; the + * fault is not really in the transformation, but in clauselist_selectivity's + * inability to recognize redundant conditions.) We can compensate for this + * redundancy by changing the cached selectivity of the original OR clause, + * cancelling out the (valid) reduction in the estimated sizes of the base + * relations so that the estimated joinrel size remains the same. This is + * a MAJOR HACK: it depends on the fact that clause selectivities are cached + * and on the fact that the same RestrictInfo node will appear in every + * joininfo list that might be used when the joinrel is formed. + * And it doesn't work in cases where the size estimation is nonlinear + * (i.e., outer and IN joins). But it beats not doing anything. + * + * We examine each base relation to see if join clauses associated with it + * contain extractable restriction conditions. If so, add those conditions + * to the rel's baserestrictinfo and update the cached selectivities of the + * join clauses. Note that the same join clause will be examined afresh + * from the point of view of each baserel that participates in it, so its + * cached selectivity may get updated multiple times. + */ +void extract_restriction_or_clauses(PlannerInfo* root) +{ + /* Examine each baserel for potential join OR clauses */ + for (int rti = 1; rti < root->simple_rel_array_size; rti++) { + RelOptInfo* rel = root->simple_rel_array[rti]; + ListCell* lc = NULL; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) { + continue; + } + + Assert(rel->relid == (uint)rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) { + continue; + } + + /* + * Find potentially interesting OR joinclauses. We can use any + * joinclause that is considered safe to move to this rel by the + * parameterized-path machinery, even though what we are going to do + * with it is not exactly a parameterized path. + * + * However, it seems best to ignore clauses that have been marked + * redundant (by setting norm_selec > 1). That likely can't happen + * for OR clauses, but let's be safe. + */ + foreach (lc, rel->joininfo) { + RestrictInfo* rinfo = (RestrictInfo*)lfirst(lc); + + if (restriction_is_or_clause(rinfo) && join_clause_is_movable_to(rinfo, rel->relid) && + rinfo->norm_selec <= 1) { + /* Try to extract a qual for this rel only */ + Expr* orclause = extract_or_clause(rinfo, rel); + + /* + * If successful, decide whether we want to use the clause, + * and insert it into the rel's restrictinfo list if so. + */ + if (orclause) { + consider_new_or_clause(root, rel, orclause, rinfo); + } + } + } + } +} + +/* + * Is the given primitive (non-OR) RestrictInfo safe to move to the rel? + */ +static bool is_safe_restriction_clause_for(RestrictInfo* rinfo, RelOptInfo* rel) +{ + /* + * We want clauses that mention the rel, and only the rel. So in + * particular pseudoconstant clauses can be rejected quickly. Then check + * the clause's Var membership. + */ + if (rinfo->pseudoconstant) { + return false; + } + if (!bms_equal(rinfo->clause_relids, rel->relids)) { + return false; + } + + /* We don't want extra evaluations of any volatile functions */ + if (contain_volatile_functions((Node*)rinfo->clause)) { + return false; + } + + return true; +} + +/* + * Try to extract a restriction clause mentioning only "rel" from the given + * join OR-clause. + * + * We must be able to extract at least one qual for this rel from each of + * the arms of the OR, else we can't use it. + * + * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL + * if no OR clause could be extracted. + */ +static Expr* extract_or_clause(RestrictInfo* or_rinfo, RelOptInfo* rel) +{ + List* clauselist = NIL; + ListCell* lc = NULL; + + /* + * Scan each arm of the input OR clause. Notice we descend into + * or_rinfo->orclause, which has RestrictInfo nodes embedded below the + * toplevel OR/AND structure. This is useful because we can use the info + * in those nodes to make is_safe_restriction_clause_for()'s checks + * cheaper. We'll strip those nodes from the returned tree, though, + * meaning that fresh ones will be built if the clause is accepted as a + * restriction clause. This might seem wasteful --- couldn't we re-use + * the existing RestrictInfos? But that'd require assuming that + * selectivity and other cached data is computed exactly the same way for + * a restriction clause as for a join clause, which seems undesirable. + */ + Assert(or_clause((Node*)or_rinfo->orclause)); + foreach (lc, ((BoolExpr*)or_rinfo->orclause)->args) { + Node* orarg = (Node*)lfirst(lc); + List* subclauses = NIL; + Node* subclause = NULL; + + /* OR arguments should be ANDs or sub-RestrictInfos */ + if (and_clause(orarg)) { + List* andargs = ((BoolExpr*)orarg)->args; + ListCell* lc2 = NULL; + + foreach (lc2, andargs) { + RestrictInfo* rinfo = (RestrictInfo*)lfirst(lc2); + + Assert(IsA(rinfo, RestrictInfo)); + if (restriction_is_or_clause(rinfo)) { + /* + * Recurse to deal with nested OR. Note we *must* recurse + * here, this isn't just overly-tense optimization: we + * have to descend far enough to find and strip all + * RestrictInfos in the expression. + */ + Expr* suborclause = NULL; + + suborclause = extract_or_clause(rinfo, rel); + if (suborclause) { + subclauses = lappend(subclauses, suborclause); + } + } else if (is_safe_restriction_clause_for(rinfo, rel)) { + subclauses = lappend(subclauses, rinfo->clause); + } + } + } else { + Assert(IsA(orarg, RestrictInfo)); + Assert(!restriction_is_or_clause((RestrictInfo*)orarg)); + if (is_safe_restriction_clause_for((RestrictInfo*)orarg, rel)) { + subclauses = lappend(subclauses, ((RestrictInfo*)orarg)->clause); + } + } + + /* + * If nothing could be extracted from this arm, we can't do anything + * with this OR clause. + */ + if (subclauses == NIL) { + return NULL; + } + + /* + * OK, add subclause(s) to the result OR. If we found more than one, + * we need an AND node. But if we found only one, and it is itself an + * OR node, add its subclauses to the result instead; this is needed + * to preserve AND/OR flatness (ie, no OR directly underneath OR). + */ + clauselist = lappend(clauselist, make_ands_explicit(subclauses)); + subclause = (Node*)make_ands_explicit(subclauses); + if (or_clause(subclause)) { + clauselist = list_concat(clauselist, list_copy(((BoolExpr*)subclause)->args)); + } else { + clauselist = lappend(clauselist, subclause); + } + } + + /* + * If we got a restriction clause from every arm, wrap them up in an OR + * node. (In theory the OR node might be unnecessary, if there was only + * one arm --- but then the input OR node was also redundant.) + */ + if (clauselist != NIL) { + return make_orclause(clauselist); + } + return NULL; +} + +/* + * Consider whether a successfully-extracted restriction OR clause is + * actually worth using. If so, add it to the planner's data structures, + * and adjust the original join clause (join_or_rinfo) to compensate. + */ +static void consider_new_or_clause(PlannerInfo* root, RelOptInfo* rel, Expr* orclause, RestrictInfo* join_or_rinfo) +{ + RestrictInfo* or_rinfo = NULL; + Selectivity or_selec, orig_selec; + + /* + * Build a RestrictInfo from the new OR clause. We can assume it's valid + * as a base restriction clause. + */ + or_rinfo = make_restrictinfo(orclause, true, false, false, join_or_rinfo->security_level, NULL, NULL, NULL); + + /* + * Estimate its selectivity. (We could have done this earlier, but doing + * it on the RestrictInfo representation allows the result to get cached, + * saving work later.) + */ + or_selec = clause_selectivity(root, (Node*)or_rinfo, 0, JOIN_INNER, NULL); + + /* + * The clause is only worth adding to the query if it rejects a useful + * fraction of the base relation's rows; otherwise, it's just going to + * cause duplicate computation (since we will still have to check the + * original OR clause when the join is formed). Somewhat arbitrarily, we + * set the selectivity threshold at 0.9. + */ + if (or_selec > 0.9) { + return; /* forget it */ + } + /* + * OK, add it to the rel's restriction-clause list. + */ + rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); + + /* + * Adjust the original join OR clause's cached selectivity to compensate + * for the selectivity of the added (but redundant) lower-level qual. This + * should result in the join rel getting approximately the same rows + * estimate as it would have gotten without all these shenanigans. + * + * XXX major hack alert: this depends on the assumption that the + * selectivity will stay cached. + * + * XXX another major hack: we adjust only norm_selec, the cached + * selectivity for JOIN_INNER semantics, even though the join clause + * might've been an outer-join clause. This is partly because we can't + * easily identify the relevant SpecialJoinInfo here, and partly because + * the linearity assumption we're making would fail anyway. (If it is an + * outer-join clause, "rel" must be on the nullable side, else we'd not + * have gotten here. So the computation of the join size is going to be + * quite nonlinear with respect to the size of "rel", so it's not clear + * how we ought to adjust outer_selec even if we could compute its + * original value correctly.) + */ + if (or_selec > 0) { + SpecialJoinInfo sjinfo; + + /* + * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare + * approx_tuple_count() in costsize.c.) + */ + sjinfo.type = T_SpecialJoinInfo; + sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids, rel->relids); + sjinfo.min_righthand = rel->relids; + sjinfo.syn_lefthand = sjinfo.min_lefthand; + sjinfo.syn_righthand = sjinfo.min_righthand; + sjinfo.jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + sjinfo.lhs_strict = false; + sjinfo.delay_upper_joins = false; + sjinfo.join_quals = NIL; + + /* Compute inner-join size */ + orig_selec = clause_selectivity(root, (Node*)join_or_rinfo, 0, JOIN_INNER, &sjinfo); + + /* And hack cached selectivity so join size remains the same */ + join_or_rinfo->norm_selec = orig_selec / or_selec; + /* ensure result stays in sane range, in particular not "redundant" */ + if (join_or_rinfo->norm_selec > 1) { + join_or_rinfo->norm_selec = 1; + } + /* as explained above, we don't touch outer_selec */ + } +} diff --git a/src/include/optimizer/orclauses.h b/src/include/optimizer/orclauses.h new file mode 100644 index 000000000..dd8d35a6a --- /dev/null +++ b/src/include/optimizer/orclauses.h @@ -0,0 +1,21 @@ +/*------------------------------------------------------------------------- + * + * orclauses.h + * prototypes for orclauses.cpp. + * + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/optimizer/orclauses.h + * + *------------------------------------------------------------------------- + */ +#ifndef ORCLAUSES_H +#define ORCLAUSES_H + +#include "nodes/relation.h" + +extern void extract_restriction_or_clauses(PlannerInfo *root); + +#endif /* ORCLAUSES_H */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 3a97187fb..b122d3eb4 100755 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -56,13 +56,6 @@ inline bool CheckIndexPathUseGPI(IndexPath* ipath) return ipath->indexinfo->isGlobal; } - -/* - * orindxpath.c - * additional routines for indexable OR clauses - */ -extern bool create_or_index_quals(PlannerInfo* root, RelOptInfo* rel); - /* * tidpath.h * routines to generate tid paths diff --git a/src/test/regress/expected/explain_pbe.out b/src/test/regress/expected/explain_pbe.out index e94e6c249..34d4e6e3e 100644 --- a/src/test/regress/expected/explain_pbe.out +++ b/src/test/regress/expected/explain_pbe.out @@ -3641,15 +3641,16 @@ INSERT INTO pbe_prunning_002 values (1, 1, 1); SET enable_pbe_optimization to false; PREPARE pa AS SELECT * FROM pbe_prunning_001 pp1 RIGHT OUTER JOIN pbe_prunning_002 pp2 ON pp1.c_int=pp2.c_int WHERE (pp1.c_int=$1 AND pp1.c_numeric=$2 AND pp2.c_int=$3 AND pp2.c_numeric=$4) or pp1.id=$5; EXPLAIN(COSTS FALSE) EXECUTE pa(10,10,10,10,11); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ Hash Join - Hash Cond: (pp1.c_int = pp2.c_int) + Hash Cond: (pp2.c_int = pp1.c_int) Join Filter: (((pp1.c_int = 10) AND (pp1.c_numeric = 10::numeric) AND (pp2.c_int = 10) AND (pp2.c_numeric = 10::numeric)) OR (pp1.id = 11)) - -> Seq Scan on pbe_prunning_001 pp1 + -> Seq Scan on pbe_prunning_002 pp2 -> Hash - -> Seq Scan on pbe_prunning_002 pp2 -(6 rows) + -> Seq Scan on pbe_prunning_001 pp1 + Filter: (((c_int = 10) AND (c_numeric = 10::numeric)) OR ((c_int = 10) AND (c_numeric = 10::numeric)) OR (id = 11) OR (id = 11)) +(7 rows) DEALLOCATE PREPARE pa; DROP TABLE pbe_prunning_001; diff --git a/src/test/regress/expected/hw_pbe.out b/src/test/regress/expected/hw_pbe.out index 347890a57..7efc2bd88 100644 --- a/src/test/regress/expected/hw_pbe.out +++ b/src/test/regress/expected/hw_pbe.out @@ -3639,15 +3639,16 @@ INSERT INTO pbe_prunning_001 values (1, 1, 1); INSERT INTO pbe_prunning_002 values (1, 1, 1); PREPARE pa AS SELECT * FROM pbe_prunning_001 pp1 RIGHT OUTER JOIN pbe_prunning_002 pp2 ON pp1.c_int=pp2.c_int WHERE (pp1.c_int=$1 AND pp1.c_numeric=$2 AND pp2.c_int=$3 AND pp2.c_numeric=$4) or pp1.id=$5; EXPLAIN(COSTS FALSE) EXECUTE pa(10,10,10,10,11); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------ Hash Join - Hash Cond: (pp1.c_int = pp2.c_int) + Hash Cond: (pp2.c_int = pp1.c_int) Join Filter: (((pp1.c_int = $1) AND (pp1.c_numeric = $2) AND (pp2.c_int = $3) AND (pp2.c_numeric = $4)) OR (pp1.id = $5)) - -> Seq Scan on pbe_prunning_001 pp1 + -> Seq Scan on pbe_prunning_002 pp2 -> Hash - -> Seq Scan on pbe_prunning_002 pp2 -(6 rows) + -> Seq Scan on pbe_prunning_001 pp1 + Filter: (((c_int = $1) AND (c_numeric = $2)) OR ((c_int = $1) AND (c_numeric = $2)) OR (id = $5) OR (id = $5)) +(7 rows) DEALLOCATE PREPARE pa; DROP TABLE pbe_prunning_001; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 6a64a1717..fd8b690c9 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2668,6 +2668,10 @@ order by c.name; (3 rows) rollback; +analyse int4_tbl; +analyse int8_tbl; +analyse tenk1; +analyse text_tbl; -- -- test incorrect handling of placeholders that only appear in targetlists, -- per bug #6154 @@ -2724,18 +2728,17 @@ SELECT qq, unique1 ( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2 USING (qq) INNER JOIN tenk1 c ON qq = unique2; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Hash Join - Hash Cond: (COALESCE((COALESCE(a.q1, 0::bigint)), (COALESCE(b.q2, (-1)::bigint))) = c.unique2) + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Nested Loop -> Hash Full Join Hash Cond: (COALESCE(a.q1, 0::bigint) = COALESCE(b.q2, (-1)::bigint)) -> Seq Scan on int8_tbl a -> Hash -> Seq Scan on int8_tbl b - -> Hash - -> Seq Scan on tenk1 c -(9 rows) + -> Index Scan using tenk1_unique2 on tenk1 c + Index Cond: (unique2 = COALESCE((COALESCE(a.q1, 0::bigint)), (COALESCE(b.q2, (-1)::bigint)))) +(8 rows) SELECT qq, unique1 FROM @@ -2767,12 +2770,12 @@ order by 1,2; ----------------------------------------------------------- Sort Sort Key: t1.q1, t1.q2 - -> Hash Left Join - Hash Cond: (t1.q2 = t2.q1) + -> Hash Right Join + Hash Cond: (t2.q1 = t1.q2) Filter: (1 = (SubPlan 1)) - -> Seq Scan on int8_tbl t1 + -> Seq Scan on int8_tbl t2 -> Hash - -> Seq Scan on int8_tbl t2 + -> Seq Scan on int8_tbl t1 SubPlan 1 -> Limit -> Result @@ -2869,23 +2872,23 @@ select * from int4(sin(1)) q1, int4(sin(0)) q2 where q1 = thousand or q2 = thousand; - QUERY PLAN ------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------ Hash Join - Hash Cond: (int4_tbl.f1 = tenk1.twothousand) - -> Seq Scan on int4_tbl - -> Hash + Hash Cond: (tenk1.twothousand = int4_tbl.f1) + -> Nested Loop -> Nested Loop - -> Nested Loop - -> Function Scan on q1 - -> Function Scan on q2 - -> Bitmap Heap Scan on tenk1 - Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand)) - -> BitmapOr - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (q1.q1 = thousand) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (q2.q2 = thousand) + -> Function Scan on q1 + -> Function Scan on q2 + -> Bitmap Heap Scan on tenk1 + Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (q1.q1 = thousand) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (q2.q2 = thousand) + -> Hash + -> Seq Scan on int4_tbl (15 rows) explain (num_nodes off, nodes off, costs off) @@ -2894,22 +2897,81 @@ select * from int4(sin(1)) q1, int4(sin(0)) q2 where thousand = (q1 + q2); - QUERY PLAN --------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------- Hash Join - Hash Cond: (int4_tbl.f1 = tenk1.twothousand) - -> Seq Scan on int4_tbl - -> Hash + Hash Cond: (tenk1.twothousand = int4_tbl.f1) + -> Nested Loop -> Nested Loop - -> Nested Loop - -> Function Scan on q1 - -> Function Scan on q2 - -> Bitmap Heap Scan on tenk1 - Recheck Cond: (thousand = (q1.q1 + q2.q2)) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = (q1.q1 + q2.q2)) + -> Function Scan on q1 + -> Function Scan on q2 + -> Bitmap Heap Scan on tenk1 + Recheck Cond: (thousand = (q1.q1 + q2.q2)) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = (q1.q1 + q2.q2)) + -> Hash + -> Seq Scan on int4_tbl (12 rows) +-- +-- test extraction of restriction OR clauses from join OR clause +-- (we used to only do this for indexable clauses) +-- +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4))) + -> Seq Scan on tenk1 b + Filter: ((unique1 = 2) OR (unique1 = 2) OR (hundred = 4) OR (hundred = 4)) + -> Bitmap Heap Scan on tenk1 a + Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = 3) +(11 rows) + +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.ten = 4); + QUERY PLAN +--------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.ten = 4))) + -> Seq Scan on tenk1 b + Filter: ((unique1 = 2) OR (unique1 = 2) OR (ten = 4) OR (ten = 4)) + -> Bitmap Heap Scan on tenk1 a + Recheck Cond: ((unique1 = 1) OR (unique2 = 3)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 1) + -> Bitmap Index Scan on tenk1_unique2 + Index Cond: (unique2 = 3) +(11 rows) + +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or + ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR (((a.unique2 = 3) OR (a.unique2 = 7)) AND (b.hundred = 4))) + -> Seq Scan on tenk1 a + -> Materialize + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: ((unique1 = 2) OR (hundred = 4)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 2) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 4) +(11 rows) + -- -- test ability to generate a suitable plan for a star-schema query -- @@ -2917,21 +2979,17 @@ explain (costs off) select * from tenk1, int8_tbl a, int8_tbl b where thousand = a.q1 and tenthous = b.q1 and a.q2 = 1 and b.q2 = 2; - QUERY PLAN -------------------------------------------------------------- - Hash Join - Hash Cond: (tenk1.tenthous = b.q1) + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop + -> Seq Scan on int8_tbl b + Filter: (q2 = 2) -> Nested Loop -> Seq Scan on int8_tbl a Filter: (q2 = 1) - -> Bitmap Heap Scan on tenk1 - Recheck Cond: (thousand = a.q1) - -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: (thousand = a.q1) - -> Hash - -> Seq Scan on int8_tbl b - Filter: (q2 = 2) -(12 rows) + -> Index Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand = a.q1) AND (tenthous = b.q1)) +(8 rows) -- -- test a corner case in which we shouldn't apply the star-schema optimization @@ -2949,24 +3007,23 @@ select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from left join tenk1 t2 on (subq1.y1 = t2.unique1) where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; - QUERY PLAN ----------------------------------------------------------------------------------- - Hash Join - Hash Cond: (i1.f1 = "*VALUES*".column2) - -> Seq Scan on int4_tbl i1 - -> Hash + QUERY PLAN +---------------------------------------------------------------------------- + Nested Loop + Join Filter: (t1.stringu1 > t2.stringu2) + -> Nested Loop + Join Filter: ("*VALUES*".column2 = i1.f1) -> Nested Loop - Join Filter: (t1.stringu1 > t2.stringu2) -> Nested Loop - -> Nested Loop - Join Filter: ("*VALUES*".column1 = "*VALUES*".column2) - -> Values Scan on "*VALUES*" - -> Values Scan on "*VALUES*" - -> Index Scan using tenk1_unique2 on tenk1 t1 - Index Cond: ((unique2 = (11)) AND (unique2 < 42)) - -> Index Scan using tenk1_unique1 on tenk1 t2 - Index Cond: (unique1 = "*VALUES*".column1) -(15 rows) + Join Filter: ("*VALUES*".column1 = "*VALUES*".column2) + -> Values Scan on "*VALUES*" + -> Values Scan on "*VALUES*" + -> Index Scan using tenk1_unique2 on tenk1 t1 + Index Cond: ((unique2 = (11)) AND (unique2 < 42)) + -> Seq Scan on int4_tbl i1 + -> Index Scan using tenk1_unique1 on tenk1 t2 + Index Cond: (unique1 = "*VALUES*".column1) +(14 rows) select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from tenk1 t1 @@ -3033,24 +3090,23 @@ select count(*) from tenk1 a join tenk1 b on a.unique1 = b.unique2 left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand join int4_tbl on b.thousand = f1; - QUERY PLAN -------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------- Aggregate - -> Hash Join - Hash Cond: (b.thousand = int4_tbl.f1) - -> Hash Left Join - Hash Cond: (a.thousand = c.thousand) - Join Filter: (a.unique2 = b.unique1) - -> Hash Join - Hash Cond: (a.unique1 = b.unique2) - -> Seq Scan on tenk1 a - -> Hash - -> Seq Scan on tenk1 b - -> Hash - -> Index Only Scan using tenk1_thous_tenthous on tenk1 c - -> Hash - -> Seq Scan on int4_tbl -(15 rows) + -> Nested Loop Left Join + Join Filter: (a.unique2 = b.unique1) + -> Nested Loop + -> Nested Loop + -> Seq Scan on int4_tbl + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: (thousand = int4_tbl.f1) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = int4_tbl.f1) + -> Index Scan using tenk1_unique1 on tenk1 a + Index Cond: (unique1 = b.unique2) + -> Index Only Scan using tenk1_thous_tenthous on tenk1 c + Index Cond: (thousand = a.thousand) +(14 rows) select count(*) from tenk1 a join tenk1 b on a.unique1 = b.unique2 @@ -3068,29 +3124,24 @@ select b.unique1 from join int4_tbl i1 on b.thousand = f1 right join int4_tbl i2 on i2.f1 = b.tenthous order by 1; - QUERY PLAN -------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------- Sort Sort Key: b.unique1 - -> Hash Right Join - Hash Cond: (b.tenthous = i2.f1) - -> Hash Join - Hash Cond: (b.thousand = i1.f1) - -> Hash Left Join - Hash Cond: (a.thousand = c.thousand) - Join Filter: (b.unique1 = 42) - -> Hash Join - Hash Cond: (a.unique1 = b.unique2) - -> Seq Scan on tenk1 a - -> Hash - -> Seq Scan on tenk1 b - -> Hash - -> Index Only Scan using tenk1_thous_tenthous on tenk1 c - -> Hash - -> Seq Scan on int4_tbl i1 - -> Hash - -> Seq Scan on int4_tbl i2 -(20 rows) + -> Nested Loop Left Join + -> Seq Scan on int4_tbl i2 + -> Nested Loop Left Join + Join Filter: (b.unique1 = 42) + -> Nested Loop + -> Nested Loop + -> Seq Scan on int4_tbl i1 + -> Index Scan using tenk1_thous_tenthous on tenk1 b + Index Cond: ((thousand = i1.f1) AND (i2.f1 = tenthous)) + -> Index Scan using tenk1_unique1 on tenk1 a + Index Cond: (unique1 = b.unique2) + -> Index Only Scan using tenk1_thous_tenthous on tenk1 c + Index Cond: (thousand = a.thousand) +(15 rows) select b.unique1 from tenk1 a join tenk1 b on a.unique1 = b.unique2 @@ -3117,13 +3168,12 @@ select q1, unique2, thousand, hundred where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123); QUERY PLAN -------------------------------------------------------------------------------------- - Hash Left Join - Hash Cond: (a.q1 = b.unique2) + Nested Loop Left Join Filter: ((COALESCE(b.thousand, 123) = a.q1) AND (a.q1 = COALESCE(b.hundred, 123))) -> Seq Scan on int8_tbl a - -> Hash - -> Seq Scan on tenk1 b -(6 rows) + -> Index Scan using tenk1_unique2 on tenk1 b + Index Cond: (a.q1 = unique2) +(5 rows) select q1, unique2, thousand, hundred from int8_tbl a left join tenk1 b on q1 = unique2 @@ -3138,13 +3188,12 @@ select f1, unique2, case when unique2 is null then f1 else 0 end where (case when unique2 is null then f1 else 0 end) = 0; QUERY PLAN -------------------------------------------------------------------- - Hash Left Join - Hash Cond: (a.f1 = b.unique2) + Nested Loop Left Join Filter: (CASE WHEN (b.unique2 IS NULL) THEN a.f1 ELSE 0 END = 0) -> Seq Scan on int4_tbl a - -> Hash - -> Index Only Scan using tenk1_unique2 on tenk1 b -(6 rows) + -> Index Only Scan using tenk1_unique2 on tenk1 b + Index Cond: (unique2 = a.f1) +(5 rows) select f1, unique2, case when unique2 is null then f1 else 0 end from int4_tbl a left join tenk1 b on f1 = unique2 @@ -3190,16 +3239,15 @@ select * from -- explain (num_nodes off, nodes off, costs off) select * from int4_tbl a left join tenk1 b on f1 = unique2 where f1 = 0; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------------------- Nested Loop Left Join Join Filter: (a.f1 = b.unique2) -> Seq Scan on int4_tbl a Filter: (f1 = 0) - -> Materialize - -> Index Scan using tenk1_unique2 on tenk1 b - Index Cond: (unique2 = 0) -(7 rows) + -> Index Scan using tenk1_unique2 on tenk1 b + Index Cond: (unique2 = 0) +(6 rows) explain (num_nodes off, nodes off, costs off) select * from tenk1 a full join tenk1 b using(unique2) where unique2 = 42; @@ -3230,15 +3278,15 @@ INSERT INTO c VALUES (0), (1); INSERT INTO d VALUES (1,3), (2,2), (3,1); -- all three cases should be optimizable into a simple seqscan explain (verbose on, costs off, nodes off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id; ---?.*QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: a.id, a.b_id (2 rows) explain (verbose on, costs off, nodes off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id; ---?.*QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: b.id, b.c_id (2 rows) @@ -3246,8 +3294,8 @@ explain (verbose on, costs off, nodes off) SELECT b.* FROM b LEFT JOIN c ON b.c_ explain (verbose on, costs off, nodes off) SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id) ON (a.b_id = b.id); ---?QUERY PLAN.* ---?-----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: a.id, a.b_id (2 rows) @@ -3257,8 +3305,8 @@ explain (verbose on, costs off, nodes off) select id from a where id in ( select b.id from b left join c on b.id = c.id ); ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Hash Join Output: a.id Hash Cond: (a.id = b.id) @@ -3275,8 +3323,8 @@ select id from a where id in ( explain (verbose on, costs off, nodes off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id and d.b = s.c_id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: d.a, d.b (2 rows) @@ -3285,8 +3333,8 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (verbose on, costs off, nodes off) select d.* from d left join (select distinct * from b) s on d.a = s.id and d.b = s.c_id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: d.a, d.b (2 rows) @@ -3296,8 +3344,8 @@ select d.* from d left join (select distinct * from b) s explain (verbose on, costs off, nodes off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Hash Right Join Output: d.a, d.b Hash Cond: (b.id = d.a) @@ -3316,8 +3364,8 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (verbose on, costs off, nodes off) select d.* from d left join (select distinct * from b) s on d.a = s.id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Hash Right Join Output: d.a, d.b Hash Cond: (b.id = d.a) @@ -3337,8 +3385,8 @@ select d.* from d left join (select distinct * from b) s explain (verbose on, costs off, nodes off) select d.* from d left join (select id from a union select id from b) s on d.a = s.id; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* --? Seq Scan on pg_temp_datanod.* Output: d.a, d.b (2 rows) @@ -3347,8 +3395,8 @@ select d.* from d left join (select id from a union select id from b) s explain (verbose on, costs off, nodes off) select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 on i8.q1 = i4.f1; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 (2 rows) @@ -3371,8 +3419,8 @@ select p.* from parent p left join child c on (p.k = c.k) order by 1,2; explain (verbose on, costs off, nodes off) select p.* from parent p left join child c on (p.k = c.k) order by 1,2; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Sort Output: p.k, p.pd Sort Key: p.k, p.pd @@ -3395,8 +3443,8 @@ explain (verbose on, costs off, nodes off) select p.*, linked from parent p left join (select c.*, true as linked from child c) as ss on (p.k = ss.k) order by p.k; ---?QUERY PLAN.* ---?----------------------------.* +--?.* +--?.* Merge Left Join Output: p.k, p.pd, (true) Merge Cond: (p.k = c.k) @@ -3418,8 +3466,8 @@ explain (verbose on, costs off, nodes off) select p.* from parent p left join child c on (p.k = c.k) where p.k = 1 and p.k = 2; ---?QUERY PLAN.* ---?----------------.* +--?.* +--?.* Result Output: p.k, p.pd One-Time Filter: false @@ -3519,8 +3567,8 @@ select t1.* from on (t1.f1 = b1.d1) left join int4_tbl i4 on (i8.q2 = i4.f1); ---?QUERY PLAN.* ---?----------------------------.* + QUERY PLAN +---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 Hash Cond: (i8.q2 = i4.f1) @@ -3529,33 +3577,35 @@ select t1.* from Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 - -> Hash Right Join + -> Materialize Output: i8.q2 - Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Right Join - Output: i8.q2, (NULL::integer) - Hash Cond: (i8b2.q1 = i8.q1) - -> Nested Loop - Output: i8b2.q1, NULL::integer - -> Seq Scan on public.int4_tbl i4b2 - Output: i4b2.f1 - -> Materialize - Output: i8b2.q1 + Output: i8.q2 + Hash Cond: ((NULL::integer) = i8b1.q2) + -> Hash Right Join + Output: i8.q2, (NULL::integer) + Hash Cond: (i8b2.q1 = i8.q1) + -> Nested Loop + Output: i8b2.q1, NULL::integer -> Seq Scan on public.int8_tbl i8b2 - Output: i8b2.q1 - -> Hash - Output: i8.q1, i8.q2 - -> Seq Scan on public.int8_tbl i8 + Output: i8b2.q1, i8b2.q2 + -> Materialize + Output: i4b2.f1 + -> Seq Scan on public.int4_tbl i4b2 + Output: i4b2.f1 + -> Hash Output: i8.q1, i8.q2 - -> Hash - Output: i8b1.q2 - -> Seq Scan on public.int8_tbl i8b1 + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + -> Hash Output: i8b1.q2 + -> Seq Scan on public.int8_tbl i8b1 + Output: i8b1.q2 -> Hash Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(34 rows) +(36 rows) select t1.* from text_tbl t1 @@ -3586,8 +3636,8 @@ select t1.* from on (t1.f1 = b1.d1) left join int4_tbl i4 on (i8.q2 = i4.f1); ---?QUERY PLAN.* ---?----------------------------.* + QUERY PLAN +---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 Hash Cond: (i8.q2 = i4.f1) @@ -3596,34 +3646,36 @@ select t1.* from Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 - -> Hash Right Join + -> Materialize Output: i8.q2 - Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Right Join - Output: i8.q2, (NULL::integer) - Hash Cond: (i8b2.q1 = i8.q1) - -> Hash Join - Output: i8b2.q1, NULL::integer - Hash Cond: (i4b2.f1 = i8b2.q1) - -> Seq Scan on public.int4_tbl i4b2 - Output: i4b2.f1 - -> Hash - Output: i8b2.q1 + Output: i8.q2 + Hash Cond: ((NULL::integer) = i8b1.q2) + -> Hash Right Join + Output: i8.q2, (NULL::integer) + Hash Cond: (i8b2.q1 = i8.q1) + -> Hash Join + Output: i8b2.q1, NULL::integer + Hash Cond: (i8b2.q1 = i4b2.f1) -> Seq Scan on public.int8_tbl i8b2 - Output: i8b2.q1 - -> Hash - Output: i8.q1, i8.q2 - -> Seq Scan on public.int8_tbl i8 + Output: i8b2.q1, i8b2.q2 + -> Hash + Output: i4b2.f1 + -> Seq Scan on public.int4_tbl i4b2 + Output: i4b2.f1 + -> Hash Output: i8.q1, i8.q2 - -> Hash - Output: i8b1.q2 - -> Seq Scan on public.int8_tbl i8b1 + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + -> Hash Output: i8b1.q2 + -> Seq Scan on public.int8_tbl i8b1 + Output: i8b1.q2 -> Hash Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(35 rows) +(37 rows) select t1.* from @@ -3672,12 +3724,10 @@ select * from -> Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 Filter: (i8.q2 = 456) - -> Materialize + -> Seq Scan on public.text_tbl t1 Output: t1.f1 - -> Seq Scan on public.text_tbl t1 - Output: t1.f1 - Filter: (t1.f1 = 'doh!'::text) -(23 rows) + Filter: (t1.f1 = 'doh!'::text) +(21 rows) select * from text_tbl t1 diff --git a/src/test/regress/expected/tpch_disablevec07.out b/src/test/regress/expected/tpch_disablevec07.out index 1da4618e6..79a32e631 100644 --- a/src/test/regress/expected/tpch_disablevec07.out +++ b/src/test/regress/expected/tpch_disablevec07.out @@ -39,7 +39,7 @@ group by l_year order by revenue; - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: (sum((lineitem.l_extendedprice * (1::numeric - lineitem.l_discount)))) @@ -67,13 +67,15 @@ order by Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) -> Row Adapter -> CStore Scan on nation n1 + Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar)) -> Materialize -> Row Adapter -> CStore Scan on nation n2 + Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar) OR (n_name = 'FRANCE'::bpchar)) -> Hash -> Row Adapter -> CStore Scan on supplier -(32 rows) +(34 rows) select supp_nation, diff --git a/src/test/regress/expected/tpchcol07.out b/src/test/regress/expected/tpchcol07.out index 60c9b821e..8b5cca56d 100644 --- a/src/test/regress/expected/tpchcol07.out +++ b/src/test/regress/expected/tpchcol07.out @@ -39,7 +39,7 @@ group by l_year order by revenue; - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Row Adapter -> Vector Sort @@ -61,10 +61,12 @@ order by -> Vector Nest Loop Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) -> CStore Scan on nation n1 + Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar)) -> Vector Materialize -> CStore Scan on nation n2 + Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar) OR (n_name = 'FRANCE'::bpchar)) -> CStore Scan on supplier -(23 rows) +(25 rows) select supp_nation, diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index f813e8f4d..a1d89b23e 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -722,6 +722,10 @@ order by c.name; rollback; +analyse int4_tbl; +analyse int8_tbl; +analyse tenk1; +analyse text_tbl; -- -- test incorrect handling of placeholders that only appear in targetlists, -- per bug #6154 @@ -825,6 +829,22 @@ select * from int4(sin(0)) q2 where thousand = (q1 + q2); +-- +-- test extraction of restriction OR clauses from join OR clause +-- (we used to only do this for indexable clauses) +-- + +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4); +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.ten = 4); +explain (costs off) +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or + ((a.unique2 = 3 or a.unique2 = 7) and b.hundred = 4); + -- -- test ability to generate a suitable plan for a star-schema query --