From 802f28f140d425f4a087eb9c68b419700115e2b7 Mon Sep 17 00:00:00 2001 From: wuyuechuan Date: Sat, 9 Jan 2021 11:11:23 +0800 Subject: [PATCH] Extract restriction OR clauses whether or not they are indexable --- .gitignore | 3 + src/gausskernel/optimizer/path/Makefile | 4 +- src/gausskernel/optimizer/path/allpaths.cpp | 10 - src/gausskernel/optimizer/path/indxpath.cpp | 3 +- src/gausskernel/optimizer/path/orindxpath.cpp | 180 ---------- src/gausskernel/optimizer/plan/planmain.cpp | 7 + src/gausskernel/optimizer/util/Makefile | 2 +- src/gausskernel/optimizer/util/orclauses.cpp | 332 ++++++++++++++++++ src/include/optimizer/orclauses.h | 21 ++ src/test/regress/expected/explain_pbe.out | 22 +- src/test/regress/expected/hw_pbe.out | 22 +- .../regress/expected/tpch_disablevec07.out | 4 +- src/test/regress/expected/tpchcol07.out | 4 +- src/test/regress/parallel_schedule0 | 5 +- 14 files changed, 397 insertions(+), 222 deletions(-) create mode 100644 .gitignore delete mode 100644 src/gausskernel/optimizer/path/orindxpath.cpp create mode 100644 src/gausskernel/optimizer/util/orclauses.cpp create mode 100644 src/include/optimizer/orclauses.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..473b78c30 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.gitee/ +/.vscode/ +/.idea/ diff --git a/src/gausskernel/optimizer/path/Makefile b/src/gausskernel/optimizer/path/Makefile index 9d4b16591..76daad113 100644 --- a/src/gausskernel/optimizer/path/Makefile +++ b/src/gausskernel/optimizer/path/Makefile @@ -20,11 +20,11 @@ endif ifeq ($(enable_multiple_nodes), yes) OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ - joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o streampath_base.o \ + joinpath.o joinrels.o pathkeys.o tidpath.o streampath_base.o \ es_selectivity.o else OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ - joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o \ + joinpath.o joinrels.o pathkeys.o tidpath.o \ pgxcpath_single.o streampath_single.o streampath_base.o es_selectivity.o endif diff --git a/src/gausskernel/optimizer/path/allpaths.cpp b/src/gausskernel/optimizer/path/allpaths.cpp index 1751c272d..dc033c59f 100755 --- a/src/gausskernel/optimizer/path/allpaths.cpp +++ b/src/gausskernel/optimizer/path/allpaths.cpp @@ -819,16 +819,6 @@ static void set_plain_rel_size(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry if (rte->tablesample == NULL) { /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); - - /* - * Check to see if we can extract any restriction conditions from join - * quals that are OR-of-AND structures. If so, add them to the rel's - * restriction list, and redo the above steps. - */ - if (create_or_index_quals(root, rel)) { - check_partial_indexes(root, rel); - set_baserel_size_estimates(root, rel); - } } else { /* Sampled relation */ set_tablesample_rel_size(root, rel, rte); diff --git a/src/gausskernel/optimizer/path/indxpath.cpp b/src/gausskernel/optimizer/path/indxpath.cpp index afefa180b..5298565a9 100755 --- a/src/gausskernel/optimizer/path/indxpath.cpp +++ b/src/gausskernel/optimizer/path/indxpath.cpp @@ -1494,7 +1494,8 @@ static Path* choose_bitmap_and(PlannerInfo* root, RelOptInfo* rel, List* paths, * we can remove this limitation. (But note that this also defends * against flat-out duplicate input paths, which can happen because * match_join_clauses_to_index will find the same OR join clauses that - * create_or_index_quals has pulled OR restriction clauses out of.) + * extract_restriction_or_clauses has pulled OR restriction clauses out + * of.) * * For the same reason, we reject AND combinations in which an index * predicate clause duplicates another clause. Here we find it necessary diff --git a/src/gausskernel/optimizer/path/orindxpath.cpp b/src/gausskernel/optimizer/path/orindxpath.cpp deleted file mode 100644 index 17bd7a022..000000000 --- a/src/gausskernel/optimizer/path/orindxpath.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* ------------------------------------------------------------------------- - * - * orindxpath.cpp - * Routines to find index paths that match a set of OR clauses - * - * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. - * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * src/gausskernel/optimizer/path/orindxpath.cpp - * - * ------------------------------------------------------------------------- - */ -#include "postgres.h" -#include "knl/knl_variable.h" - -#include "optimizer/cost.h" -#include "optimizer/paths.h" -#include "optimizer/restrictinfo.h" - -/* ---------- - * create_or_index_quals - * Examine join OR-of-AND quals to see if any useful restriction OR - * clauses can be extracted. If so, add them to the query. - * - * Although a join clause must reference other relations overall, - * an OR of ANDs clause might contain sub-clauses that reference just this - * relation and can be used to build a restriction clause. - * For example consider - * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); - * We can transform this into - * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) - * AND (a.x = 42 OR a.x = 44) - * AND (b.y = 43 OR b.z = 45); - * which opens the potential to build OR indexscans on a and b. In essence - * this is a partial transformation to CNF (AND of ORs format). It is not - * complete, however, because we do not unravel the original OR --- doing so - * would usually bloat the qualification expression to little gain. - * - * The added quals are partially redundant with the original OR, and therefore - * will cause the size of the joinrel to be underestimated when it is finally - * formed. (This would be true of a full transformation to CNF as well; the - * fault is not really in the transformation, but in clauselist_selectivity's - * inability to recognize redundant conditions.) To minimize the collateral - * damage, we want to minimize the number of quals added. Therefore we do - * not add every possible extracted restriction condition to the query. - * Instead, we search for the single restriction condition that generates - * the most useful (cheapest) OR indexscan, and add only that condition. - * This is a pretty ad-hoc heuristic, but quite useful. - * - * We can then compensate for the redundancy of the added qual by poking - * the recorded selectivity of the original OR clause, thereby ensuring - * the added qual doesn't change the estimated size of the joinrel when - * it is finally formed. This is a MAJOR HACK: it depends on the fact - * that clause selectivities are cached and on the fact that the same - * RestrictInfo node will appear in every joininfo list that might be used - * when the joinrel is formed. And it probably isn't right in cases where - * the size estimation is nonlinear (i.e., outer and IN joins). But it - * beats not doing anything. - * - * NOTE: one might think this messiness could be worked around by generating - * the indexscan path with a small path->rows value, and not touching the - * rel's baserestrictinfo or rel->rows. However, that does not work. - * The optimizer's fundamental design assumes that every general-purpose - * Path for a given relation generates the same number of rows. Without - * this assumption we'd not be able to optimize solely on the cost of Paths, - * but would have to take number of output rows into account as well. - * (The parameterized-paths stuff almost fixes this, but not quite...) - * - * 'rel' is the relation entry for which quals are to be created - * - * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE. - * If no quals available, returns FALSE and doesn't change rel. - * - * Note: check_partial_indexes() must have been run previously. - * ---------- - */ -bool create_or_index_quals(PlannerInfo* root, RelOptInfo* rel) -{ - BitmapOrPath* bestpath = NULL; - RestrictInfo* bestrinfo = NULL; - List* newrinfos = NIL; - RestrictInfo* or_rinfo = NULL; - Selectivity or_selec, orig_selec; - ListCell* i = NULL; - - /* Skip the whole mess if no indexes */ - if (rel->indexlist == NIL) - return false; - - /* - * Find potentially interesting OR joinclauses. We can use any joinclause - * that is considered safe to move to this rel by the parameterized-path - * machinery, even though what we are going to do with it is not exactly a - * parameterized path. - */ - foreach (i, rel->joininfo) { - RestrictInfo* rinfo = (RestrictInfo*)lfirst(i); - - if (restriction_is_or_clause(rinfo) && join_clause_is_movable_to(rinfo, rel->relid)) { - /* - * Use the generate_bitmap_or_paths() machinery to estimate the - * value of each OR clause. We can use regular restriction - * clauses along with the OR clause contents to generate - * indexquals. We pass restriction_only = true so that any - * sub-clauses that are actually joins will be ignored. - */ - List* orpaths = NIL; - ListCell* k = NULL; - - orpaths = generate_bitmap_or_paths(root, rel, list_make1(rinfo), rel->baserestrictinfo, true); - - if (rel->isPartitionedTable) { - orpaths = list_concat( - orpaths, GenerateBitmapOrPathsUseGPI(root, rel, list_make1(rinfo), rel->baserestrictinfo, true)); - } - - /* Locate the cheapest OR path */ - foreach (k, orpaths) { - BitmapOrPath* path = (BitmapOrPath*)lfirst(k); - - AssertEreport(IsA(path, BitmapOrPath), MOD_OPT, "Restriction information is incorrect"); - if (bestpath == NULL || path->path.total_cost < bestpath->path.total_cost) { - bestpath = path; - bestrinfo = rinfo; - } - } - } - } - - /* Fail if no suitable clauses found */ - if (bestpath == NULL) - return false; - - /* - * Convert the path's indexclauses structure to a RestrictInfo tree. We - * include any partial-index predicates so as to get a reasonable - * representation of what the path is actually scanning. - */ - newrinfos = make_restrictinfo_from_bitmapqual((Path*)bestpath, true, true); - - /* It's possible we get back something other than a single OR clause */ - if (list_length(newrinfos) != 1) - return false; - or_rinfo = (RestrictInfo*)linitial(newrinfos); - AssertEreport(IsA(or_rinfo, RestrictInfo), MOD_OPT, "Restriction clause does not contain OR"); - if (!restriction_is_or_clause(or_rinfo)) - return false; - - /* - * OK, add it to the rel's restriction list. - */ - rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos); - rel->baserestrict_min_security = Min(rel->baserestrict_min_security, or_rinfo->security_level); - - /* - * Adjust the original OR clause's cached selectivity to compensate for - * the selectivity of the added (but redundant) lower-level qual. This - * should result in the join rel getting approximately the same rows - * estimate as it would have gotten without all these shenanigans. (XXX - * major hack alert ... this depends on the assumption that the - * selectivity will stay cached ...) - * we don't need cache the selectivity because the index's selectivity is not accurate. - */ - or_selec = clause_selectivity(root, (Node*)or_rinfo, 0, JOIN_INNER, NULL, false); - if (or_selec > 0 && or_selec < 1) { - orig_selec = clause_selectivity(root, (Node*)bestrinfo, 0, JOIN_INNER, NULL, false); - bestrinfo->norm_selec = orig_selec / or_selec; - /* clamp result to sane range */ - if (bestrinfo->norm_selec > 1) - bestrinfo->norm_selec = 1; - /* It isn't an outer join clause, so no need to adjust outer_selec */ - } - - /* Tell caller to recompute partial index status and rowcount estimate */ - return true; -} - diff --git a/src/gausskernel/optimizer/plan/planmain.cpp b/src/gausskernel/optimizer/plan/planmain.cpp index b4d1c5f4f..d6e8c63f0 100755 --- a/src/gausskernel/optimizer/plan/planmain.cpp +++ b/src/gausskernel/optimizer/plan/planmain.cpp @@ -27,6 +27,7 @@ #include "parser/parse_hint.h" #include "pgxc/pgxc.h" #include "optimizer/cost.h" +#include "optimizer/orclauses.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" @@ -239,6 +240,12 @@ void query_planner(PlannerInfo* root, List* tlist, double tuple_fraction, double */ add_placeholders_to_base_rels(root); + /* + * Look for join OR clauses that we can extract single-relation + * restriction OR clauses from. + */ + extract_restriction_or_clauses(root); + /* * We should now have size estimates for every actual table involved in * the query, and we also know which if any have been deleted from the diff --git a/src/gausskernel/optimizer/util/Makefile b/src/gausskernel/optimizer/util/Makefile index c08e349ea..a7abfbea8 100644 --- a/src/gausskernel/optimizer/util/Makefile +++ b/src/gausskernel/optimizer/util/Makefile @@ -19,7 +19,7 @@ endif SUBDIRS = learn -OBJS = clauses.o joininfo.o pathnode.o placeholder.o plancat.o predtest.o \ +OBJS = clauses.o joininfo.o orclauses.o pathnode.o placeholder.o plancat.o predtest.o \ relnode.o restrictinfo.o tlist.o var.o pruningboundary.o pgxcship.o pruning.o randomplan.o optimizerdebug.o planmem_walker.o \ nodegroups.o plananalyzer.o optcommon.o dataskew.o joinskewinfo.o autoanalyzer.o bucketinfo.o bucketpruning.o diff --git a/src/gausskernel/optimizer/util/orclauses.cpp b/src/gausskernel/optimizer/util/orclauses.cpp new file mode 100644 index 000000000..1b024b7f7 --- /dev/null +++ b/src/gausskernel/optimizer/util/orclauses.cpp @@ -0,0 +1,332 @@ +/*------------------------------------------------------------------------- + * + * orclauses.cpp + * Routines to extract restriction OR clauses from join OR clauses + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * + * IDENTIFICATION + * src/gausskernel/optimizer/util/orclauses.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/orclauses.h" +#include "optimizer/restrictinfo.h" + +static bool is_safe_restriction_clause_for(RestrictInfo* rinfo, RelOptInfo* rel); +static Expr* extract_or_clause(RestrictInfo* or_rinfo, RelOptInfo* rel); +static void consider_new_or_clause(PlannerInfo* root, RelOptInfo* rel, Expr* orclause, RestrictInfo* join_or_rinfo); + +/* + * extract_restriction_or_clauses + * Examine join OR-of-AND clauses to see if any useful restriction OR + * clauses can be extracted. If so, add them to the query. + * + * Although a join clause must reference multiple relations overall, + * an OR of ANDs clause might contain sub-clauses that reference just one + * relation and can be used to build a restriction clause for that rel. + * For example consider + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); + * We can transform this into + * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) + * AND (a.x = 42 OR a.x = 44) + * AND (b.y = 43 OR b.z = 45); + * which allows the latter clauses to be applied during the scans of a and b, + * perhaps as index qualifications, and in any case reducing the number of + * rows arriving at the join. In essence this is a partial transformation to + * CNF (AND of ORs format). It is not complete, however, because we do not + * unravel the original OR --- doing so would usually bloat the qualification + * expression to little gain. + * + * The added quals are partially redundant with the original OR, and therefore + * would cause the size of the joinrel to be underestimated when it is finally + * formed. (This would be true of a full transformation to CNF as well; the + * fault is not really in the transformation, but in clauselist_selectivity's + * inability to recognize redundant conditions.) We can compensate for this + * redundancy by changing the cached selectivity of the original OR clause, + * cancelling out the (valid) reduction in the estimated sizes of the base + * relations so that the estimated joinrel size remains the same. This is + * a MAJOR HACK: it depends on the fact that clause selectivities are cached + * and on the fact that the same RestrictInfo node will appear in every + * joininfo list that might be used when the joinrel is formed. + * And it doesn't work in cases where the size estimation is nonlinear + * (i.e., outer and IN joins). But it beats not doing anything. + * + * We examine each base relation to see if join clauses associated with it + * contain extractable restriction conditions. If so, add those conditions + * to the rel's baserestrictinfo and update the cached selectivities of the + * join clauses. Note that the same join clause will be examined afresh + * from the point of view of each baserel that participates in it, so its + * cached selectivity may get updated multiple times. + */ +void extract_restriction_or_clauses(PlannerInfo* root) +{ + /* Examine each baserel for potential join OR clauses */ + for (int rti = 1; rti < root->simple_rel_array_size; rti++) { + RelOptInfo* rel = root->simple_rel_array[rti]; + ListCell* lc = NULL; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) { + continue; + } + + Assert(rel->relid == (uint)rti); /* sanity check on array */ + + /* ignore RTEs that are "other rels" */ + if (rel->reloptkind != RELOPT_BASEREL) { + continue; + } + + /* + * Find potentially interesting OR joinclauses. We can use any + * joinclause that is considered safe to move to this rel by the + * parameterized-path machinery, even though what we are going to do + * with it is not exactly a parameterized path. + * + * However, it seems best to ignore clauses that have been marked + * redundant (by setting norm_selec > 1). That likely can't happen + * for OR clauses, but let's be safe. + */ + foreach (lc, rel->joininfo) { + RestrictInfo* rinfo = (RestrictInfo*)lfirst(lc); + + if (restriction_is_or_clause(rinfo) && join_clause_is_movable_to(rinfo, rel->relid) && + rinfo->norm_selec <= 1) { + /* Try to extract a qual for this rel only */ + Expr* orclause = extract_or_clause(rinfo, rel); + + /* + * If successful, decide whether we want to use the clause, + * and insert it into the rel's restrictinfo list if so. + */ + if (orclause) { + consider_new_or_clause(root, rel, orclause, rinfo); + } + } + } + } +} + +/* + * Is the given primitive (non-OR) RestrictInfo safe to move to the rel? + */ +static bool is_safe_restriction_clause_for(RestrictInfo* rinfo, RelOptInfo* rel) +{ + /* + * We want clauses that mention the rel, and only the rel. So in + * particular pseudoconstant clauses can be rejected quickly. Then check + * the clause's Var membership. + */ + if (rinfo->pseudoconstant) { + return false; + } + if (!bms_equal(rinfo->clause_relids, rel->relids)) { + return false; + } + + /* We don't want extra evaluations of any volatile functions */ + if (contain_volatile_functions((Node*)rinfo->clause)) { + return false; + } + + return true; +} + +/* + * Try to extract a restriction clause mentioning only "rel" from the given + * join OR-clause. + * + * We must be able to extract at least one qual for this rel from each of + * the arms of the OR, else we can't use it. + * + * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL + * if no OR clause could be extracted. + */ +static Expr* extract_or_clause(RestrictInfo* or_rinfo, RelOptInfo* rel) +{ + List* clauselist = NIL; + ListCell* lc = NULL; + + /* + * Scan each arm of the input OR clause. Notice we descend into + * or_rinfo->orclause, which has RestrictInfo nodes embedded below the + * toplevel OR/AND structure. This is useful because we can use the info + * in those nodes to make is_safe_restriction_clause_for()'s checks + * cheaper. We'll strip those nodes from the returned tree, though, + * meaning that fresh ones will be built if the clause is accepted as a + * restriction clause. This might seem wasteful --- couldn't we re-use + * the existing RestrictInfos? But that'd require assuming that + * selectivity and other cached data is computed exactly the same way for + * a restriction clause as for a join clause, which seems undesirable. + */ + Assert(or_clause((Node*)or_rinfo->orclause)); + foreach (lc, ((BoolExpr*)or_rinfo->orclause)->args) { + Node* orarg = (Node*)lfirst(lc); + List* subclauses = NIL; + Node* subclause = NULL; + + /* OR arguments should be ANDs or sub-RestrictInfos */ + if (and_clause(orarg)) { + List* andargs = ((BoolExpr*)orarg)->args; + ListCell* lc2 = NULL; + + foreach (lc2, andargs) { + RestrictInfo* rinfo = (RestrictInfo*)lfirst(lc2); + + Assert(IsA(rinfo, RestrictInfo)); + if (restriction_is_or_clause(rinfo)) { + /* + * Recurse to deal with nested OR. Note we *must* recurse + * here, this isn't just overly-tense optimization: we + * have to descend far enough to find and strip all + * RestrictInfos in the expression. + */ + Expr* suborclause = NULL; + + suborclause = extract_or_clause(rinfo, rel); + if (suborclause) { + subclauses = lappend(subclauses, suborclause); + } + } else if (is_safe_restriction_clause_for(rinfo, rel)) { + subclauses = lappend(subclauses, rinfo->clause); + } + } + } else { + Assert(IsA(orarg, RestrictInfo)); + Assert(!restriction_is_or_clause((RestrictInfo*)orarg)); + if (is_safe_restriction_clause_for((RestrictInfo*)orarg, rel)) { + subclauses = lappend(subclauses, ((RestrictInfo*)orarg)->clause); + } + } + + /* + * If nothing could be extracted from this arm, we can't do anything + * with this OR clause. + */ + if (subclauses == NIL) { + return NULL; + } + + /* + * OK, add subclause(s) to the result OR. If we found more than one, + * we need an AND node. But if we found only one, and it is itself an + * OR node, add its subclauses to the result instead; this is needed + * to preserve AND/OR flatness (ie, no OR directly underneath OR). + */ + clauselist = lappend(clauselist, make_ands_explicit(subclauses)); + subclause = (Node*)make_ands_explicit(subclauses); + if (or_clause(subclause)) { + clauselist = list_concat(clauselist, list_copy(((BoolExpr*)subclause)->args)); + } else { + clauselist = lappend(clauselist, subclause); + } + } + + /* + * If we got a restriction clause from every arm, wrap them up in an OR + * node. (In theory the OR node might be unnecessary, if there was only + * one arm --- but then the input OR node was also redundant.) + */ + if (clauselist != NIL) { + return make_orclause(clauselist); + } + return NULL; +} + +/* + * Consider whether a successfully-extracted restriction OR clause is + * actually worth using. If so, add it to the planner's data structures, + * and adjust the original join clause (join_or_rinfo) to compensate. + */ +static void consider_new_or_clause(PlannerInfo* root, RelOptInfo* rel, Expr* orclause, RestrictInfo* join_or_rinfo) +{ + RestrictInfo* or_rinfo = NULL; + Selectivity or_selec, orig_selec; + + /* + * Build a RestrictInfo from the new OR clause. We can assume it's valid + * as a base restriction clause. + */ + or_rinfo = make_restrictinfo(orclause, true, false, false, join_or_rinfo->security_level, NULL, NULL, NULL); + + /* + * Estimate its selectivity. (We could have done this earlier, but doing + * it on the RestrictInfo representation allows the result to get cached, + * saving work later.) + */ + or_selec = clause_selectivity(root, (Node*)or_rinfo, 0, JOIN_INNER, NULL); + + /* + * The clause is only worth adding to the query if it rejects a useful + * fraction of the base relation's rows; otherwise, it's just going to + * cause duplicate computation (since we will still have to check the + * original OR clause when the join is formed). Somewhat arbitrarily, we + * set the selectivity threshold at 0.9. + */ + if (or_selec > 0.9) { + return; /* forget it */ + } + /* + * OK, add it to the rel's restriction-clause list. + */ + rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); + + /* + * Adjust the original join OR clause's cached selectivity to compensate + * for the selectivity of the added (but redundant) lower-level qual. This + * should result in the join rel getting approximately the same rows + * estimate as it would have gotten without all these shenanigans. + * + * XXX major hack alert: this depends on the assumption that the + * selectivity will stay cached. + * + * XXX another major hack: we adjust only norm_selec, the cached + * selectivity for JOIN_INNER semantics, even though the join clause + * might've been an outer-join clause. This is partly because we can't + * easily identify the relevant SpecialJoinInfo here, and partly because + * the linearity assumption we're making would fail anyway. (If it is an + * outer-join clause, "rel" must be on the nullable side, else we'd not + * have gotten here. So the computation of the join size is going to be + * quite nonlinear with respect to the size of "rel", so it's not clear + * how we ought to adjust outer_selec even if we could compute its + * original value correctly.) + */ + if (or_selec > 0) { + SpecialJoinInfo sjinfo; + + /* + * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare + * approx_tuple_count() in costsize.c.) + */ + sjinfo.type = T_SpecialJoinInfo; + sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids, rel->relids); + sjinfo.min_righthand = rel->relids; + sjinfo.syn_lefthand = sjinfo.min_lefthand; + sjinfo.syn_righthand = sjinfo.min_righthand; + sjinfo.jointype = JOIN_INNER; + /* we don't bother trying to make the remaining fields valid */ + sjinfo.lhs_strict = false; + sjinfo.delay_upper_joins = false; + sjinfo.join_quals = NIL; + + /* Compute inner-join size */ + orig_selec = clause_selectivity(root, (Node*)join_or_rinfo, 0, JOIN_INNER, &sjinfo); + + /* And hack cached selectivity so join size remains the same */ + join_or_rinfo->norm_selec = orig_selec / or_selec; + /* ensure result stays in sane range, in particular not "redundant" */ + if (join_or_rinfo->norm_selec > 1) { + join_or_rinfo->norm_selec = 1; + } + /* as explained above, we don't touch outer_selec */ + } +} diff --git a/src/include/optimizer/orclauses.h b/src/include/optimizer/orclauses.h new file mode 100644 index 000000000..dd8d35a6a --- /dev/null +++ b/src/include/optimizer/orclauses.h @@ -0,0 +1,21 @@ +/*------------------------------------------------------------------------- + * + * orclauses.h + * prototypes for orclauses.cpp. + * + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/optimizer/orclauses.h + * + *------------------------------------------------------------------------- + */ +#ifndef ORCLAUSES_H +#define ORCLAUSES_H + +#include "nodes/relation.h" + +extern void extract_restriction_or_clauses(PlannerInfo *root); + +#endif /* ORCLAUSES_H */ diff --git a/src/test/regress/expected/explain_pbe.out b/src/test/regress/expected/explain_pbe.out index 65556bb2c..34d4e6e3e 100644 --- a/src/test/regress/expected/explain_pbe.out +++ b/src/test/regress/expected/explain_pbe.out @@ -22,11 +22,10 @@ explain (costs off, verbose on) execute s (1,1); explain (costs off, verbose on) execute i (6,6,6); QUERY PLAN ---------------------------- - [Bypass] Insert on public.t1_xc_fqs -> Result Output: 6, 6, 6 -(4 rows) +(3 rows) explain (costs off, verbose on) execute u (2,2); QUERY PLAN @@ -189,11 +188,10 @@ explain (costs off, verbose on) execute s (1); explain (costs off, verbose on) execute i (6); QUERY PLAN ---------------------------- - [Bypass] Insert on public.t1_xc_fqs -> Result Output: 6, 2, 3 -(4 rows) +(3 rows) explain (costs off, verbose on) execute u (2); QUERY PLAN @@ -2558,11 +2556,10 @@ explain (costs off, verbose on) execute s (1); explain (costs off, verbose on) execute i (6); QUERY PLAN ---------------------------- - [Bypass] Insert on public.t1_xc_fqs -> Result Output: 6, 2, 3 -(4 rows) +(3 rows) explain (costs off, verbose on) execute u (2); QUERY PLAN @@ -3644,15 +3641,16 @@ INSERT INTO pbe_prunning_002 values (1, 1, 1); SET enable_pbe_optimization to false; PREPARE pa AS SELECT * FROM pbe_prunning_001 pp1 RIGHT OUTER JOIN pbe_prunning_002 pp2 ON pp1.c_int=pp2.c_int WHERE (pp1.c_int=$1 AND pp1.c_numeric=$2 AND pp2.c_int=$3 AND pp2.c_numeric=$4) or pp1.id=$5; EXPLAIN(COSTS FALSE) EXECUTE pa(10,10,10,10,11); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ Hash Join - Hash Cond: (pp1.c_int = pp2.c_int) + Hash Cond: (pp2.c_int = pp1.c_int) Join Filter: (((pp1.c_int = 10) AND (pp1.c_numeric = 10::numeric) AND (pp2.c_int = 10) AND (pp2.c_numeric = 10::numeric)) OR (pp1.id = 11)) - -> Seq Scan on pbe_prunning_001 pp1 + -> Seq Scan on pbe_prunning_002 pp2 -> Hash - -> Seq Scan on pbe_prunning_002 pp2 -(6 rows) + -> Seq Scan on pbe_prunning_001 pp1 + Filter: (((c_int = 10) AND (c_numeric = 10::numeric)) OR ((c_int = 10) AND (c_numeric = 10::numeric)) OR (id = 11) OR (id = 11)) +(7 rows) DEALLOCATE PREPARE pa; DROP TABLE pbe_prunning_001; diff --git a/src/test/regress/expected/hw_pbe.out b/src/test/regress/expected/hw_pbe.out index c590f3551..95f80ab66 100644 --- a/src/test/regress/expected/hw_pbe.out +++ b/src/test/regress/expected/hw_pbe.out @@ -21,11 +21,10 @@ explain (costs off, verbose on) execute s (1,1); explain (costs off, verbose on) execute i (6,6,6); QUERY PLAN ---------------------------- - [Bypass] Insert on public.t1_xc_fqs -> Result Output: 6, 6, 6 -(4 rows) +(3 rows) explain (costs off, verbose on) execute u (2,2); QUERY PLAN @@ -188,11 +187,10 @@ explain (costs off, verbose on) execute s (1); explain (costs off, verbose on) execute i (6); QUERY PLAN ---------------------------- - [Bypass] Insert on public.t1_xc_fqs -> Result Output: 6, 2, 3 -(4 rows) +(3 rows) explain (costs off, verbose on) execute u (2); QUERY PLAN @@ -2557,11 +2555,10 @@ explain (costs off, verbose on) execute s (1); explain (costs off, verbose on) execute i (6); QUERY PLAN ---------------------------- - [Bypass] Insert on public.t1_xc_fqs -> Result Output: 6, 2, 3 -(4 rows) +(3 rows) explain (costs off, verbose on) execute u (2); QUERY PLAN @@ -3642,15 +3639,16 @@ INSERT INTO pbe_prunning_001 values (1, 1, 1); INSERT INTO pbe_prunning_002 values (1, 1, 1); PREPARE pa AS SELECT * FROM pbe_prunning_001 pp1 RIGHT OUTER JOIN pbe_prunning_002 pp2 ON pp1.c_int=pp2.c_int WHERE (pp1.c_int=$1 AND pp1.c_numeric=$2 AND pp2.c_int=$3 AND pp2.c_numeric=$4) or pp1.id=$5; EXPLAIN(COSTS FALSE) EXECUTE pa(10,10,10,10,11); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------ Hash Join - Hash Cond: (pp1.c_int = pp2.c_int) + Hash Cond: (pp2.c_int = pp1.c_int) Join Filter: (((pp1.c_int = $1) AND (pp1.c_numeric = $2) AND (pp2.c_int = $3) AND (pp2.c_numeric = $4)) OR (pp1.id = $5)) - -> Seq Scan on pbe_prunning_001 pp1 + -> Seq Scan on pbe_prunning_002 pp2 -> Hash - -> Seq Scan on pbe_prunning_002 pp2 -(6 rows) + -> Seq Scan on pbe_prunning_001 pp1 + Filter: (((c_int = $1) AND (c_numeric = $2)) OR ((c_int = $1) AND (c_numeric = $2)) OR (id = $5) OR (id = $5)) +(7 rows) DEALLOCATE PREPARE pa; DROP TABLE pbe_prunning_001; diff --git a/src/test/regress/expected/tpch_disablevec07.out b/src/test/regress/expected/tpch_disablevec07.out index 1da4618e6..f09cdb630 100644 --- a/src/test/regress/expected/tpch_disablevec07.out +++ b/src/test/regress/expected/tpch_disablevec07.out @@ -67,13 +67,15 @@ order by Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) -> Row Adapter -> CStore Scan on nation n1 + Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar)) -> Materialize -> Row Adapter -> CStore Scan on nation n2 + Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar) OR (n_name = 'FRANCE'::bpchar)) -> Hash -> Row Adapter -> CStore Scan on supplier -(32 rows) +(34 rows) select supp_nation, diff --git a/src/test/regress/expected/tpchcol07.out b/src/test/regress/expected/tpchcol07.out index 60c9b821e..69222e384 100644 --- a/src/test/regress/expected/tpchcol07.out +++ b/src/test/regress/expected/tpchcol07.out @@ -61,10 +61,12 @@ order by -> Vector Nest Loop Join Filter: (((n1.n_name = 'FRANCE '::bpchar) AND (n2.n_name = 'GERMANY'::bpchar)) OR ((n1.n_name = 'GERMANY'::bpchar) AND (n2.n_name = 'FRANCE'::bpchar))) -> CStore Scan on nation n1 + Filter: ((n_name = 'FRANCE '::bpchar) OR (n_name = 'FRANCE '::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar)) -> Vector Materialize -> CStore Scan on nation n2 + Filter: ((n_name = 'GERMANY'::bpchar) OR (n_name = 'GERMANY'::bpchar) OR (n_name = 'FRANCE'::bpchar) OR (n_name = 'FRANCE'::bpchar)) -> CStore Scan on supplier -(23 rows) +(25 rows) select supp_nation, diff --git a/src/test/regress/parallel_schedule0 b/src/test/regress/parallel_schedule0 index 7f39242ab..9cdb7a70b 100644 --- a/src/test/regress/parallel_schedule0 +++ b/src/test/regress/parallel_schedule0 @@ -387,7 +387,8 @@ test: xc_groupby xc_distkey xc_having #test: hw_rewrite_lazyagg hw_light test: xc_temp xc_FQS #test: xc_remote hw_pbe -test: xc_FQS_join xc_copy +test: hw_pbe +test: xc_FQS_join xc_copy #test: xc_alter_table test: xc_constraints xc_limit xc_sort #test: xc_params xc_returning_step1 @@ -558,7 +559,7 @@ test: hw_pwd_reuse test: performance_enhance test: explain_fqs -#test: explain_pbe +test: explain_pbe # temp__3 create_table copy vec_prepare_001 vec_prepare_002 vec_prepare_003 int4 int8 are duplicated test: temp__3 # ----------