Parallel merge join.
This commit is contained in:
@ -1862,7 +1862,8 @@ static void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *li
|
||||
RelOptInfo* childrel = (RelOptInfo*)lfirst(lcr);
|
||||
Path* cheapest_total = NULL;
|
||||
|
||||
cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, NIL, required_outer, TOTAL_COST);
|
||||
cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, NIL,
|
||||
required_outer, TOTAL_COST, false);
|
||||
|
||||
AssertEreport(cheapest_total != NULL, MOD_OPT, "");
|
||||
|
||||
@ -1938,8 +1939,8 @@ static void generate_mergeappend_paths(PlannerInfo* root, RelOptInfo* rel,
|
||||
Path *cheapest_startup, *cheapest_total;
|
||||
|
||||
/* Locate the right paths, if they are available. */
|
||||
cheapest_startup = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, STARTUP_COST);
|
||||
cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, TOTAL_COST);
|
||||
cheapest_startup = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, STARTUP_COST, false);
|
||||
cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, TOTAL_COST, false);
|
||||
|
||||
/*
|
||||
* If we can't find any paths with the right order just use the
|
||||
|
@ -43,6 +43,9 @@
|
||||
#include "pgxc/pgxc.h"
|
||||
#include "parser/parsetree.h"
|
||||
|
||||
static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
|
||||
SpecialJoinInfo *sjinfo, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys,
|
||||
List *mergeclauses, List *outersortkeys, List *innersortkeys);
|
||||
static void copy_JoinCostWorkspace(JoinCostWorkspace* to, JoinCostWorkspace* from);
|
||||
static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel,
|
||||
JoinType jointype, JoinPathExtraData* extra);
|
||||
@ -50,13 +53,19 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
||||
SemiAntiJoinFactors* semifactors, JoinType jointype, JoinPathExtraData* extra);
|
||||
static void consider_parallel_nestloop(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel,
|
||||
RelOptInfo* innerrel, JoinType jointype, JoinPathExtraData* extra);
|
||||
static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel,
|
||||
JoinType jointype, SemiAntiJoinFactors* semifactors, JoinPathExtraData* extra);
|
||||
static List* select_mergejoin_clauses(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel,
|
||||
RelOptInfo* innerrel, List* restrictlist, JoinType jointype, bool* mergejoin_allowed);
|
||||
static bool checkForPWJ(PlannerInfo* root, Path* outer_path, Path* inner_path, JoinType jointype, List* joinrestrict);
|
||||
static bool checkJoinColumnForPWJ(PlannerInfo* root, Index varno, AttrNumber varattno);
|
||||
static bool checkJoinClauseForPWJ(PlannerInfo* root, List* joinclause);
|
||||
static void consider_parallel_mergejoin(PlannerInfo *root, RelOptInfo *joinrel, const RelOptInfo *outerrel,
|
||||
RelOptInfo *innerrel, JoinType jointype, JoinType save_jointype, JoinPathExtraData *extra,
|
||||
Path *inner_cheapest_total);
|
||||
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
|
||||
JoinType jointype, SemiAntiJoinFactors *semifactors, JoinPathExtraData *extra);
|
||||
static List *select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel,
|
||||
RelOptInfo *innerrel, List *restrictlist, JoinType jointype, bool *mergejoin_allowed);
|
||||
static void generate_mergejoin_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *innerrel, Path *outerpath,
|
||||
JoinType jointype, JoinType save_jointype, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total,
|
||||
List *merge_pathkeys, bool is_partial);
|
||||
static bool checkForPWJ(PlannerInfo *root, Path *outer_path, Path *inner_path, JoinType jointype, List *joinrestrict);
|
||||
static bool checkJoinColumnForPWJ(PlannerInfo *root, Index varno, AttrNumber varattno);
|
||||
static bool checkJoinClauseForPWJ(PlannerInfo *root, List *joinclause);
|
||||
static bool checkPartitionkeyForPWJ(PlannerInfo* root, Path* outer_path, Path* inner_path);
|
||||
static bool checkPathForPWJ(PlannerInfo* root, Path* path);
|
||||
static bool checkIndexPathForPWJ(PartIteratorPath* pIterpath);
|
||||
@ -590,11 +599,17 @@ static void try_partial_nestloop_path(PlannerInfo* root, RelOptInfo* joinrel, Pa
|
||||
*/
|
||||
static void try_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType jointype, JoinType save_jointype,
|
||||
SpecialJoinInfo* sjinfo, Relids param_source_rels, Path* outer_path, Path* inner_path, List* restrict_clauses,
|
||||
List* pathkeys, List* mergeclauses, List* outersortkeys, List* innersortkeys)
|
||||
List* pathkeys, List* mergeclauses, List* outersortkeys, List* innersortkeys, bool is_partial)
|
||||
{
|
||||
Relids required_outer;
|
||||
JoinCostWorkspace workspace;
|
||||
|
||||
if (is_partial) {
|
||||
try_partial_mergejoin_path(root, joinrel, jointype, sjinfo, outer_path, inner_path, restrict_clauses,
|
||||
pathkeys, mergeclauses, outersortkeys, innersortkeys);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if proposed path is still parameterized, and reject if the
|
||||
* parameterization wouldn't be sensible.
|
||||
@ -730,10 +745,58 @@ static void try_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* try_partial_mergejoin_path
|
||||
* Consider a partial merge join path; if it appears useful, push it into
|
||||
* the joinrel's pathlist via add_partial_path().
|
||||
*/
|
||||
static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
|
||||
SpecialJoinInfo *sjinfo, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys,
|
||||
List *mergeclauses, List *outersortkeys, List *innersortkeys)
|
||||
{
|
||||
JoinCostWorkspace workspace;
|
||||
|
||||
/*
|
||||
* See comments in try_partial_hashjoin_path().
|
||||
*/
|
||||
if (inner_path->param_info != NULL) {
|
||||
Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
|
||||
|
||||
if (!bms_is_empty(inner_paramrels)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the given paths are already well enough ordered, we can skip doing
|
||||
* an explicit sort.
|
||||
*/
|
||||
if (outersortkeys && pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) {
|
||||
outersortkeys = NIL;
|
||||
}
|
||||
if (innersortkeys && pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) {
|
||||
innersortkeys = NIL;
|
||||
}
|
||||
|
||||
/*
|
||||
* See comments in try_partial_nestloop_path().
|
||||
*/
|
||||
initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, outer_path, inner_path, outersortkeys,
|
||||
innersortkeys, sjinfo);
|
||||
|
||||
if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Might be good enough to be worth trying, so let's try it. */
|
||||
add_partial_path(joinrel, (Path *)create_mergejoin_path(root, joinrel, jointype, &workspace, sjinfo, outer_path,
|
||||
inner_path, restrict_clauses, pathkeys, NULL, mergeclauses, outersortkeys, innersortkeys));
|
||||
}
|
||||
|
||||
/*
|
||||
* try_hashjoin_path
|
||||
* Consider a hash join path; if it appears useful, push it into
|
||||
* the joinrel's pathlist via add_path().
|
||||
* Consider a hash join path; if it appears useful, push it into
|
||||
* the joinrel's pathlist via add_path().
|
||||
*/
|
||||
static void try_hashjoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType jointype, JoinType save_jointype,
|
||||
SpecialJoinInfo* sjinfo, SemiAntiJoinFactors* semifactors, Relids param_source_rels, Path* outer_path,
|
||||
@ -962,6 +1025,8 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
||||
ListCell* lc2 = NULL;
|
||||
int i, j;
|
||||
bool* join_used = NULL;
|
||||
Path *cheapest_partial_outer = NULL;
|
||||
Path *cheapest_safe_inner = NULL;
|
||||
int num_inner = list_length(innerrel->cheapest_total_path) - 1;
|
||||
|
||||
/*
|
||||
@ -1010,6 +1075,25 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
||||
jointype = JOIN_INNER;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the joinrel is parallel-safe, we may be able to consider a partial
|
||||
* merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
|
||||
* outer path will be partial, and therefore we won't be able to properly
|
||||
* guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
|
||||
* JOIN_RIGHT, because they can produce false null extended rows. Also,
|
||||
* the resulting path must not be parameterized.
|
||||
*/
|
||||
if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_FULL &&
|
||||
save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
|
||||
cheapest_partial_outer = (Path *)linitial(outerrel->partial_pathlist);
|
||||
|
||||
if (inner_path->parallel_safe) {
|
||||
cheapest_safe_inner = inner_path;
|
||||
} else if (save_jointype != JOIN_UNIQUE_INNER) {
|
||||
cheapest_safe_inner = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Each possible ordering of the available mergejoin clauses will generate
|
||||
* a differently-sorted result path at essentially the same cost. We have
|
||||
@ -1086,7 +1170,18 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
||||
merge_pathkeys,
|
||||
cur_mergeclauses,
|
||||
outerkeys,
|
||||
innerkeys);
|
||||
innerkeys,
|
||||
false);
|
||||
|
||||
/*
|
||||
* If we have partial outer and parallel safe inner path then try
|
||||
* partial mergejoin path.
|
||||
*/
|
||||
if (cheapest_partial_outer != NULL && cheapest_safe_inner != NULL) {
|
||||
try_partial_mergejoin_path(root, joinrel, jointype, extra->sjinfo, cheapest_partial_outer,
|
||||
cheapest_safe_inner, extra->restrictlist, merge_pathkeys, cur_mergeclauses, outerkeys,
|
||||
innerkeys);
|
||||
}
|
||||
}
|
||||
j++;
|
||||
}
|
||||
@ -1228,13 +1323,6 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
||||
foreach (l, outerrel->pathlist) {
|
||||
Path* outerpath = (Path*)lfirst(l);
|
||||
List* merge_pathkeys = NIL;
|
||||
List* mergeclauses = NIL;
|
||||
List* innersortkeys = NIL;
|
||||
List* trialsortkeys = NIL;
|
||||
Path* cheapest_startup_inner = NULL;
|
||||
Path* cheapest_total_inner = NULL;
|
||||
int num_sortkeys;
|
||||
int sortkeycnt;
|
||||
|
||||
/* for non-optimal inner, we only try outer path with the same distributed key */
|
||||
if (inner_cheapest_total_orig != linitial(innerrel->cheapest_total_path) &&
|
||||
@ -1330,174 +1418,40 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
||||
if (save_jointype == JOIN_UNIQUE_OUTER)
|
||||
continue;
|
||||
|
||||
/* Look for useful mergeclauses (if any) */
|
||||
mergeclauses = find_mergeclauses_for_outer_pathkeys(root, outerpath->pathkeys, extra->mergeclause_list);
|
||||
generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype, save_jointype, extra,
|
||||
useallclauses, inner_cheapest_total, merge_pathkeys, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider partial nestloop and mergejoin plan if outerrel has any
|
||||
* partial path and the joinrel is parallel-safe. However, we can't
|
||||
* handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
|
||||
* therefore we won't be able to properly guarantee uniqueness. Nor can
|
||||
* we handle extra_lateral_rels, since partial paths must not be
|
||||
* parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
|
||||
* because they can produce false null extended rows.
|
||||
*/
|
||||
if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_FULL &&
|
||||
save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
|
||||
if (nestjoinOK) {
|
||||
consider_parallel_nestloop(root, joinrel, outerrel, innerrel, save_jointype, extra);
|
||||
}
|
||||
/*
|
||||
* Done with this outer path if no chance for a mergejoin.
|
||||
*
|
||||
* Special corner case: for "x FULL JOIN y ON true", there will be no
|
||||
* join clauses at all. Ordinarily we'd generate a clauseless
|
||||
* nestloop path, but since mergejoin is our only join type that
|
||||
* supports FULL JOIN without any join clauses, it's necessary to
|
||||
* generate a clauseless mergejoin path instead.
|
||||
* If inner_cheapest_total is NULL or non parallel-safe then find the
|
||||
* cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
|
||||
* can't use any alternative inner path.
|
||||
*/
|
||||
if (mergeclauses == NIL) {
|
||||
if (jointype == JOIN_FULL)
|
||||
/* okay to try for mergejoin */;
|
||||
else
|
||||
if (inner_cheapest_total == NULL || !inner_cheapest_total->parallel_safe) {
|
||||
if (save_jointype == JOIN_UNIQUE_INNER) {
|
||||
continue;
|
||||
}
|
||||
if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
|
||||
continue;
|
||||
|
||||
/* Compute the required ordering of the inner path */
|
||||
innersortkeys = make_inner_pathkeys_for_merge(root, mergeclauses, outerpath->pathkeys);
|
||||
|
||||
/*
|
||||
* Generate a mergejoin on the basis of sorting the cheapest inner.
|
||||
* Since a sort will be needed, only cheapest total cost matters. (But
|
||||
* try_mergejoin_path will do the right thing if inner_cheapest_total
|
||||
* is already correctly sorted.)
|
||||
*/
|
||||
try_mergejoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
save_jointype,
|
||||
extra->sjinfo,
|
||||
extra->param_source_rels,
|
||||
outerpath,
|
||||
inner_cheapest_total,
|
||||
extra->restrictlist,
|
||||
merge_pathkeys,
|
||||
mergeclauses,
|
||||
NIL,
|
||||
innersortkeys);
|
||||
|
||||
/* Can't do anything else if inner path needs to be unique'd */
|
||||
if (save_jointype == JOIN_UNIQUE_INNER)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Look for presorted inner paths that satisfy the innersortkey list
|
||||
* --- or any truncation thereof, if we are allowed to build a
|
||||
* mergejoin using a subset of the merge clauses. Here, we consider
|
||||
* both cheap startup cost and cheap total cost.
|
||||
*
|
||||
* Currently we do not consider parameterized inner paths here. This
|
||||
* interacts with decisions elsewhere that also discriminate against
|
||||
* mergejoins with parameterized inputs; see comments in
|
||||
* src/backend/optimizer/README.
|
||||
*
|
||||
* As we shorten the sortkey list, we should consider only paths that
|
||||
* are strictly cheaper than (in particular, not the same as) any path
|
||||
* found in an earlier iteration. Otherwise we'd be intentionally
|
||||
* using fewer merge keys than a given path allows (treating the rest
|
||||
* as plain joinquals), which is unlikely to be a good idea. Also,
|
||||
* eliminating paths here on the basis of compare_path_costs is a lot
|
||||
* cheaper than building the mergejoin path only to throw it away.
|
||||
*
|
||||
* If inner_cheapest_total is well enough sorted to have not required
|
||||
* a sort in the path made above, we shouldn't make a duplicate path
|
||||
* with it, either. We handle that case with the same logic that
|
||||
* handles the previous consideration, by initializing the variables
|
||||
* that track cheapest-so-far properly. Note that we do NOT reject
|
||||
* inner_cheapest_total if we find it matches some shorter set of
|
||||
* pathkeys. That case corresponds to using fewer mergekeys to avoid
|
||||
* sorting inner_cheapest_total, whereas we did sort it above, so the
|
||||
* plans being considered are different.
|
||||
*/
|
||||
if (pathkeys_contained_in(innersortkeys, inner_cheapest_total->pathkeys)) {
|
||||
/* inner_cheapest_total didn't require a sort */
|
||||
cheapest_startup_inner = inner_cheapest_total;
|
||||
cheapest_total_inner = inner_cheapest_total;
|
||||
} else {
|
||||
/* it did require a sort, at least for the full set of keys */
|
||||
cheapest_startup_inner = NULL;
|
||||
cheapest_total_inner = NULL;
|
||||
}
|
||||
num_sortkeys = list_length(innersortkeys);
|
||||
if (num_sortkeys > 1 && !useallclauses)
|
||||
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
|
||||
else
|
||||
trialsortkeys = innersortkeys; /* won't really truncate */
|
||||
|
||||
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) {
|
||||
Path* innerpath = NULL;
|
||||
List* newclauses = NIL;
|
||||
|
||||
/*
|
||||
* Look for an inner path ordered well enough for the first
|
||||
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
|
||||
* destructively, which is why we made a copy...
|
||||
*/
|
||||
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
|
||||
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, TOTAL_COST);
|
||||
if (innerpath != NULL && (cheapest_total_inner == NULL ||
|
||||
compare_path_costs(innerpath, cheapest_total_inner, TOTAL_COST) < 0)) {
|
||||
/* Found a cheap (or even-cheaper) sorted path */
|
||||
/* Select the right mergeclauses, if we didn't already */
|
||||
if (sortkeycnt < num_sortkeys) {
|
||||
newclauses = trim_mergeclauses_for_inner_pathkeys(root, mergeclauses, trialsortkeys);
|
||||
AssertEreport(newclauses != NIL, MOD_OPT_JOIN, "newclauses list is NIL");
|
||||
} else
|
||||
newclauses = mergeclauses;
|
||||
try_mergejoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
save_jointype,
|
||||
extra->sjinfo,
|
||||
extra->param_source_rels,
|
||||
outerpath,
|
||||
innerpath,
|
||||
extra->restrictlist,
|
||||
merge_pathkeys,
|
||||
newclauses,
|
||||
NIL,
|
||||
NIL);
|
||||
cheapest_total_inner = innerpath;
|
||||
}
|
||||
/* Same on the basis of cheapest startup cost ... */
|
||||
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, STARTUP_COST);
|
||||
if (innerpath != NULL &&
|
||||
(cheapest_startup_inner == NULL ||
|
||||
compare_path_costs(innerpath, cheapest_startup_inner, STARTUP_COST) < 0)) {
|
||||
/* Found a cheap (or even-cheaper) sorted path */
|
||||
if (innerpath != cheapest_total_inner) {
|
||||
/*
|
||||
* Avoid rebuilding clause list if we already made one;
|
||||
* saves memory in big join trees...
|
||||
*/
|
||||
if (newclauses == NIL) {
|
||||
if (sortkeycnt < num_sortkeys) {
|
||||
newclauses =
|
||||
trim_mergeclauses_for_inner_pathkeys(root, mergeclauses, trialsortkeys);
|
||||
AssertEreport(newclauses != NIL, MOD_OPT_JOIN, "newclauses list is NIL");
|
||||
} else
|
||||
newclauses = mergeclauses;
|
||||
}
|
||||
try_mergejoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
save_jointype,
|
||||
extra->sjinfo,
|
||||
extra->param_source_rels,
|
||||
outerpath,
|
||||
innerpath,
|
||||
extra->restrictlist,
|
||||
merge_pathkeys,
|
||||
newclauses,
|
||||
NIL,
|
||||
NIL);
|
||||
}
|
||||
cheapest_startup_inner = innerpath;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't consider truncated sortkeys if we need all clauses.
|
||||
*/
|
||||
if (useallclauses)
|
||||
break;
|
||||
inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
|
||||
}
|
||||
|
||||
if (inner_cheapest_total != NULL) {
|
||||
consider_parallel_mergejoin(root, joinrel, outerrel, innerrel, jointype, save_jointype, extra,
|
||||
inner_cheapest_total);
|
||||
}
|
||||
}
|
||||
j++;
|
||||
@ -1505,20 +1459,41 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
|
||||
i++;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the joinrel is parallel-safe and the join type supports nested loops,
|
||||
* we may be able to consider a partial nestloop plan. However, we can't
|
||||
* handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
|
||||
* therefore we won't be able to properly guarantee uniqueness. Nor can
|
||||
* we handle extra_lateral_rels, since partial paths must not be
|
||||
* parameterized.
|
||||
*/
|
||||
if (joinrel->consider_parallel && nestjoinOK && save_jointype != JOIN_UNIQUE_OUTER) {
|
||||
consider_parallel_nestloop(root, joinrel, outerrel, innerrel, save_jointype, extra);
|
||||
}
|
||||
|
||||
if (join_used != NULL)
|
||||
if (join_used != NULL) {
|
||||
pfree_ext(join_used);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* consider_parallel_mergejoin
|
||||
* Try to build partial paths for a joinrel by joining a partial path
|
||||
* for the outer relation to a complete path for the inner relation.
|
||||
*
|
||||
* 'joinrel' is the join relation
|
||||
* 'outerrel' is the outer join relation
|
||||
* 'innerrel' is the inner join relation
|
||||
* 'jointype' is the type of join to do
|
||||
* 'extra' contains additional input values
|
||||
* 'inner_cheapest_total' cheapest total path for innerrel
|
||||
*/
|
||||
static void consider_parallel_mergejoin(PlannerInfo *root, RelOptInfo *joinrel, const RelOptInfo *outerrel,
|
||||
RelOptInfo *innerrel, JoinType jointype, JoinType save_jointype, JoinPathExtraData *extra,
|
||||
Path *inner_cheapest_total)
|
||||
{
|
||||
ListCell *lc1 = NULL;
|
||||
|
||||
/* generate merge join path for each partial outer path */
|
||||
foreach (lc1, outerrel->partial_pathlist) {
|
||||
Path *outerpath = (Path *)lfirst(lc1);
|
||||
|
||||
/*
|
||||
* Figure out what useful ordering any paths we create will have.
|
||||
*/
|
||||
List *merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerpath->pathkeys);
|
||||
|
||||
generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype, save_jointype, extra, false,
|
||||
inner_cheapest_total, merge_pathkeys, true);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1956,6 +1931,162 @@ static List* select_mergejoin_clauses(PlannerInfo* root, RelOptInfo* joinrel, Re
|
||||
return result_list;
|
||||
}
|
||||
|
||||
static void generate_mergejoin_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *innerrel, Path *outerpath,
|
||||
JoinType jointype, JoinType save_jointype, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total,
|
||||
List *merge_pathkeys, bool is_partial)
|
||||
{
|
||||
List *trialsortkeys = NIL;
|
||||
Path *cheapest_startup_inner = NULL;
|
||||
Path *cheapest_total_inner = NULL;
|
||||
int sortkeycnt;
|
||||
|
||||
if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) {
|
||||
jointype = JOIN_INNER;
|
||||
}
|
||||
|
||||
/* Look for useful mergeclauses (if any) */
|
||||
List *mergeclauses = find_mergeclauses_for_outer_pathkeys(root, outerpath->pathkeys, extra->mergeclause_list);
|
||||
|
||||
/*
|
||||
* Done with this outer path if no chance for a mergejoin.
|
||||
*
|
||||
* Special corner case: for "x FULL JOIN y ON true", there will be no
|
||||
* join clauses at all. Ordinarily we'd generate a clauseless
|
||||
* nestloop path, but since mergejoin is our only join type that
|
||||
* supports FULL JOIN without any join clauses, it's necessary to
|
||||
* generate a clauseless mergejoin path instead.
|
||||
*/
|
||||
if (mergeclauses == NIL) {
|
||||
if (jointype == JOIN_FULL) {
|
||||
/* okay to try for mergejoin */;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Compute the required ordering of the inner path */
|
||||
List *innersortkeys = make_inner_pathkeys_for_merge(root, mergeclauses, outerpath->pathkeys);
|
||||
|
||||
/*
|
||||
* Generate a mergejoin on the basis of sorting the cheapest inner.
|
||||
* Since a sort will be needed, only cheapest total cost matters. (But
|
||||
* try_mergejoin_path will do the right thing if inner_cheapest_total
|
||||
* is already correctly sorted.)
|
||||
*/
|
||||
try_mergejoin_path(root, joinrel, jointype, save_jointype, extra->sjinfo, extra->param_source_rels, outerpath,
|
||||
inner_cheapest_total, extra->restrictlist, merge_pathkeys, mergeclauses, NIL, innersortkeys, is_partial);
|
||||
|
||||
/* Can't do anything else if inner path needs to be unique'd */
|
||||
if (save_jointype == JOIN_UNIQUE_INNER) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for presorted inner paths that satisfy the innersortkey list
|
||||
* --- or any truncation thereof, if we are allowed to build a
|
||||
* mergejoin using a subset of the merge clauses. Here, we consider
|
||||
* both cheap startup cost and cheap total cost.
|
||||
*
|
||||
* Currently we do not consider parameterized inner paths here. This
|
||||
* interacts with decisions elsewhere that also discriminate against
|
||||
* mergejoins with parameterized inputs; see comments in
|
||||
* src/backend/optimizer/README.
|
||||
*
|
||||
* As we shorten the sortkey list, we should consider only paths that
|
||||
* are strictly cheaper than (in particular, not the same as) any path
|
||||
* found in an earlier iteration. Otherwise we'd be intentionally
|
||||
* using fewer merge keys than a given path allows (treating the rest
|
||||
* as plain joinquals), which is unlikely to be a good idea. Also,
|
||||
* eliminating paths here on the basis of compare_path_costs is a lot
|
||||
* cheaper than building the mergejoin path only to throw it away.
|
||||
*
|
||||
* If inner_cheapest_total is well enough sorted to have not required
|
||||
* a sort in the path made above, we shouldn't make a duplicate path
|
||||
* with it, either. We handle that case with the same logic that
|
||||
* handles the previous consideration, by initializing the variables
|
||||
* that track cheapest-so-far properly. Note that we do NOT reject
|
||||
* inner_cheapest_total if we find it matches some shorter set of
|
||||
* pathkeys. That case corresponds to using fewer mergekeys to avoid
|
||||
* sorting inner_cheapest_total, whereas we did sort it above, so the
|
||||
* plans being considered are different.
|
||||
*/
|
||||
if (pathkeys_contained_in(innersortkeys, inner_cheapest_total->pathkeys)) {
|
||||
/* inner_cheapest_total didn't require a sort */
|
||||
cheapest_startup_inner = inner_cheapest_total;
|
||||
cheapest_total_inner = inner_cheapest_total;
|
||||
} else {
|
||||
/* it did require a sort, at least for the full set of keys */
|
||||
cheapest_startup_inner = NULL;
|
||||
cheapest_total_inner = NULL;
|
||||
}
|
||||
int num_sortkeys = list_length(innersortkeys);
|
||||
if (num_sortkeys > 1 && !useallclauses) {
|
||||
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
|
||||
} else {
|
||||
trialsortkeys = innersortkeys; /* won't really truncate */
|
||||
}
|
||||
|
||||
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) {
|
||||
Path *innerpath = NULL;
|
||||
List *newclauses = NIL;
|
||||
|
||||
/*
|
||||
* Look for an inner path ordered well enough for the first
|
||||
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
|
||||
* destructively, which is why we made a copy...
|
||||
*/
|
||||
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
|
||||
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, TOTAL_COST, is_partial);
|
||||
if (innerpath != NULL &&
|
||||
(cheapest_total_inner == NULL || compare_path_costs(innerpath, cheapest_total_inner, TOTAL_COST) < 0)) {
|
||||
/* Found a cheap (or even-cheaper) sorted path */
|
||||
/* Select the right mergeclauses, if we didn't already */
|
||||
if (sortkeycnt < num_sortkeys) {
|
||||
newclauses = trim_mergeclauses_for_inner_pathkeys(root, mergeclauses, trialsortkeys);
|
||||
AssertEreport(newclauses != NIL, MOD_OPT_JOIN, "newclauses list is NIL");
|
||||
} else {
|
||||
newclauses = mergeclauses;
|
||||
}
|
||||
try_mergejoin_path(root, joinrel, jointype, save_jointype, extra->sjinfo, extra->param_source_rels,
|
||||
outerpath, innerpath, extra->restrictlist, merge_pathkeys, newclauses, NIL, NIL, is_partial);
|
||||
cheapest_total_inner = innerpath;
|
||||
}
|
||||
/* Same on the basis of cheapest startup cost ... */
|
||||
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, STARTUP_COST, is_partial);
|
||||
if (innerpath != NULL && (cheapest_startup_inner == NULL ||
|
||||
compare_path_costs(innerpath, cheapest_startup_inner, STARTUP_COST) < 0)) {
|
||||
/* Found a cheap (or even-cheaper) sorted path */
|
||||
if (innerpath != cheapest_total_inner) {
|
||||
/*
|
||||
* Avoid rebuilding clause list if we already made one;
|
||||
* saves memory in big join trees...
|
||||
*/
|
||||
if (newclauses == NIL) {
|
||||
if (sortkeycnt < num_sortkeys) {
|
||||
newclauses = trim_mergeclauses_for_inner_pathkeys(root, mergeclauses, trialsortkeys);
|
||||
AssertEreport(newclauses != NIL, MOD_OPT_JOIN, "newclauses list is NIL");
|
||||
} else {
|
||||
newclauses = mergeclauses;
|
||||
}
|
||||
}
|
||||
try_mergejoin_path(root, joinrel, jointype, save_jointype, extra->sjinfo, extra->param_source_rels,
|
||||
outerpath, innerpath, extra->restrictlist, merge_pathkeys, newclauses, NIL, NIL, is_partial);
|
||||
}
|
||||
cheapest_startup_inner = innerpath;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't consider truncated sortkeys if we need all clauses.
|
||||
*/
|
||||
if (useallclauses) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool checkForPWJ(PlannerInfo* root, Path* outer_path, Path* inner_path, JoinType jointype, List* joinrestrict)
|
||||
{
|
||||
/* Validate configuration */
|
||||
|
@ -322,8 +322,10 @@ bool pathkeys_contained_in(List* keys1, List* keys2)
|
||||
* 'pathkeys' represents a required ordering (already canonicalized!)
|
||||
* 'required_outer' denotes allowable outer relations for parameterized paths
|
||||
* 'cost_criterion' is STARTUP_COST or TOTAL_COST
|
||||
* 'require_parallel_safe' causes us to consider only parallel-safe paths
|
||||
*/
|
||||
Path* get_cheapest_path_for_pathkeys(List* paths, List* pathkeys, Relids required_outer, CostSelector cost_criterion)
|
||||
Path* get_cheapest_path_for_pathkeys(List* paths, List* pathkeys, Relids required_outer,
|
||||
CostSelector cost_criterion, bool require_parallel_safe)
|
||||
{
|
||||
Path* matched_path = NULL;
|
||||
ListCell* l = NULL;
|
||||
@ -338,6 +340,10 @@ Path* get_cheapest_path_for_pathkeys(List* paths, List* pathkeys, Relids require
|
||||
if (matched_path != NULL && compare_path_costs(matched_path, path, cost_criterion) <= 0)
|
||||
continue;
|
||||
|
||||
if (require_parallel_safe && !path->parallel_safe) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pathkeys_contained_in(pathkeys, path->pathkeys) && bms_is_subset(PATH_REQ_OUTER(path), required_outer))
|
||||
matched_path = path;
|
||||
}
|
||||
|
@ -3816,7 +3816,8 @@ MergePath* create_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinTyp
|
||||
pathnode->jpath.path.parallel_aware = false;
|
||||
pathnode->jpath.path.parallel_safe =
|
||||
joinrel->consider_parallel && outer_path->parallel_safe && inner_path->parallel_safe;
|
||||
pathnode->jpath.path.parallel_workers = 0;
|
||||
/* This is a foolish way to estimate parallel_workers, but for now... */
|
||||
pathnode->jpath.path.parallel_workers = outer_path->parallel_workers;
|
||||
pathnode->jpath.path.pathkeys = pathkeys;
|
||||
pathnode->jpath.jointype = jointype;
|
||||
pathnode->jpath.outerjoinpath = outer_path;
|
||||
|
@ -134,7 +134,7 @@ extern List* canonicalize_pathkeys(PlannerInfo* root, List* pathkeys);
|
||||
extern PathKeysComparison compare_pathkeys(List* keys1, List* keys2);
|
||||
extern bool pathkeys_contained_in(List* keys1, List* keys2);
|
||||
extern Path* get_cheapest_path_for_pathkeys(
|
||||
List* paths, List* pathkeys, Relids required_outer, CostSelector cost_criterion);
|
||||
List* paths, List* pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe);
|
||||
extern Path* get_cheapest_fractional_path_for_pathkeys(
|
||||
List* paths, List* pathkeys, Relids required_outer, double fraction);
|
||||
extern List* build_index_pathkeys(PlannerInfo* root, IndexOptInfo* index, ScanDirection scandir);
|
||||
|
@ -1,5 +1,6 @@
|
||||
create table parallel_t1(a int);
|
||||
insert into parallel_t1 values(generate_series(1,100000));
|
||||
analyze parallel_t1;
|
||||
--normal plan for seq scan
|
||||
explain (costs off) select count(*) from parallel_t1;
|
||||
QUERY PLAN
|
||||
@ -158,7 +159,6 @@ select count(*) from parallel_t1 where a <> 5000;
|
||||
(1 row)
|
||||
|
||||
--clean up
|
||||
drop table parallel_t1;
|
||||
reset force_parallel_mode;
|
||||
reset parallel_setup_cost;
|
||||
reset parallel_tuple_cost;
|
||||
@ -169,6 +169,34 @@ reset parallel_leader_participation;
|
||||
create table parallel_t2(a int, b int);
|
||||
insert into parallel_t2 values(generate_series(1,100000), generate_series(1,100000));
|
||||
create index t2_idx on parallel_t2 using btree(a);
|
||||
analyze parallel_t2;
|
||||
--normal plan for merge join
|
||||
set enable_hashjoin to off;
|
||||
set enable_nestloop to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------
|
||||
Aggregate
|
||||
-> Merge Join
|
||||
Merge Cond: (parallel_t1.a = parallel_t2.a)
|
||||
-> Sort
|
||||
Sort Key: parallel_t1.a
|
||||
-> Seq Scan on parallel_t1
|
||||
-> Sort
|
||||
Sort Key: parallel_t2.a
|
||||
-> Seq Scan on parallel_t2
|
||||
(9 rows)
|
||||
|
||||
select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
|
||||
count
|
||||
--------
|
||||
100000
|
||||
(1 row)
|
||||
|
||||
reset enable_hashjoin;
|
||||
reset enable_nestloop;
|
||||
reset enable_indexscan;
|
||||
--set index scan parameter
|
||||
set enable_seqscan to off;
|
||||
set enable_bitmapscan to off;
|
||||
@ -246,7 +274,7 @@ select count(b) from parallel_t2 where a < 5000;
|
||||
alter table parallel_t2 set (parallel_workers = 1);
|
||||
--parallel plan for index scan
|
||||
explain (costs off) select count(b) from parallel_t2 where a > 5000;
|
||||
QUERY PLAN
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------
|
||||
Aggregate
|
||||
-> Gather
|
||||
@ -256,7 +284,7 @@ explain (costs off) select count(b) from parallel_t2 where a > 5000;
|
||||
(5 rows)
|
||||
|
||||
explain (costs off) select count(b) from parallel_t2 where a < 5000;
|
||||
QUERY PLAN
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------
|
||||
Aggregate
|
||||
-> Gather
|
||||
@ -266,18 +294,50 @@ explain (costs off) select count(b) from parallel_t2 where a < 5000;
|
||||
(5 rows)
|
||||
|
||||
select count(b) from parallel_t2 where a > 5000;
|
||||
count
|
||||
count
|
||||
-------
|
||||
95000
|
||||
(1 row)
|
||||
|
||||
select count(b) from parallel_t2 where a < 5000;
|
||||
count
|
||||
count
|
||||
-------
|
||||
4999
|
||||
(1 row)
|
||||
|
||||
--parallel plan for merge join
|
||||
reset enable_seqscan;
|
||||
reset enable_bitmapscan;
|
||||
set enable_hashjoin to off;
|
||||
set enable_nestloop to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------------
|
||||
Aggregate
|
||||
-> Gather
|
||||
Number of Workers: 2
|
||||
-> Merge Join
|
||||
Merge Cond: (parallel_t1.a = parallel_t2.a)
|
||||
-> Sort
|
||||
Sort Key: parallel_t1.a
|
||||
-> Parallel Seq Scan on parallel_t1
|
||||
-> Sort
|
||||
Sort Key: parallel_t2.a
|
||||
-> Seq Scan on parallel_t2
|
||||
(11 rows)
|
||||
|
||||
select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
|
||||
count
|
||||
--------
|
||||
100000
|
||||
(1 row)
|
||||
|
||||
reset enable_hashjoin;
|
||||
reset enable_nestloop;
|
||||
reset enable_indexscan;
|
||||
--clean up
|
||||
drop table parallel_t1;
|
||||
drop table parallel_t2;
|
||||
reset enable_seqscan;
|
||||
reset enable_bitmapscan;
|
||||
|
@ -1,5 +1,6 @@
|
||||
create table parallel_t1(a int);
|
||||
insert into parallel_t1 values(generate_series(1,100000));
|
||||
analyze parallel_t1;
|
||||
--normal plan for seq scan
|
||||
explain (costs off) select count(*) from parallel_t1;
|
||||
explain (costs off) select count(*) from parallel_t1 where a = 5000;
|
||||
@ -33,7 +34,6 @@ select count(*) from parallel_t1 where a < 5000;
|
||||
select count(*) from parallel_t1 where a <> 5000;
|
||||
|
||||
--clean up
|
||||
drop table parallel_t1;
|
||||
reset force_parallel_mode;
|
||||
reset parallel_setup_cost;
|
||||
reset parallel_tuple_cost;
|
||||
@ -45,6 +45,17 @@ reset parallel_leader_participation;
|
||||
create table parallel_t2(a int, b int);
|
||||
insert into parallel_t2 values(generate_series(1,100000), generate_series(1,100000));
|
||||
create index t2_idx on parallel_t2 using btree(a);
|
||||
analyze parallel_t2;
|
||||
|
||||
--normal plan for merge join
|
||||
set enable_hashjoin to off;
|
||||
set enable_nestloop to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
|
||||
select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
|
||||
reset enable_hashjoin;
|
||||
reset enable_nestloop;
|
||||
reset enable_indexscan;
|
||||
|
||||
--set index scan parameter
|
||||
set enable_seqscan to off;
|
||||
@ -80,7 +91,20 @@ explain (costs off) select count(b) from parallel_t2 where a < 5000;
|
||||
select count(b) from parallel_t2 where a > 5000;
|
||||
select count(b) from parallel_t2 where a < 5000;
|
||||
|
||||
--parallel plan for merge join
|
||||
reset enable_seqscan;
|
||||
reset enable_bitmapscan;
|
||||
set enable_hashjoin to off;
|
||||
set enable_nestloop to off;
|
||||
set enable_indexscan to off;
|
||||
explain (costs off) select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
|
||||
select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
|
||||
reset enable_hashjoin;
|
||||
reset enable_nestloop;
|
||||
reset enable_indexscan;
|
||||
|
||||
--clean up
|
||||
drop table parallel_t1;
|
||||
drop table parallel_t2;
|
||||
reset enable_seqscan;
|
||||
reset enable_bitmapscan;
|
||||
|
Reference in New Issue
Block a user