Parallel merge join.

2020-11-10 09:57:11 +08:00
parent b973181d29
commit 35f6cc72a5
7 changed files with 428 additions and 205 deletions
--- a/src/gausskernel/optimizer/path/allpaths.cpp
+++ b/src/gausskernel/optimizer/path/allpaths.cpp
@ -1862,7 +1862,8 @@ static void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *li
            RelOptInfo* childrel = (RelOptInfo*)lfirst(lcr);
            Path* cheapest_total = NULL;

-            cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, NIL, required_outer, TOTAL_COST);
+            cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, NIL,
+                required_outer, TOTAL_COST, false);

            AssertEreport(cheapest_total != NULL, MOD_OPT, "");

@ -1938,8 +1939,8 @@ static void generate_mergeappend_paths(PlannerInfo* root, RelOptInfo* rel,
            Path *cheapest_startup, *cheapest_total;

            /* Locate the right paths, if they are available. */
-            cheapest_startup = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, STARTUP_COST);
-            cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, TOTAL_COST);
+            cheapest_startup = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, STARTUP_COST, false);
+            cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, TOTAL_COST, false);

            /*
             * If we can't find any paths with the right order just use the
--- a/src/gausskernel/optimizer/path/joinpath.cpp
+++ b/src/gausskernel/optimizer/path/joinpath.cpp
@ -43,6 +43,9 @@
 #include "pgxc/pgxc.h"
 #include "parser/parsetree.h"

+static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
+    SpecialJoinInfo *sjinfo, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys,
+    List *mergeclauses, List *outersortkeys, List *innersortkeys);
 static void copy_JoinCostWorkspace(JoinCostWorkspace* to, JoinCostWorkspace* from);
 static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel,
    JoinType jointype, JoinPathExtraData* extra);
@ -50,13 +53,19 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
    SemiAntiJoinFactors* semifactors, JoinType jointype, JoinPathExtraData* extra);
 static void consider_parallel_nestloop(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel,
    RelOptInfo* innerrel, JoinType jointype, JoinPathExtraData* extra);
-static void hash_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel, RelOptInfo* innerrel,
-    JoinType jointype, SemiAntiJoinFactors* semifactors, JoinPathExtraData* extra);
-static List* select_mergejoin_clauses(PlannerInfo* root, RelOptInfo* joinrel, RelOptInfo* outerrel,
-    RelOptInfo* innerrel, List* restrictlist, JoinType jointype, bool* mergejoin_allowed);
-static bool checkForPWJ(PlannerInfo* root, Path* outer_path, Path* inner_path, JoinType jointype, List* joinrestrict);
-static bool checkJoinColumnForPWJ(PlannerInfo* root, Index varno, AttrNumber varattno);
-static bool checkJoinClauseForPWJ(PlannerInfo* root, List* joinclause);
+static void consider_parallel_mergejoin(PlannerInfo *root, RelOptInfo *joinrel, const RelOptInfo *outerrel,
+    RelOptInfo *innerrel, JoinType jointype, JoinType save_jointype, JoinPathExtraData *extra,
+    Path *inner_cheapest_total);
+static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
+    JoinType jointype, SemiAntiJoinFactors *semifactors, JoinPathExtraData *extra);
+static List *select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel,
+    RelOptInfo *innerrel, List *restrictlist, JoinType jointype, bool *mergejoin_allowed);
+static void generate_mergejoin_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *innerrel, Path *outerpath,
+    JoinType jointype, JoinType save_jointype, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total,
+    List *merge_pathkeys, bool is_partial);
+static bool checkForPWJ(PlannerInfo *root, Path *outer_path, Path *inner_path, JoinType jointype, List *joinrestrict);
+static bool checkJoinColumnForPWJ(PlannerInfo *root, Index varno, AttrNumber varattno);
+static bool checkJoinClauseForPWJ(PlannerInfo *root, List *joinclause);
 static bool checkPartitionkeyForPWJ(PlannerInfo* root, Path* outer_path, Path* inner_path);
 static bool checkPathForPWJ(PlannerInfo* root, Path* path);
 static bool checkIndexPathForPWJ(PartIteratorPath* pIterpath);
@ -590,11 +599,17 @@ static void try_partial_nestloop_path(PlannerInfo* root, RelOptInfo* joinrel, Pa
 */
 static void try_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType jointype, JoinType save_jointype,
    SpecialJoinInfo* sjinfo, Relids param_source_rels, Path* outer_path, Path* inner_path, List* restrict_clauses,
-    List* pathkeys, List* mergeclauses, List* outersortkeys, List* innersortkeys)
+    List* pathkeys, List* mergeclauses, List* outersortkeys, List* innersortkeys, bool is_partial)
 {
    Relids required_outer;
    JoinCostWorkspace workspace;

+    if (is_partial) {
+        try_partial_mergejoin_path(root, joinrel, jointype, sjinfo, outer_path, inner_path, restrict_clauses,
+            pathkeys, mergeclauses, outersortkeys, innersortkeys);
+        return;
+    }
+
    /*
     * Check to see if proposed path is still parameterized, and reject if the
     * parameterization wouldn't be sensible.
@ -730,10 +745,58 @@ static void try_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType
    }
 }

+/*
+ * try_partial_mergejoin_path
+ * 	  Consider a partial merge join path; if it appears useful, push it into
+ * 	  the joinrel's pathlist via add_partial_path().
+ */
+static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
+    SpecialJoinInfo *sjinfo, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys,
+    List *mergeclauses, List *outersortkeys, List *innersortkeys)
+{
+    JoinCostWorkspace workspace;
+
+    /*
+     * See comments in try_partial_hashjoin_path().
+     */
+    if (inner_path->param_info != NULL) {
+        Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
+
+        if (!bms_is_empty(inner_paramrels)) {
+            return;
+        }
+    }
+
+    /*
+     * If the given paths are already well enough ordered, we can skip doing
+     * an explicit sort.
+     */
+    if (outersortkeys && pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) {
+        outersortkeys = NIL;
+    }
+    if (innersortkeys && pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) {
+        innersortkeys = NIL;
+    }
+
+    /*
+     * See comments in try_partial_nestloop_path().
+     */
+    initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, outer_path, inner_path, outersortkeys,
+        innersortkeys, sjinfo);
+
+    if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) {
+        return;
+    }
+
+    /* Might be good enough to be worth trying, so let's try it. */
+    add_partial_path(joinrel, (Path *)create_mergejoin_path(root, joinrel, jointype, &workspace, sjinfo, outer_path,
+        inner_path, restrict_clauses, pathkeys, NULL, mergeclauses, outersortkeys, innersortkeys));
+}
+
 /*
 * try_hashjoin_path
- *	  Consider a hash join path; if it appears useful, push it into
- *	  the joinrel's pathlist via add_path().
+ * 	  Consider a hash join path; if it appears useful, push it into
+ * 	  the joinrel's pathlist via add_path().
 */
 static void try_hashjoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinType jointype, JoinType save_jointype,
    SpecialJoinInfo* sjinfo, SemiAntiJoinFactors* semifactors, Relids param_source_rels, Path* outer_path,
@ -962,6 +1025,8 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
    ListCell* lc2 = NULL;
    int i, j;
    bool* join_used = NULL;
+    Path *cheapest_partial_outer = NULL;
+    Path *cheapest_safe_inner = NULL;
    int num_inner = list_length(innerrel->cheapest_total_path) - 1;

    /*
@ -1010,6 +1075,25 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
                jointype = JOIN_INNER;
            }

+            /*
+             * If the joinrel is parallel-safe, we may be able to consider a partial
+             * merge join.  However, we can't handle JOIN_UNIQUE_OUTER, because the
+             * outer path will be partial, and therefore we won't be able to properly
+             * guarantee uniqueness.  Similarly, we can't handle JOIN_FULL and
+             * JOIN_RIGHT, because they can produce false null extended rows.  Also,
+             * the resulting path must not be parameterized.
+             */
+            if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_FULL &&
+                save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
+                cheapest_partial_outer = (Path *)linitial(outerrel->partial_pathlist);
+
+                if (inner_path->parallel_safe) {
+                    cheapest_safe_inner = inner_path;
+                } else if (save_jointype != JOIN_UNIQUE_INNER) {
+                    cheapest_safe_inner = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
+                }
+            }
+
            /*
             * Each possible ordering of the available mergejoin clauses will generate
             * a differently-sorted result path at essentially the same cost.  We have
@ -1086,7 +1170,18 @@ static void sort_inner_and_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
                    merge_pathkeys,
                    cur_mergeclauses,
                    outerkeys,
-                    innerkeys);
+                    innerkeys,
+                    false);
+
+                /*
+                 * If we have partial outer and parallel safe inner path then try
+                 * partial mergejoin path.
+                 */
+                if (cheapest_partial_outer != NULL && cheapest_safe_inner != NULL) {
+                    try_partial_mergejoin_path(root, joinrel, jointype, extra->sjinfo, cheapest_partial_outer,
+                        cheapest_safe_inner, extra->restrictlist, merge_pathkeys, cur_mergeclauses, outerkeys,
+                        innerkeys);
+                }
            }
            j++;
        }
@ -1228,13 +1323,6 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
            foreach (l, outerrel->pathlist) {
                Path* outerpath = (Path*)lfirst(l);
                List* merge_pathkeys = NIL;
-                List* mergeclauses = NIL;
-                List* innersortkeys = NIL;
-                List* trialsortkeys = NIL;
-                Path* cheapest_startup_inner = NULL;
-                Path* cheapest_total_inner = NULL;
-                int num_sortkeys;
-                int sortkeycnt;

                /* for non-optimal inner, we only try outer path with the same distributed key */
                if (inner_cheapest_total_orig != linitial(innerrel->cheapest_total_path) &&
@ -1330,174 +1418,40 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
                if (save_jointype == JOIN_UNIQUE_OUTER)
                    continue;

-                /* Look for useful mergeclauses (if any) */
-                mergeclauses = find_mergeclauses_for_outer_pathkeys(root, outerpath->pathkeys, extra->mergeclause_list);
+                generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype, save_jointype, extra,
+                    useallclauses, inner_cheapest_total, merge_pathkeys, false);
+            }

+            /*
+            * Consider partial nestloop and mergejoin plan if outerrel has any
+            * partial path and the joinrel is parallel-safe.  However, we can't
+            * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
+            * therefore we won't be able to properly guarantee uniqueness.  Nor can
+            * we handle extra_lateral_rels, since partial paths must not be
+            * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
+            * because they can produce false null extended rows.
+            */
+            if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_FULL &&
+                save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL) {
+                if (nestjoinOK) {
+                    consider_parallel_nestloop(root, joinrel, outerrel, innerrel, save_jointype, extra);
+                }
                /*
-                 * Done with this outer path if no chance for a mergejoin.
-                 *
-                 * Special corner case: for "x FULL JOIN y ON true", there will be no
-                 * join clauses at all.  Ordinarily we'd generate a clauseless
-                 * nestloop path, but since mergejoin is our only join type that
-                 * supports FULL JOIN without any join clauses, it's necessary to
-                 * generate a clauseless mergejoin path instead.
+                 * If inner_cheapest_total is NULL or non parallel-safe then find the
+                 * cheapest total parallel safe path.  If doing JOIN_UNIQUE_INNER, we
+                 * can't use any alternative inner path.
                 */
-                if (mergeclauses == NIL) {
-                    if (jointype == JOIN_FULL)
-                        /* okay to try for mergejoin */;
-                    else
+                if (inner_cheapest_total == NULL || !inner_cheapest_total->parallel_safe) {
+                    if (save_jointype == JOIN_UNIQUE_INNER) {
                        continue;
-                }
-                if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
-                    continue;
-
-                /* Compute the required ordering of the inner path */
-                innersortkeys = make_inner_pathkeys_for_merge(root, mergeclauses, outerpath->pathkeys);
-
-                /*
-                 * Generate a mergejoin on the basis of sorting the cheapest inner.
-                 * Since a sort will be needed, only cheapest total cost matters. (But
-                 * try_mergejoin_path will do the right thing if inner_cheapest_total
-                 * is already correctly sorted.)
-                 */
-                try_mergejoin_path(root,
-                    joinrel,
-                    jointype,
-                    save_jointype,
-                    extra->sjinfo,
-                    extra->param_source_rels,
-                    outerpath,
-                    inner_cheapest_total,
-                    extra->restrictlist,
-                    merge_pathkeys,
-                    mergeclauses,
-                    NIL,
-                    innersortkeys);
-
-                /* Can't do anything else if inner path needs to be unique'd */
-                if (save_jointype == JOIN_UNIQUE_INNER)
-                    continue;
-
-                /*
-                 * Look for presorted inner paths that satisfy the innersortkey list
-                 * --- or any truncation thereof, if we are allowed to build a
-                 * mergejoin using a subset of the merge clauses.  Here, we consider
-                 * both cheap startup cost and cheap total cost.
-                 *
-                 * Currently we do not consider parameterized inner paths here. This
-                 * interacts with decisions elsewhere that also discriminate against
-                 * mergejoins with parameterized inputs; see comments in
-                 * src/backend/optimizer/README.
-                 *
-                 * As we shorten the sortkey list, we should consider only paths that
-                 * are strictly cheaper than (in particular, not the same as) any path
-                 * found in an earlier iteration.  Otherwise we'd be intentionally
-                 * using fewer merge keys than a given path allows (treating the rest
-                 * as plain joinquals), which is unlikely to be a good idea.  Also,
-                 * eliminating paths here on the basis of compare_path_costs is a lot
-                 * cheaper than building the mergejoin path only to throw it away.
-                 *
-                 * If inner_cheapest_total is well enough sorted to have not required
-                 * a sort in the path made above, we shouldn't make a duplicate path
-                 * with it, either.  We handle that case with the same logic that
-                 * handles the previous consideration, by initializing the variables
-                 * that track cheapest-so-far properly.  Note that we do NOT reject
-                 * inner_cheapest_total if we find it matches some shorter set of
-                 * pathkeys.  That case corresponds to using fewer mergekeys to avoid
-                 * sorting inner_cheapest_total, whereas we did sort it above, so the
-                 * plans being considered are different.
-                 */
-                if (pathkeys_contained_in(innersortkeys, inner_cheapest_total->pathkeys)) {
-                    /* inner_cheapest_total didn't require a sort */
-                    cheapest_startup_inner = inner_cheapest_total;
-                    cheapest_total_inner = inner_cheapest_total;
-                } else {
-                    /* it did require a sort, at least for the full set of keys */
-                    cheapest_startup_inner = NULL;
-                    cheapest_total_inner = NULL;
-                }
-                num_sortkeys = list_length(innersortkeys);
-                if (num_sortkeys > 1 && !useallclauses)
-                    trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
-                else
-                    trialsortkeys = innersortkeys; /* won't really truncate */
-
-                for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) {
-                    Path* innerpath = NULL;
-                    List* newclauses = NIL;
-
-                    /*
-                     * Look for an inner path ordered well enough for the first
-                     * 'sortkeycnt' innersortkeys.	NB: trialsortkeys list is modified
-                     * destructively, which is why we made a copy...
-                     */
-                    trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
-                    innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, TOTAL_COST);
-                    if (innerpath != NULL && (cheapest_total_inner == NULL ||
-                                                 compare_path_costs(innerpath, cheapest_total_inner, TOTAL_COST) < 0)) {
-                        /* Found a cheap (or even-cheaper) sorted path */
-                        /* Select the right mergeclauses, if we didn't already */
-                        if (sortkeycnt < num_sortkeys) {
-                            newclauses = trim_mergeclauses_for_inner_pathkeys(root, mergeclauses, trialsortkeys);
-                            AssertEreport(newclauses != NIL, MOD_OPT_JOIN, "newclauses list is NIL");
-                        } else
-                            newclauses = mergeclauses;
-                        try_mergejoin_path(root,
-                            joinrel,
-                            jointype,
-                            save_jointype,
-                            extra->sjinfo,
-                            extra->param_source_rels,
-                            outerpath,
-                            innerpath,
-                            extra->restrictlist,
-                            merge_pathkeys,
-                            newclauses,
-                            NIL,
-                            NIL);
-                        cheapest_total_inner = innerpath;
-                    }
-                    /* Same on the basis of cheapest startup cost ... */
-                    innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, STARTUP_COST);
-                    if (innerpath != NULL &&
-                        (cheapest_startup_inner == NULL ||
-                            compare_path_costs(innerpath, cheapest_startup_inner, STARTUP_COST) < 0)) {
-                        /* Found a cheap (or even-cheaper) sorted path */
-                        if (innerpath != cheapest_total_inner) {
-                            /*
-                             * Avoid rebuilding clause list if we already made one;
-                             * saves memory in big join trees...
-                             */
-                            if (newclauses == NIL) {
-                                if (sortkeycnt < num_sortkeys) {
-                                    newclauses =
-                                        trim_mergeclauses_for_inner_pathkeys(root, mergeclauses, trialsortkeys);
-                                    AssertEreport(newclauses != NIL, MOD_OPT_JOIN, "newclauses list is NIL");
-                                } else
-                                    newclauses = mergeclauses;
-                            }
-                            try_mergejoin_path(root,
-                                joinrel,
-                                jointype,
-                                save_jointype,
-                                extra->sjinfo,
-                                extra->param_source_rels,
-                                outerpath,
-                                innerpath,
-                                extra->restrictlist,
-                                merge_pathkeys,
-                                newclauses,
-                                NIL,
-                                NIL);
-                        }
-                        cheapest_startup_inner = innerpath;
                    }

-                    /*
-                     * Don't consider truncated sortkeys if we need all clauses.
-                     */
-                    if (useallclauses)
-                        break;
+                    inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
+                }
+
+                if (inner_cheapest_total != NULL) {
+                    consider_parallel_mergejoin(root, joinrel, outerrel, innerrel, jointype, save_jointype, extra,
+                        inner_cheapest_total);
                }
            }
            j++;
@ -1505,20 +1459,41 @@ static void match_unsorted_outer(PlannerInfo* root, RelOptInfo* joinrel, RelOptI
        i++;
    }

-    /*
-     * If the joinrel is parallel-safe and the join type supports nested loops,
-     * we may be able to consider a partial nestloop plan.  However, we can't
-     * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
-     * therefore we won't be able to properly guarantee uniqueness.  Nor can
-     * we handle extra_lateral_rels, since partial paths must not be
-     * parameterized.
-     */
-    if (joinrel->consider_parallel && nestjoinOK && save_jointype != JOIN_UNIQUE_OUTER) {
-        consider_parallel_nestloop(root, joinrel, outerrel, innerrel, save_jointype, extra);
-    }
-
-    if (join_used != NULL)
+    if (join_used != NULL) {
        pfree_ext(join_used);
+    }
+}
+
+/*
+ * consider_parallel_mergejoin
+ * 	  Try to build partial paths for a joinrel by joining a partial path
+ * 	  for the outer relation to a complete path for the inner relation.
+ *
+ * 'joinrel' is the join relation
+ * 'outerrel' is the outer join relation
+ * 'innerrel' is the inner join relation
+ * 'jointype' is the type of join to do
+ * 'extra' contains additional input values
+ * 'inner_cheapest_total' cheapest total path for innerrel
+ */
+static void consider_parallel_mergejoin(PlannerInfo *root, RelOptInfo *joinrel, const RelOptInfo *outerrel,
+    RelOptInfo *innerrel, JoinType jointype, JoinType save_jointype, JoinPathExtraData *extra,
+    Path *inner_cheapest_total)
+{
+    ListCell *lc1 = NULL;
+
+    /* generate merge join path for each partial outer path */
+    foreach (lc1, outerrel->partial_pathlist) {
+        Path *outerpath = (Path *)lfirst(lc1);
+
+        /*
+         * Figure out what useful ordering any paths we create will have.
+         */
+        List *merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerpath->pathkeys);
+
+        generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype, save_jointype, extra, false,
+            inner_cheapest_total, merge_pathkeys, true);
+    }
 }

 /*
@ -1956,6 +1931,162 @@ static List* select_mergejoin_clauses(PlannerInfo* root, RelOptInfo* joinrel, Re
    return result_list;
 }

+static void generate_mergejoin_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *innerrel, Path *outerpath,
+    JoinType jointype, JoinType save_jointype, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total,
+    List *merge_pathkeys, bool is_partial)
+{
+    List *trialsortkeys = NIL;
+    Path *cheapest_startup_inner = NULL;
+    Path *cheapest_total_inner = NULL;
+    int sortkeycnt;
+
+    if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) {
+        jointype = JOIN_INNER;
+    }
+
+    /* Look for useful mergeclauses (if any) */
+    List *mergeclauses = find_mergeclauses_for_outer_pathkeys(root, outerpath->pathkeys, extra->mergeclause_list);
+
+    /*
+     * Done with this outer path if no chance for a mergejoin.
+     *
+     * Special corner case: for "x FULL JOIN y ON true", there will be no
+     * join clauses at all.  Ordinarily we'd generate a clauseless
+     * nestloop path, but since mergejoin is our only join type that
+     * supports FULL JOIN without any join clauses, it's necessary to
+     * generate a clauseless mergejoin path instead.
+     */
+    if (mergeclauses == NIL) {
+        if (jointype == JOIN_FULL) {
+            /* okay to try for mergejoin */;
+        } else {
+            return;
+        }
+    }
+    if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list)) {
+        return;
+    }
+
+    /* Compute the required ordering of the inner path */
+    List *innersortkeys = make_inner_pathkeys_for_merge(root, mergeclauses, outerpath->pathkeys);
+
+    /*
+     * Generate a mergejoin on the basis of sorting the cheapest inner.
+     * Since a sort will be needed, only cheapest total cost matters. (But
+     * try_mergejoin_path will do the right thing if inner_cheapest_total
+     * is already correctly sorted.)
+     */
+    try_mergejoin_path(root, joinrel, jointype, save_jointype, extra->sjinfo, extra->param_source_rels, outerpath,
+        inner_cheapest_total, extra->restrictlist, merge_pathkeys, mergeclauses, NIL, innersortkeys, is_partial);
+
+    /* Can't do anything else if inner path needs to be unique'd */
+    if (save_jointype == JOIN_UNIQUE_INNER) {
+        return;
+    }
+
+    /*
+     * Look for presorted inner paths that satisfy the innersortkey list
+     * --- or any truncation thereof, if we are allowed to build a
+     * mergejoin using a subset of the merge clauses.  Here, we consider
+     * both cheap startup cost and cheap total cost.
+     *
+     * Currently we do not consider parameterized inner paths here. This
+     * interacts with decisions elsewhere that also discriminate against
+     * mergejoins with parameterized inputs; see comments in
+     * src/backend/optimizer/README.
+     *
+     * As we shorten the sortkey list, we should consider only paths that
+     * are strictly cheaper than (in particular, not the same as) any path
+     * found in an earlier iteration.  Otherwise we'd be intentionally
+     * using fewer merge keys than a given path allows (treating the rest
+     * as plain joinquals), which is unlikely to be a good idea.  Also,
+     * eliminating paths here on the basis of compare_path_costs is a lot
+     * cheaper than building the mergejoin path only to throw it away.
+     *
+     * If inner_cheapest_total is well enough sorted to have not required
+     * a sort in the path made above, we shouldn't make a duplicate path
+     * with it, either.  We handle that case with the same logic that
+     * handles the previous consideration, by initializing the variables
+     * that track cheapest-so-far properly.  Note that we do NOT reject
+     * inner_cheapest_total if we find it matches some shorter set of
+     * pathkeys.  That case corresponds to using fewer mergekeys to avoid
+     * sorting inner_cheapest_total, whereas we did sort it above, so the
+     * plans being considered are different.
+     */
+    if (pathkeys_contained_in(innersortkeys, inner_cheapest_total->pathkeys)) {
+        /* inner_cheapest_total didn't require a sort */
+        cheapest_startup_inner = inner_cheapest_total;
+        cheapest_total_inner = inner_cheapest_total;
+    } else {
+        /* it did require a sort, at least for the full set of keys */
+        cheapest_startup_inner = NULL;
+        cheapest_total_inner = NULL;
+    }
+    int num_sortkeys = list_length(innersortkeys);
+    if (num_sortkeys > 1 && !useallclauses) {
+        trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
+    } else {
+        trialsortkeys = innersortkeys; /* won't really truncate */
+    }
+
+    for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) {
+        Path *innerpath = NULL;
+        List *newclauses = NIL;
+
+        /*
+         * Look for an inner path ordered well enough for the first
+         * 'sortkeycnt' innersortkeys.	NB: trialsortkeys list is modified
+         * destructively, which is why we made a copy...
+         */
+        trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
+        innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, TOTAL_COST, is_partial);
+        if (innerpath != NULL &&
+            (cheapest_total_inner == NULL || compare_path_costs(innerpath, cheapest_total_inner, TOTAL_COST) < 0)) {
+            /* Found a cheap (or even-cheaper) sorted path */
+            /* Select the right mergeclauses, if we didn't already */
+            if (sortkeycnt < num_sortkeys) {
+                newclauses = trim_mergeclauses_for_inner_pathkeys(root, mergeclauses, trialsortkeys);
+                AssertEreport(newclauses != NIL, MOD_OPT_JOIN, "newclauses list is NIL");
+            } else {
+                newclauses = mergeclauses;
+            }
+            try_mergejoin_path(root, joinrel, jointype, save_jointype, extra->sjinfo, extra->param_source_rels,
+                outerpath, innerpath, extra->restrictlist, merge_pathkeys, newclauses, NIL, NIL, is_partial);
+            cheapest_total_inner = innerpath;
+        }
+        /* Same on the basis of cheapest startup cost ... */
+        innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, STARTUP_COST, is_partial);
+        if (innerpath != NULL && (cheapest_startup_inner == NULL ||
+            compare_path_costs(innerpath, cheapest_startup_inner, STARTUP_COST) < 0)) {
+            /* Found a cheap (or even-cheaper) sorted path */
+            if (innerpath != cheapest_total_inner) {
+                /*
+                 * Avoid rebuilding clause list if we already made one;
+                 * saves memory in big join trees...
+                 */
+                if (newclauses == NIL) {
+                    if (sortkeycnt < num_sortkeys) {
+                        newclauses = trim_mergeclauses_for_inner_pathkeys(root, mergeclauses, trialsortkeys);
+                        AssertEreport(newclauses != NIL, MOD_OPT_JOIN, "newclauses list is NIL");
+                    } else {
+                        newclauses = mergeclauses;
+                    }
+                }
+                try_mergejoin_path(root, joinrel, jointype, save_jointype, extra->sjinfo, extra->param_source_rels,
+                    outerpath, innerpath, extra->restrictlist, merge_pathkeys, newclauses, NIL, NIL, is_partial);
+            }
+            cheapest_startup_inner = innerpath;
+        }
+
+        /*
+         * Don't consider truncated sortkeys if we need all clauses.
+         */
+        if (useallclauses) {
+            break;
+        }
+    }
+}
+
 static bool checkForPWJ(PlannerInfo* root, Path* outer_path, Path* inner_path, JoinType jointype, List* joinrestrict)
 {
    /* Validate configuration */
--- a/src/gausskernel/optimizer/path/pathkeys.cpp
+++ b/src/gausskernel/optimizer/path/pathkeys.cpp
@ -322,8 +322,10 @@ bool pathkeys_contained_in(List* keys1, List* keys2)
 * 'pathkeys' represents a required ordering (already canonicalized!)
 * 'required_outer' denotes allowable outer relations for parameterized paths
 * 'cost_criterion' is STARTUP_COST or TOTAL_COST
+ * 'require_parallel_safe' causes us to consider only parallel-safe paths
 */
-Path* get_cheapest_path_for_pathkeys(List* paths, List* pathkeys, Relids required_outer, CostSelector cost_criterion)
+Path* get_cheapest_path_for_pathkeys(List* paths, List* pathkeys, Relids required_outer,
+    CostSelector cost_criterion, bool require_parallel_safe)
 {
    Path* matched_path = NULL;
    ListCell* l = NULL;
@ -338,6 +340,10 @@ Path* get_cheapest_path_for_pathkeys(List* paths, List* pathkeys, Relids require
        if (matched_path != NULL && compare_path_costs(matched_path, path, cost_criterion) <= 0)
            continue;

+        if (require_parallel_safe && !path->parallel_safe) {
+            continue;
+        }
+
        if (pathkeys_contained_in(pathkeys, path->pathkeys) && bms_is_subset(PATH_REQ_OUTER(path), required_outer))
            matched_path = path;
    }
--- a/src/gausskernel/optimizer/util/pathnode.cpp
+++ b/src/gausskernel/optimizer/util/pathnode.cpp
@ -3816,7 +3816,8 @@ MergePath* create_mergejoin_path(PlannerInfo* root, RelOptInfo* joinrel, JoinTyp
    pathnode->jpath.path.parallel_aware = false;
    pathnode->jpath.path.parallel_safe =
        joinrel->consider_parallel && outer_path->parallel_safe && inner_path->parallel_safe;
-    pathnode->jpath.path.parallel_workers = 0;
+    /* This is a foolish way to estimate parallel_workers, but for now... */
+    pathnode->jpath.path.parallel_workers = outer_path->parallel_workers;
    pathnode->jpath.path.pathkeys = pathkeys;
    pathnode->jpath.jointype = jointype;
    pathnode->jpath.outerjoinpath = outer_path;
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@ -134,7 +134,7 @@ extern List* canonicalize_pathkeys(PlannerInfo* root, List* pathkeys);
 extern PathKeysComparison compare_pathkeys(List* keys1, List* keys2);
 extern bool pathkeys_contained_in(List* keys1, List* keys2);
 extern Path* get_cheapest_path_for_pathkeys(
-    List* paths, List* pathkeys, Relids required_outer, CostSelector cost_criterion);
+    List* paths, List* pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe);
 extern Path* get_cheapest_fractional_path_for_pathkeys(
    List* paths, List* pathkeys, Relids required_outer, double fraction);
 extern List* build_index_pathkeys(PlannerInfo* root, IndexOptInfo* index, ScanDirection scandir);
--- a/src/test/regress/expected/parallel_query.out
+++ b/src/test/regress/expected/parallel_query.out
@ -1,5 +1,6 @@
 create table parallel_t1(a int);
 insert into parallel_t1 values(generate_series(1,100000));
+analyze parallel_t1;
 --normal plan for seq scan
 explain (costs off) select count(*) from parallel_t1;
          QUERY PLAN           
@ -158,7 +159,6 @@ select count(*) from parallel_t1 where a <> 5000;
 (1 row)

 --clean up
-drop table parallel_t1;
 reset force_parallel_mode;
 reset parallel_setup_cost;
 reset parallel_tuple_cost;
@ -169,6 +169,34 @@ reset parallel_leader_participation;
 create table parallel_t2(a int, b int);
 insert into parallel_t2 values(generate_series(1,100000), generate_series(1,100000));
 create index t2_idx on parallel_t2 using btree(a);
+analyze parallel_t2;
+--normal plan for merge join
+set enable_hashjoin to off;
+set enable_nestloop to off;
+set enable_indexscan to off;
+explain (costs off) select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Aggregate
+   ->  Merge Join
+         Merge Cond: (parallel_t1.a = parallel_t2.a)
+         ->  Sort
+               Sort Key: parallel_t1.a
+               ->  Seq Scan on parallel_t1
+         ->  Sort
+               Sort Key: parallel_t2.a
+               ->  Seq Scan on parallel_t2
+(9 rows)
+
+select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
+ count  
+--------
+ 100000
+(1 row)
+
+reset enable_hashjoin;
+reset enable_nestloop;
+reset enable_indexscan;
 --set index scan parameter
 set enable_seqscan to off;
 set enable_bitmapscan to off;
@ -246,7 +274,7 @@ select count(b) from parallel_t2 where a < 5000;
 alter table parallel_t2 set (parallel_workers = 1);
 --parallel plan for index scan
 explain (costs off) select count(b) from parallel_t2 where a > 5000;
-                         QUERY PLAN
+                         QUERY PLAN                          
 -------------------------------------------------------------
 Aggregate
   ->  Gather
@ -256,7 +284,7 @@ explain (costs off) select count(b) from parallel_t2 where a > 5000;
 (5 rows)

 explain (costs off) select count(b) from parallel_t2 where a < 5000;
-                         QUERY PLAN
+                         QUERY PLAN                          
 -------------------------------------------------------------
 Aggregate
   ->  Gather
@ -266,18 +294,50 @@ explain (costs off) select count(b) from parallel_t2 where a < 5000;
 (5 rows)

 select count(b) from parallel_t2 where a > 5000;
- count
+ count 
 -------
 95000
 (1 row)

 select count(b) from parallel_t2 where a < 5000;
- count
+ count 
 -------
  4999
 (1 row)

+--parallel plan for merge join
+reset enable_seqscan;
+reset enable_bitmapscan;
+set enable_hashjoin to off;
+set enable_nestloop to off;
+set enable_indexscan to off;
+explain (costs off) select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Aggregate
+   ->  Gather
+         Number of Workers: 2
+         ->  Merge Join
+               Merge Cond: (parallel_t1.a = parallel_t2.a)
+               ->  Sort
+                     Sort Key: parallel_t1.a
+                     ->  Parallel Seq Scan on parallel_t1
+               ->  Sort
+                     Sort Key: parallel_t2.a
+                     ->  Seq Scan on parallel_t2
+(11 rows)
+
+select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
+ count  
+--------
+ 100000
+(1 row)
+
+reset enable_hashjoin;
+reset enable_nestloop;
+reset enable_indexscan;
 --clean up
+drop table parallel_t1;
 drop table parallel_t2;
 reset enable_seqscan;
 reset enable_bitmapscan;
--- a/src/test/regress/sql/parallel_query.sql
+++ b/src/test/regress/sql/parallel_query.sql
@ -1,5 +1,6 @@
 create table parallel_t1(a int);
 insert into parallel_t1 values(generate_series(1,100000));
+analyze parallel_t1;
 --normal plan for seq scan
 explain (costs off) select count(*) from parallel_t1;
 explain (costs off) select count(*) from parallel_t1 where a = 5000;
@ -33,7 +34,6 @@ select count(*) from parallel_t1 where a < 5000;
 select count(*) from parallel_t1 where a <> 5000;

 --clean up
-drop table parallel_t1;
 reset force_parallel_mode;
 reset parallel_setup_cost;
 reset parallel_tuple_cost;
@ -45,6 +45,17 @@ reset parallel_leader_participation;
 create table parallel_t2(a int, b int);
 insert into parallel_t2 values(generate_series(1,100000), generate_series(1,100000));
 create index t2_idx on parallel_t2 using btree(a);
+analyze parallel_t2;
+
+--normal plan for merge join
+set enable_hashjoin to off;
+set enable_nestloop to off;
+set enable_indexscan to off;
+explain (costs off) select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
+select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
+reset enable_hashjoin;
+reset enable_nestloop;
+reset enable_indexscan;

 --set index scan parameter
 set enable_seqscan to off;
@ -80,7 +91,20 @@ explain (costs off) select count(b) from parallel_t2 where a < 5000;
 select count(b) from parallel_t2 where a > 5000;
 select count(b) from parallel_t2 where a < 5000;

+--parallel plan for merge join
+reset enable_seqscan;
+reset enable_bitmapscan;
+set enable_hashjoin to off;
+set enable_nestloop to off;
+set enable_indexscan to off;
+explain (costs off) select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
+select count(*) from parallel_t1,parallel_t2 where parallel_t1.a=parallel_t2.a;
+reset enable_hashjoin;
+reset enable_nestloop;
+reset enable_indexscan;
+
 --clean up
+drop table parallel_t1;
 drop table parallel_t2;
 reset enable_seqscan;
 reset enable_bitmapscan;