From 5dfa9d8ed1ced7fcdee6b84dc16075a1a15efa1e Mon Sep 17 00:00:00 2001
From: TotaJ <totaj@qq.com>
Date: Wed, 16 Sep 2020 20:10:45 +0800
Subject: [PATCH] Parallel query, sequence scan.

---
 src/bin/gs_guc/cluster_guc.conf               |    7 +
 src/common/backend/catalog/index.cpp          |    9 +
 src/common/backend/catalog/namespace.cpp      |   52 +-
 src/common/backend/nodes/copyfuncs.cpp        |   26 +
 src/common/backend/nodes/nodeFuncs.cpp        |  114 ++
 src/common/backend/nodes/nodes.cpp            |    1 +
 src/common/backend/nodes/outfuncs.cpp         |   32 +
 src/common/backend/nodes/params.cpp           |  177 +++
 src/common/backend/nodes/readfuncs.cpp        |    2 +
 src/common/backend/utils/adt/datum.cpp        |  126 ++
 src/common/backend/utils/adt/lockfuncs.cpp    |   37 +-
 src/common/backend/utils/cache/lsyscache.cpp  |   10 +
 src/common/backend/utils/cache/relmapper.cpp  |    7 +
 src/common/backend/utils/misc/guc.cpp         |  150 +++
 src/common/backend/utils/time/snapmgr.cpp     |  221 +++-
 src/common/pl/plpgsql/src/pl_exec.cpp         |   25 +-
 src/gausskernel/optimizer/commands/async.cpp  |    5 +
 src/gausskernel/optimizer/commands/copy.cpp   |    4 +-
 .../optimizer/commands/explain.cpp            |   18 +-
 .../optimizer/commands/extension.cpp          |    2 +-
 .../optimizer/commands/prepare.cpp            |    1 +
 .../optimizer/commands/sequence.cpp           |   15 +
 .../optimizer/commands/variable.cpp           |   32 +-
 src/gausskernel/optimizer/path/allpaths.cpp   |  166 ++-
 src/gausskernel/optimizer/path/costsize.cpp   |   46 +-
 src/gausskernel/optimizer/plan/createplan.cpp |   50 +
 src/gausskernel/optimizer/plan/planmain.cpp   |    4 +
 src/gausskernel/optimizer/plan/planner.cpp    |   48 +
 src/gausskernel/optimizer/plan/setrefs.cpp    |    7 +-
 src/gausskernel/optimizer/plan/subselect.cpp  |    1 +
 src/gausskernel/optimizer/util/clauses.cpp    |  192 ++-
 src/gausskernel/optimizer/util/optcommon.cpp  |    3 +
 src/gausskernel/optimizer/util/pathnode.cpp   |   66 +-
 src/gausskernel/optimizer/util/relnode.cpp    |   22 +
 .../process/postmaster/bgworker.cpp           |    5 +
 .../process/postmaster/postmaster.cpp         |  128 +-
 src/gausskernel/process/tcop/dest.cpp         |    8 +
 src/gausskernel/process/tcop/postgres.cpp     |   18 +-
 src/gausskernel/process/tcop/utility.cpp      |   20 +-
 .../process/threadpool/knl_thread.cpp         |    6 +
 src/gausskernel/runtime/executor/Makefile     |    6 +-
 src/gausskernel/runtime/executor/execAmi.cpp  |   22 +-
 src/gausskernel/runtime/executor/execMain.cpp |   53 +-
 .../runtime/executor/execParallel.cpp         |  589 +++++++++
 .../runtime/executor/execProcnode.cpp         |   35 +
 .../runtime/executor/functions.cpp            |    6 +-
 .../runtime/executor/instrument.cpp           |   77 +-
 .../runtime/executor/nodeGather.cpp           |  434 +++++++
 .../runtime/executor/nodeSamplescan.cpp       |   14 +-
 .../runtime/executor/nodeSeqscan.cpp          |  119 +-
 src/gausskernel/runtime/executor/spi.cpp      |   41 +-
 src/gausskernel/runtime/executor/tqueue.cpp   |  905 ++++++++++++++
 .../storage/access/hbstore/hbucket_am.cpp     |    6 +-
 .../storage/access/heap/heapam.cpp            |  343 +++++-
 .../storage/access/transam/Makefile           |    4 +-
 .../storage/access/transam/parallel.cpp       | 1093 +++++++++++++++++
 .../storage/access/transam/varsup.cpp         |    9 +
 .../storage/access/transam/xact.cpp           |  327 ++++-
 src/gausskernel/storage/buffer/localbuf.cpp   |   14 +
 src/gausskernel/storage/ipc/Makefile          |    2 +-
 src/gausskernel/storage/ipc/dsm.cpp           |   63 +
 src/gausskernel/storage/ipc/procarray.cpp     |   47 +-
 src/gausskernel/storage/ipc/procsignal.cpp    |    4 +
 src/gausskernel/storage/ipc/shm_mq.cpp        |    4 +-
 src/gausskernel/storage/lmgr/lock.cpp         |   35 +-
 src/gausskernel/storage/lmgr/predicate.cpp    |   10 +
 src/include/access/heapam.h                   |    6 +
 src/include/access/parallel.h                 |   68 +
 src/include/access/relscan.h                  |   33 +-
 src/include/access/tableam.h                  |   20 +
 src/include/access/xact.h                     |    7 +
 src/include/catalog/namespace.h               |    4 +-
 src/include/catalog/pg_proc.h                 |    9 +
 src/include/executor/execParallel.h           |   38 +
 src/include/executor/executor.h               |    1 +
 src/include/executor/instrument.h             |   10 +
 src/include/executor/nodeGather.h             |   25 +
 src/include/executor/nodeSeqscan.h            |    6 +
 src/include/executor/spi.h                    |    1 +
 src/include/executor/tqueue.h                 |   29 +
 src/include/gs_thread.h                       |    1 +
 .../knl/knl_guc/knl_instance_attr_common.h    |    4 +
 .../knl/knl_guc/knl_session_attr_sql.h        |    5 +
 src/include/knl/knl_session.h                 |   65 +
 src/include/knl/knl_thread.h                  |   44 +
 src/include/libpq/pqmq.h                      |    2 +-
 src/include/miscadmin.h                       |    4 +
 src/include/nodes/execnodes.h                 |   20 +
 src/include/nodes/nodeFuncs.h                 |    2 +
 src/include/nodes/nodes.h                     |    3 +
 src/include/nodes/params.h                    |    4 +
 src/include/nodes/parsenodes.h                |    1 +
 src/include/nodes/plannodes.h                 |   17 +
 src/include/nodes/relation.h                  |   21 +
 src/include/optimizer/clauses.h               |    1 +
 src/include/optimizer/cost.h                  |    6 +-
 src/include/optimizer/pathnode.h              |    5 +-
 src/include/optimizer/planner.h               |    7 +
 src/include/postmaster/bgworker.h             |    1 +
 src/include/postmaster/postmaster.h           |    2 +
 src/include/storage/dsm.h                     |   48 +
 src/include/storage/procarray.h               |    2 +
 src/include/storage/shm_mq.h                  |    2 +-
 src/include/tcop/dest.h                       |    3 +-
 src/include/utils/datum.h                     |    8 +
 src/include/utils/lsyscache.h                 |    1 +
 src/include/utils/snapmgr.h                   |    7 +
 .../expected/bypass_simplequery_support.out   |    1 +
 .../sql/bypass_simplequery_support.sql        |    1 +
 109 files changed, 6485 insertions(+), 185 deletions(-)
 create mode 100644 src/gausskernel/runtime/executor/execParallel.cpp
 create mode 100644 src/gausskernel/runtime/executor/nodeGather.cpp
 create mode 100644 src/gausskernel/runtime/executor/tqueue.cpp
 create mode 100644 src/gausskernel/storage/access/transam/parallel.cpp
 create mode 100644 src/gausskernel/storage/ipc/dsm.cpp
 create mode 100644 src/include/access/parallel.h
 create mode 100644 src/include/executor/execParallel.h
 create mode 100644 src/include/executor/nodeGather.h
 create mode 100644 src/include/executor/tqueue.h
 create mode 100644 src/include/storage/dsm.h

diff --git a/src/bin/gs_guc/cluster_guc.conf b/src/bin/gs_guc/cluster_guc.conf
index 00837318f..4b4d8bc4c 100644
--- a/src/bin/gs_guc/cluster_guc.conf
+++ b/src/bin/gs_guc/cluster_guc.conf
@@ -527,6 +527,13 @@ tcp_recv_timeout|int|0,86400|s|Specify the receiving timeouts until reporting an
 max_inner_tool_connections|int|1,8388607|NULL|NULL|
 max_keep_log_seg|int|0,2147483647|NULL|NULL|
 max_background_workers|int|0,262143|NULL|NULL|
+min_parallel_table_scan_size|int|0,715827882|kB|NULL|
+max_parallel_workers|int|0,1024|NULL|NULL|
+max_parallel_workers_per_gather|int|0,1024|NULL|NULL|
+parallel_tuple_cost|real|0,1.79769e+308|NULL|NULL|
+parallel_setup_cost|real|0,1.79769e+308|NULL|NULL|
+force_parallel_mode|enum|off,on,regress|NULL|NULL|
+parallel_leader_participation|bool|0,0|NULL|NULL|
 [gtm]
 nodename|string|0,0|NULL|Name of this GTM/GTM-Standby.|
 port|int|1,65535|NULL|Listen Port of GTM or GTM standby server.|
diff --git a/src/common/backend/catalog/index.cpp b/src/common/backend/catalog/index.cpp
index 9a4f922ae..e9404b1b7 100755
--- a/src/common/backend/catalog/index.cpp
+++ b/src/common/backend/catalog/index.cpp
@@ -4334,6 +4334,11 @@ static void SetReindexPending(List* indexes)
     /* Reindexing is not re-entrant. */
     if (u_sess->catalog_cxt.pendingReindexedIndexes)
         ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("cannot reindex while reindexing")));
+
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+            errmsg("cannot modify reindex state during a parallel operation")));
+    }
     u_sess->catalog_cxt.pendingReindexedIndexes = list_copy(indexes);
 }
 
@@ -4343,6 +4348,10 @@ static void SetReindexPending(List* indexes)
  */
 static void RemoveReindexPending(Oid indexOid)
 {
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+            errmsg("cannot modify reindex state during a parallel operation")));
+    }
     u_sess->catalog_cxt.pendingReindexedIndexes =
         list_delete_oid(u_sess->catalog_cxt.pendingReindexedIndexes, indexOid);
 }
diff --git a/src/common/backend/catalog/namespace.cpp b/src/common/backend/catalog/namespace.cpp
index 911aa4509..c4bc7a418 100755
--- a/src/common/backend/catalog/namespace.cpp
+++ b/src/common/backend/catalog/namespace.cpp
@@ -20,6 +20,7 @@
 #include "postgres.h"
 #include "knl/knl_variable.h"
 
+#include "access/parallel.h"
 #include "access/xact.h"
 #include "access/xlog.h"
 #ifdef PGXC
@@ -2978,6 +2979,47 @@ Oid GetTempToastNamespace(void)
     return u_sess->catalog_cxt.myTempToastNamespace;
 }
 
+/*
+ * GetTempNamespaceState - fetch status of session's temporary namespace
+ *
+ * This is used for conveying state to a parallel worker, and is not meant
+ * for general-purpose access.
+ */
+void GetTempNamespaceState(Oid *tempNamespaceId, Oid *tempToastNamespaceId)
+{
+    /* Return namespace OIDs, or 0 if session has not created temp namespace */
+    *tempNamespaceId = u_sess->catalog_cxt.myTempNamespace;
+    *tempToastNamespaceId = u_sess->catalog_cxt.myTempToastNamespace;
+}
+
+/*
+ * SetTempNamespaceState - set status of session's temporary namespace
+ *
+ * This is used for conveying state to a parallel worker, and is not meant for
+ * general-purpose access.  By transferring these namespace OIDs to workers,
+ * we ensure they will have the same notion of the search path as their leader
+ * does.
+ */
+void SetTempNamespaceState(Oid tempNamespaceId, Oid tempToastNamespaceId)
+{
+    /* Worker should not have created its own namespaces ... */
+    Assert(u_sess->catalog_cxt.myTempNamespace == InvalidOid);
+    Assert(u_sess->catalog_cxt.myTempToastNamespace == InvalidOid);
+    Assert(u_sess->catalog_cxt.myTempNamespaceSubID == InvalidSubTransactionId);
+
+    /* Assign same namespace OIDs that leader has */
+    u_sess->catalog_cxt.myTempNamespace = tempNamespaceId;
+    u_sess->catalog_cxt.myTempToastNamespace = tempToastNamespaceId;
+
+    /*
+     * It's fine to leave myTempNamespaceSubID == InvalidSubTransactionId.
+     * Even if the namespace is new so far as the leader is concerned, it's
+     * not new to the worker, and we certainly wouldn't want the worker trying
+     * to destroy it.
+     */
+    u_sess->catalog_cxt.baseSearchPathValid = false; /* may need to rebuild list */
+}
+
 /*
  * GetOverrideSearchPath - fetch current search path definition in form
  * used by PushOverrideSearchPath.
@@ -3622,6 +3664,12 @@ static void InitTempTableNamespace(void)
         ereport(ERROR,
             (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), errmsg("cannot create temporary tables during recovery")));
 
+    /* Parallel workers can't create temporary tables, either. */
+    if (IsParallelWorker()) {
+        ereport(ERROR, (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
+            errmsg("cannot create temporary tables during a parallel operation")));
+    }
+
     timeLineId = get_controlfile_timeline();
     tempID = __sync_add_and_fetch(&gt_tempID_seed, 1);
 
@@ -3762,7 +3810,7 @@ static void InitTempTableNamespace(void)
 /*
  * End-of-transaction cleanup for namespaces.
  */
-void AtEOXact_Namespace(bool isCommit)
+void AtEOXact_Namespace(bool isCommit, bool parallel)
 {
     /*
      * If we abort the transaction in which a temp namespace was selected,
@@ -3772,7 +3820,7 @@ void AtEOXact_Namespace(bool isCommit)
      * at backend shutdown.  (We only want to register the callback once per
      * session, so this is a good place to do it.)
      */
-    if (u_sess->catalog_cxt.myTempNamespaceSubID != InvalidSubTransactionId) {
+    if (u_sess->catalog_cxt.myTempNamespaceSubID != InvalidSubTransactionId && !parallel) {
         //@Temp table. No need to register RemoveTempRelationsCallback here,
         // because we don't drop temp objects by porc_exit();
         if (!isCommit) {
diff --git a/src/common/backend/nodes/copyfuncs.cpp b/src/common/backend/nodes/copyfuncs.cpp
index 5e2a4d631..64153f766 100644
--- a/src/common/backend/nodes/copyfuncs.cpp
+++ b/src/common/backend/nodes/copyfuncs.cpp
@@ -152,6 +152,7 @@ static PlannedStmt* _copyPlannedStmt(const PlannedStmt* from)
     COPY_SCALAR_FIELD(gather_count);
     COPY_SCALAR_FIELD(isRowTriggerShippable);
     COPY_SCALAR_FIELD(is_stream_plan);
+    COPY_SCALAR_FIELD(parallelModeNeeded);
     /*
      * Not copy ng_queryMem to avoid memory leak in CachedPlan context,
      * and dywlm_client_manager always calls CalculateQueryMemMain to generate it.
@@ -175,6 +176,7 @@ static void CopyPlanFields(const Plan* from, Plan* newnode)
     COPY_SCALAR_FIELD(plan_rows);
     COPY_SCALAR_FIELD(multiple);
     COPY_SCALAR_FIELD(plan_width);
+    COPY_SCALAR_FIELD(parallel_aware);
     COPY_SCALAR_FIELD(dop);
     COPY_NODE_FIELD(targetlist);
     COPY_NODE_FIELD(qual);
@@ -421,6 +423,27 @@ static BitmapAnd* _copyBitmapAnd(const BitmapAnd* from)
     return newnode;
 }
 
+/*
+ * _copyGather
+ */
+static Gather *_copyGather(const Gather *from)
+{
+    Gather *newnode = makeNode(Gather);
+
+    /*
+     * copy node superclass fields
+     */
+    CopyPlanFields((const Plan *)from, (Plan *)newnode);
+
+    /*
+     * copy remainder of node
+     */
+    COPY_SCALAR_FIELD(num_workers);
+    COPY_SCALAR_FIELD(single_copy);
+
+    return newnode;
+}
+
 /*
  * _copyBitmapOr
  */
@@ -5834,6 +5857,9 @@ void* copyObject(const void* from)
         case T_Scan:
             retval = _copyScan((Scan*)from);
             break;
+        case T_Gather:
+            retval = _copyGather((Gather*)from);
+            break;
         case T_BucketInfo:
             retval = _copyBucketInfo((BucketInfo*)from);
             break;
diff --git a/src/common/backend/nodes/nodeFuncs.cpp b/src/common/backend/nodes/nodeFuncs.cpp
index 698fa8ca1..38e0fb9e1 100755
--- a/src/common/backend/nodes/nodeFuncs.cpp
+++ b/src/common/backend/nodes/nodeFuncs.cpp
@@ -28,6 +28,8 @@
 
 static bool expression_returns_set_walker(Node* node, void* context);
 static int leftmostLoc(int loc1, int loc2);
+static bool planstate_walk_subplans(List *plans, bool (*walker)(), void *context);
+static bool planstate_walk_members(List *plans, PlanState **planstates, bool (*walker)(), void *context);
 
 /*
  *	exprType -
@@ -3185,3 +3187,115 @@ bool lockNextvalWalker(Node* node, void* context)
     lockSeqForNextvalFunc(node);
     return expression_tree_walker(node, (bool (*)())lockNextvalWalker, context);
 }
+
+/*
+ * planstate_tree_walker --- walk plan state trees
+ *
+ * The walker has already visited the current node, and so we need only
+ * recurse into any sub-nodes it has.
+ */
+bool planstate_tree_walker(PlanState *planstate, bool (*walker)(), void *context)
+{
+    Plan *plan = planstate->plan;
+    bool (*p2walker)(PlanState *, void *) = (bool (*)(PlanState *, void *))walker;
+
+    /* initPlan-s */
+    if (planstate_walk_subplans(planstate->initPlan, walker, context)) {
+        return true;
+    }
+
+    /* lefttree */
+    if (outerPlanState(planstate)) {
+        if (p2walker(outerPlanState(planstate), context)) {
+            return true;
+        }
+    }
+
+    /* righttree */
+    if (innerPlanState(planstate)) {
+        if (p2walker(innerPlanState(planstate), context)) {
+            return true;
+        }
+    }
+
+    /* special child plans */
+    switch (nodeTag(plan)) {
+        case T_ModifyTable:
+            if (planstate_walk_members(((ModifyTable *)plan)->plans, ((ModifyTableState *)planstate)->mt_plans, walker,
+                context))
+                return true;
+            break;
+        case T_Append:
+            if (planstate_walk_members(((Append *)plan)->appendplans, ((AppendState *)planstate)->appendplans, walker,
+                context))
+                return true;
+            break;
+        case T_MergeAppend:
+            if (planstate_walk_members(((MergeAppend *)plan)->mergeplans, ((MergeAppendState *)planstate)->mergeplans,
+                walker, context))
+                return true;
+            break;
+        case T_BitmapAnd:
+            if (planstate_walk_members(((BitmapAnd *)plan)->bitmapplans, ((BitmapAndState *)planstate)->bitmapplans,
+                walker, context))
+                return true;
+            break;
+        case T_BitmapOr:
+            if (planstate_walk_members(((BitmapOr *)plan)->bitmapplans, ((BitmapOrState *)planstate)->bitmapplans,
+                walker, context))
+                return true;
+            break;
+        case T_SubqueryScan:
+            if (p2walker(((SubqueryScanState *)planstate)->subplan, context))
+                return true;
+            break;
+        default:
+            break;
+    }
+
+    /* subPlan-s */
+    if (planstate_walk_subplans(planstate->subPlan, walker, context)) {
+        return true;
+    }
+
+    return false;
+}
+
+/*
+ * Walk a list of SubPlans (or initPlans, which also use SubPlan nodes).
+ */
+static bool planstate_walk_subplans(List *plans, bool (*walker)(), void *context)
+{
+    ListCell *lc = NULL;
+    bool (*p2walker)(PlanState *, void *) = (bool (*)(PlanState *, void *))walker;
+
+    foreach (lc, plans) {
+        SubPlanState *sps = (SubPlanState *)lfirst(lc);
+
+        Assert(IsA(sps, SubPlanState));
+        if (p2walker(sps->planstate, context))
+            return true;
+    }
+
+    return false;
+}
+
+/*
+ * Walk the constituent plans of a ModifyTable, Append, MergeAppend,
+ * BitmapAnd, or BitmapOr node.
+ *
+ * Note: we don't actually need to examine the Plan list members, but
+ * we need the list in order to determine the length of the PlanState array.
+ */
+static bool planstate_walk_members(List *plans, PlanState **planstates, bool (*walker)(), void *context)
+{
+    int nplans = list_length(plans);
+    bool (*p2walker)(PlanState *, void *) = (bool (*)(PlanState *, void *))walker;
+
+    for (int j = 0; j < nplans; j++) {
+        if (p2walker(planstates[j], context))
+            return true;
+    }
+
+    return false;
+}
diff --git a/src/common/backend/nodes/nodes.cpp b/src/common/backend/nodes/nodes.cpp
index 98d84cd08..0ea1b29b4 100755
--- a/src/common/backend/nodes/nodes.cpp
+++ b/src/common/backend/nodes/nodes.cpp
@@ -245,6 +245,7 @@ static const TagStr g_tagStrArr[] = {{T_Invalid, "Invalid"},
     {T_ResultPath, "ResultPath"},
     {T_MaterialPath, "MaterialPath"},
     {T_UniquePath, "UniquePath"},
+    {T_GatherPath, "Gather"},
     {T_PartIteratorPath, "PartIteratorPath"},
     {T_EquivalenceClass, "EquivalenceClass"},
     {T_EquivalenceMember, "EquivalenceMember"},
diff --git a/src/common/backend/nodes/outfuncs.cpp b/src/common/backend/nodes/outfuncs.cpp
index ab85460e0..1e935bb06 100755
--- a/src/common/backend/nodes/outfuncs.cpp
+++ b/src/common/backend/nodes/outfuncs.cpp
@@ -593,6 +593,7 @@ static void _outPlannedStmt(StringInfo str, PlannedStmt* node)
     }
     WRITE_BOOL_FIELD(isRowTriggerShippable);
     WRITE_BOOL_FIELD(is_stream_plan);
+    WRITE_BOOL_FIELD(parallelModeNeeded);
 }
 
 /*
@@ -609,6 +610,7 @@ static void _outPlanInfo(StringInfo str, Plan* node)
     appendStringInfo(str, " :plan_rows %.0f", PLAN_LOCAL_ROWS(node));
     WRITE_FLOAT_FIELD(multiple, "%.0f");
     WRITE_INT_FIELD(plan_width);
+    WRITE_BOOL_FIELD(parallel_aware);
     WRITE_NODE_FIELD(targetlist);
     WRITE_NODE_FIELD(qual);
     WRITE_NODE_FIELD(lefttree);
@@ -897,6 +899,16 @@ static void _outBucketInfo(StringInfo str, BucketInfo* node)
     WRITE_NODE_FIELD(buckets);
 }
 
+static void _outGather(StringInfo str, Gather *node)
+{
+    WRITE_NODE_TYPE("GATHER");
+
+    _outPlanInfo(str, (Plan *)node);
+
+    WRITE_INT_FIELD(num_workers);
+    WRITE_BOOL_FIELD(single_copy);
+}
+
 static void _outScan(StringInfo str, Scan* node)
 {
     WRITE_NODE_TYPE("SCAN");
@@ -2829,6 +2841,17 @@ static void _outUniquePath(StringInfo str, UniquePath* node)
     WRITE_BOOL_FIELD(hold_tlist);
 }
 
+static void _outGatherPath(StringInfo str, GatherPath *node)
+{
+    WRITE_NODE_TYPE("GATHERPATH");
+
+    _outPathInfo(str, (Path *)node);
+
+    WRITE_NODE_FIELD(subpath);
+    WRITE_INT_FIELD(num_workers);
+    WRITE_BOOL_FIELD(single_copy);
+}
+
 static void _outNestPath(StringInfo str, NestPath* node)
 {
     WRITE_NODE_TYPE("NESTPATH");
@@ -2877,6 +2900,8 @@ static void _outPlannerGlobal(StringInfo str, PlannerGlobal* node)
     WRITE_UINT_FIELD(lastRowMarkId);
     WRITE_BOOL_FIELD(transientPlan);
     WRITE_BOOL_FIELD(dependsOnRole);
+    WRITE_BOOL_FIELD(parallelModeOK);
+    WRITE_BOOL_FIELD(parallelModeNeeded);
 }
 
 /*
@@ -2956,6 +2981,7 @@ static void _outRelOptInfo(StringInfo str, RelOptInfo* node)
     WRITE_ENUM_FIELD(partflag, PartitionFlag);
     WRITE_FLOAT_FIELD(rows, "%.0f");
     WRITE_INT_FIELD(width);
+    WRITE_BOOL_FIELD(consider_parallel);
     WRITE_NODE_FIELD(reltargetlist);
     WRITE_NODE_FIELD(pathlist);
     WRITE_NODE_FIELD(ppilist);
@@ -4860,6 +4886,9 @@ static void _outNode(StringInfo str, const void* obj)
             case T_BitmapOr:
                 _outBitmapOr(str, (BitmapOr*)obj);
                 break;
+            case T_Gather:
+                _outGather(str, (Gather*)obj);
+                break;
             case T_Scan:
                 _outScan(str, (Scan*)obj);
                 break;
@@ -5189,6 +5218,9 @@ static void _outNode(StringInfo str, const void* obj)
             case T_UniquePath:
                 _outUniquePath(str, (UniquePath*)obj);
                 break;
+            case T_GatherPath:
+                _outGatherPath(str, (GatherPath*)obj);
+                break;
             case T_NestPath:
                 _outNestPath(str, (NestPath*)obj);
                 break;
diff --git a/src/common/backend/nodes/params.cpp b/src/common/backend/nodes/params.cpp
index 7f28bb5e9..cb9baf633 100644
--- a/src/common/backend/nodes/params.cpp
+++ b/src/common/backend/nodes/params.cpp
@@ -17,6 +17,7 @@
 #include "knl/knl_variable.h"
 
 #include "nodes/params.h"
+#include "storage/shmem.h"
 #include "utils/datum.h"
 #include "utils/lsyscache.h"
 
@@ -49,6 +50,7 @@ ParamListInfo copyParamList(ParamListInfo from)
     retval->parserSetupArg = NULL;
     retval->params_need_process = false;
     retval->numParams = from->numParams;
+    retval->paramMask = NULL;
     
     for (i = 0; i < from->numParams; i++) {
         ParamExternData* oprm = &from->params[i];
@@ -56,6 +58,15 @@ ParamListInfo copyParamList(ParamListInfo from)
         int16 typLen;
         bool typByVal = false;
 
+        /* Ignore parameters we don't need, to save cycles and space. */
+        if (retval->paramMask != NULL && !bms_is_member(i, retval->paramMask)) {
+            nprm->value = (Datum)0;
+            nprm->isnull = true;
+            nprm->pflags = 0;
+            nprm->ptype = InvalidOid;
+            continue;
+        }
+
         /* give hook a chance in case parameter is dynamic */
         if (!OidIsValid(oprm->ptype) && from->paramFetch != NULL) {
             (*from->paramFetch)(from, i + 1);
@@ -74,3 +85,169 @@ ParamListInfo copyParamList(ParamListInfo from)
 
     return retval;
 }
+
+/*
+ * Estimate the amount of space required to serialize a ParamListInfo.
+ */
+Size EstimateParamListSpace(ParamListInfo paramLI)
+{
+    Size sz = sizeof(int);
+
+    if (paramLI == NULL || paramLI->numParams <= 0)
+        return sz;
+
+    for (int i = 0; i < paramLI->numParams; i++) {
+        ParamExternData *prm = &paramLI->params[i];
+        Oid typeOid;
+        int16 typLen;
+        bool typByVal = false;
+
+        /* Ignore parameters we don't need, to save cycles and space. */
+        if (paramLI->paramMask != NULL && !bms_is_member(i, paramLI->paramMask)) {
+            typeOid = InvalidOid;
+        } else {
+            /* give hook a chance in case parameter is dynamic */
+            if (!OidIsValid(prm->ptype) && paramLI->paramFetch != NULL)
+                (*paramLI->paramFetch)(paramLI, i + 1);
+            typeOid = prm->ptype;
+        }
+
+        sz = add_size(sz, sizeof(Oid));    /* space for type OID */
+        sz = add_size(sz, sizeof(uint16)); /* space for pflags */
+
+        /* space for datum/isnull */
+        if (OidIsValid(typeOid)) {
+            get_typlenbyval(typeOid, &typLen, &typByVal);
+        } else {
+            /* If no type OID, assume by-value, like copyParamList does. */
+            typLen = sizeof(Datum);
+            typByVal = true;
+        }
+        sz = add_size(sz, datumEstimateSpace(prm->value, prm->isnull, typByVal, typLen));
+    }
+
+    return sz;
+}
+
+/*
+ * Serialize a paramListInfo structure into caller-provided storage.
+ *
+ * We write the number of parameters first, as a 4-byte integer, and then
+ * write details for each parameter in turn.  The details for each parameter
+ * consist of a 4-byte type OID, 2 bytes of flags, and then the datum as
+ * serialized by datumSerialize().  The caller is responsible for ensuring
+ * that there is enough storage to store the number of bytes that will be
+ * written; use EstimateParamListSpace to find out how many will be needed.
+ * *start_address is updated to point to the byte immediately following those
+ * written.
+ *
+ * RestoreParamList can be used to recreate a ParamListInfo based on the
+ * serialized representation; this will be a static, self-contained copy
+ * just as copyParamList would create.
+ */
+void SerializeParamList(ParamListInfo paramLI, char *start_address, Size len)
+{
+    int nparams;
+
+    /* Write number of parameters. */
+    if (paramLI == NULL || paramLI->numParams <= 0) {
+        nparams = 0;
+    } else {
+        nparams = paramLI->numParams;
+    }
+    int rc = memcpy_s(start_address, len, &nparams, sizeof(int));
+    securec_check_c(rc, "", "");
+    Size remainLen = len - sizeof(int);
+    start_address += sizeof(int);
+
+    /* Write each parameter in turn. */
+    for (int i = 0; i < nparams; i++) {
+        ParamExternData *prm = &paramLI->params[i];
+        Oid typeOid;
+        int16 typLen;
+        bool typByVal;
+
+        /* Ignore parameters we don't need, to save cycles and space. */
+        if (paramLI->paramMask != NULL && !bms_is_member(i, paramLI->paramMask)) {
+            typeOid = InvalidOid;
+        } else {
+            /* give hook a chance in case parameter is dynamic */
+            if (!OidIsValid(prm->ptype) && paramLI->paramFetch != NULL)
+                (*paramLI->paramFetch)(paramLI, i + 1);
+            typeOid = prm->ptype;
+        }
+
+        /* Write type OID. */
+        rc = memcpy_s(start_address, remainLen, &typeOid, sizeof(Oid));
+        securec_check_c(rc, "", "");
+        remainLen -= sizeof(Oid);
+        start_address += sizeof(Oid);
+
+        /* Write flags. */
+        rc = memcpy_s(start_address, remainLen, &prm->pflags, sizeof(uint16));
+        securec_check_c(rc, "", "");
+        remainLen -= sizeof(uint16);
+        start_address += sizeof(uint16);
+
+        /* Write datum/isnull. */
+        if (OidIsValid(typeOid)) {
+            get_typlenbyval(typeOid, &typLen, &typByVal);
+        } else {
+            /* If no type OID, assume by-value, like copyParamList does. */
+            typLen = sizeof(Datum);
+            typByVal = true;
+        }
+        datumSerialize(prm->value, prm->isnull, typByVal, typLen, &start_address, &remainLen);
+    }
+}
+
+/*
+ * Copy a ParamListInfo structure.
+ *
+ * The result is allocated in CurrentMemoryContext.
+ *
+ * Note: the intent of this function is to make a static, self-contained
+ * set of parameter values.  If dynamic parameter hooks are present, we
+ * intentionally do not copy them into the result.  Rather, we forcibly
+ * instantiate all available parameter values and copy the datum values.
+ */
+ParamListInfo RestoreParamList(char *start_address, Size len)
+{
+    int nparams;
+
+    int rc = memcpy_s(&nparams, len, start_address, sizeof(int));
+    securec_check_c(rc, "", "");
+    Size remainLen = len - sizeof(int);
+    start_address += sizeof(int);
+
+    Size size = offsetof(ParamListInfoData, params) + nparams * sizeof(ParamExternData);
+
+    ParamListInfo paramLI = (ParamListInfo)palloc(size);
+    paramLI->paramFetch = NULL;
+    paramLI->paramFetchArg = NULL;
+    paramLI->parserSetup = NULL;
+    paramLI->parserSetupArg = NULL;
+    paramLI->numParams = nparams;
+    paramLI->paramMask = NULL;
+
+    for (int i = 0; i < nparams; i++) {
+        ParamExternData *prm = &paramLI->params[i];
+
+        /* Read type OID. */
+        rc = memcpy_s(&prm->ptype, remainLen, start_address, sizeof(Oid));
+        securec_check_c(rc, "", "");
+        remainLen -= sizeof(Oid);
+        start_address += sizeof(Oid);
+
+        /* Read flags. */
+        rc = memcpy_s(&prm->pflags, remainLen, start_address, sizeof(uint16));
+        securec_check_c(rc, "", "");
+        remainLen -= sizeof(uint16);
+        start_address += sizeof(uint16);
+
+        /* Read datum/isnull. */
+        prm->value = datumRestore(&start_address, &remainLen, &prm->isnull);
+    }
+
+    return paramLI;
+}
diff --git a/src/common/backend/nodes/readfuncs.cpp b/src/common/backend/nodes/readfuncs.cpp
index ae1059864..3885ed18a 100644
--- a/src/common/backend/nodes/readfuncs.cpp
+++ b/src/common/backend/nodes/readfuncs.cpp
@@ -2684,6 +2684,7 @@ static Plan* _readPlan(Plan* local_node)
     READ_FLOAT_FIELD(plan_rows);
     READ_FLOAT_FIELD(multiple);
     READ_INT_FIELD(plan_width);
+    READ_BOOL_FIELD(parallel_aware);
     READ_NODE_FIELD(targetlist);
     READ_NODE_FIELD(qual);
     READ_NODE_FIELD(lefttree);
@@ -3722,6 +3723,7 @@ static PlannedStmt* _readPlannedStmt(void)
     }
     READ_BOOL_FIELD(isRowTriggerShippable);
     READ_BOOL_FIELD(is_stream_plan);
+    READ_BOOL_FIELD(parallelModeNeeded);
 
     READ_DONE();
 }
diff --git a/src/common/backend/utils/adt/datum.cpp b/src/common/backend/utils/adt/datum.cpp
index 84ec7fed8..fcfde6794 100755
--- a/src/common/backend/utils/adt/datum.cpp
+++ b/src/common/backend/utils/adt/datum.cpp
@@ -200,3 +200,129 @@ bool datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
     }
     return res;
 }
+
+/* -------------------------------------------------------------------------
+ * datumEstimateSpace
+ *
+ * Compute the amount of space that datumSerialize will require for a
+ * particular Datum.
+ * -------------------------------------------------------------------------
+ */
+Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
+{
+    Size sz = sizeof(int);
+
+    if (!isnull) {
+        /* no need to use add_size, can't overflow */
+        if (typByVal)
+            sz += sizeof(Datum);
+        else
+            sz += datumGetSize(value, typByVal, typLen);
+    }
+
+    return sz;
+}
+
+/* -------------------------------------------------------------------------
+ * datumSerialize
+ *
+ * Serialize a possibly-NULL datum into caller-provided storage.
+ *
+ * Note: "expanded" objects are flattened so as to produce a self-contained
+ * representation, but other sorts of toast pointers are transferred as-is.
+ * This is because the intended use of this function is to pass the value
+ * to another process within the same database server.  The other process
+ * could not access an "expanded" object within this process's memory, but
+ * we assume it can dereference the same TOAST pointers this one can.
+ *
+ * The format is as follows: first, we write a 4-byte header word, which
+ * is either the length of a pass-by-reference datum, -1 for a
+ * pass-by-value datum, or -2 for a NULL.  If the value is NULL, nothing
+ * further is written.  If it is pass-by-value, sizeof(Datum) bytes
+ * follow.  Otherwise, the number of bytes indicated by the header word
+ * follow.  The caller is responsible for ensuring that there is enough
+ * storage to store the number of bytes that will be written; use
+ * datumEstimateSpace() to find out how many will be needed.
+ * *start_address is updated to point to the byte immediately following
+ * those written.
+ * -------------------------------------------------------------------------
+ */
+void datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, char **start_address, Size *remainLen)
+{
+    int header;
+
+    /* Write header word. */
+    if (isnull) {
+        header = -2;
+    } else if (typByVal) {
+        header = -1;
+    } else {
+        header = datumGetSize(value, typByVal, typLen);
+    }
+    int rc = memcpy_s(*start_address, *remainLen, &header, sizeof(int));
+    securec_check_c(rc, "", "");
+    *remainLen -= sizeof(int);
+    *start_address += sizeof(int);
+
+    /* If not null, write payload bytes. */
+    if (!isnull) {
+        if (typByVal) {
+            rc = memcpy_s(*start_address, *remainLen, &value, sizeof(Datum));
+            securec_check_c(rc, "", "");
+            *remainLen -= sizeof(Datum);
+            *start_address += sizeof(Datum);
+        } else {
+            rc = memcpy_s(*start_address, *remainLen, DatumGetPointer(value), (Size)header);
+            securec_check_c(rc, "", "");
+            *remainLen -= header;
+            *start_address += header;
+        }
+    }
+}
+
+/* -------------------------------------------------------------------------
+ * datumRestore
+ *
+ * Restore a possibly-NULL datum previously serialized by datumSerialize.
+ * *start_address is updated according to the number of bytes consumed.
+ * -------------------------------------------------------------------------
+ */
+Datum datumRestore(char **start_address, Size *remainLen, bool *isnull)
+{
+    int header;
+
+    /* Read header word. */
+    int rc = memcpy_s(&header, *remainLen, *start_address, sizeof(int));
+    securec_check_c(rc, "", "");
+    *remainLen -= sizeof(int);
+    *start_address += sizeof(int);
+
+    /* If this datum is NULL, we can stop here. */
+    if (header == -2) {
+        *isnull = true;
+        return (Datum)0;
+    }
+
+    /* OK, datum is not null. */
+    *isnull = false;
+
+    /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */
+    if (header == -1) {
+        Datum val;
+
+        rc = memcpy_s(&val, *remainLen, *start_address, sizeof(Datum));
+        securec_check_c(rc, "", "");
+        *remainLen -= sizeof(Datum);
+        *start_address += sizeof(Datum);
+        return val;
+    }
+
+    /* Pass-by-reference case; copy indicated number of bytes. */
+    Assert(header > 0);
+    void *d = palloc((Size)header);
+    rc = memcpy_s(d, *remainLen, *start_address, header);
+    securec_check_c(rc, "", "");
+    *remainLen -= header;
+    *start_address += header;
+    return PointerGetDatum(d);
+}
diff --git a/src/common/backend/utils/adt/lockfuncs.cpp b/src/common/backend/utils/adt/lockfuncs.cpp
index 7b5aefd36..5ccc327ae 100755
--- a/src/common/backend/utils/adt/lockfuncs.cpp
+++ b/src/common/backend/utils/adt/lockfuncs.cpp
@@ -429,6 +429,14 @@ Datum pg_lock_status(PG_FUNCTION_ARGS)
 
 #define SET_LOCKTAG_INT32_DB(tag, databaseOid, key1, key2) SET_LOCKTAG_ADVISORY(tag, databaseOid, key1, key2, 2)
 
+static void PreventAdvisoryLocksInParallelMode(void)
+{
+    if (IsInParallelMode())
+        ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+            errmsg("cannot use advisory locks during a parallel operation")));
+}
+
+
 #ifdef PGXC
 
 #define MAXINT8LEN 25
@@ -452,7 +460,8 @@ static bool pgxc_advisory_lock(int64 key64, int32 key1, int32 key2, bool iskeybi
     LockLevel locklevel, TryType locktry, Name databaseName)
 {
     LOCKTAG locktag;
-    Oid *coOids = NULL, *dnOids = NULL;
+    Oid *coOids = NULL;
+    Oid *dnOids = NULL;
     int numdnodes, numcoords;
     StringInfoData lock_cmd, unlock_cmd, lock_funcname, unlock_funcname, args;
     char str_key[MAXINT8LEN + 1];
@@ -576,6 +585,7 @@ Datum pg_advisory_lock_int8(PG_FUNCTION_ARGS)
     int64 key = PG_GETARG_INT64(0);
     LOCKTAG tag;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) {
         (void)pgxc_advisory_lock(key, 0, 0, true, ExclusiveLock, SESSION_LOCK, WAIT);
@@ -599,6 +609,7 @@ Datum pg_advisory_xact_lock_int8(PG_FUNCTION_ARGS)
     int64 key = PG_GETARG_INT64(0);
     LOCKTAG tag;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) {
         (void)pgxc_advisory_lock(key, 0, 0, true, ExclusiveLock, TRANSACTION_LOCK, WAIT);
@@ -621,6 +632,7 @@ Datum pg_advisory_lock_shared_int8(PG_FUNCTION_ARGS)
     int64 key = PG_GETARG_INT64(0);
     LOCKTAG tag;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) {
         (void)pgxc_advisory_lock(key, 0, 0, true, ShareLock, SESSION_LOCK, WAIT);
@@ -644,6 +656,7 @@ Datum pg_advisory_xact_lock_shared_int8(PG_FUNCTION_ARGS)
     int64 key = PG_GETARG_INT64(0);
     LOCKTAG tag;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) {
         (void)pgxc_advisory_lock(key, 0, 0, true, ShareLock, TRANSACTION_LOCK, WAIT);
@@ -669,6 +682,7 @@ Datum pg_try_advisory_lock_int8(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     LockAcquireResult res;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
         PG_RETURN_BOOL(pgxc_advisory_lock(key, 0, 0, true, ExclusiveLock, SESSION_LOCK, DONT_WAIT));
@@ -693,6 +707,7 @@ Datum pg_try_advisory_xact_lock_int8(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     LockAcquireResult res;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
         PG_RETURN_BOOL(pgxc_advisory_lock(key, 0, 0, true, ExclusiveLock, TRANSACTION_LOCK, DONT_WAIT));
@@ -716,6 +731,7 @@ Datum pg_try_advisory_lock_shared_int8(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     LockAcquireResult res;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
         PG_RETURN_BOOL(pgxc_advisory_lock(key, 0, 0, true, ShareLock, SESSION_LOCK, DONT_WAIT));
@@ -740,6 +756,7 @@ Datum pg_try_advisory_xact_lock_shared_int8(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     LockAcquireResult res;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
         PG_RETURN_BOOL(pgxc_advisory_lock(key, 0, 0, true, ShareLock, TRANSACTION_LOCK, DONT_WAIT));
@@ -763,6 +780,7 @@ Datum pg_advisory_unlock_int8(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     bool res = false;
 
+    PreventAdvisoryLocksInParallelMode();
     SET_LOCKTAG_INT64(tag, key);
 
     res = LockRelease(&tag, ExclusiveLock, true);
@@ -781,6 +799,7 @@ Datum pg_advisory_unlock_shared_int8(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     bool res = false;
 
+    PreventAdvisoryLocksInParallelMode();
     SET_LOCKTAG_INT64(tag, key);
 
     res = LockRelease(&tag, ShareLock, true);
@@ -797,6 +816,7 @@ Datum pg_advisory_lock_int4(PG_FUNCTION_ARGS)
     int32 key2 = PG_GETARG_INT32(1);
     LOCKTAG tag;
 
+    PreventAdvisoryLocksInParallelMode();
     if (key1 == XC_LOCK_FOR_BACKUP_KEY_1 && key2 == XC_LOCK_FOR_BACKUP_KEY_2 && !superuser())
         ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("Only system admin can lock the cluster.")));
 
@@ -826,6 +846,7 @@ Datum pg_advisory_lock_sp_db_int4(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     Oid database_oid = u_sess->proc_cxt.MyDatabaseId;
 
+    PreventAdvisoryLocksInParallelMode();
     if (key1 == XC_LOCK_FOR_BACKUP_KEY_1 && key2 == XC_LOCK_FOR_BACKUP_KEY_2 && !superuser())
         ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("Only system admin can lock the cluster.")));
 
@@ -858,6 +879,7 @@ Datum pg_advisory_xact_lock_int4(PG_FUNCTION_ARGS)
     int32 key2 = PG_GETARG_INT32(1);
     LOCKTAG tag;
 
+    PreventAdvisoryLocksInParallelMode();
     if (key1 == XC_LOCK_FOR_BACKUP_KEY_1 && key2 == XC_LOCK_FOR_BACKUP_KEY_2 && !superuser())
         ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("Only system admin can lock the cluster.")));
 
@@ -884,6 +906,7 @@ Datum pg_advisory_lock_shared_int4(PG_FUNCTION_ARGS)
     int32 key2 = PG_GETARG_INT32(1);
     LOCKTAG tag;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) {
         (void)pgxc_advisory_lock(0, key1, key2, false, ShareLock, SESSION_LOCK, WAIT);
@@ -908,6 +931,7 @@ Datum pg_advisory_xact_lock_shared_int4(PG_FUNCTION_ARGS)
     int32 key2 = PG_GETARG_INT32(1);
     LOCKTAG tag;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) {
         (void)pgxc_advisory_lock(0, key1, key2, false, ShareLock, TRANSACTION_LOCK, WAIT);
@@ -934,6 +958,7 @@ Datum pg_try_advisory_lock_int4(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     LockAcquireResult res;
 
+    PreventAdvisoryLocksInParallelMode();
     if (key1 == XC_LOCK_FOR_BACKUP_KEY_1 && key2 == XC_LOCK_FOR_BACKUP_KEY_2 && !superuser())
         ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("Only system admin can lock the cluster.")));
 
@@ -962,6 +987,7 @@ Datum pg_try_advisory_xact_lock_int4(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     LockAcquireResult res;
 
+    PreventAdvisoryLocksInParallelMode();
     if (key1 == XC_LOCK_FOR_BACKUP_KEY_1 && key2 == XC_LOCK_FOR_BACKUP_KEY_2 && !superuser())
         ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("Only system admin can lock the cluster.")));
 
@@ -989,6 +1015,7 @@ Datum pg_try_advisory_lock_shared_int4(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     LockAcquireResult res;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
         PG_RETURN_BOOL(pgxc_advisory_lock(0, key1, key2, false, ShareLock, SESSION_LOCK, DONT_WAIT));
@@ -1014,6 +1041,7 @@ Datum pg_try_advisory_xact_lock_shared_int4(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     LockAcquireResult res;
 
+    PreventAdvisoryLocksInParallelMode();
 #ifdef PGXC
     if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
         PG_RETURN_BOOL(pgxc_advisory_lock(0, key1, key2, false, ShareLock, TRANSACTION_LOCK, DONT_WAIT));
@@ -1038,6 +1066,7 @@ Datum pg_advisory_unlock_int4(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     bool res = false;
 
+    PreventAdvisoryLocksInParallelMode();
     SET_LOCKTAG_INT32(tag, key1, key2);
 
     res = LockRelease(&tag, ExclusiveLock, true);
@@ -1059,6 +1088,7 @@ Datum pg_advisory_unlock_sp_db_int4(PG_FUNCTION_ARGS)
     bool res = false;
     Oid database_oid = u_sess->proc_cxt.MyDatabaseId;
 
+    PreventAdvisoryLocksInParallelMode();
     if (database_name != NULL) {
         database_oid = get_database_oid(database_name->data, false);
     }
@@ -1082,6 +1112,7 @@ Datum pg_advisory_unlock_shared_int4(PG_FUNCTION_ARGS)
     LOCKTAG tag;
     bool res = false;
 
+    PreventAdvisoryLocksInParallelMode();
     SET_LOCKTAG_INT32(tag, key1, key2);
 
     res = LockRelease(&tag, ShareLock, true);
@@ -1113,6 +1144,7 @@ Datum pgxc_lock_for_backup(PG_FUNCTION_ARGS)
 {
     bool lockAcquired = false;
 
+    PreventAdvisoryLocksInParallelMode();
     if (!superuser())
         ereport(ERROR,
             (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("only system admin can lock the cluster for backup")));
@@ -1194,6 +1226,7 @@ Datum pgxc_unlock_for_sp_database(PG_FUNCTION_ARGS)
     Name databaseName = PG_GETARG_NAME(0);
     bool result = false;
 
+    PreventAdvisoryLocksInParallelMode();
     /* try to acquire the advisory lock in exclusive mode */
     result = DatumGetBool(DirectFunctionCall3(pg_advisory_unlock_sp_db_int4,
         t_thrd.postmaster_cxt.xc_lockForBackupKey1,
@@ -1220,6 +1253,7 @@ Datum pgxc_lock_for_sp_database(PG_FUNCTION_ARGS)
     int prepared_xact_count;
     Name databaseName = PG_GETARG_NAME(0);
 
+    PreventAdvisoryLocksInParallelMode();
     if (!superuser())
         ereport(ERROR,
             (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("only system admin can lock the cluster for backup")));
@@ -1302,6 +1336,7 @@ void pgxc_lock_for_utility_stmt(Node* parsetree, bool is_temp)
     LOCKTAG tag;
     LockAcquireResult res;
 
+    PreventAdvisoryLocksInParallelMode();
     /*
      * Reload configuration if we got SIGHUP from the postmaster, since we want to fetch
      * latest enable_online_ddl_waitlock values.
diff --git a/src/common/backend/utils/cache/lsyscache.cpp b/src/common/backend/utils/cache/lsyscache.cpp
index 11d2eaaf5..42bf45e53 100644
--- a/src/common/backend/utils/cache/lsyscache.cpp
+++ b/src/common/backend/utils/cache/lsyscache.cpp
@@ -1430,6 +1430,16 @@ char func_volatile(Oid funcid)
     return result;
 }
 
+/*
+ * func_parallel
+ * 		Given procedure id, return the function's proparallel flag.
+ */
+char func_parallel(Oid funcid)
+{
+    /* Now we treat all func as parallel safe */
+    return PROPARALLEL_SAFE;
+}
+
 /*
  * get_func_proshippable
  *		Given procedure id, return the function's proshippable flag.
diff --git a/src/common/backend/utils/cache/relmapper.cpp b/src/common/backend/utils/cache/relmapper.cpp
index 38c34164b..1f0d902fb 100644
--- a/src/common/backend/utils/cache/relmapper.cpp
+++ b/src/common/backend/utils/cache/relmapper.cpp
@@ -195,6 +195,13 @@ void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, bool immedi
                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                     errmsg("cannot change relation mapping within subtransaction")));
         }
+
+        if (IsInParallelMode()) {
+            ereport(ERROR,
+                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("cannot change relation mapping in parallel mode")));
+        }
+
         if (immediate) {
             /* Make it active, but only locally */
             if (shared) {
diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp
index d20bc1552..ab07a19aa 100644
--- a/src/common/backend/utils/misc/guc.cpp
+++ b/src/common/backend/utils/misc/guc.cpp
@@ -67,6 +67,7 @@
 #include "parser/parser.h"
 #include "parser/scansup.h"
 #include "pgstat.h"
+#include "postmaster/bgworker_internals.h"
 #include "workload/workload.h"
 #include "pgaudit.h"
 #include "instruments/instr_unique_sql.h"
@@ -910,6 +911,19 @@ static const struct config_enum_entry synchronous_commit_options[] = {{"local",
     {"remote_apply", SYNCHRONOUS_COMMIT_REMOTE_REPLAY, false},
     {NULL, 0, false}};
 
+static const struct config_enum_entry force_parallel_mode_options[] = {
+    {"off", FORCE_PARALLEL_OFF, false},
+    {"on", FORCE_PARALLEL_ON, false},
+    {"regress", FORCE_PARALLEL_REGRESS, false},
+    {"true", FORCE_PARALLEL_ON, true},
+    {"false", FORCE_PARALLEL_OFF, true},
+    {"yes", FORCE_PARALLEL_ON, true},
+    {"no", FORCE_PARALLEL_OFF, true},
+    {"1", FORCE_PARALLEL_ON, true},
+    {"0", FORCE_PARALLEL_OFF, true},
+    {NULL, 0, false}
+};
+
 static const struct config_enum_entry plan_cache_mode_options[] = {
     {"auto", PLAN_CACHE_MODE_AUTO, false},
     {"force_generic_plan", PLAN_CACHE_MODE_FORCE_GENERIC_PLAN, false},
@@ -4549,6 +4563,20 @@ static void init_configure_names_bool()
             NULL,
             NULL
         },
+        {
+            {
+                "parallel_leader_participation",
+                PGC_USERSET,
+                RESOURCES_ASYNCHRONOUS,
+                gettext_noop("Controls whether Gather and Gather Merge also run subplans."),
+                gettext_noop("Should gather nodes also run subplans, or just gather tuples?")
+            },
+            &u_sess->attr.attr_sql.parallel_leader_participation,
+            true,
+            NULL,
+            NULL,
+            NULL
+        },
         /* End-of-list marker */
         {
             {
@@ -7304,6 +7332,24 @@ static void init_configure_names_int()
             NULL,
             NULL
         },
+        {
+            {
+                "min_parallel_table_scan_size",
+                PGC_USERSET,
+                QUERY_TUNING_COST,
+                gettext_noop("Sets the minimum amount of table data for a parallel scan."),
+                gettext_noop("If the planner estimates that it will read a number of table "
+                    "pages too small to reach this limit, a parallel scan will not be considered."),
+                GUC_UNIT_BLOCKS,
+            },
+            &u_sess->attr.attr_sql.min_parallel_table_scan_size,
+            (8 * 1024 * 1024) / BLCKSZ,
+            0,
+            INT_MAX / 3,
+            NULL,
+            NULL,
+            NULL
+        },
         {
             /* Can't be set in postgresql.conf */
             {
@@ -9183,6 +9229,38 @@ static void init_configure_names_int()
             NULL,
             NULL
         },
+        {
+            {
+                "max_parallel_workers",
+                PGC_USERSET,
+                RESOURCES_ASYNCHRONOUS,
+                gettext_noop("Sets the maximum number of parallel workers that can be active at one time."),
+                NULL
+            },
+            &g_instance.attr.attr_common.max_parallel_workers,
+            8,
+            0,
+            MAX_PARALLEL_WORKER_LIMIT,
+            NULL,
+            NULL,
+            NULL
+        },
+        {
+            {
+                "max_parallel_workers_per_gather",
+                PGC_USERSET,
+                RESOURCES_ASYNCHRONOUS,
+                gettext_noop("Sets the maximum number of parallel processes per executor node."),
+                NULL
+            },
+            &g_instance.attr.attr_common.max_parallel_workers_per_gather,
+            2,
+            0,
+            MAX_PARALLEL_WORKER_LIMIT,
+            NULL,
+            NULL,
+            NULL
+        },
         /* End-of-list marker */
         {
             {
@@ -9332,6 +9410,40 @@ static void init_configure_names_real()
             NULL
         },
 #endif
+        {
+            {
+                "parallel_tuple_cost",
+                PGC_USERSET,
+                QUERY_TUNING_COST,
+                gettext_noop("Sets the planner's estimate of the cost of "
+                    "passing each tuple (row) from worker to master backend."),
+                NULL
+            },
+            &u_sess->attr.attr_sql.parallel_tuple_cost,
+            DEFAULT_PARALLEL_TUPLE_COST,
+            0,
+            DBL_MAX,
+            NULL,
+            NULL,
+            NULL
+        },
+        {
+            {
+                "parallel_setup_cost",
+                PGC_USERSET,
+                QUERY_TUNING_COST,
+                gettext_noop("Sets the planner's estimate of the cost of "
+                    "starting up worker processes for parallel query."),
+                NULL
+            },
+            &u_sess->attr.attr_sql.parallel_setup_cost,
+            DEFAULT_PARALLEL_SETUP_COST,
+            0,
+            DBL_MAX,
+            NULL,
+            NULL,
+            NULL
+        },
         {
             {
                 "cursor_tuple_fraction",
@@ -11735,6 +11847,21 @@ static void init_configure_names_enum()
             NULL
         },
 #endif
+        {
+            {
+                "force_parallel_mode",
+                PGC_USERSET,
+                QUERY_TUNING_OTHER,
+                gettext_noop("Forces use of parallel query facilities."),
+                gettext_noop("If possible, run query using a parallel worker and with parallel restrictions.")
+            },
+            &u_sess->attr.attr_sql.force_parallel_mode,
+            FORCE_PARALLEL_OFF,
+            force_parallel_mode_options,
+            NULL,
+            NULL,
+            NULL
+        },
         {
             {
                 "plan_cache_mode",
@@ -14667,6 +14794,20 @@ int set_config_option(const char* name, const char* value, GucContext context, G
         }
     }
 
+    /*
+     * GUC_ACTION_SAVE changes are acceptable during a parallel operation,
+     * because the current worker will also pop the change.  We're probably
+     * dealing with a function having a proconfig entry.  Only the function's
+     * body should observe the change, and peer workers do not share in the
+     * execution of a function call started by this worker.
+     *
+     * Other changes might need to affect other workers, so forbid them.
+     */
+    if (IsInParallelMode() && changeVal && action != GUC_ACTION_SAVE) {
+        ereport(elevel,
+            (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot set parameters during a parallel operation")));
+    }
+
     record = find_option(name, true, elevel);
     if (record == NULL) {
         ereport(
@@ -15901,6 +16042,15 @@ void ExecSetVariableStmt(VariableSetStmt* stmt)
     char* passwd = NULL;
     ListCell* phead = NULL;
 
+    /*
+     * Workers synchronize these parameters at the start of the parallel
+     * operation; then, we block SET during the operation.
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR,
+            (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot set parameters during a parallel operation")));
+    }
+
     switch (stmt->kind) {
         case VAR_SET_VALUE:
         case VAR_SET_CURRENT:
diff --git a/src/common/backend/utils/time/snapmgr.cpp b/src/common/backend/utils/time/snapmgr.cpp
index b7d53dad2..0fbca1687 100755
--- a/src/common/backend/utils/time/snapmgr.cpp
+++ b/src/common/backend/utils/time/snapmgr.cpp
@@ -101,6 +101,26 @@ static Snapshot CopySnapshot(Snapshot snapshot);
 static void FreeSnapshot(Snapshot snapshot);
 static void SnapshotResetXmin(void);
 
+/*
+ * Snapshot fields to be serialized.
+ *
+ * Only these fields need to be sent to the cooperating backend; the
+ * remaining ones can (and must) set by the receiver upon restore.
+ */
+typedef struct SerializedSnapshotData {
+    TransactionId xmin;
+    TransactionId xmax;
+    uint32 xcnt;
+    int32 subxcnt;
+    bool suboverflowed;
+    bool takenDuringRecovery;
+    CommandId curcid;
+    GTM_Timeline timeline;
+    CommitSeqNo snapshotcsn;
+    SnapshotType snapshot_type;
+} SerializedSnapshotData;
+
+
 /*
  * GetTransactionSnapshot
  *		Get the appropriate snapshot for a new query in a transaction.
@@ -128,6 +148,12 @@ Snapshot GetTransactionSnapshot(bool force_local_snapshot)
         Assert(u_sess->utils_cxt.RegisteredSnapshots == 0);
         Assert(u_sess->utils_cxt.FirstXactSnapshot == NULL);
 
+        if (IsInParallelMode()) {
+            ereport(ERROR,
+                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("cannot take query snapshot during a parallel operation")));
+        }
+
         /*
          * In transaction-snapshot mode, the first snapshot must live until
          * end of xact regardless of what the caller does with it, so we must
@@ -230,6 +256,15 @@ void StreamTxnContextSetMyPgXactXmin(TransactionId xmin)
  */
 Snapshot GetLatestSnapshot(void)
 {
+    /*
+     * We might be able to relax this, but nothing that could otherwise work
+     * needs it.
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+            errmsg("cannot update SecondarySnapshot during a parallel operation")));
+    }
+
     /*
      * So far there are no cases requiring support for GetLatestSnapshot()
      * during logical decoding, but it wouldn't be hard to add if
@@ -324,7 +359,7 @@ void SnapshotSetCommandId(CommandId curcid)
  * must take care of all the same considerations as the first-snapshot case
  * in GetTransactionSnapshot.
  */
-static void SetTransactionSnapshot(Snapshot sourcesnap, TransactionId sourcexid)
+static void SetTransactionSnapshot(Snapshot sourcesnap, TransactionId sourcexid, PGPROC *sourceproc)
 {
     /* Caller should have checked this already */
     Assert(!u_sess->utils_cxt.FirstSnapshotSet);
@@ -351,6 +386,28 @@ static void SetTransactionSnapshot(Snapshot sourcesnap, TransactionId sourcexid)
     u_sess->utils_cxt.CurrentSnapshot->timeline = sourcesnap->timeline;
     u_sess->utils_cxt.CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
 
+    /*
+     * Now we have to fix what GetSnapshotData did with MyPgXact->xmin and
+     * TransactionXmin.  There is a race condition: to make sure we are not
+     * causing the global xmin to go backwards, we have to test that the
+     * source transaction is still running, and that has to be done
+     * atomically. So let procarray.c do it.
+     *
+     * Note: in serializable mode, predicate.c will do this a second time. It
+     * doesn't seem worth contorting the logic here to avoid two calls,
+     * especially since it's not clear that predicate.c *must* do this.
+     */
+    if (sourceproc != NULL) {
+        if (!ProcArrayInstallRestoredXmin(u_sess->utils_cxt.CurrentSnapshot->xmin, sourceproc))
+            ereport(ERROR,
+                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not import the requested snapshot"),
+                errdetail("The source transaction is not running anymore.")));
+    } else if (!ProcArrayInstallImportedXmin(u_sess->utils_cxt.CurrentSnapshot->xmin, sourcexid)) {
+        ereport(ERROR,
+            (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not import the requested snapshot"),
+            errdetail("The source transaction %lu is not running anymore.", sourcexid)));
+    }
+
     /*
 	 * NB: curcid should NOT be copied, it's a local matter
      * Now we have to fix what GetSnapshotData did with MyPgXact->xmin and
@@ -523,6 +580,21 @@ void UpdateActiveSnapshotCommandId(void)
     Assert(u_sess->utils_cxt.ActiveSnapshot->as_snap->active_count == 1);
     Assert(u_sess->utils_cxt.ActiveSnapshot->as_snap->regd_count == 0);
 
+    /*
+     * Don't allow modification of the active snapshot during parallel
+     * operation.  We share the snapshot to worker backends at the beginning
+     * of parallel operation, so any change to the snapshot can lead to
+     * inconsistencies.  We have other defenses against
+     * CommandCounterIncrement, but there are a few places that call this
+     * directly, so we put an additional guard here.
+     */
+    CommandId save_curcid = u_sess->utils_cxt.ActiveSnapshot->as_snap->curcid;
+    CommandId curcid = GetCurrentCommandId(false);
+    if (IsInParallelMode() && save_curcid != curcid) {
+        ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+            errmsg("cannot modify commandid in active snapshot during a parallel operation")));
+    }
+
     u_sess->utils_cxt.ActiveSnapshot->as_snap->curcid = GetCurrentCommandId(false);
 }
 
@@ -1267,7 +1339,7 @@ void ImportSnapshot(const char* idstr)
             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot import a snapshot from a different database")));
 
     /* OK, install the snapshot */
-    SetTransactionSnapshot(&snapshot, src_xid);
+    SetTransactionSnapshot(&snapshot, src_xid, NULL);
 }
 
 /*
@@ -1380,3 +1452,148 @@ HTAB* HistoricSnapshotGetTupleCids(void)
     Assert(HistoricSnapshotActive());
     return u_sess->utils_cxt.tuplecid_data;
 }
+
+/*
+ * EstimateSnapshotSpace
+ *      Returns the size need to store the given snapshot.
+ *
+ * We are exporting only required fields from the Snapshot, stored in
+ * SerializedSnapshotData.
+ */
+Size EstimateSnapshotSpace(Snapshot snap)
+{
+    Assert(snap != InvalidSnapshot);
+    Assert(snap->satisfies == HeapTupleSatisfiesMVCC);
+
+    /* We allocate any XID arrays needed in the same palloc block. */
+    Size size = add_size(sizeof(SerializedSnapshotData), mul_size(snap->xcnt, sizeof(TransactionId)));
+    if (snap->subxcnt > 0 && (!snap->suboverflowed || snap->takenDuringRecovery)) {
+        size = add_size(size, mul_size((Size)snap->subxcnt, sizeof(TransactionId)));
+    }
+
+    return size;
+}
+
+/*
+ * SerializeSnapshot
+ * 		Dumps the serialized snapshot (extracted from given snapshot) onto the
+ * 		memory location at start_address.
+ */
+void SerializeSnapshot(Snapshot snapshot, char *start_address, Size len)
+{
+    Assert(snapshot->subxcnt >= 0);
+
+    SerializedSnapshotData *serialized_snapshot = (SerializedSnapshotData *)start_address;
+    int rc;
+
+    /* Copy all required fields */
+    serialized_snapshot->xmin = snapshot->xmin;
+    serialized_snapshot->xmax = snapshot->xmax;
+    serialized_snapshot->xcnt = snapshot->xcnt;
+    serialized_snapshot->subxcnt = snapshot->subxcnt;
+    serialized_snapshot->suboverflowed = snapshot->suboverflowed;
+    serialized_snapshot->takenDuringRecovery = snapshot->takenDuringRecovery;
+    serialized_snapshot->curcid = snapshot->curcid;
+    serialized_snapshot->timeline = snapshot->timeline;
+    serialized_snapshot->snapshotcsn = snapshot->snapshotcsn;
+    serialized_snapshot->snapshot_type = snapshot->snapshot_type;
+
+    /*
+     * Ignore the SubXID array if it has overflowed, unless the snapshot was
+     * taken during recovey - in that case, top-level XIDs are in subxip as
+     * well, and we mustn't lose them.
+     */
+    if (serialized_snapshot->suboverflowed && !snapshot->takenDuringRecovery)
+        serialized_snapshot->subxcnt = 0;
+
+    /* Copy XID array */
+    if (snapshot->xcnt > 0) {
+        rc = memcpy_s((TransactionId *)(serialized_snapshot + 1), len - 1,
+            snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
+        securec_check_c(rc, "", "");
+    }
+
+    /*
+     * Copy SubXID array. Don't bother to copy it if it had overflowed,
+     * though, because it's not used anywhere in that case. Except if it's a
+     * snapshot taken during recovery; all the top-level XIDs are in subxip as
+     * well in that case, so we mustn't lose them.
+     */
+    if (snapshot->subxcnt > 0) {
+        Size subxipoff = sizeof(SerializedSnapshotData) + snapshot->xcnt * sizeof(TransactionId);
+
+        rc = memcpy_s(((char *)serialized_snapshot + subxipoff), len - subxipoff, snapshot->subxip,
+            snapshot->subxcnt * sizeof(TransactionId));
+        securec_check_c(rc, "", "");
+    }
+}
+
+/*
+ * RestoreSnapshot
+ * 		Restore a serialized snapshot from the specified address.
+ *
+ * The copy is palloc'd in TopTransactionContext and has initial refcounts set
+ * to 0.  The returned snapshot has the copied flag set.
+ */
+Snapshot RestoreSnapshot(char *start_address, Size len)
+{
+    SerializedSnapshotData *serialized_snapshot = (SerializedSnapshotData*)start_address;
+    TransactionId *serialized_xids = (TransactionId*)(start_address + sizeof(SerializedSnapshotData));
+
+    /* We allocate any XID arrays needed in the same palloc block. */
+    Size size = sizeof(SnapshotData) + serialized_snapshot->xcnt * sizeof(TransactionId) +
+        serialized_snapshot->subxcnt * sizeof(TransactionId);
+
+    /* Copy all required fields */
+    Snapshot snapshot = (Snapshot)MemoryContextAlloc(u_sess->top_transaction_mem_cxt, size);
+    snapshot->satisfies = HeapTupleSatisfiesMVCC;
+    snapshot->xmin = serialized_snapshot->xmin;
+    snapshot->xmax = serialized_snapshot->xmax;
+    snapshot->xip = NULL;
+    snapshot->xcnt = serialized_snapshot->xcnt;
+    snapshot->subxip = NULL;
+    snapshot->subxcnt = serialized_snapshot->subxcnt;
+    snapshot->suboverflowed = serialized_snapshot->suboverflowed;
+    snapshot->takenDuringRecovery = serialized_snapshot->takenDuringRecovery;
+    snapshot->curcid = serialized_snapshot->curcid;
+    snapshot->user_data = NULL;
+    snapshot->timeline = serialized_snapshot->timeline;
+    snapshot->snapshotcsn = serialized_snapshot->snapshotcsn;
+    snapshot->snapshot_type = serialized_snapshot->snapshot_type;
+
+    /* Copy XIDs, if present. */
+    int rc;
+    Size remainLen = len - sizeof(SerializedSnapshotData);
+    if (serialized_snapshot->xcnt > 0) {
+        snapshot->xip = (TransactionId *)(snapshot + 1);
+        rc = memcpy_s(snapshot->xip, remainLen, serialized_xids, serialized_snapshot->xcnt * sizeof(TransactionId));
+        remainLen -= serialized_snapshot->xcnt * sizeof(TransactionId);
+        securec_check_c(rc, "", "");
+    }
+
+    /* Copy SubXIDs, if present. */
+    if (serialized_snapshot->subxcnt > 0) {
+        snapshot->subxip = snapshot->xip + serialized_snapshot->xcnt;
+        rc = memcpy_s(snapshot->subxip, remainLen, serialized_xids + serialized_snapshot->xcnt,
+            serialized_snapshot->subxcnt * sizeof(TransactionId));
+        securec_check_c(rc, "", "");
+    }
+
+    /* Set the copied flag so that the caller will set refcounts correctly. */
+    snapshot->regd_count = 0;
+    snapshot->active_count = 0;
+    snapshot->copied = true;
+
+    return snapshot;
+}
+
+/*
+ * Install a restored snapshot as the transaction snapshot.
+ *
+ * The second argument is of type void * so that snapmgr.h need not include
+ * the declaration for PGPROC.
+ */
+void RestoreTransactionSnapshot(Snapshot snapshot, void *master_pgproc)
+{
+    SetTransactionSnapshot(snapshot, InvalidTransactionId, (PGPROC *)master_pgproc);
+}
diff --git a/src/common/pl/plpgsql/src/pl_exec.cpp b/src/common/pl/plpgsql/src/pl_exec.cpp
index 082a16bfe..7f7e8b156 100755
--- a/src/common/pl/plpgsql/src/pl_exec.cpp
+++ b/src/common/pl/plpgsql/src/pl_exec.cpp
@@ -150,7 +150,8 @@ static void exec_eval_datum(
 static int exec_eval_integer(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, bool* isNull);
 static bool exec_eval_boolean(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, bool* isNull);
 static Datum exec_eval_expr(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, bool* isNull, Oid* rettype);
-static int exec_run_select(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, long maxtuples, Portal* portalP);
+static int exec_run_select(PLpgSQL_execstate *estate, PLpgSQL_expr *expr, long maxtuples, Portal *portalP,
+    bool parallelOK);
 static int exec_for_query(PLpgSQL_execstate* estate, PLpgSQL_stmt_forq* stmt, Portal portal, bool prefetch_ok, int dno);
 static ParamListInfo setup_param_list(PLpgSQL_execstate* estate, PLpgSQL_expr* expr);
 static void plpgsql_param_fetch(ParamListInfo params, int paramid);
@@ -2164,7 +2165,7 @@ static int exec_stmt_perform(PLpgSQL_execstate* estate, PLpgSQL_stmt_perform* st
     if (!RecoveryInProgress())
         oldTransactionId = GetTopTransactionId();
 
-    rc = exec_run_select(estate, expr, 0, NULL);
+    rc = exec_run_select(estate, expr, 0, NULL, true);
     if (rc != SPI_OK_SELECT) {
         ereport(DEBUG1,
             (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmodule(MOD_PLSQL), errmsg("exec_run_select returns %d", rc)));
@@ -2708,7 +2709,7 @@ static int exec_stmt_fors(PLpgSQL_execstate* estate, PLpgSQL_stmt_fors* stmt)
     /*
      * Open the implicit cursor for the statement using exec_run_select
      */
-    rc = exec_run_select(estate, stmt->query, 0, &portal);
+    rc = exec_run_select(estate, stmt->query, 0, &portal, false);
     if (rc != SPI_OK_SELECT) {
         ereport(DEBUG1,
             (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmodule(MOD_PLSQL), errmsg("exec_run_select returns %d", rc)));
@@ -3206,7 +3207,7 @@ static int exec_stmt_return(PLpgSQL_execstate* estate, PLpgSQL_stmt_return* stmt
 
     if (stmt->expr != NULL) {
         if (estate->retistuple) {
-            exec_run_select(estate, stmt->expr, 1, NULL);
+            exec_run_select(estate, stmt->expr, 1, NULL, true);
             if (estate->eval_processed > 0) {
                 estate->retval = PointerGetDatum(estate->eval_tuptable->vals[0]);
                 estate->rettupdesc = estate->eval_tuptable->tupdesc;
@@ -3398,11 +3399,11 @@ static int exec_stmt_return_query(PLpgSQL_execstate* estate, PLpgSQL_stmt_return
 
     if (stmt->query != NULL) {
         /* static query */
-        exec_run_select(estate, stmt->query, 0, &portal);
+        exec_run_select(estate, stmt->query, 0, &portal, true);
     } else {
         /* RETURN QUERY EXECUTE */
         AssertEreport(stmt->dynquery != NULL, MOD_PLSQL, "stmt's dynamic query is required.");
-        portal = exec_dynquery_with_params(estate, stmt->dynquery, stmt->params, NULL, 0);
+        portal = exec_dynquery_with_params(estate, stmt->dynquery, stmt->params, NULL, CURSOR_OPT_PARALLEL_OK);
     }
 
     tupmap = convert_tuples_by_position(
@@ -6222,7 +6223,7 @@ static Datum exec_eval_expr(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, bool*
     /*
      * Else do it the hard way via exec_run_select
      */
-    rc = exec_run_select(estate, expr, 2, NULL);
+    rc = exec_run_select(estate, expr, 2, NULL, false);
     if (rc != SPI_OK_SELECT) {
         ereport(ERROR,
             (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -6279,7 +6280,8 @@ static Datum exec_eval_expr(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, bool*
  * exec_run_select			Execute a select query
  * ----------
  */
-static int exec_run_select(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, long maxtuples, Portal* portalP)
+static int exec_run_select(PLpgSQL_execstate *estate, PLpgSQL_expr *expr, long maxtuples, Portal *portalP,
+    bool parallelOK)
 {
     ParamListInfo paramLI;
     int rc;
@@ -6288,7 +6290,7 @@ static int exec_run_select(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, long m
      * On the first call for this expression generate the plan
      */
     if (expr->plan == NULL) {
-        exec_prepare_plan(estate, expr, 0);
+        exec_prepare_plan(estate, expr, parallelOK ? CURSOR_OPT_PARALLEL_OK : 0);
     }
 
     /*
@@ -6711,6 +6713,11 @@ static ParamListInfo setup_param_list(PLpgSQL_execstate* estate, PLpgSQL_expr* e
         paramLI->parserSetupArg = (void*)expr;
         paramLI->params_need_process = false;
         paramLI->numParams = estate->ndatums;
+        /*
+         * Allow parameters that aren't needed by this expression to be
+         * ignored.
+         */
+        paramLI->paramMask = expr->paramnos;
 
         /* Instantiate values for "safe" parameters of the expression */
         tmpset = bms_copy(expr->paramnos);
diff --git a/src/gausskernel/optimizer/commands/async.cpp b/src/gausskernel/optimizer/commands/async.cpp
index 8dad9eb7e..af623b780 100755
--- a/src/gausskernel/optimizer/commands/async.cpp
+++ b/src/gausskernel/optimizer/commands/async.cpp
@@ -115,6 +115,7 @@
 #include <unistd.h>
 #include <signal.h>
 
+#include "access/parallel.h"
 #include "access/slru.h"
 #include "access/transam.h"
 #include "access/xact.h"
@@ -446,6 +447,10 @@ void Async_Notify(const char* channel, const char* payload)
     Notification* n = NULL;
     MemoryContext oldcontext;
 
+    if (IsParallelWorker()) {
+        elog(ERROR, "cannot send notifications from a parallel worker");
+    }
+
     if (u_sess->attr.attr_common.Trace_notify) {
         elog(DEBUG1, "Async_Notify(%s)", channel);
     }
diff --git a/src/gausskernel/optimizer/commands/copy.cpp b/src/gausskernel/optimizer/commands/copy.cpp
index 5c0e74c12..a53689332 100644
--- a/src/gausskernel/optimizer/commands/copy.cpp
+++ b/src/gausskernel/optimizer/commands/copy.cpp
@@ -983,8 +983,10 @@ uint64 DoCopy(CopyStmt* stmt, const char* queryString)
         Assert(rel);
 
         /* check read-only transaction */
-        if (u_sess->attr.attr_common.XactReadOnly && !RELATION_IS_TEMP(rel))
+        if (u_sess->attr.attr_common.XactReadOnly && !RELATION_IS_TEMP(rel)) {
             PreventCommandIfReadOnly("COPY FROM");
+        }
+        PreventCommandIfParallelMode("COPY FROM");
 
         /* set write for backend status for the thread, we will use it to check default transaction readOnly */
         pgstat_set_stmt_tag(STMTTAG_WRITE);
diff --git a/src/gausskernel/optimizer/commands/explain.cpp b/src/gausskernel/optimizer/commands/explain.cpp
index 1b78fd1e6..f3b0eb8b1 100755
--- a/src/gausskernel/optimizer/commands/explain.cpp
+++ b/src/gausskernel/optimizer/commands/explain.cpp
@@ -661,7 +661,7 @@ static void ExplainOneQuery(
     PlannedStmt* plan = NULL;
 
     /* plan the query */
-    plan = pg_plan_query(query, 0, params, true);
+    plan = pg_plan_query(query, CURSOR_OPT_PARALLEL_OK, params, true);
 
     /* run it (if needed) and produce output */
     ExplainOnePlan(plan, into, es, queryString, params);
@@ -1790,6 +1790,9 @@ static void ExplainNode(
                     appendStringInfoString(es->str, "->  ");
                     es->indent += 2;
                 }
+                if (plan->parallel_aware) {
+                    appendStringInfoString(es->str, "Parallel ");
+                }
                 appendStringInfoString(es->str, pname);
 
                 es->indent++;
@@ -1805,6 +1808,9 @@ static void ExplainNode(
             ExplainPropertyText("Parent Relationship", relationship, es);
         if (plan_name != NULL)
             ExplainPropertyText("Subplan Name", plan_name, es);
+        if (plan->parallel_aware) {
+            ExplainPropertyText("Parallel Aware", "true", es);
+        }
     }
 
     switch (nodeTag(plan)) {
@@ -2379,6 +2385,16 @@ static void ExplainNode(
                 show_instrumentation_count("Rows Removed by Filter", 1, planstate, es);
             show_llvm_info(planstate, es);
             break;
+        case T_Gather: {
+            Gather *gather = (Gather *)plan;
+            show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+            if (plan->qual)
+                show_instrumentation_count("Rows Removed by Filter", 1, planstate, es);
+            ExplainPropertyInteger("Number of Workers", gather->num_workers, es);
+            if (gather->single_copy)
+                ExplainPropertyText("Single Copy", gather->single_copy ? "true" : "false", es);
+            break;
+        }
         case T_DfsScan: {
             show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
             show_pushdown_qual(planstate, ancestors, es, PUSHDOWN_PREDICATE_FLAG);
diff --git a/src/gausskernel/optimizer/commands/extension.cpp b/src/gausskernel/optimizer/commands/extension.cpp
index 01a785876..ebed784e8 100755
--- a/src/gausskernel/optimizer/commands/extension.cpp
+++ b/src/gausskernel/optimizer/commands/extension.cpp
@@ -625,7 +625,7 @@ static void execute_sql_string(const char* sql, const char* filename)
          * We use a null string query_string here to avoid this.
          */
         stmt_list = pg_analyze_and_rewrite(parsetree, query_string, NULL, 0);
-        stmt_list = pg_plan_queries(stmt_list, 0, NULL);
+        stmt_list = pg_plan_queries(stmt_list, CURSOR_OPT_PARALLEL_OK, NULL);
         foreach (lc2, stmt_list) {
             Node* stmt = (Node*)lfirst(lc2);
 
diff --git a/src/gausskernel/optimizer/commands/prepare.cpp b/src/gausskernel/optimizer/commands/prepare.cpp
index d37f3c3d4..a0a2f0599 100755
--- a/src/gausskernel/optimizer/commands/prepare.cpp
+++ b/src/gausskernel/optimizer/commands/prepare.cpp
@@ -400,6 +400,7 @@ static ParamListInfo EvaluateParams(PreparedStatement* pstmt, List* params, cons
     paramLI->parserSetupArg = NULL;
     paramLI->params_need_process = false;
     paramLI->numParams = num_params;
+    paramLI->paramMask = NULL;
 
     i = 0;
     foreach (l, exprstates) {
diff --git a/src/gausskernel/optimizer/commands/sequence.cpp b/src/gausskernel/optimizer/commands/sequence.cpp
index bf4950867..133126559 100755
--- a/src/gausskernel/optimizer/commands/sequence.cpp
+++ b/src/gausskernel/optimizer/commands/sequence.cpp
@@ -962,6 +962,14 @@ static int64 nextval_internal(Oid relid)
     /* read-only transactions may only modify temp sequences */
     if (!is_use_local_seq)
         PreventCommandIfReadOnly("nextval()");
+
+    /*
+     * Forbid this during parallel operation because, to make it work, the
+     * cooperating backends would need to share the backend-local cached
+     * sequence information.  Currently, we don't support that.
+     */
+    PreventCommandIfParallelMode("nextval()");
+
     if (elm->last != elm->cached) {
         /* some numbers were cached */
         Assert(elm->last_valid);
@@ -1361,6 +1369,13 @@ static void do_setval(Oid relid, int64 next, bool iscalled)
         PreventCommandIfReadOnly("setval()");
 #endif
 
+    /*
+     * Forbid this during parallel operation because, to make it work, the
+     * cooperating backends would need to share the backend-local cached
+     * sequence information.  Currently, we don't support that.
+     */
+    PreventCommandIfParallelMode("setval()");
+
     /* lock page' buffer and read tuple */
     GTM_UUID uuid;
     seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple, &uuid);
diff --git a/src/gausskernel/optimizer/commands/variable.cpp b/src/gausskernel/optimizer/commands/variable.cpp
index 62cfb3c5a..d088d757a 100755
--- a/src/gausskernel/optimizer/commands/variable.cpp
+++ b/src/gausskernel/optimizer/commands/variable.cpp
@@ -19,6 +19,7 @@
 
 #include <ctype.h>
 
+#include "access/parallel.h"
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "catalog/pg_authid.h"
@@ -502,7 +503,8 @@ const char* show_log_timezone(void)
  */
 bool check_transaction_read_only(bool* newval, void** extra, GucSource source)
 {
-    if (*newval == false && u_sess->attr.attr_common.XactReadOnly && IsTransactionState()) {
+    if (*newval == false && u_sess->attr.attr_common.XactReadOnly && IsTransactionState() &&
+        !t_thrd.bgworker_cxt.InitializingParallelWorker) {
         /* Can't go to r/w mode inside a r/o transaction */
         if (IsSubTransaction()) {
             GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
@@ -763,6 +765,28 @@ void assign_client_encoding(const char* newval, void* extra)
 {
     int encoding = *((int*)extra);
 
+    /*
+     * Parallel workers send data to the leader, not the client.  They always
+     * send data using the database encoding.
+     */
+    if (IsParallelWorker()) {
+        /*
+         * During parallel worker startup, we want to accept the leader's
+         * client_encoding setting so that anyone who looks at the value in
+         * the worker sees the same value that they would see in the leader.
+         */
+        if (t_thrd.bgworker_cxt.InitializingParallelWorker)
+            return;
+
+        /*
+         * A change other than during startup, for example due to a SET clause
+         * attached to a function definition, should be rejected, as there is
+         * nothing we can do inside the worker to make it take effect.
+         */
+        ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+            errmsg("cannot change client_encoding during a parallel operation")));
+    }
+
     /* We do not expect an error if PrepareClientEncoding succeeded */
     if (SetClientEncoding(encoding) < 0)
         elog(LOG, "SetClientEncoding(%d) failed", encoding);
@@ -895,9 +919,11 @@ bool check_role(char** newval, void** extra, GucSource source)
         }
 
         /*
-         * Verify that session user is allowed to become this role
+         * Verify that session user is allowed to become this role, but skip
+         * this in parallel mode, where we must blindly recreate the parallel
+         * leader's state.
          */
-        if (!is_member_of_role(GetSessionUserId(), roleid)) {
+        if (!t_thrd.bgworker_cxt.InitializingParallelWorker && !is_member_of_role(GetSessionUserId(), roleid)) {
             GUC_check_errcode(ERRCODE_INSUFFICIENT_PRIVILEGE);
             GUC_check_errmsg("permission denied to set role \"%s\"", *newval);
             return false;
diff --git a/src/gausskernel/optimizer/path/allpaths.cpp b/src/gausskernel/optimizer/path/allpaths.cpp
index 9b1fab1cc..de14a87c0 100755
--- a/src/gausskernel/optimizer/path/allpaths.cpp
+++ b/src/gausskernel/optimizer/path/allpaths.cpp
@@ -22,6 +22,7 @@
 #include "catalog/pg_class.h"
 #include "catalog/pg_partition.h"
 #include "catalog/pg_partition_fn.h"
+#include "catalog/pg_proc.h"
 #include "foreign/fdwapi.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/pg_list.h"
@@ -65,6 +66,7 @@ static void set_rel_pathlist(PlannerInfo* root, RelOptInfo* rel, Index rti, Rang
 static void set_plain_rel_size(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry* rte);
 static void set_tablesample_rel_size(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry* rte);
 static void set_plain_rel_pathlist(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry* rte);
+static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte);
 static void set_foreign_size(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry* rte);
 static void set_foreign_pathlist(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry* rte);
 static void set_append_rel_size(PlannerInfo* root, RelOptInfo* rel, Index rti, RangeTblEntry* rte);
@@ -264,6 +266,16 @@ static void set_base_rel_sizes(PlannerInfo* root)
         if (rel->reloptkind != RELOPT_BASEREL)
             continue;
 
+        /*
+         * If parallelism is allowable for this query in general, see whether
+         * it's allowable for this rel in particular.  We have to do this
+         * before set_rel_size, because that if this is an inheritance parent,
+         * set_append_rel_size will pass the consider_parallel flag down to
+         * inheritance children.
+         */
+        if (root->glob->parallelModeOK)
+            set_rel_consider_parallel(root, rel, root->simple_rte_array[rti]);
+
         set_rel_size(root, rel, (Index)rti, root->simple_rte_array[rti]);
 
         /* Try inlist2join optimization */
@@ -812,6 +824,7 @@ static void set_plain_rel_pathlist(PlannerInfo* root, RelOptInfo* rel, RangeTblE
     List* quals = NIL;
     bool has_vecengine_unsupport_expr = false;
     ListCell* lc = NULL;
+    int parallel_threshold = u_sess->attr.attr_sql.min_parallel_table_scan_size;
 
 #ifdef PGXC
     bool isrp = create_plainrel_rqpath(root, rel, rte);
@@ -871,8 +884,39 @@ static void set_plain_rel_pathlist(PlannerInfo* root, RelOptInfo* rel, RangeTblE
             }
             case REL_ROW_ORIENTED: {
                 add_path(root, rel, create_seqscan_path(root, rel, NULL));
-                if (can_parallel)
+                if (can_parallel) {
                     add_path(root, rel, create_seqscan_path(root, rel, NULL, u_sess->opt_cxt.query_dop));
+                }
+
+                /* Consider parallel sequential scan */
+                if (rel->consider_parallel && rel->pages > parallel_threshold) {
+                    Path *path;
+                    int parallel_degree = 1;
+
+                    /*
+                     * Limit the degree of parallelism logarithmically based on the size
+                     * of the relation.  This probably needs to be a good deal more
+                     * sophisticated, but we need something here for now.
+                     */
+                    while (rel->pages > parallel_threshold * 3 &&
+                        parallel_degree < g_instance.attr.attr_common.max_parallel_workers_per_gather) {
+                        parallel_degree++;
+                        parallel_threshold *= 3;
+                        if (parallel_threshold >= PG_INT32_MAX / 3)
+                            break;
+                    }
+
+                    /*
+                     * Ideally we should consider postponing the gather operation until
+                     * much later, after we've pushed joins and so on atop the parallel
+                     * sequential scan path.  But we don't have the infrastructure for
+                     * that yet, so just do this for now.
+                     */
+                    path = create_seqscan_path(root, rel, NULL, 1, parallel_degree);
+                    path = (Path *)create_gather_path(root, rel, path, NULL, parallel_degree);
+                    add_path(root, rel, path);
+                }
+
                 break;
             }
             default: {
@@ -937,15 +981,123 @@ static void set_plain_rel_pathlist(PlannerInfo* root, RelOptInfo* rel, RangeTblE
     }
 }
 
+/*
+ * If this relation could possibly be scanned from within a worker, then set
+ * the consider_parallel flag.  The flag has previously been initialized to
+ * false, so we just bail out if it becomes clear that we can't safely set it.
+ */
+static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+    /* Don't call this if parallelism is disallowed for the entire query. */
+    Assert(root->glob->parallelModeOK);
+
+    /* Don't call this for non-baserels. */
+    Assert(rel->reloptkind == RELOPT_BASEREL);
+
+    /* Assorted checks based on rtekind. */
+    switch (rte->rtekind) {
+        case RTE_RELATION:
+            /*
+             * Currently, parallel workers can't access the leader's temporary
+             * tables.  We could possibly relax this if the wrote all of its
+             * local buffers at the start of the query and made no changes
+             * thereafter (maybe we could allow hint bit changes), and if we
+             * taught the workers to read them.  Writing a large number of
+             * temporary buffers could be expensive, though, and we don't have
+             * the rest of the necessary infrastructure right now anyway.  So
+             * for now, bail out if we see a temporary table.
+             */
+            if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
+                return;
+
+            /* Don't support parallel for partitioned table. */
+            if (rte->ispartrel) {
+                return;
+            }
+
+            /*
+             * Table sampling can be pushed down to workers if the sample
+             * function and its arguments are safe.
+             */
+            if (rte->tablesample != NULL) {
+                // TODO, try to use this: func_parallel(rte->tablesample->tsmhandler)
+                Oid proparallel = PROPARALLEL_SAFE;
+
+                if (proparallel != PROPARALLEL_SAFE)
+                    return;
+                if (has_parallel_hazard((Node *)rte->tablesample->args, false))
+                    return;
+                return;
+            }
+            break;
+
+        case RTE_SUBQUERY:
+            /*
+             * Subplans currently aren't passed to workers.  Even if they
+             * were, the subplan might be using parallelism internally, and
+             * we can't support nested Gather nodes at present.  Finally,
+             * we don't have a good way of knowing whether the subplan
+             * involves any parallel-restricted operations.  It would be
+             * nice to relax this restriction some day, but it's going to
+             * take a fair amount of work.
+             */
+            return;
+
+        case RTE_JOIN:
+            /* Shouldn't happen; we're only considering baserels here. */
+            Assert(false);
+            return;
+
+        case RTE_FUNCTION:
+            /* Check for parallel-restricted functions. */
+            if (has_parallel_hazard(rte->funcexpr, false))
+                return;
+            break;
+
+        case RTE_VALUES:
+            /*
+             * The data for a VALUES clause is stored in the plan tree itself,
+             * so scanning it in a worker is fine.
+             */
+            break;
+
+        case RTE_CTE:
+            /*
+             * CTE tuplestores aren't shared among parallel workers, so we
+             * force all CTE scans to happen in the leader.  Also, populating
+             * the CTE would require executing a subplan that's not available
+             * in the worker, might be parallel-restricted, and must get
+             * executed only once.
+             */
+            return;
+        case RTE_REMOTE_DUMMY:
+            return;
+    }
+
+    /*
+     * If there's anything in baserestrictinfo that's parallel-restricted,
+     * we give up on parallelizing access to this relation.  We could consider
+     * instead postponing application of the restricted quals until we're
+     * above all the parallelism in the plan tree, but it's not clear that
+     * this would be a win in very many cases, and it might be tricky to make
+     * outer join clauses work correctly.
+     */
+    if (has_parallel_hazard((Node *)rel->baserestrictinfo, false))
+        return;
+
+    /* We have a winner. */
+    rel->consider_parallel = true;
+}
+
 /*
  * Description:add result operator over scan operator. And add
  * vector type scan's qual with unsupport expression in vector engine
  * to result operator
  *
  * Parameters:
- *	@in root: plannerinfo struct for current query level.
- *	@in rel: Per-relation information for planning/optimization.
- *	@in quals: filter condition
+ * 	@in root: plannerinfo struct for current query level.
+ * 	@in rel: Per-relation information for planning/optimization.
+ * 	@in quals: filter condition
  *
  * Return: void
  */
@@ -1147,6 +1299,9 @@ static void set_append_rel_size(PlannerInfo* root, RelOptInfo* rel, Index rti, R
             continue;
         }
 
+        /* Copy consider_parallel flag from parent. */
+        childrel->consider_parallel = rel->consider_parallel;
+
         /*
          * CE failed, so finish copying/modifying targetlist and join quals.
          *
@@ -3025,6 +3180,9 @@ static void print_path(PlannerInfo* root, Path* path, int indent)
         case T_Unique:
             subpath = ((UniquePath*)path)->subpath;
             break;
+        case T_GatherPath:
+            subpath = ((GatherPath*)path)->subpath;
+            break;
         case T_NestLoop:
             join = true;
             break;
diff --git a/src/gausskernel/optimizer/path/costsize.cpp b/src/gausskernel/optimizer/path/costsize.cpp
index d38a1ee04..34907c8c1 100644
--- a/src/gausskernel/optimizer/path/costsize.cpp
+++ b/src/gausskernel/optimizer/path/costsize.cpp
@@ -11,6 +11,8 @@
  *	cpu_tuple_cost		Cost of typical CPU time to process a tuple
  *	cpu_index_tuple_cost  Cost of typical CPU time to process an index tuple
  *	cpu_operator_cost	Cost of CPU time to execute an operator or function
+ *	parallel_tuple_cost Cost of CPU time to pass a tuple from worker to master backend
+ *	parallel_setup_cost Cost of setting up shared memory for parallelism
  *
  * We expect that the kernel will typically do some amount of read-ahead
  * optimization; this in conjunction with seek costs means that seq_page_cost
@@ -157,6 +159,7 @@ void init_plan_cost(Plan* plan)
     plan->pred_startup_time = -1.0;
     plan->pred_total_time = -1.0;
     plan->pred_max_memory = -1;
+    plan->parallel_aware = false;
 }
 
 static inline void get_info_from_rel(
@@ -669,7 +672,7 @@ static void set_parallel_path_rows(Path* path)
  * 'baserel' is the relation to be scanned
  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
  */
-void cost_seqscan(Path* path, PlannerInfo* root, RelOptInfo* baserel, ParamPathInfo* param_info)
+void cost_seqscan(Path* path, PlannerInfo* root, RelOptInfo* baserel, ParamPathInfo* param_info, int nworkers)
 {
     Cost startup_cost = 0;
     Cost run_cost = 0;
@@ -703,6 +706,17 @@ void cost_seqscan(Path* path, PlannerInfo* root, RelOptInfo* baserel, ParamPathI
     cpu_per_tuple = u_sess->attr.attr_sql.cpu_tuple_cost + qpqual_cost.per_tuple;
     run_cost += cpu_per_tuple * RELOPTINFO_LOCAL_FIELD(root, baserel, tuples) / dop;
 
+    /*
+     * Primitive parallel cost model.  Assume the leader will do half as much
+     * work as a regular worker, because it will also need to read the tuples
+     * returned by the workers when they percolate up to the gather ndoe.
+     * This is almost certainly not exactly the right way to model this, so
+     * this will probably need to be changed at some point...
+     */
+    if (nworkers > 0) {
+        run_cost = run_cost / (nworkers + 0.5);
+    }
+
     path->startup_cost = startup_cost;
     path->total_cost = startup_cost + run_cost;
     path->stream_cost = 0;
@@ -946,6 +960,36 @@ void cost_tsstorescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
     }
 }
 
+/*
+ * cost_gather
+ * 	  Determines and returns the cost of gather path.
+ *
+ * 'rel' is the relation to be operated upon
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void cost_gather(GatherPath *path, RelOptInfo *rel, ParamPathInfo *param_info)
+{
+    Cost startup_cost = 0;
+    Cost run_cost = 0;
+
+    /* Mark the path with the correct row estimate */
+    if (param_info)
+        path->path.rows = param_info->ppi_rows;
+    else
+        path->path.rows = rel->rows;
+
+    startup_cost = path->subpath->startup_cost;
+
+    run_cost = path->subpath->total_cost - path->subpath->startup_cost;
+
+    /* Parallel setup and communication cost. */
+    startup_cost += u_sess->attr.attr_sql.parallel_setup_cost;
+    run_cost += u_sess->attr.attr_sql.parallel_tuple_cost * path->path.rows;
+
+    path->path.startup_cost = startup_cost;
+    path->path.total_cost = (startup_cost + run_cost);
+}
+
 /*
  * cost_index
  *	  Determines and returns the cost of scanning a relation using an index.
diff --git a/src/gausskernel/optimizer/plan/createplan.cpp b/src/gausskernel/optimizer/plan/createplan.cpp
index d45c1f38d..dd2f6a3a2 100644
--- a/src/gausskernel/optimizer/plan/createplan.cpp
+++ b/src/gausskernel/optimizer/plan/createplan.cpp
@@ -94,6 +94,7 @@ static CStoreScan* create_cstorescan_plan(PlannerInfo* root, Path* best_path, Li
 static DfsScan* create_dfsscan_plan(PlannerInfo* root, Path* best_path, List* tlist, List* scan_clauses,
     bool indexFlag = false, List* excludedCol = NIL, bool indexOnly = false);
 static TsStoreScan* create_tsstorescan_plan(PlannerInfo* root, Path* best_path, List* tlist, List* scan_clauses);
+static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path);
 static Scan* create_indexscan_plan(
     PlannerInfo* root, IndexPath* best_path, List* tlist, List* scan_clauses, bool indexonly);
 static BitmapHeapScan* create_bitmap_scan_plan(
@@ -130,6 +131,7 @@ static Plan* setPartitionParam(PlannerInfo* root, Plan* plan, RelOptInfo* rel);
 static Plan* setBucketInfoParam(PlannerInfo* root, Plan* plan, RelOptInfo* rel);
 Plan* create_globalpartInterator_plan(PlannerInfo* root, PartIteratorPath* pIterpath);
 
+static Gather *make_gather(List *qptlist, List *qpqual, int nworkers, bool single_copy, Plan *subplan);
 static IndexScan* make_indexscan(List* qptlist, List* qpqual, Index scanrelid, Oid indexid, List* indexqual,
     List* indexqualorig, List* indexorderby, List* indexorderbyorig, ScanDirection indexscandir);
 static IndexOnlyScan* make_indexonlyscan(List* qptlist, List* qpqual, Index scanrelid, Oid indexid, List* indexqual,
@@ -385,6 +387,9 @@ static Plan* create_plan_recurse(PlannerInfo* root, Path* best_path)
             plan = create_stream_plan(root, (StreamPath*)best_path);
             break;
 #endif
+        case T_Gather:
+            plan = (Plan*)create_gather_plan(root, (GatherPath*)best_path);
+            break;
         default: {
             ereport(ERROR,
                 (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
@@ -1811,6 +1816,34 @@ static bool relIsDeltaNode(PlannerInfo* root, RelOptInfo* relOptInfo)
     return isDelta;
 }
 
+/*
+ * create_gather_plan
+ *
+ * 	  Create a Gather plan for 'best_path' and (recursively) plans
+ * 	  for its subpaths.
+ */
+static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path)
+{
+    Index scan_relid = best_path->path.parent->relid;
+    Plan *subplan = create_plan_recurse(root, best_path->subpath);
+
+    disuse_physical_tlist(subplan, best_path->subpath);
+
+    Gather *gather_plan = make_gather(subplan->targetlist, NIL,
+        best_path->num_workers, best_path->single_copy, subplan);
+
+    copy_path_costsize(&gather_plan->plan, &best_path->path);
+
+#ifdef STREAMPLAN
+    add_distribute_info(root, &gather_plan->plan, scan_relid, &(best_path->path), NULL);
+#endif
+
+    /* use parallel mode for parallel plans. */
+    root->glob->parallelModeNeeded = true;
+
+    return gather_plan;
+}
+
 /*
  * create_seqscan_plan
  *	 Returns a seqscan plan for the base relation scanned by 'best_path'
@@ -5192,6 +5225,7 @@ static void copy_path_costsize(Plan* dest, Path* src)
         dest->plan_width = src->parent->width;
         dest->innerdistinct = src->innerdistinct;
         dest->outerdistinct = src->outerdistinct;
+        dest->parallel_aware = src->parallel_aware;
     } else {
         /* init the cost field directly */
         init_plan_cost(dest);
@@ -7522,6 +7556,22 @@ Unique* make_unique(Plan* lefttree, List* distinctList)
     return node;
 }
 
+static Gather *make_gather(List *qptlist, List *qpqual, int nworkers, bool single_copy, Plan *subplan)
+{
+    Gather *node = makeNode(Gather);
+    Plan *plan = &node->plan;
+
+    /* cost should be inserted by caller */
+    plan->targetlist = qptlist;
+    plan->qual = qpqual;
+    plan->lefttree = subplan;
+    plan->righttree = NULL;
+    node->num_workers = nworkers;
+    node->single_copy = single_copy;
+
+    return node;
+}
+
 /*
  * distinctList is a list of SortGroupClauses, identifying the targetlist
  * items that should be considered by the SetOp filter.  The input path must
diff --git a/src/gausskernel/optimizer/plan/planmain.cpp b/src/gausskernel/optimizer/plan/planmain.cpp
index 97de26a37..5af9d9967 100755
--- a/src/gausskernel/optimizer/plan/planmain.cpp
+++ b/src/gausskernel/optimizer/plan/planmain.cpp
@@ -31,6 +31,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/placeholder.h"
 #include "optimizer/planmain.h"
+#include "optimizer/planner.h"
 #include "optimizer/randomplan.h"
 #include "optimizer/tlist.h"
 #include "utils/selfuncs.h"
@@ -113,6 +114,9 @@ void query_planner(PlannerInfo* root, List* tlist, double tuple_fraction, double
     if (parse->jointree->fromlist == NIL) {
         /* We need a trivial path result */
         *cheapest_path = (Path*)create_result_path((List*)parse->jointree->quals);
+        if (root->glob->parallelModeOK && u_sess->attr.attr_sql.force_parallel_mode != FORCE_PARALLEL_OFF) {
+            (*cheapest_path)->parallel_safe = !has_parallel_hazard(parse->jointree->quals, false);
+        }
         *sorted_path = NULL;
 
         /*
diff --git a/src/gausskernel/optimizer/plan/planner.cpp b/src/gausskernel/optimizer/plan/planner.cpp
index 415acf524..7e7718d96 100644
--- a/src/gausskernel/optimizer/plan/planner.cpp
+++ b/src/gausskernel/optimizer/plan/planner.cpp
@@ -19,6 +19,7 @@
 #include <limits.h>
 #include <math.h>
 
+#include "access/parallel.h"
 #include "access/transam.h"
 #include "catalog/indexing.h"
 #include "catalog/pg_cast.h"
@@ -438,6 +439,52 @@ PlannedStmt* standard_planner(Query* parse, int cursorOptions, ParamListInfo bou
     glob->bloomfilter.bloomfilter_index = -1;
     glob->bloomfilter.add_index = true;
     glob->estiopmem = esti_op_mem;
+
+    /*
+     * Assess whether it's feasible to use parallel mode for this query.
+     * We can't do this in a standalone backend, or if the command will
+     * try to modify any data, or if this is a cursor operation, or if
+     * GUCs are set to values that don't permit parallelism, or if
+     * parallel-unsafe functions are present in the query tree.
+     *
+     * For now, we don't try to use parallel mode if we're running inside
+     * a parallel worker.  We might eventually be able to relax this
+     * restriction, but for now it seems best not to have parallel workers
+     * trying to create their own parallel workers.
+     *
+     * We can't use parallelism in serializable mode because the predicate
+     * locking code is not parallel-aware.  It's not catastrophic if someone
+     * tries to run a parallel plan in serializable mode; it just won't get
+     * any workers and will run serially.  But it seems like a good heuristic
+     * to assume that the same serialization level will be in effect at plan
+     * time and execution time, so don't generate a parallel plan if we're
+     * in serializable mode.
+     */
+    glob->parallelModeOK = (cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 && IsUnderPostmaster &&
+        parse->commandType == CMD_SELECT && !parse->hasModifyingCTE && parse->utilityStmt == NULL &&
+        g_instance.attr.attr_common.max_parallel_workers_per_gather > 0 && !IsParallelWorker() &&
+        !IsolationIsSerializable() && !has_parallel_hazard((Node *)parse, true);
+
+    /*
+     * glob->parallelModeNeeded is normally set to false here and changed to
+     * true during plan creation if a Gather or Gather Merge plan is actually
+     * created (cf. create_gather_plan, create_gather_merge_plan).
+     *
+     * However, if force_parallel_mode = on or force_parallel_mode = regress,
+     * then we impose parallel mode whenever it's safe to do so, even if the
+     * final plan doesn't use parallelism.  It's not safe to do so if the
+     * query contains anything parallel-unsafe; parallelModeOK will be false
+     * in that case.  Note that parallelModeOK can't change after this point.
+     * Otherwise, everything in the query is either parallel-safe or
+     * parallel-restricted, and in either case it should be OK to impose
+     * parallel-mode restrictions.  If that ends up breaking something, then
+     * either some function the user included in the query is incorrectly
+     * labelled as parallel-safe or parallel-restricted when in reality it's
+     * parallel-unsafe, or else the query planner itself has a bug.
+     */
+    glob->parallelModeNeeded =
+        glob->parallelModeOK && (u_sess->attr.attr_sql.force_parallel_mode != FORCE_PARALLEL_OFF);
+
     if (IS_STREAM_PLAN)
         glob->vectorized = !vector_engine_preprocess_walker((Node*)parse, parse->rtable);
     else
@@ -723,6 +770,7 @@ PlannedStmt* standard_planner(Query* parse, int cursorOptions, ParamListInfo bou
     result->canSetTag = parse->canSetTag;
     result->transientPlan = glob->transientPlan;
     result->dependsOnRole = glob->dependsOnRole;
+    result->parallelModeNeeded = glob->parallelModeNeeded;
     result->planTree = top_plan;
     result->rtable = glob->finalrtable;
     result->resultRelations = glob->resultRelations;
diff --git a/src/gausskernel/optimizer/plan/setrefs.cpp b/src/gausskernel/optimizer/plan/setrefs.cpp
index a64541b86..6a174f958 100755
--- a/src/gausskernel/optimizer/plan/setrefs.cpp
+++ b/src/gausskernel/optimizer/plan/setrefs.cpp
@@ -295,8 +295,9 @@ static Plan* set_plan_refs(PlannerInfo* root, Plan* plan, int rtoffset)
 {
     ListCell* l = NULL;
 
-    if (plan == NULL)
+    if (plan == NULL) {
         return NULL;
+    }
 
     /*
      * Plan-type-specific fixes
@@ -572,6 +573,10 @@ static Plan* set_plan_refs(PlannerInfo* root, Plan* plan, int rtoffset)
             }
         } break;
 
+        case T_Gather:
+            set_upper_references(root, plan, rtoffset);
+            break;
+
         case T_Hash:
         case T_Material:
         case T_VecMaterial:
diff --git a/src/gausskernel/optimizer/plan/subselect.cpp b/src/gausskernel/optimizer/plan/subselect.cpp
index 3ff05af07..4eab0d9de 100755
--- a/src/gausskernel/optimizer/plan/subselect.cpp
+++ b/src/gausskernel/optimizer/plan/subselect.cpp
@@ -2703,6 +2703,7 @@ static Bitmapset* finalize_plan(PlannerInfo* root, Plan* plan, Bitmapset* valid_
         case T_Material:
         case T_Sort:
         case T_Unique:
+        case T_Gather:
         case T_SetOp:
         case T_Group:
         case T_Stream:
diff --git a/src/gausskernel/optimizer/util/clauses.cpp b/src/gausskernel/optimizer/util/clauses.cpp
index e23e379fc..f58b23255 100644
--- a/src/gausskernel/optimizer/util/clauses.cpp
+++ b/src/gausskernel/optimizer/util/clauses.cpp
@@ -89,6 +89,11 @@ typedef struct {
     char* prosrc;
 } inline_error_callback_arg;
 
+typedef struct {
+    bool allow_restricted;
+} has_parallel_hazard_arg;
+
+
 typedef enum { CONTAIN_FUNCTION_ID, CONTAIN_MUTABLE_FUNCTION, CONTAIN_VOLATILE_FUNTION } checkFuntionType;
 
 typedef struct {
@@ -109,6 +114,9 @@ static bool expression_returns_set_rows_walker(Node* node, double* count);
 static bool contain_subplans_walker(Node* node, void* context);
 template <bool isSimpleVar>
 static bool contain_specified_functions_walker(Node* node, check_function_context* context);
+static bool has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context);
+static bool parallel_too_dangerous(char proparallel, has_parallel_hazard_arg *context);
+static bool typeid_is_temp(Oid type_id);
 static bool contain_nonstrict_functions_walker(Node* node, void* context);
 static bool contain_leaky_functions_walker(Node* node, void* context);
 static Relids find_nonnullable_rels_walker(Node* node, bool top_level);
@@ -1145,11 +1153,191 @@ bool exec_simple_check_mutable_function(Node* clause)
 }
 
 /*****************************************************************************
- *		Check clauses for nonstrict functions
+ *		Check queries for parallel unsafe and/or restricted constructs
  *****************************************************************************/
+/*
+ * Check whether a node tree contains parallel hazards.  This is used both
+ * on the entire query tree, to see whether the query can be parallelized at
+ * all, and also to evaluate whether a particular expression is safe to
+ * run in a parallel worker.  We could separate these concerns into two
+ * different functions, but there's enough overlap that it doesn't seem
+ * worthwhile.
+ */
+bool has_parallel_hazard(Node *node, bool allow_restricted)
+{
+    has_parallel_hazard_arg context;
+
+    context.allow_restricted = allow_restricted;
+    return has_parallel_hazard_walker(node, &context);
+}
+
+static bool has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context)
+{
+    if (node == NULL)
+        return false;
+
+    /*
+     * When we're first invoked on a completely unplanned tree, we must
+     * recurse through Query objects to as to locate parallel-unsafe
+     * constructs anywhere in the tree.
+     *
+     * Later, we'll be called again for specific quals, possibly after
+     * some planning has been done, we may encounter SubPlan, SubLink,
+     * or AlternativeSubLink nodes.  Currently, there's no need to recurse
+     * through these; they can't be unsafe, since we've already cleared
+     * the entire query of unsafe operations, and they're definitely
+     * parallel-restricted.
+     */
+    if (IsA(node, Query)) {
+        Query *query = (Query *)node;
+
+        if (query->rowMarks != NULL)
+            return true;
+
+        /* Recurse into subselects */
+        return query_tree_walker(query, (bool (*)())has_parallel_hazard_walker, context, 0);
+    } else if (IsA(node, SubPlan) || IsA(node, SubLink) || IsA(node, AlternativeSubPlan) || IsA(node, Param)) {
+        return true;
+    }
+
+    /* This is just a notational convenience for callers. */
+    if (IsA(node, RestrictInfo)) {
+        RestrictInfo *rinfo = (RestrictInfo *)node;
+        return has_parallel_hazard_walker((Node *)rinfo->clause, context);
+    }
+
+    /*
+     * It is an error for a parallel worker to touch a temporary table in any
+     * way, so we can't handle nodes whose type is the rowtype of such a table.
+     */
+    if (!context->allow_restricted) {
+        switch (nodeTag(node)) {
+            case T_Var:
+            case T_Const:
+            case T_Param:
+            case T_Aggref:
+            case T_WindowFunc:
+            case T_ArrayRef:
+            case T_FuncExpr:
+            case T_NamedArgExpr:
+            case T_OpExpr:
+            case T_DistinctExpr:
+            case T_NullIfExpr:
+            case T_FieldSelect:
+            case T_FieldStore:
+            case T_RelabelType:
+            case T_CoerceViaIO:
+            case T_ArrayCoerceExpr:
+            case T_ConvertRowtypeExpr:
+            case T_CaseExpr:
+            case T_CaseTestExpr:
+            case T_ArrayExpr:
+            case T_RowExpr:
+            case T_CoalesceExpr:
+            case T_MinMaxExpr:
+            case T_CoerceToDomain:
+            case T_CoerceToDomainValue:
+            case T_SetToDefault:
+                if (typeid_is_temp(exprType(node)))
+                    return true;
+                break;
+            default:
+                break;
+        }
+    }
+
+    /*
+     * For each node that might potentially call a function, we need to
+     * examine the pg_proc.proparallel marking for that function to see
+     * whether it's safe enough for the current value of allow_restricted.
+     */
+    if (IsA(node, FuncExpr)) {
+        FuncExpr *expr = (FuncExpr *)node;
+
+        if (parallel_too_dangerous(func_parallel(expr->funcid), context))
+            return true;
+    } else if (IsA(node, OpExpr)) {
+        OpExpr *expr = (OpExpr *)node;
+
+        set_opfuncid(expr);
+        if (parallel_too_dangerous(func_parallel(expr->opfuncid), context))
+            return true;
+    } else if (IsA(node, DistinctExpr)) {
+        DistinctExpr *expr = (DistinctExpr *)node;
+
+        set_opfuncid((OpExpr *)expr); /* rely on struct equivalence */
+        if (parallel_too_dangerous(func_parallel(expr->opfuncid), context))
+            return true;
+    } else if (IsA(node, NullIfExpr)) {
+        NullIfExpr *expr = (NullIfExpr *)node;
+
+        set_opfuncid((OpExpr *)expr); /* rely on struct equivalence */
+        if (parallel_too_dangerous(func_parallel(expr->opfuncid), context))
+            return true;
+    } else if (IsA(node, ScalarArrayOpExpr)) {
+        ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *)node;
+
+        set_sa_opfuncid(expr);
+        if (parallel_too_dangerous(func_parallel(expr->opfuncid), context))
+            return true;
+    } else if (IsA(node, CoerceViaIO)) {
+        CoerceViaIO *expr = (CoerceViaIO *)node;
+        Oid iofunc;
+        Oid typioparam;
+        bool typisvarlena;
+
+        /* check the result type's input function */
+        getTypeInputInfo(expr->resulttype, &iofunc, &typioparam);
+        if (parallel_too_dangerous(func_parallel(iofunc), context))
+            return true;
+        /* check the input type's output function */
+        getTypeOutputInfo(exprType((Node *)expr->arg), &iofunc, &typisvarlena);
+        if (parallel_too_dangerous(func_parallel(iofunc), context))
+            return true;
+    } else if (IsA(node, ArrayCoerceExpr)) {
+        ArrayCoerceExpr *expr = (ArrayCoerceExpr *)node;
+
+        if (OidIsValid(expr->elemfuncid) && parallel_too_dangerous(func_parallel(expr->elemfuncid), context))
+            return true;
+    } else if (IsA(node, RowCompareExpr)) {
+        RowCompareExpr *rcexpr = (RowCompareExpr *)node;
+        ListCell *opid;
+
+        foreach (opid, rcexpr->opnos) {
+            Oid opfuncid = get_opcode(lfirst_oid(opid));
+            if (parallel_too_dangerous(func_parallel(opfuncid), context))
+                return true;
+        }
+    }
+
+    /* ... and recurse to check substructure */
+    return expression_tree_walker(node, (bool (*)())has_parallel_hazard_walker, context);
+}
+
+static bool parallel_too_dangerous(char proparallel, has_parallel_hazard_arg *context)
+{
+    if (context->allow_restricted)
+        return proparallel == PROPARALLEL_UNSAFE;
+    else
+        return proparallel != PROPARALLEL_SAFE;
+}
+
+static bool typeid_is_temp(Oid type_id)
+{
+    Oid relid = get_typ_typrelid(type_id);
+
+    if (!OidIsValid(relid))
+        return false;
+
+    return (get_rel_persistence(relid) == RELPERSISTENCE_TEMP);
+}
+
+/* ****************************************************************************
+ * 		Check clauses for nonstrict functions
+ * *************************************************************************** */
 /*
  * contain_nonstrict_functions
- *	  Recursively search for nonstrict functions within a clause.
+ * 	  Recursively search for nonstrict functions within a clause.
  *
  * Returns true if any nonstrict construct is found --- ie, anything that
  * could produce non-NULL output with a NULL input.
diff --git a/src/gausskernel/optimizer/util/optcommon.cpp b/src/gausskernel/optimizer/util/optcommon.cpp
index 5153dd797..28303d6d2 100755
--- a/src/gausskernel/optimizer/util/optcommon.cpp
+++ b/src/gausskernel/optimizer/util/optcommon.cpp
@@ -153,6 +153,9 @@ void GetPlanNodePlainText(
                 }
             }
             break;
+        case T_Gather:
+            *pname = *sname = *pt_options = "Gather";
+            break;
         case T_IndexScan:
             *pt_operation = "INDEX";
             if (((IndexScan*)plan)->scan.isPartTbl)
diff --git a/src/gausskernel/optimizer/util/pathnode.cpp b/src/gausskernel/optimizer/util/pathnode.cpp
index 97d119544..ef633323b 100755
--- a/src/gausskernel/optimizer/util/pathnode.cpp
+++ b/src/gausskernel/optimizer/util/pathnode.cpp
@@ -1098,10 +1098,12 @@ void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path)
                     case COSTS_EQUAL:
                         outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), PATH_REQ_OUTER(old_path));
                         if (keyscmp == PATHKEYS_BETTER1) {
-                            if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) && new_path->rows <= old_path->rows)
+                            if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) &&
+                                new_path->rows <= old_path->rows && new_path->parallel_safe >= old_path->parallel_safe)
                                 remove_old = true; /* new dominates old */
                         } else if (keyscmp == PATHKEYS_BETTER2) {
-                            if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) && new_path->rows >= old_path->rows)
+                            if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) &&
+                                new_path->rows >= old_path->rows && new_path->parallel_safe >= old_path->parallel_safe)
                                 accept_new = false; /* old dominates new */
                         } else {
                             if (outercmp == BMS_EQUAL) {
@@ -1120,7 +1122,11 @@ void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path)
                                  * comparison decides the startup and total
                                  * costs compare differently.
                                  */
-                                if (new_path->rows < old_path->rows)
+                                if (new_path->parallel_safe > old_path->parallel_safe) {
+                                    remove_old = true;
+                                } else if (new_path->parallel_safe < old_path->parallel_safe) {
+                                    accept_new = false;
+                                } else if (new_path->rows < old_path->rows)
                                     remove_old = true; /* new dominates old */
                                 else if (new_path->rows > old_path->rows)
                                     accept_new = false; /* old dominates new */
@@ -1132,9 +1138,11 @@ void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path)
                                     else
                                         accept_new = false; /* old equals or dominates new */
                                 }
-                            } else if (outercmp == BMS_SUBSET1 && new_path->rows <= old_path->rows)
+                            } else if (outercmp == BMS_SUBSET1 && new_path->rows <= old_path->rows &&
+                                        new_path->parallel_safe >= old_path->parallel_safe)
                                 remove_old = true; /* new dominates old */
-                            else if (outercmp == BMS_SUBSET2 && new_path->rows >= old_path->rows)
+                            else if (outercmp == BMS_SUBSET2 && new_path->rows >= old_path->rows &&
+                                        new_path->parallel_safe <= old_path->parallel_safe)
                                 accept_new = false; /* old dominates new */
                                                     /* else different parameterizations, keep both */
                         }
@@ -1142,14 +1150,16 @@ void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path)
                     case COSTS_BETTER1:
                         if (keyscmp != PATHKEYS_BETTER2) {
                             outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), PATH_REQ_OUTER(old_path));
-                            if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) && new_path->rows <= old_path->rows)
+                            if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET1) &&
+                                new_path->rows <= old_path->rows && new_path->parallel_safe >= old_path->parallel_safe)
                                 remove_old = true; /* new dominates old */
                         }
                         break;
                     case COSTS_BETTER2:
                         if (keyscmp != PATHKEYS_BETTER1) {
                             outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), PATH_REQ_OUTER(old_path));
-                            if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) && new_path->rows >= old_path->rows)
+                            if ((outercmp == BMS_EQUAL || outercmp == BMS_SUBSET2) &&
+                                new_path->rows >= old_path->rows && new_path->parallel_safe <= old_path->parallel_safe)
                                 accept_new = false; /* old dominates new */
                         }
                         break;
@@ -1433,7 +1443,7 @@ static void add_parameterized_path(RelOptInfo* parent_rel, Path* new_path)
  *	  Creates a path corresponding to a sequential scan, returning the
  *	  pathnode.
  */
-Path* create_seqscan_path(PlannerInfo* root, RelOptInfo* rel, Relids required_outer, int dop)
+Path* create_seqscan_path(PlannerInfo* root, RelOptInfo* rel, Relids required_outer, int dop, int nworkers)
 {
     Path* pathnode = makeNode(Path);
 
@@ -1442,11 +1452,14 @@ Path* create_seqscan_path(PlannerInfo* root, RelOptInfo* rel, Relids required_ou
     pathnode->param_info = get_baserel_parampathinfo(root, rel, required_outer);
     pathnode->pathkeys = NIL; /* seqscan has unordered result */
     pathnode->dop = dop;
+    pathnode->parallel_aware = nworkers > 0 ? true : false;
+    pathnode->parallel_safe = rel->consider_parallel;
 
 #ifdef STREAMPLAN
+    /* We need to set locator_type for parallel query, cause we may send this value to bg worker */
+    pathnode->locator_type = rel->locator_type;
     if (IS_STREAM_PLAN) {
         pathnode->distribute_keys = rel->distribute_keys;
-        pathnode->locator_type = rel->locator_type;
 
         /* add location information for seqscan path */
         RangeTblEntry* rte = root->simple_rte_array[rel->relid];
@@ -1460,7 +1473,7 @@ Path* create_seqscan_path(PlannerInfo* root, RelOptInfo* rel, Relids required_ou
 
     RangeTblEntry* rte = planner_rt_fetch(rel->relid, root);
     if (NULL == rte->tablesample) {
-        cost_seqscan(pathnode, root, rel, pathnode->param_info);
+        cost_seqscan(pathnode, root, rel, pathnode->param_info, nworkers);
     } else {
         AssertEreport(rte->rtekind == RTE_RELATION, MOD_OPT_JOIN, "Rel should be base relation");
         cost_samplescan(pathnode, root, rel, pathnode->param_info);
@@ -2277,6 +2290,8 @@ ResultPath* create_result_path(List* quals, Path* subpath)
         pathnode->path.total_cost = subpath->total_cost;
         pathnode->path.dop = subpath->dop;
         pathnode->path.stream_cost = subpath->stream_cost;
+        pathnode->path.parallel_aware = subpath->parallel_aware;
+        pathnode->path.parallel_safe = subpath->parallel_safe;
 #ifdef STREAMPLAN
         /* result path will inherit node group and distribute information from it's child node */
         inherit_path_locator_info((Path*)pathnode, subpath);
@@ -2707,6 +2722,37 @@ no_unique_path: /* failure exit */
     return NULL;
 }
 
+/*
+ * create_gather_path
+ *
+ * 	  Creates a path corresponding to a gather scan, returning the
+ * 	  pathnode.
+ */
+GatherPath *create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, Relids required_outer, int nworkers)
+{
+    GatherPath *pathnode = makeNode(GatherPath);
+
+    pathnode->path.pathtype = T_Gather;
+    pathnode->path.parent = rel;
+    pathnode->path.param_info = get_baserel_parampathinfo(root, rel, required_outer);
+    pathnode->path.parallel_aware = false;
+    pathnode->path.pathkeys = NIL; /* Gather has unordered result */
+
+    pathnode->subpath = subpath;
+    pathnode->num_workers = nworkers;
+    pathnode->single_copy = false;
+
+    if (pathnode->num_workers == 0) {
+        pathnode->path.pathkeys = subpath->pathkeys;
+        pathnode->num_workers = 1;
+        pathnode->single_copy = true;
+    }
+
+    cost_gather(pathnode, rel, pathnode->path.param_info);
+
+    return pathnode;
+}
+
 /*
  * translate_sub_tlist - get subquery column numbers represented by tlist
  *
diff --git a/src/gausskernel/optimizer/util/relnode.cpp b/src/gausskernel/optimizer/util/relnode.cpp
index cf47879eb..33be36f47 100755
--- a/src/gausskernel/optimizer/util/relnode.cpp
+++ b/src/gausskernel/optimizer/util/relnode.cpp
@@ -20,6 +20,7 @@
 #include "nodes/nodeFuncs.h"
 #include "nodes/print.h"
 #include "parser/parse_hint.h"
+#include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
@@ -169,6 +170,7 @@ RelOptInfo* build_simple_rel(PlannerInfo* root, int relid, RelOptKind reloptkind
     rel->partflag = PARTITION_NONE;
     rel->rows = 0;
     rel->width = 0;
+    rel->consider_parallel = false; /* might get changed later */
     rel->encodedwidth = 0;
     rel->encodednum = 0;
     rel->reltargetlist = NIL;
@@ -549,6 +551,7 @@ RelOptInfo* build_join_rel(PlannerInfo* root, Relids joinrelids, RelOptInfo* out
     joinrel->partflag = PARTITION_NONE;
     joinrel->rows = 0;
     joinrel->width = 0;
+    joinrel->consider_parallel = false;
     joinrel->encodedwidth = 0;
     joinrel->encodednum = 0;
     joinrel->reltargetlist = NIL;
@@ -624,6 +627,25 @@ RelOptInfo* build_join_rel(PlannerInfo* root, Relids joinrelids, RelOptInfo* out
      */
     set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, sjinfo, restrictlist);
 
+    /*
+     * Set the consider_parallel flag if this joinrel could potentially be
+     * scanned within a parallel worker.  If this flag is false for either
+     * inner_rel or outer_rel, then it must be false for the joinrel also.
+     * Even if both are true, there might be parallel-restricted quals at our
+     * level.
+     *
+     * Note that if there are more than two rels in this relation, they could
+     * be divided between inner_rel and outer_rel in any arbitary way.  We
+     * assume this doesn't matter, because we should hit all the same baserels
+     * and joinclauses while building up to this joinrel no matter which we
+     * take; therefore, we should make the same decision here however we get
+     * here.
+     */
+    if (inner_rel->consider_parallel && outer_rel->consider_parallel &&
+        !has_parallel_hazard((Node *)restrictlist, false)) {
+        joinrel->consider_parallel = true;
+    }
+
     /*
      * Add the joinrel to the query's joinrel list, and store it into the
      * auxiliary hashtable if there is one.  NB: GEQO requires us to append
diff --git a/src/gausskernel/process/postmaster/bgworker.cpp b/src/gausskernel/process/postmaster/bgworker.cpp
index 02eeaefa4..898268537 100644
--- a/src/gausskernel/process/postmaster/bgworker.cpp
+++ b/src/gausskernel/process/postmaster/bgworker.cpp
@@ -14,6 +14,7 @@
 
 #include <unistd.h>
 
+#include "access/parallel.h"
 #include "libpq/pqsignal.h"
 #include "miscadmin.h"
 #include "pgstat.h"
@@ -109,6 +110,10 @@ static const struct {
     {
         "autonomous_worker_main",
         autonomous_worker_main
+    },
+    {
+        "ParallelWorkerMain",
+        ParallelWorkerMain
     }
 };
 
diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp
index 886dc4892..64fac6422 100755
--- a/src/gausskernel/process/postmaster/postmaster.cpp
+++ b/src/gausskernel/process/postmaster/postmaster.cpp
@@ -152,6 +152,7 @@
 #include "utils/datetime.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
+#include "utils/postinit.h"
 #include "utils/ps_status.h"
 #include "utils/plog.h"
 #include "utils/zfiles.h"
@@ -384,8 +385,8 @@ bool PMstateIsRun(void);
 #define BACKEND_TYPE_TEMPBACKEND                                        \
     0x0010                      /* temp thread processing cancel signal \
                                    or stream connection */
-
-#define BACKEND_TYPE_ALL 0x001F /* OR of all the above */
+#define BACKEND_TYPE_BGWORKER 0x0020
+#define BACKEND_TYPE_ALL 0x003F /* OR of all the above */
 
 static int CountChildren(int target);
 static bool CreateOptsFile(int argc, const char* argv[], const char* fullprogname);
@@ -1029,8 +1030,8 @@ void SetShmemCxt(void)
                                        AV_LAUNCHER_PROCS;
     g_instance.shmem_cxt.MaxReserveBackendId = g_instance.attr.attr_sql.job_queue_processes + 1 +
                                                g_instance.attr.attr_storage.autovacuum_max_workers +
-                                               (thread_pool_worker_num * STREAM_RESERVE_PROC_TIMES) + 
-                                               AUXILIARY_BACKENDS + 
+                                               (thread_pool_worker_num * STREAM_RESERVE_PROC_TIMES) +
+                                               AUXILIARY_BACKENDS +
                                                AV_LAUNCHER_PROCS;
     g_instance.shmem_cxt.ThreadPoolGroupNum = thread_pool_group_num;
 
@@ -2543,6 +2544,85 @@ static bool save_backend_variables_for_callback_thread()
     return save_backend_variables(&backend_save_para, &port);
 }
 
+/*
+ * Determine how long should we let ServerLoop sleep.
+ *
+ * In normal conditions we wait at most one minute, to ensure that the other
+ * background tasks handled by ServerLoop get done even when no requests are
+ * arriving.  However, if there are background workers waiting to be started,
+ * we don't actually sleep so that they are quickly serviced.  Other exception
+ * cases are as shown in the code.
+ */
+static void DetermineSleepTime(struct timeval *timeout)
+{
+    TimestampTz next_wakeup = 0;
+
+    /*
+     * Normal case: either there are no background workers at all, or we're in
+     * a shutdown sequence (during which we ignore bgworkers altogether).
+     */
+    if (Shutdown > NoShutdown ||
+        (!g_instance.bgworker_cxt.start_worker_needed && !g_instance.bgworker_cxt.have_crashed_worker)) {
+        timeout->tv_sec = PM_POLL_TIMEOUT_SECOND;
+        timeout->tv_usec = 0;
+        return;
+    }
+
+    if (g_instance.bgworker_cxt.start_worker_needed) {
+        timeout->tv_sec = 0;
+        timeout->tv_usec = 0;
+        return;
+    }
+
+    if (g_instance.bgworker_cxt.have_crashed_worker) {
+        slist_mutable_iter siter;
+
+        /*
+         * When there are crashed bgworkers, we sleep just long enough that
+         * they are restarted when they request to be.  Scan the list to
+         * determine the minimum of all wakeup times according to most recent
+         * crash time and requested restart interval.
+         */
+        slist_foreach_modify(siter, &t_thrd.bgworker_cxt.background_worker_list)
+        {
+            RegisteredBgWorker *rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+
+            if (rw->rw_crashed_at == 0) {
+                continue;
+            }
+
+            if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART || rw->rw_terminate) {
+                ForgetBackgroundWorker(&siter);
+                continue;
+            }
+
+            TimestampTz this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
+                1000L * rw->rw_worker.bgw_restart_time);
+            if (next_wakeup == 0 || this_wakeup < next_wakeup) {
+                next_wakeup = this_wakeup;
+            }
+        }
+    }
+
+    if (next_wakeup != 0) {
+        long secs;
+        int microsecs;
+
+        TimestampDifference(GetCurrentTimestamp(), next_wakeup, &secs, &microsecs);
+        timeout->tv_sec = secs;
+        timeout->tv_usec = microsecs;
+
+        /* Ensure we don't exceed PM_POLL_TIMEOUT_SECOND */
+        if (timeout->tv_sec > PM_POLL_TIMEOUT_SECOND) {
+            timeout->tv_sec = PM_POLL_TIMEOUT_SECOND;
+            timeout->tv_usec = 0;
+        }
+    } else {
+        timeout->tv_sec = PM_POLL_TIMEOUT_SECOND;
+        timeout->tv_usec = 0;
+    }
+}
+
 /*
  * Main idle loop of postmaster
  */
@@ -2651,8 +2731,7 @@ static int ServerLoop(void)
             /* must set timeout each time; some OSes change it! */
             struct timeval timeout;
 
-            timeout.tv_sec = PM_POLL_TIMEOUT_SECOND;
-            timeout.tv_usec = 0;
+            DetermineSleepTime(&timeout);
 
 #ifdef HAVE_POLL
             selres = poll(ufds, nSockets, timeout.tv_sec * 1000);
@@ -3807,7 +3886,8 @@ CAC_state canAcceptConnections(bool isSession)
 
     /*
      * Can't start backends when in startup/shutdown/inconsistent recovery
-     * state.
+     * state. bgworkers are excluded from this test; we expect
+     * bgworker_should_start_now() decided whether the DB state allows them.
      *
      * In state PM_WAIT_BACKUP only superusers can connect (this must be
      * allowed so that a superuser can end online backup mode); we return
@@ -4513,10 +4593,11 @@ static void pmdie(SIGNAL_ARGS)
             }
 
             if (pmState == PM_RECOVERY) {
+                (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_BGWORKER);
                 /*
-                 * Only startup, bgwriter, walreceiver, and/or checkpointer
-                 * should be active in this state; we just signaled the first
-                 * three, and we don't want to kill checkpointer yet.
+                 * Only startup, bgwriter, walreceiver, possibly bgworkers,
+                 * and/or checkpointer should be active in this state; we just
+                 * signaled the first four, and we don't want to kill checkpointer yet.
                  */
                 pmState = PM_WAIT_BACKENDS;
             } else if (pmState == PM_RUN || pmState == PM_WAIT_BACKUP || pmState == PM_WAIT_READONLY ||
@@ -4527,8 +4608,8 @@ static void pmdie(SIGNAL_ARGS)
                     g_threadPoolControler->CloseAllSessions();
                     g_threadPoolControler->ShutDownWorker();
                 }
-                /* shut down all backends and autovac workers */
-                (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC);
+                /* shut down all backends and bgworkers and autovac workers */
+                (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER);
 
                 /* and the autovac launcher too */
                 if (g_instance.pid_cxt.AutoVacPID != 0)
@@ -5680,6 +5761,8 @@ static bool CleanupBackgroundWorker(ThreadId pid,
  * CleanupBackend -- cleanup after terminated backend.
  *
  * Remove all local state associated with backend.
+ *
+ * If you change this, see also CleanupBackgroundWorker.
  */
 static void CleanupBackend(ThreadId pid, int exitstatus) /* child's exit status. */
 {
@@ -5725,8 +5808,8 @@ static void CleanupBackend(ThreadId pid, int exitstatus) /* child's exit status.
     for (curr = DLGetTail(g_instance.backend_list); curr; curr = DLGetPred(curr)) {
         Backend* bp = (Backend*)DLE_VAL(curr);
 
-        if (bp->pid == pid && bp->dead_end) {
-            {
+        if (bp->pid == pid) {
+            if (bp->dead_end) {
                 if (!ReleasePostmasterChildSlot(bp->child_slot)) {
                     /*
                      * Uh-oh, the child failed to clean itself up.	Treat as a
@@ -5739,8 +5822,7 @@ static void CleanupBackend(ThreadId pid, int exitstatus) /* child's exit status.
                 BackendArrayRemove(bp);
             }
 
-			if (bp->bgworker_notify)
-			{
+			if (bp->bgworker_notify) {
 				/*
 				 * This backend may have been slated to receive SIGUSR1 when
 				 * some background worker started or stopped.  Cancel those
@@ -5930,7 +6012,8 @@ static void PostmasterStateMachine(void)
     if (pmState == PM_WAIT_BACKENDS) {
         /*
          * PM_WAIT_BACKENDS state ends when we have no regular backends
-         * (including autovac workers) and no walwriter, autovac launcher or
+         * (including autovac workers), no bgworkers (including 
+         * unconnected ones), and no walwriter, autovac launcher or
          * bgwriter.  If we are doing crash recovery then we expect the
          * checkpointer to exit as well, otherwise not. The archiver, stats,
          * and syslogger processes are disregarded since they are not
@@ -5939,7 +6022,8 @@ static void PostmasterStateMachine(void)
          * later after writing the checkpoint record, like the archiver
          * process.
          */
-        if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 && g_instance.pid_cxt.StartupPID == 0 &&
+        if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER) == 0 &&
+            g_instance.pid_cxt.StartupPID == 0 &&
             g_instance.pid_cxt.TwoPhaseCleanerPID == 0 && g_instance.pid_cxt.FaultMonitorPID == 0 &&
             g_instance.pid_cxt.WalReceiverPID == 0 && g_instance.pid_cxt.WalRcvWriterPID == 0 &&
             g_instance.pid_cxt.DataReceiverPID == 0 && g_instance.pid_cxt.DataRcvWriterPID == 0 &&
@@ -6162,6 +6246,10 @@ static void PostmasterStateMachine(void)
      */
     if (g_instance.demotion > NoDemote && pmState == PM_NO_CHILDREN) {
         ereport(LOG, (errmsg("all server processes terminated; reinitializing")));
+
+        /* allow background workers to immediately restart */
+        ResetBackgroundWorkerCrashTimes();
+
         shmem_exit(1);
         reset_shared(g_instance.attr.attr_network.PostPortNumber);
 
@@ -6346,6 +6434,8 @@ static int BackendStartup(Port* port)
      * Unless it's a dead_end child, assign it a child slot number
      */
     bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = childSlot;
+    /* Hasn't asked to be notified about any bgworkers yet */
+    bn->bgworker_notify = false;
 
     pid = initialize_worker_thread(WORKER, port);
     t_thrd.proc_cxt.MyPMChildSlot = 0;
@@ -7360,6 +7450,7 @@ static void StartAutovacuumWorker(void)
 
             /* Autovac workers are not dead_end and need a child slot */
             bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = slot;
+            bn->bgworker_notify = false;
             bn->pid = initialize_util_thread(AUTOVACUUM_WORKER, bn);
             t_thrd.proc_cxt.MyPMChildSlot = 0;
             if (bn->pid > 0) {
@@ -8405,6 +8496,7 @@ static void BackendArrayRemove(Backend* bn)
     g_instance.backend_array[i].flag = 0;
     g_instance.backend_array[i].cancel_key = 0;
     g_instance.backend_array[i].dead_end = false;
+    g_instance.backend_array[i].bgworker_notify = false;
 }
 
 #ifdef WIN32
diff --git a/src/gausskernel/process/tcop/dest.cpp b/src/gausskernel/process/tcop/dest.cpp
index 7ceea9744..c8cac990d 100755
--- a/src/gausskernel/process/tcop/dest.cpp
+++ b/src/gausskernel/process/tcop/dest.cpp
@@ -36,6 +36,7 @@
 #include "commands/matview.h"
 #include "executor/functions.h"
 #include "executor/spi.h"
+#include "executor/tqueue.h"
 #include "executor/tstoreReceiver.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
@@ -149,6 +150,8 @@ DestReceiver* CreateDestReceiver(CommandDest dest)
         case DestBatchLocalRoundRobin:
         case DestBatchHybrid:
             return createStreamDestReceiver(dest);
+        case DestTupleQueue:
+            return CreateTupleQueueDestReceiver(NULL);
         default:
             break;
     }
@@ -187,6 +190,7 @@ void EndCommand(const char* commandTag, CommandDest dest)
         case DestCopyOut:
         case DestSQLFunction:
         case DestTransientRel:
+        case DestTupleQueue:
         default:
             break;
     }
@@ -217,6 +221,7 @@ void EndCommand_noblock(const char* commandTag, CommandDest dest)
         case DestIntoRel:
         case DestCopyOut:
         case DestSQLFunction:
+        case DestTupleQueue:
         default:
             break;
     }
@@ -264,6 +269,7 @@ void NullCommand(CommandDest dest)
         case DestCopyOut:
         case DestSQLFunction:
         case DestTransientRel:
+        case DestTupleQueue:
         default:
             break;
     }
@@ -312,6 +318,7 @@ void ReadyForQuery(CommandDest dest)
         case DestIntoRel:
         case DestCopyOut:
         case DestSQLFunction:
+        case DestTupleQueue:
         default:
             break;
     }
@@ -346,6 +353,7 @@ void ReadyForQuery_noblock(CommandDest dest, int timeout)
         case DestIntoRel:
         case DestCopyOut:
         case DestSQLFunction:
+        case DestTupleQueue:
         default:
             break;
     }
diff --git a/src/gausskernel/process/tcop/postgres.cpp b/src/gausskernel/process/tcop/postgres.cpp
index b326a4c05..d68c37353 100755
--- a/src/gausskernel/process/tcop/postgres.cpp
+++ b/src/gausskernel/process/tcop/postgres.cpp
@@ -36,6 +36,7 @@
 #include <poll.h>
 #endif
 
+#include "access/parallel.h"
 #include "access/printtup.h"
 #include "access/xact.h"
 #include "access/dfs/dfs_am.h"
@@ -256,6 +257,16 @@ bool StreamTopConsumerAmI()
     return (t_thrd.subrole == TOP_CONSUMER);
 }
 
+bool ParallelWorkerAmI()
+{
+    return t_thrd.role == BACKGROUND_WORKER;
+}
+
+bool ParallelLeaderAmI()
+{
+    return t_thrd.subrole == BACKGROUND_LEADER;
+}
+
 void EnableDoingCommandRead()
 {
     t_thrd.postgres_cxt.DoingCommandRead = true;
@@ -2275,7 +2286,7 @@ void exec_simple_query(const char* query_string, MessageType messageType, String
             FreeExecNodes(&single_exec_node);
         }
 
-        plantree_list = pg_plan_queries(querytree_list, 0, NULL);
+        plantree_list = pg_plan_queries(querytree_list, CURSOR_OPT_PARALLEL_OK, NULL);
 
         randomPlanInfo = get_random_plan_string();
         if (was_logged != false && randomPlanInfo != NULL) {
@@ -4072,6 +4083,7 @@ void exec_bind_message(StringInfo input_message)
         params->parserSetupArg = NULL;
         params->params_need_process = false;
         params->numParams = numParams;
+        params->paramMask = NULL;
 
         for (paramno = 0; paramno < numParams; paramno++) {
             Oid ptype = psrc->param_types[paramno];
@@ -5694,6 +5706,10 @@ void ProcessInterrupts(void)
             ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), errmsg("canceling snapshot task")));
         }
     }
+
+    if (t_thrd.bgworker_cxt.ParallelMessagePending) {
+        HandleParallelMessages();
+    }
     /* If we get here, do nothing (probably, t_thrd.int_cxt.QueryCancelPending was reset) */
 }
 
diff --git a/src/gausskernel/process/tcop/utility.cpp b/src/gausskernel/process/tcop/utility.cpp
index a4083fe39..cc9fa61e1 100644
--- a/src/gausskernel/process/tcop/utility.cpp
+++ b/src/gausskernel/process/tcop/utility.cpp
@@ -308,7 +308,7 @@ bool CommandIsReadOnly(Node* parse_tree)
  */
 static void check_xact_readonly(Node* parse_tree)
 {
-    if (!u_sess->attr.attr_common.XactReadOnly)
+    if (!u_sess->attr.attr_common.XactReadOnly && !IsInParallelMode())
         return;
 
     /*
@@ -410,12 +410,14 @@ static void check_xact_readonly(Node* parse_tree)
         case T_CreateSynonymStmt:
         case T_DropSynonymStmt:
             PreventCommandIfReadOnly(CreateCommandTag(parse_tree));
+            PreventCommandIfParallelMode(CreateCommandTag(parse_tree));
             break;
         case T_VacuumStmt: {
             VacuumStmt* stmt = (VacuumStmt*)parse_tree;
             /* on verify mode, do nothing */
             if (!(stmt->options & VACOPT_VERIFY)) {
                 PreventCommandIfReadOnly(CreateCommandTag(parse_tree));
+                PreventCommandIfParallelMode(CreateCommandTag(parse_tree));
             }
             break;
         }
@@ -423,6 +425,7 @@ static void check_xact_readonly(Node* parse_tree)
             AlterRoleStmt* stmt = (AlterRoleStmt*)parse_tree;
             if (!(DO_NOTHING != stmt->lockstatus && t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE)) {
                 PreventCommandIfReadOnly(CreateCommandTag(parse_tree));
+                PreventCommandIfParallelMode(CreateCommandTag(parse_tree));
             }
             break;
         }
@@ -447,6 +450,21 @@ void PreventCommandIfReadOnly(const char* cmd_name)
                 errmsg("cannot execute %s in a read-only transaction", cmd_name)));
 }
 
+/*
+ * PreventCommandIfParallelMode: throw error if current (sub)transaction is
+ * in parallel mode.
+ *
+ * This is useful mainly to ensure consistency of the error message wording;
+ * most callers have checked IsInParallelMode() for themselves.
+ */
+void PreventCommandIfParallelMode(const char *cmdname)
+{
+    if (IsInParallelMode())
+        ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+            /* translator: %s is name of a SQL command, eg CREATE */
+            errmsg("cannot execute %s during a parallel operation", cmdname)));
+}
+
 /*
  * PreventCommandDuringRecovery: throw error if RecoveryInProgress
  *
diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp
index 23ea560aa..337d94e7c 100755
--- a/src/gausskernel/process/threadpool/knl_thread.cpp
+++ b/src/gausskernel/process/threadpool/knl_thread.cpp
@@ -1422,6 +1422,12 @@ void knl_t_bgworker_init(knl_t_bgworker_context* bgworker_cxt)
     bgworker_cxt->my_bgworker_entry = NULL;
     bgworker_cxt->is_background_worker = false;
     bgworker_cxt->background_worker_list = SLIST_STATIC_INIT(background_worker_list);
+    bgworker_cxt->ParallelMessagePending = false;
+    bgworker_cxt->InitializingParallelWorker = false;
+    bgworker_cxt->ParallelWorkerNumber = -1;
+    bgworker_cxt->pcxt_list = DLIST_STATIC_INIT(bgworker_cxt->pcxt_list);
+    bgworker_cxt->save_pgBufferUsage = NULL;
+    bgworker_cxt->hpm_context = NULL;
 }
 
 void knl_t_msqueue_init(knl_t_msqueue_context* msqueue_cxt)
diff --git a/src/gausskernel/runtime/executor/Makefile b/src/gausskernel/runtime/executor/Makefile
index 729cd22ed..bb72890ec 100755
--- a/src/gausskernel/runtime/executor/Makefile
+++ b/src/gausskernel/runtime/executor/Makefile
@@ -21,10 +21,10 @@ ifneq "$(MAKECMDGOALS)" "clean"
 endif
 
 OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \
-       execProcnode.o execQual.o execScan.o execTuples.o \
+       execParallel.o execProcnode.o execQual.o execScan.o execTuples.o \
        execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \
        nodeBitmapAnd.o nodeBitmapOr.o \
-       nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \
+       nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeGather.o nodeHash.o \
        nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \
        nodeLimit.o nodeLockRows.o \
        nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \
@@ -32,7 +32,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \
        nodeSamplescan.o nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
        nodeValuesscan.o nodeCtescan.o nodeWorktablescan.o \
        nodeGroup.o nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o \
-       nodeForeignscan.o nodeWindowAgg.o tstoreReceiver.o spi.o \
+       nodeForeignscan.o nodeWindowAgg.o tstoreReceiver.o tqueue.o spi.o \
        nodePartIterator.o nodeStub.o execClusterResize.o lightProxy.o execMerge.o \
        nodeExtensible.o opfusion.o  opfusion_scan.o  opfusion_util.o
 
diff --git a/src/gausskernel/runtime/executor/execAmi.cpp b/src/gausskernel/runtime/executor/execAmi.cpp
index e30bb139e..7c3c812ae 100755
--- a/src/gausskernel/runtime/executor/execAmi.cpp
+++ b/src/gausskernel/runtime/executor/execAmi.cpp
@@ -26,7 +26,7 @@
 #include "executor/nodeExtensible.h"
 #include "executor/nodeForeignscan.h"
 #include "executor/nodeFunctionscan.h"
-#include "executor/nodeGroup.h"
+#include "executor/nodeGather.h"
 #include "executor/nodeGroup.h"
 #include "executor/nodeHash.h"
 #include "executor/nodeHashjoin.h"
@@ -131,6 +131,10 @@ void ExecReScanByType(PlanState* node)
             ExecReScanSeqScan((SeqScanState*)node);
             break;
 
+        case T_GatherState:
+            ExecReScanGather((GatherState*)node);
+            break;
+
         case T_IndexScanState:
             ExecReScanIndexScan((IndexScanState*)node);
             break;
@@ -492,8 +496,19 @@ bool ExecSupportsMarkRestore(NodeTag plantype)
  */
 bool ExecSupportsBackwardScan(Plan* node)
 {
-    if (node == NULL)
+    if (node == NULL) {
         return false;
+    }
+
+    /*
+     * Parallel-aware nodes return a subset of the tuples in each worker,
+     * and in general we can't expect to have enough bookkeeping state to
+     * know which ones we returned in this worker as opposed to some other
+     * worker.
+     */
+    if (node->parallel_aware) {
+        return false;
+    }
 
     switch (nodeTag(node)) {
         case T_BaseResult:
@@ -527,6 +542,9 @@ bool ExecSupportsBackwardScan(Plan* node)
         case T_CteScan:
             return target_list_supports_backward_scan(node->targetlist);
 
+        case T_Gather:
+            return false;
+
         case T_IndexScan:
             return index_supports_backward_scan(((IndexScan*)node)->indexid) &&
                    target_list_supports_backward_scan(node->targetlist);
diff --git a/src/gausskernel/runtime/executor/execMain.cpp b/src/gausskernel/runtime/executor/execMain.cpp
index 7c314ab2b..c0993b2ea 100644
--- a/src/gausskernel/runtime/executor/execMain.cpp
+++ b/src/gausskernel/runtime/executor/execMain.cpp
@@ -107,8 +107,9 @@ static void CheckValidRowMarkRel(Relation rel, RowMarkType markType);
 static void ExecPostprocessPlan(EState *estate);
 static void ExecEndPlan(PlanState *planstate, EState *estate);
 static void ExecCollectMaterialForSubplan(EState *estate);
-static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation, bool sendTuples, long numberTuples,
-    ScanDirection direction, DestReceiver *dest, JitExec::JitContext* mot_jit_context);
+static void ExecutePlan(EState *estate, PlanState *planstate, bool use_parallel_mode,
+    CmdType operation, bool sendTuples, long numberTuples, ScanDirection direction,
+    DestReceiver *dest, JitExec::JitContext* mot_jit_context);
 static void ExecuteVectorizedPlan(EState *estate, PlanState *planstate, CmdType operation, bool sendTuples,
     long numberTuples, ScanDirection direction, DestReceiver *dest);
 static bool ExecCheckRTEPerms(RangeTblEntry *rte);
@@ -241,8 +242,20 @@ void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
     /*
      * If the transaction is read-only, we need to check if any writes are
      * planned to non-temporary tables.  EXPLAIN is considered read-only.
+     *
+     * Don't allow writes in parallel mode.  Supporting UPDATE and DELETE
+     * would require (a) storing the combocid hash in shared memory, rather
+     * than synchronizing it just once at the start of parallelism, and (b) an
+     * alternative to heap_update()'s reliance on xmax for mutual exclusion.
+     * INSERT may have no such troubles, but we forbid it to simplify the
+     * checks.
+     *
+     * We have lower-level defenses in CommandCounterIncrement and elsewhere
+     * against performing unsafe operations in parallel mode, but this gives a
+     * more user-friendly error message.
      */
-    if (u_sess->attr.attr_common.XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY)) {
+    if ((u_sess->attr.attr_common.XactReadOnly || IsInParallelMode()) &&
+        !(eflags & EXEC_FLAG_EXPLAIN_ONLY)) {
         ExecCheckXactReadOnly(queryDesc->plannedstmt);
     }
 
@@ -572,8 +585,8 @@ void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long co
         if (queryDesc->planstate->vectorized) {
             ExecuteVectorizedPlan(estate, queryDesc->planstate, operation, send_tuples, count, direction, dest);
         } else {
-            ExecutePlan(estate, queryDesc->planstate, operation, send_tuples,
-                count, direction, dest, queryDesc->mot_jit_context);
+            ExecutePlan(estate, queryDesc->planstate, queryDesc->plannedstmt->parallelModeNeeded, operation,
+                send_tuples, count, direction, dest, queryDesc->mot_jit_context);
         }
     }
 
@@ -1058,6 +1071,10 @@ void ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
 
         PreventCommandIfReadOnly(CreateCommandTag((Node *)plannedstmt));
     }
+
+    if (plannedstmt->commandType != CMD_SELECT || plannedstmt->hasModifyingCTE) {
+        PreventCommandIfParallelMode(CreateCommandTag((Node*)plannedstmt));
+    }
 }
 
 /* ----------------------------------------------------------------
@@ -1940,8 +1957,9 @@ static void ExecCollectMaterialForSubplan(EState *estate)
  * user can see it
  * ----------------------------------------------------------------
  */
-static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation, bool sendTuples, long numberTuples,
-    ScanDirection direction, DestReceiver *dest, JitExec::JitContext* mot_jit_context)
+static void ExecutePlan(EState *estate, PlanState *planstate, bool use_parallel_mode,
+    CmdType operation, bool sendTuples, long numberTuples, ScanDirection direction,
+    DestReceiver *dest, JitExec::JitContext* mot_jit_context)
 {
     TupleTableSlot *slot = NULL;
     long current_tuple_count = 0;
@@ -1968,6 +1986,22 @@ static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation,
      */
     estate->es_direction = direction;
 
+    /*
+     * If a tuple count was supplied, we must force the plan to run without
+     * parallelism, because we might exit early.
+     */
+    if (numberTuples != 0) {
+        use_parallel_mode = false;
+    }
+
+    /*
+     * If a tuple count was supplied, we must force the plan to run without
+     * parallelism, because we might exit early.
+     */
+    if (use_parallel_mode) {
+        EnterParallelMode();
+    }
+
     if (IS_PGXC_DATANODE) {
         /* Collect Material for Subplan first */
         ExecCollectMaterialForSubplan(estate);
@@ -2035,6 +2069,7 @@ static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation,
          * process so we just end the loop...
          */
         if (TupIsNull(slot)) {
+            (void)ExecShutdownNode(planstate);
             ExecEarlyFree(planstate);
             break;
         }
@@ -2103,6 +2138,10 @@ static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation,
             << (chunkSizeInBits - BITS_IN_MB);
         u_sess->instr_cxt.global_instr->SetPeakNodeMemory(planstate->plan->plan_node_id, peak_memory);
     }
+
+    if (use_parallel_mode) {
+        ExitParallelMode();
+    }
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/gausskernel/runtime/executor/execParallel.cpp b/src/gausskernel/runtime/executor/execParallel.cpp
new file mode 100644
index 000000000..4e0289a26
--- /dev/null
+++ b/src/gausskernel/runtime/executor/execParallel.cpp
@@ -0,0 +1,589 @@
+/* -------------------------------------------------------------------------
+ *
+ * execParallel.c
+ * 	  Support routines for parallel execution.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * This file contains routines that are intended to support setting up,
+ * using, and tearing down a ParallelContext from within the PostgreSQL
+ * executor.  The ParallelContext machinery will handle starting the
+ * workers and ensuring that their state generally matches that of the
+ * leader; see src/backend/access/transam/README.parallel for details.
+ * However, we must save and restore relevant executor state, such as
+ * any ParamListInfo associated with the query, buffer usage info, and
+ * the actual plan to be passed down to the worker.
+ *
+ * IDENTIFICATION
+ * 	  src/backend/executor/execParallel.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "executor/execParallel.h"
+#include "executor/executor.h"
+#include "executor/nodeSeqscan.h"
+#include "executor/tqueue.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/planmain.h"
+#include "optimizer/planner.h"
+#include "storage/spin.h"
+#include "tcop/tcopprot.h"
+#include "utils/memutils.h"
+#include "utils/snapmgr.h"
+
+#define PARALLEL_TUPLE_QUEUE_SIZE 65536
+
+/* DSM structure for accumulating per-PlanState instrumentation. */
+struct SharedExecutorInstrumentation {
+    int instrument_options;
+    uint32 instrument_offset;                /* offset of first Instrumentation struct */
+    int num_workers;                         /* # of workers */
+    int num_plan_nodes;                      /* # of plan nodes */
+    int plan_node_id[FLEXIBLE_ARRAY_MEMBER]; /* array of plan node IDs */
+                                             /* array of num_plan_nodes * num_workers Instrumentation objects follows */
+};
+#define GetInstrumentationArray(sei) (AssertVariableIsOfTypeMacro(sei, SharedExecutorInstrumentation *), \
+        (Instrumentation *)(((char *)sei) + sei->instrument_offset))
+
+
+/* Context object for ExecParallelEstimate. */
+typedef struct ExecParallelEstimateContext {
+    ParallelContext *pcxt;
+    int nnodes;
+} ExecParallelEstimateContext;
+
+/* Context object for ExecParallelEstimate. */
+typedef struct ExecParallelInitializeDSMContext {
+    ParallelContext *pcxt;
+    SharedExecutorInstrumentation *instrumentation;
+    int nnodes;
+} ExecParallelInitializeDSMContext;
+
+/* Helper functions that run in the parallel leader. */
+static char *ExecSerializePlan(Plan *plan, EState *estate);
+static bool ExecParallelEstimate(PlanState *node, ExecParallelEstimateContext *e);
+static bool ExecParallelInitializeDSM(PlanState *node, ExecParallelInitializeDSMContext *d);
+static shm_mq_handle **ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool reinitialize);
+static bool ExecParallelRetrieveInstrumentation(PlanState *planstate, SharedExecutorInstrumentation *instrumentation);
+
+/* Helper functions that run in the parallel worker. */
+static DestReceiver *ExecParallelGetReceiver(void *seg);
+
+/*
+ * Create a serialized representation of the plan to be sent to each worker.
+ */
+static char *ExecSerializePlan(Plan *plan, EState *estate)
+{
+    ListCell *tlist = NULL;
+
+    /* We can't scribble on the original plan, so make a copy. */
+    plan = (Plan *)copyObject(plan);
+
+    /*
+     * The worker will start its own copy of the executor, and that copy will
+     * insert a junk filter if the toplevel node has any resjunk entries. We
+     * don't want that to happen, because while resjunk columns shouldn't be
+     * sent back to the user, here the tuples are coming back to another
+     * backend which may very well need them.  So mutate the target list
+     * accordingly.  This is sort of a hack; there might be better ways to do
+     * this...
+     */
+    foreach (tlist, plan->targetlist) {
+        TargetEntry *tle = (TargetEntry *)lfirst(tlist);
+
+        tle->resjunk = false;
+    }
+
+    /*
+     * Create a dummy PlannedStmt.  Most of the fields don't need to be valid
+     * for our purposes, but the worker will need at least a minimal
+     * PlannedStmt to start the executor.
+     */
+    PlannedStmt *pstmt = makeNode(PlannedStmt);
+    pstmt->commandType = CMD_SELECT;
+    pstmt->queryId = 0;
+    pstmt->hasReturning = 0;
+    pstmt->hasModifyingCTE = 0;
+    pstmt->canSetTag = 1;
+    pstmt->transientPlan = 0;
+    pstmt->planTree = plan;
+    pstmt->rtable = estate->es_range_table;
+    pstmt->resultRelations = NIL;
+    pstmt->utilityStmt = NULL;
+    pstmt->subplans = NIL;
+    pstmt->rewindPlanIDs = NULL;
+    pstmt->rowMarks = NIL;
+    pstmt->nParamExec = estate->es_plannedstmt->nParamExec;
+    pstmt->relationOids = NIL;
+    pstmt->invalItems = NIL; /* workers can't replan anyway... */
+    pstmt->num_plannodes = estate->es_plannedstmt->num_plannodes;
+
+    /* Return serialized copy of our dummy PlannedStmt. */
+    return nodeToString(pstmt);
+}
+
+/*
+ * Ordinary plan nodes won't do anything here, but parallel-aware plan nodes
+ * may need some state which is shared across all parallel workers.  Before
+ * we size the DSM, give them a chance to call shm_toc_estimate_chunk or
+ * shm_toc_estimate_keys on &pcxt->estimator.
+ *
+ * While we're at it, count the number of PlanState nodes in the tree, so
+ * we know how many SharedPlanStateInstrumentation structures we need.
+ */
+static bool ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
+{
+    if (planstate == NULL)
+        return false;
+
+    /* Count this node. */
+    e->nnodes++;
+
+    /* Call estimators for parallel-aware nodes. */
+    switch (nodeTag(planstate)) {
+        case T_SeqScanState:
+            ExecSeqScanEstimate((SeqScanState *)planstate, e->pcxt);
+            break;
+        default:
+            break;
+    }
+
+    return planstate_tree_walker(planstate, (bool (*)())ExecParallelEstimate, e);
+}
+
+/*
+ * Ordinary plan nodes won't do anything here, but parallel-aware plan nodes
+ * may need to initialize shared state in the DSM before parallel workers
+ * are available.  They can allocate the space they previous estimated using
+ * shm_toc_allocate, and add the keys they previously estimated using
+ * shm_toc_insert, in each case targeting pcxt->toc.
+ */
+static bool ExecParallelInitializeDSM(PlanState *planstate, ExecParallelInitializeDSMContext *d)
+{
+    if (planstate == NULL)
+        return false;
+
+    /* If instrumentation is enabled, initialize slot for this node. */
+    if (d->instrumentation != NULL) {
+        d->instrumentation->plan_node_id[d->nnodes] = planstate->plan->plan_node_id;
+    }
+
+    /* Count this node. */
+    d->nnodes++;
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)d->pcxt->seg;
+
+    /* Call initializers for parallel-aware plan nodes. */
+    switch (nodeTag(planstate)) {
+        case T_SeqScanState:
+            ExecSeqScanInitializeDSM((SeqScanState *)planstate, d->pcxt, cxt->pwCtx->pscan_num);
+            cxt->pwCtx->pscan_num++;
+            break;
+        default:
+            break;
+    }
+
+    return planstate_tree_walker(planstate, (bool (*)())ExecParallelInitializeDSM, d);
+}
+
+/*
+ * It sets up the response queues for backend workers to return tuples
+ * to the main backend and start the workers.
+ */
+static shm_mq_handle **ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool reinitialize)
+{
+    /* Skip this if no workers. */
+    if (pcxt->nworkers <= 0)
+        return NULL;
+
+    /* Allocate memory for shared memory queue handles. */
+    shm_mq_handle **responseq = (shm_mq_handle **)palloc(pcxt->nworkers * sizeof(shm_mq_handle *));
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
+    /*
+     * If not reinitializing, allocate space from the DSM for the queues;
+     * otherwise, find the already allocated space.
+     */
+    if (!reinitialize) {
+        cxt->pwCtx->tupleQueue = (char *)palloc0(PARALLEL_TUPLE_QUEUE_SIZE * (Size)pcxt->nworkers);
+    }
+    Assert(cxt->pwCtx->tupleQueue != NULL);
+    char *tqueuespace = cxt->pwCtx->tupleQueue;
+
+    /* Create the queues, and become the receiver for each. */
+    for (int i = 0; i < pcxt->nworkers; ++i) {
+        shm_mq *mq = shm_mq_create(tqueuespace + i * PARALLEL_TUPLE_QUEUE_SIZE, (Size)PARALLEL_TUPLE_QUEUE_SIZE);
+        shm_mq_set_receiver(mq, t_thrd.proc);
+        responseq[i] = shm_mq_attach(mq, pcxt->seg, NULL);
+    }
+
+    /* Return array of handles. */
+    return responseq;
+}
+
+/*
+ * Re-initialize the parallel executor info such that it can be reused by
+ * workers.
+ */
+void ExecParallelReinitialize(ParallelExecutorInfo *pei)
+{
+    ReinitializeParallelDSM(pei->pcxt);
+    pei->tqueue = ExecParallelSetupTupleQueues(pei->pcxt, true);
+    pei->finished = false;
+}
+
+/*
+ * Sets up the required infrastructure for backend workers to perform
+ * execution and return results to the main backend.
+ */
+ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers)
+{
+    ExecParallelEstimateContext e;
+    ExecParallelInitializeDSMContext d;
+    uint32 instrumentation_len = 0;
+    uint32 instrument_offset = 0;
+
+    /* Allocate object for return value. */
+    ParallelExecutorInfo *pei = (ParallelExecutorInfo *)palloc0(sizeof(ParallelExecutorInfo));
+    pei->finished = false;
+    pei->planstate = planstate;
+
+    /* Fix up and serialize plan to be sent to workers. */
+    char *pstmt_data = ExecSerializePlan(planstate->plan, estate);
+
+    /* Create a parallel context. */
+    ParallelContext *pcxt = CreateParallelContext("postgres", "ParallelQueryMain", nworkers);
+    pei->pcxt = pcxt;
+
+    /* Estimate space for serialized PlannedStmt. */
+    Size pstmt_len = strlen(pstmt_data) + 1;
+    /* Estimate space for serialized ParamListInfo. */
+    Size param_len = EstimateParamListSpace(estate->es_param_list_info);
+
+    /*
+     * Give parallel-aware nodes a chance to add to the estimates, and get
+     * a count of how many PlanState nodes there are.
+     */
+    e.pcxt = pcxt;
+    e.nnodes = 0;
+    (void)ExecParallelEstimate(planstate, &e);
+
+    /* Estimate space for instrumentation, if required. */
+    if (estate->es_instrument) {
+        instrumentation_len = offsetof(SharedExecutorInstrumentation, plan_node_id) + sizeof(int) * e.nnodes;
+        instrumentation_len = MAXALIGN(instrumentation_len);
+        instrument_offset = instrumentation_len;
+        instrumentation_len += sizeof(Instrumentation) * e.nnodes * nworkers;
+    }
+
+    /* Everyone's had a chance to ask for space, so now create the DSM. */
+    InitializeParallelDSM(pcxt);
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
+
+    /*
+     * OK, now we have a dynamic shared memory segment, and it should be big
+     * enough to store all of the data we estimated we would want to put into
+     * it, plus whatever general stuff (not specifically executor-related) the
+     * ParallelContext itself needs to store there.  None of the space we
+     * asked for has been allocated or initialized yet, though, so do that.
+     */
+    MemoryContext oldcontext = MemoryContextSwitchTo(cxt->memCtx);
+
+    /* Store serialized PlannedStmt. */
+    cxt->pwCtx->pstmt_space = (char *)palloc0(pstmt_len);
+    int rc = memcpy_s(cxt->pwCtx->pstmt_space, pstmt_len, pstmt_data, pstmt_len);
+    securec_check(rc, "", "");
+
+    /* Store serialized ParamListInfo. */
+    cxt->pwCtx->param_space = (char *)palloc0(param_len);
+    cxt->pwCtx->param_len = param_len;
+    SerializeParamList(estate->es_param_list_info, cxt->pwCtx->param_space, param_len);
+
+    /* Allocate space for each worker's BufferUsage; no need to initialize. */
+    cxt->pwCtx->bufUsage = (BufferUsage *)palloc0(sizeof(BufferUsage) * pcxt->nworkers);
+    pei->buffer_usage = cxt->pwCtx->bufUsage;
+
+    /* Set up tuple queues. */
+    pei->tqueue = ExecParallelSetupTupleQueues(pcxt, false);
+
+    /*
+     * If instrumentation options were supplied, allocate space for the
+     * data.  It only gets partially initialized here; the rest happens
+     * during ExecParallelInitializeDSM.
+     */
+    if (estate->es_instrument) {
+        cxt->pwCtx->instrumentation = (SharedExecutorInstrumentation *)palloc0(instrumentation_len);
+        cxt->pwCtx->instrumentation->instrument_options = estate->es_instrument;
+        cxt->pwCtx->instrumentation->instrument_offset = instrument_offset;
+        cxt->pwCtx->instrumentation->num_workers = nworkers;
+        cxt->pwCtx->instrumentation->num_plan_nodes = e.nnodes;
+        Instrumentation *instrument = GetInstrumentationArray(cxt->pwCtx->instrumentation);
+        for (int i = 0; i < nworkers * e.nnodes; ++i) {
+            InstrInit(&instrument[i], estate->es_instrument);
+        }
+        pei->instrumentation = cxt->pwCtx->instrumentation;
+    }
+
+    cxt->pwCtx->pscan = (ParallelHeapScanDesc *)palloc0(sizeof(ParallelHeapScanDesc) * e.nnodes);
+
+    /*
+     * Give parallel-aware nodes a chance to initialize their shared data.
+     * This also initializes the elements of instrumentation->ps_instrument,
+     * if it exists.
+     */
+    d.pcxt = pcxt;
+    d.instrumentation = cxt->pwCtx->instrumentation;
+    d.nnodes = 0;
+
+    /* Here we switch to old context, cause heap_beginscan_parallel need malloc memory */
+    (void)MemoryContextSwitchTo(oldcontext);
+    (void)ExecParallelInitializeDSM(planstate, &d);
+
+    /*
+     * Make sure that the world hasn't shifted under our feat.  This could
+     * probably just be an Assert(), but let's be conservative for now.
+     */
+    if (e.nnodes != d.nnodes) {
+        ereport(ERROR, (errmsg("inconsistent count of PlanState nodes")));
+    }
+
+    /* OK, we're ready to rock and roll. */
+    return pei;
+}
+
+/*
+ * Copy instrumentation information about this node and its descendents from
+ * dynamic shared memory.
+ */
+static bool ExecParallelRetrieveInstrumentation(PlanState *planstate, SharedExecutorInstrumentation *instrumentation)
+{
+    int i;
+    int plan_node_id = planstate->plan->plan_node_id;
+
+    /* Find the instumentation for this node. */
+    for (i = 0; i < instrumentation->num_plan_nodes; ++i) {
+        if (instrumentation->plan_node_id[i] == plan_node_id) {
+            break;
+        }
+    }
+    if (i >= instrumentation->num_plan_nodes) {
+        ereport(ERROR, (errmsg("plan node %d not found", plan_node_id)));
+    }
+
+    /* Accumulate the statistics from all workers. */
+    Instrumentation *instrument = GetInstrumentationArray(instrumentation);
+    instrument += i * instrumentation->num_workers;
+    for (i = 0; i < instrumentation->num_workers; ++i) {
+        InstrAggNode(planstate->instrument, &instrument[i]);
+    }
+
+    /* Also store the per-worker detail. */
+    Size ibytes = instrumentation->num_workers * sizeof(Instrumentation);
+    planstate->worker_instrument =
+        (WorkerInstrumentation *)palloc(offsetof(WorkerInstrumentation, instrument) + ibytes);
+    planstate->worker_instrument->num_workers = instrumentation->num_workers;
+    int rc = memcpy_s(&planstate->worker_instrument->instrument, ibytes, instrument, ibytes);
+    securec_check(rc, "", "");
+
+    return planstate_tree_walker(planstate, (bool (*)())ExecParallelRetrieveInstrumentation, instrumentation);
+}
+
+
+/*
+ * Finish parallel execution.  We wait for parallel workers to finish, and
+ * accumulate their buffer usage and instrumentation.
+ */
+void ExecParallelFinish(ParallelExecutorInfo *pei)
+{
+    if (pei->finished)
+        return;
+
+    /* First, wait for the workers to finish. */
+    WaitForParallelWorkersToFinish(pei->pcxt);
+
+    /* Next, accumulate buffer usage. */
+    for (int i = 0; i < pei->pcxt->nworkers; ++i)
+        InstrAccumParallelQuery(&pei->buffer_usage[i]);
+
+    /* Finally, accumulate instrumentation, if any. */
+    if (pei->instrumentation) {
+        (void)ExecParallelRetrieveInstrumentation(pei->planstate, pei->instrumentation);
+    }
+
+    pei->finished = true;
+}
+
+/*
+ * Clean up whatever ParallelExecutreInfo resources still exist after
+ * ExecParallelFinish.  We separate these routines because someone might
+ * want to examine the contents of the DSM after ExecParallelFinish and
+ * before calling this routine.
+ */
+void ExecParallelCleanup(ParallelExecutorInfo *pei)
+{
+    if (pei->pcxt != NULL) {
+        DestroyParallelContext(pei->pcxt);
+        pei->pcxt = NULL;
+    }
+    pfree(pei);
+}
+
+/*
+ * Create a DestReceiver to write tuples we produce to the shm_mq designated
+ * for that purpose.
+ */
+static DestReceiver *ExecParallelGetReceiver(void *seg)
+{
+    Assert(seg != NULL);
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
+
+    char *mqspace = cxt->pwCtx->tupleQueue;
+    mqspace += t_thrd.bgworker_cxt.ParallelWorkerNumber * PARALLEL_TUPLE_QUEUE_SIZE;
+    shm_mq *mq = (shm_mq *)mqspace;
+    shm_mq_set_sender(mq, t_thrd.proc);
+    return CreateTupleQueueDestReceiver(shm_mq_attach(mq, seg, NULL));
+}
+
+/*
+ * Create a QueryDesc for the PlannedStmt we are to execute, and return it.
+ */
+static QueryDesc *ExecParallelGetQueryDesc(void *seg, DestReceiver *receiver, int instrument_options)
+{
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
+
+    /* Reconstruct leader-supplied PlannedStmt. */
+    PlannedStmt *pstmt = (PlannedStmt *)stringToNode(cxt->pwCtx->pstmt_space);
+
+    /* Reconstruct ParamListInfo. */
+    ParamListInfo paramLI = RestoreParamList(cxt->pwCtx->param_space, cxt->pwCtx->param_len);
+
+    /*
+     * Create a QueryDesc for the query.
+     *
+     * It's not obvious how to obtain the query string from here; and even if
+     * we could copying it would take more cycles than not copying it. But
+     * it's a bit unsatisfying to just use a dummy string here, so consider
+     * revising this someday.
+     */
+    return CreateQueryDesc(pstmt, "<parallel query>", GetActiveSnapshot(), InvalidSnapshot, receiver, paramLI,
+        instrument_options);
+}
+
+/*
+ * Copy instrumentation information from this node and its descendents into
+ * dynamic shared memory, so that the parallel leader can retrieve it.
+ */
+static bool ExecParallelReportInstrumentation(PlanState *planstate, SharedExecutorInstrumentation *instrumentation)
+{
+    int i;
+    int plan_node_id = planstate->plan->plan_node_id;
+
+    InstrEndLoop(planstate->instrument);
+
+    /*
+     * If we shuffled the plan_node_id values in ps_instrument into sorted
+     * order, we could use binary search here.  This might matter someday
+     * if we're pushing down sufficiently large plan trees.  For now, do it
+     * the slow, dumb way.
+     */
+    for (i = 0; i < instrumentation->num_plan_nodes; ++i) {
+        if (instrumentation->plan_node_id[i] == plan_node_id) {
+            break;
+        }
+    }
+    if (i >= instrumentation->num_plan_nodes) {
+        ereport(ERROR, (errmsg("plan node %d not found", plan_node_id)));
+    }
+
+    /*
+     * Add our statistics to the per-node, per-worker totals.  It's possible
+     * that this could happen more than once if we relaunched workers.
+     */
+    Instrumentation *instrument = GetInstrumentationArray(instrumentation);
+    instrument += i * instrumentation->num_workers;
+    Assert(IsParallelWorker());
+    Assert(t_thrd.bgworker_cxt.ParallelWorkerNumber < instrumentation->num_workers);
+    InstrAggNode(&instrument[t_thrd.bgworker_cxt.ParallelWorkerNumber], planstate->instrument);
+
+    return planstate_tree_walker(planstate, (bool (*)())ExecParallelReportInstrumentation, instrumentation);
+}
+
+/*
+ * Initialize the PlanState and its descendents with the information
+ * retrieved from shared memory.  This has to be done once the PlanState
+ * is allocated and initialized by executor; that is, after ExecutorStart().
+ */
+static bool ExecParallelInitializeWorker(PlanState *planstate, void *context)
+{
+    if (planstate == NULL)
+        return false;
+
+    /* Call initializers for parallel-aware plan nodes. */
+    if (planstate->plan->parallel_aware) {
+        switch (nodeTag(planstate)) {
+            case T_SeqScanState:
+                ExecSeqScanInitializeWorker((SeqScanState *)planstate, context);
+                break;
+            default:
+                break;
+        }
+    }
+
+    return planstate_tree_walker(planstate, (bool (*)())ExecParallelInitializeWorker, context);
+}
+
+/*
+ * Main entrypoint for parallel query worker processes.
+ *
+ * We reach this function from ParallelMain, so the setup necessary to create
+ * a sensible parallel environment has already been done; ParallelMain worries
+ * about stuff like the transaction state, combo CID mappings, and GUC values,
+ * so we don't need to deal with any of that here.
+ *
+ * Our job is to deal with concerns specific to the executor.  The parallel
+ * group leader will have stored a serialized PlannedStmt, and it's our job
+ * to execute that plan and write the resulting tuples to the appropriate
+ * tuple queue.  Various bits of supporting information that we need in order
+ * to do this are also stored in the dsm_segment and can be accessed through
+ * the shm_toc.
+ */
+void ParallelQueryMain(void *seg)
+{
+    int instrument_options = 0;
+
+    /* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)seg;
+    DestReceiver *receiver = ExecParallelGetReceiver(seg);
+    SharedExecutorInstrumentation *instrumentation = cxt->pwCtx->instrumentation;
+    if (instrumentation != NULL)
+        instrument_options = instrumentation->instrument_options;
+    QueryDesc *queryDesc = ExecParallelGetQueryDesc(seg, receiver, instrument_options);
+
+    /* Prepare to track buffer usage during query execution. */
+    InstrStartParallelQuery();
+
+    /* Start up the executor, have it run the plan, and then shut it down. */
+    (void)ExecutorStart(queryDesc, 0);
+    ExecParallelInitializeWorker(queryDesc->planstate, seg);
+    ExecutorRun(queryDesc, ForwardScanDirection, 0L);
+    ExecutorFinish(queryDesc);
+
+    /* Report buffer usage during parallel execution. */
+    BufferUsage *buffer_usage = cxt->pwCtx->bufUsage;
+    InstrEndParallelQuery(&buffer_usage[t_thrd.bgworker_cxt.ParallelWorkerNumber]);
+
+    /* Report instrumentation data if any instrumentation options are set. */
+    if (instrumentation != NULL) {
+        (void)ExecParallelReportInstrumentation(queryDesc->planstate, instrumentation);
+    }
+
+    /* Must do this after capturing instrumentation. */
+    ExecutorEnd(queryDesc);
+
+    /* Cleanup. */
+    FreeQueryDesc(queryDesc);
+    (*receiver->rDestroy)(receiver);
+}
+
diff --git a/src/gausskernel/runtime/executor/execProcnode.cpp b/src/gausskernel/runtime/executor/execProcnode.cpp
index a448f0d15..77f86b765 100755
--- a/src/gausskernel/runtime/executor/execProcnode.cpp
+++ b/src/gausskernel/runtime/executor/execProcnode.cpp
@@ -89,6 +89,7 @@
 #include "executor/nodeExtensible.h"
 #include "executor/nodeForeignscan.h"
 #include "executor/nodeFunctionscan.h"
+#include "executor/nodeGather.h"
 #include "executor/nodeGroup.h"
 #include "executor/nodeHash.h"
 #include "executor/nodeHashjoin.h"
@@ -116,6 +117,7 @@
 #include "executor/nodeWindowAgg.h"
 #include "executor/nodeWorktablescan.h"
 #include "executor/execStream.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/encoding.h"
 #include "optimizer/ml_model.h"
@@ -297,6 +299,8 @@ PlanState* ExecInitNodeByType(Plan* node, EState* e_state, int e_flags)
             return (PlanState*)ExecInitWindowAgg((WindowAgg*)node, e_state, e_flags);
         case T_Unique:
             return (PlanState*)ExecInitUnique((Unique*)node, e_state, e_flags);
+        case T_Gather:
+            return (PlanState*)ExecInitGather((Gather*)node, e_state, e_flags);
         case T_Hash:
             return (PlanState*)ExecInitHash((Hash*)node, e_state, e_flags);
         case T_SetOp:
@@ -635,6 +639,8 @@ TupleTableSlot* ExecProcNodeByType(PlanState* node)
             return ExecWindowAgg((WindowAggState*)node);
         case T_UniqueState:
             return ExecUnique((UniqueState*)node);
+        case T_GatherState:
+            return ExecGather((GatherState*)node);
         case T_HashState:
             return ExecHash();
         case T_SetOpState:
@@ -1085,6 +1091,9 @@ static void ExecEndNodeByType(PlanState* node)
         case T_TsStoreScanState:
             ExecEndCStoreScan((CStoreScanState*)node, false);
             break;
+        case T_GatherState:
+            ExecEndGather((GatherState *)node);
+            break;
         case T_IndexScanState:
             ExecEndIndexScan((IndexScanState*)node);
             break;
@@ -1340,3 +1349,29 @@ void ExecEndNode(PlanState* node)
     }
     ExecEndNodeByType(node);
 }
+
+/*
+ * ExecShutdownNode
+ *
+ * Give execution nodes a chance to stop asynchronous resource consumption
+ * and release any resources still held.  Currently, this is only used for
+ * parallel query, but we might want to extend it to other cases also (e.g.
+ * FDW).  We might also want to call it sooner, as soon as it's evident that
+ * no more rows will be needed (e.g. when a Limit is filled) rather than only
+ * at the end of ExecutorRun.
+ */
+bool ExecShutdownNode(PlanState *node)
+{
+    if (node == NULL)
+        return false;
+
+    switch (nodeTag(node)) {
+        case T_GatherState:
+            ExecShutdownGather((GatherState *)node);
+            break;
+        default:
+            break;
+    }
+
+    return planstate_tree_walker(node, (bool (*)())ExecShutdownNode, NULL);
+}
diff --git a/src/gausskernel/runtime/executor/functions.cpp b/src/gausskernel/runtime/executor/functions.cpp
index 1af5d06dd..7bfa62049 100755
--- a/src/gausskernel/runtime/executor/functions.cpp
+++ b/src/gausskernel/runtime/executor/functions.cpp
@@ -456,7 +456,7 @@ static List* init_execution_state(List* query_tree_list, SQLFunctionCachePtr f_c
             if (query_tree->commandType == CMD_UTILITY)
                 stmt = query_tree->utilityStmt;
             else
-                stmt = (Node*)pg_plan_query(query_tree, 0, NULL);
+                stmt = (Node*)pg_plan_query(query_tree, f_cache->readonly_func ? CURSOR_OPT_PARALLEL_OK : 0, NULL);
 
             /* Precheck all commands for validity in a function */
             if (IsA(stmt, TransactionStmt))
@@ -471,6 +471,9 @@ static List* init_execution_state(List* query_tree_list, SQLFunctionCachePtr f_c
                         /* translator: %s is a SQL statement name */
                         errmsg("%s is not allowed in a non-volatile function", CreateCommandTag(stmt))));
 
+            if (IsInParallelMode() && !CommandIsReadOnly(stmt))
+                PreventCommandIfParallelMode(CreateCommandTag((Node *)stmt));
+
             /* OK, build the execution_state for this query */
             new_es = (execution_state*)palloc(sizeof(execution_state));
             if (prev_es != NULL)
@@ -882,6 +885,7 @@ static void postquel_sub_params(SQLFunctionCachePtr f_cache, FunctionCallInfo fc
             param_li->parserSetupArg = NULL;
             param_li->params_need_process = false;
             param_li->numParams = n_args;
+            param_li->paramMask = NULL;
             f_cache->paramLI = param_li;
         } else {
             param_li = f_cache->paramLI;
diff --git a/src/gausskernel/runtime/executor/instrument.cpp b/src/gausskernel/runtime/executor/instrument.cpp
index 6ffb916eb..c7d5a3024 100644
--- a/src/gausskernel/runtime/executor/instrument.cpp
+++ b/src/gausskernel/runtime/executor/instrument.cpp
@@ -59,6 +59,7 @@ extern const char* GetStreamType(Stream* node);
 extern void insert_obsscaninfo(
     uint64 queryid, const char* rel_name, int64 file_count, double scan_data_size, double total_time, int format);
 
+static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add);
 static void BufferUsageAccumDiff(BufferUsage* dst, const BufferUsage* add, const BufferUsage* sub);
 static void CPUUsageGetCurrent(CPUUsage* cur);
 static void CPUUsageAccumDiff(CPUUsage* dst, const CPUUsage* add, const CPUUsage* sub);
@@ -454,6 +455,15 @@ Instrumentation* InstrAlloc(int n, int instrument_options)
     return instr;
 }
 
+/* Initialize an pre-allocated instrumentation structure. */
+void InstrInit(Instrumentation *instr, int instrument_options)
+{
+    int rc = memset_s(instr, sizeof(Instrumentation), 0, sizeof(Instrumentation));
+    securec_check(rc, "", "");
+    instr->need_bufusage = (instrument_options & INSTRUMENT_BUFFERS) != 0;
+    instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0;
+}
+
 /* Entry to a plan node */
 void InstrStartNode(Instrumentation* instr)
 {
@@ -689,11 +699,72 @@ void StreamEndLoop(StreamTime* instr)
     instr->tuplecount = 0;
 }
 
-/* 
+/* aggregate instrumentation information */
+void InstrAggNode(Instrumentation *dst, Instrumentation *add)
+{
+    if (!dst->running && add->running) {
+        dst->running = true;
+        dst->firsttuple = add->firsttuple;
+    } else if (dst->running && add->running && dst->firsttuple > add->firsttuple) {
+        dst->firsttuple = add->firsttuple;
+    }
+
+    INSTR_TIME_ADD(dst->counter, add->counter);
+
+    dst->tuplecount += add->tuplecount;
+    dst->startup += add->startup;
+    dst->total += add->total;
+    dst->ntuples += add->ntuples;
+    dst->nloops += add->nloops;
+    dst->nfiltered1 += add->nfiltered1;
+    dst->nfiltered2 += add->nfiltered2;
+
+    /* Add delta of buffer usage since entry to node's totals */
+    if (dst->need_bufusage)
+        BufferUsageAdd(&dst->bufusage, &add->bufusage);
+}
+
+/* note current values during parallel executor startup */
+void InstrStartParallelQuery(void)
+{
+    t_thrd.bgworker_cxt.save_pgBufferUsage = u_sess->instr_cxt.pg_buffer_usage;
+}
+
+/* report usage after parallel executor shutdown */
+void InstrEndParallelQuery(BufferUsage *result)
+{
+    int rc = memset_s(result, sizeof(BufferUsage), 0, sizeof(BufferUsage));
+    securec_check(rc, "", "");
+    BufferUsageAccumDiff(result, u_sess->instr_cxt.pg_buffer_usage, t_thrd.bgworker_cxt.save_pgBufferUsage);
+}
+
+/* accumulate work done by workers in leader's stats */
+void InstrAccumParallelQuery(BufferUsage *result)
+{
+    BufferUsageAdd(u_sess->instr_cxt.pg_buffer_usage, result);
+}
+
+static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add)
+{
+    dst->shared_blks_hit += add->shared_blks_hit;
+    dst->shared_blks_read += add->shared_blks_read;
+    dst->shared_blks_dirtied += add->shared_blks_dirtied;
+    dst->shared_blks_written += add->shared_blks_written;
+    dst->local_blks_hit += add->local_blks_hit;
+    dst->local_blks_read += add->local_blks_read;
+    dst->local_blks_dirtied += add->local_blks_dirtied;
+    dst->local_blks_written += add->local_blks_written;
+    dst->temp_blks_read += add->temp_blks_read;
+    dst->temp_blks_written += add->temp_blks_written;
+    INSTR_TIME_ADD(dst->blk_read_time, add->blk_read_time);
+    INSTR_TIME_ADD(dst->blk_write_time, add->blk_write_time);
+}
+
+/*
  * BufferUsageAccumDiff
- * calculate every element of dst like: dst += add - sub 
+ * calculate every element of dst like: dst += add - sub
  */
-static void BufferUsageAccumDiff(BufferUsage* dst, const BufferUsage* add, const BufferUsage* sub)
+static void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
 {
     dst->shared_blks_hit += add->shared_blks_hit - sub->shared_blks_hit;
     dst->shared_blks_read += add->shared_blks_read - sub->shared_blks_read;
diff --git a/src/gausskernel/runtime/executor/nodeGather.cpp b/src/gausskernel/runtime/executor/nodeGather.cpp
new file mode 100644
index 000000000..e9f6f6b8e
--- /dev/null
+++ b/src/gausskernel/runtime/executor/nodeGather.cpp
@@ -0,0 +1,434 @@
+/* -------------------------------------------------------------------------
+ *
+ * nodeGather.c
+ * 	  Support routines for scanning a plan via multiple workers.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * A Gather executor launches parallel workers to run multiple copies of a
+ * plan.  It can also run the plan itself, if the workers are not available
+ * or have not started up yet.  It then merges all of the results it produces
+ * and the results from the workers into a single output stream.  Therefore,
+ * it will normally be used with a plan where running multiple copies of the
+ * same plan does not produce duplicate output, such as parallel-aware
+ * SeqScan.
+ *
+ * Alternatively, a Gather node can be configured to use just one worker
+ * and the single-copy flag can be set.  In this case, the Gather node will
+ * run the plan in one worker and will not execute the plan itself.  In
+ * this case, it simply returns whatever tuples were returned by the worker.
+ * If a worker cannot be obtained, then it will run the plan itself and
+ * return the results.  Therefore, a plan used with a single-copy Gather
+ * node need not be parallel-aware.
+ *
+ * IDENTIFICATION
+ * 	  src/backend/executor/nodeGather.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "access/xact.h"
+#include "executor/execdebug.h"
+#include "executor/execParallel.h"
+#include "executor/nodeGather.h"
+#include "executor/nodeSubplan.h"
+#include "executor/tqueue.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+
+static TupleTableSlot *gather_getnext(GatherState *gatherstate);
+static HeapTuple gather_readnext(GatherState *gatherstate);
+static void ExecShutdownGatherWorkers(GatherState *node);
+
+
+/* ----------------------------------------------------------------
+ * 		ExecInitGather
+ * ----------------------------------------------------------------
+ */
+GatherState *ExecInitGather(Gather *node, EState *estate, int eflags)
+{
+    bool hasoid = false;
+
+    /* Gather node doesn't have innerPlan node. */
+    Assert(innerPlan(node) == NULL);
+
+    /*
+     * create state structure
+     */
+    GatherState *gatherstate = makeNode(GatherState);
+    gatherstate->ps.plan = (Plan *)node;
+    gatherstate->ps.state = estate;
+    gatherstate->need_to_scan_locally = !node->single_copy &&
+        u_sess->attr.attr_sql.parallel_leader_participation;
+
+    /*
+     * Miscellaneous initialization
+     *
+     * create expression context for node
+     */
+    ExecAssignExprContext(estate, &gatherstate->ps);
+
+    /*
+     * initialize child expressions
+     */
+    gatherstate->ps.targetlist = (List *)ExecInitExpr((Expr *)node->plan.targetlist, (PlanState *)gatherstate);
+    gatherstate->ps.qual = (List *)ExecInitExpr((Expr *)node->plan.qual, (PlanState *)gatherstate);
+
+    /*
+     * tuple table initialization
+     */
+    gatherstate->funnel_slot = ExecInitExtraTupleSlot(estate);
+    ExecInitResultTupleSlot(estate, &gatherstate->ps);
+
+    /*
+     * now initialize outer plan
+     */
+    Plan *outerNode = outerPlan(node);
+    outerPlanState(gatherstate) = ExecInitNode(outerNode, estate, eflags);
+
+    gatherstate->ps.ps_TupFromTlist = false;
+
+    /*
+     * Initialize result tuple type and projection info.
+     */
+    ExecAssignResultTypeFromTL(&gatherstate->ps);
+    ExecAssignProjectionInfo(&gatherstate->ps, NULL);
+
+    /*
+     * Initialize funnel slot to same tuple descriptor as outer plan.
+     */
+    if (!ExecContextForcesOids(&gatherstate->ps, &hasoid))
+        hasoid = false;
+    TupleDesc tupDesc = ExecTypeFromTL(outerNode->targetlist, hasoid);
+    ExecSetSlotDescriptor(gatherstate->funnel_slot, tupDesc);
+
+    return gatherstate;
+}
+
+/* ----------------------------------------------------------------
+ * 		ExecGather(node)
+ *
+ * 		Scans the relation via multiple workers and returns
+ * 		the next qualifying tuple.
+ * ----------------------------------------------------------------
+ */
+TupleTableSlot *ExecGather(GatherState *node)
+{
+    TupleTableSlot *fslot = node->funnel_slot;
+    int i;
+    TupleTableSlot *slot = NULL;
+    TupleTableSlot *resultSlot = NULL;
+    ExprDoneCond isDone;
+
+    CHECK_FOR_INTERRUPTS();
+
+    /*
+     * Initialize the parallel context and workers on first execution. We do
+     * this on first execution rather than during node initialization, as it
+     * needs to allocate large dynamic segement, so it is better to do if it
+     * is really needed.
+     */
+    if (!node->initialized) {
+        EState *estate = node->ps.state;
+        Gather *gather = (Gather *)node->ps.plan;
+        t_thrd.subrole = BACKGROUND_LEADER;
+
+        /*
+         * Sometimes we might have to run without parallelism; but if
+         * parallel mode is active then we can try to fire up some workers.
+         */
+        if (gather->num_workers > 0 && IsInParallelMode()) {
+            bool got_any_worker = false;
+
+            /* Initialize the workers required to execute Gather node. */
+            if (!node->pei)
+                node->pei = ExecInitParallelPlan(node->ps.lefttree, estate, gather->num_workers);
+
+            /*
+             * Register backend workers. We might not get as many as we
+             * requested, or indeed any at all.
+             */
+            ParallelContext *pcxt = node->pei->pcxt;
+            LaunchParallelWorkers(pcxt);
+
+            /* Set up tuple queue readers to read the results. */
+            if (pcxt->nworkers > 0) {
+                node->nreaders = 0;
+                node->reader = (TupleQueueReader **)palloc(pcxt->nworkers * sizeof(TupleQueueReader *));
+
+                for (i = 0; i < pcxt->nworkers; ++i) {
+                    if (pcxt->worker[i].bgwhandle == NULL)
+                        continue;
+
+                    shm_mq_set_handle(node->pei->tqueue[i], pcxt->worker[i].bgwhandle);
+                    node->reader[node->nreaders++] =
+                        CreateTupleQueueReader(node->pei->tqueue[i], fslot->tts_tupleDescriptor);
+                    got_any_worker = true;
+                }
+            }
+
+            /* No workers?  Then never mind. */
+            if (!got_any_worker)
+                ExecShutdownGatherWorkers(node);
+        }
+
+        /* Run plan locally if no workers or not single-copy. */
+        node->need_to_scan_locally = (node->reader == NULL) ||
+            (!gather->single_copy && u_sess->attr.attr_sql.parallel_leader_participation);
+        node->initialized = true;
+    }
+
+    /*
+     * Check to see if we're still projecting out tuples from a previous scan
+     * tuple (because there is a function-returning-set in the projection
+     * expressions).  If so, try to project another one.
+     */
+    if (node->ps.ps_TupFromTlist) {
+        resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone);
+        if (isDone == ExprMultipleResult)
+            return resultSlot;
+        /* Done with that source tuple... */
+        node->ps.ps_TupFromTlist = false;
+    }
+
+    /*
+     * Reset per-tuple memory context to free any expression evaluation
+     * storage allocated in the previous tuple cycle.  Note we can't do this
+     * until we're done projecting.  This will also clear any previous tuple
+     * returned by a TupleQueueReader; to make sure we don't leave a dangling
+     * pointer around, clear the working slot first.
+     */
+    (void)ExecClearTuple(node->funnel_slot);
+    ExprContext *econtext = node->ps.ps_ExprContext;
+    ResetExprContext(econtext);
+
+    /* Get and return the next tuple, projecting if necessary. */
+    for (;;) {
+        /*
+         * Get next tuple, either from one of our workers, or by running the
+         * plan ourselves.
+         */
+        slot = gather_getnext(node);
+        if (TupIsNull(slot))
+            return NULL;
+
+        /*
+         * form the result tuple using ExecProject(), and return it --- unless
+         * the projection produces an empty set, in which case we must loop
+         * back around for another tuple
+         */
+        econtext->ecxt_outertuple = slot;
+        resultSlot = ExecProject(node->ps.ps_ProjInfo, &isDone);
+
+        if (isDone != ExprEndResult) {
+            node->ps.ps_TupFromTlist = (isDone == ExprMultipleResult);
+            return resultSlot;
+        }
+    }
+
+    return slot;
+}
+
+/* ----------------------------------------------------------------
+ * 		ExecEndGather
+ *
+ * 		frees any storage allocated through C routines.
+ * ----------------------------------------------------------------
+ */
+void ExecEndGather(GatherState *node)
+{
+    ExecShutdownGather(node);
+    ExecFreeExprContext(&node->ps);
+    (void)ExecClearTuple(node->ps.ps_ResultTupleSlot);
+    ExecEndNode(outerPlanState(node));
+}
+
+/*
+ * Read the next tuple.  We might fetch a tuple from one of the tuple queues
+ * using gather_readnext, or if no tuple queue contains a tuple and the
+ * single_copy flag is not set, we might generate one locally instead.
+ */
+static TupleTableSlot *gather_getnext(GatherState *gatherstate)
+{
+    PlanState *outerPlan = outerPlanState(gatherstate);
+    TupleTableSlot *fslot = gatherstate->funnel_slot;
+
+    while (gatherstate->reader != NULL || gatherstate->need_to_scan_locally) {
+        CHECK_FOR_INTERRUPTS();
+
+        if (gatherstate->reader != NULL) {
+            HeapTuple tup = gather_readnext(gatherstate);
+            if (HeapTupleIsValid(tup)) {
+                (void)ExecStoreTuple(tup,   /* tuple to store */
+                    fslot,                  /* slot in which to store the tuple */
+                    InvalidBuffer,          /* buffer associated with this tuple */
+                    true);                  /* pfree this pointer if not from heap */
+                return fslot;
+            }
+        }
+
+        if (gatherstate->need_to_scan_locally) {
+            TupleTableSlot *outerTupleSlot = ExecProcNode(outerPlan);
+
+            if (!TupIsNull(outerTupleSlot))
+                return outerTupleSlot;
+
+            gatherstate->need_to_scan_locally = false;
+        }
+    }
+
+    return ExecClearTuple(fslot);
+}
+
+/*
+ * Attempt to read a tuple from one of our parallel workers.
+ */
+static HeapTuple gather_readnext(GatherState *gatherstate)
+{
+    int nvisited = 0;
+
+    for (;;) {
+        bool readerdone = false;
+
+        /* Check for async events, particularly messages from workers. */
+        CHECK_FOR_INTERRUPTS();
+
+        /* Attempt to read a tuple, but don't block if none is available. */
+        TupleQueueReader *reader = gatherstate->reader[gatherstate->nextreader];
+        HeapTuple tup = TupleQueueReaderNext(reader, true, &readerdone);
+
+        /*
+         * If this reader is done, remove it.  If all readers are done,
+         * clean up remaining worker state.
+         */
+        if (readerdone) {
+            Assert(!tup);
+            DestroyTupleQueueReader(reader);
+            --gatherstate->nreaders;
+            if (gatherstate->nreaders == 0) {
+                ExecShutdownGatherWorkers(gatherstate);
+                return NULL;
+            }
+            Size remainSize = sizeof(TupleQueueReader *) * (gatherstate->nreaders - gatherstate->nextreader);
+            if (remainSize != 0) {
+                int rc = memmove_s(&gatherstate->reader[gatherstate->nextreader], remainSize,
+                    &gatherstate->reader[gatherstate->nextreader + 1], remainSize);
+                securec_check(rc, "", "");
+            }
+            if (gatherstate->nextreader >= gatherstate->nreaders) {
+                gatherstate->nextreader = 0;
+            }
+            continue;
+        }
+
+        /* If we got a tuple, return it. */
+        if (tup)
+            return tup;
+
+        /*
+         * Advance nextreader pointer in round-robin fashion.  Note that we
+         * only reach this code if we weren't able to get a tuple from the
+         * current worker.  We used to advance the nextreader pointer after
+         * every tuple, but it turns out to be much more efficient to keep
+         * reading from the same queue until that would require blocking.
+         */
+        gatherstate->nextreader++;
+        if (gatherstate->nextreader >= gatherstate->nreaders)
+            gatherstate->nextreader = 0;
+
+        /* Have we visited every (surviving) TupleQueueReader? */
+        nvisited++;
+        if (nvisited >= gatherstate->nreaders) {
+            /*
+             * If (still) running plan locally, return NULL so caller can
+             * generate another tuple from the local copy of the plan.
+             */
+            if (gatherstate->need_to_scan_locally)
+                return NULL;
+
+            /* Nothing to do except wait for developments. */
+            (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, 0);
+            CHECK_FOR_INTERRUPTS();
+            ResetLatch(&t_thrd.proc->procLatch);
+            nvisited = 0;
+        }
+    }
+}
+
+/* ----------------------------------------------------------------
+ * 		ExecShutdownGatherWorkers
+ *
+ * 		Destroy the parallel workers.  Collect all the stats after
+ * 		workers are stopped, else some work done by workers won't be
+ * 		accounted.
+ * ----------------------------------------------------------------
+ */
+static void ExecShutdownGatherWorkers(GatherState *node)
+{
+    /* Shut down tuple queue readers before shutting down workers. */
+    if (node->reader != NULL) {
+        for (int i = 0; i < node->nreaders; ++i)
+            DestroyTupleQueueReader(node->reader[i]);
+
+        pfree(node->reader);
+        node->reader = NULL;
+    }
+
+    /* Now shut down the workers. */
+    if (node->pei != NULL)
+        ExecParallelFinish(node->pei);
+}
+
+/* ----------------------------------------------------------------
+ * 		ExecShutdownGather
+ *
+ * 		Destroy the setup for parallel workers including parallel context.
+ * 		Collect all the stats after workers are stopped, else some work
+ * 		done by workers won't be accounted.
+ * ----------------------------------------------------------------
+ */
+void ExecShutdownGather(GatherState *node)
+{
+    ExecShutdownGatherWorkers(node);
+
+    /* Now destroy the parallel context. */
+    if (node->pei != NULL) {
+        ExecParallelCleanup(node->pei);
+        node->pei = NULL;
+    }
+}
+
+/* ----------------------------------------------------------------
+ * 						Join Support
+ * ----------------------------------------------------------------
+ */
+/* ----------------------------------------------------------------
+ * 		ExecReScanGather
+ *
+ * 		Re-initialize the workers and rescans a relation via them.
+ * ----------------------------------------------------------------
+ */
+void ExecReScanGather(GatherState *node)
+{
+    /*
+     * Re-initialize the parallel workers to perform rescan of relation.
+     * We want to gracefully shutdown all the workers so that they
+     * should be able to propagate any error or other information to master
+     * backend before dying.  Parallel context will be reused for rescan.
+     */
+    ExecShutdownGatherWorkers(node);
+
+    node->initialized = false;
+
+    if (node->pei)
+        ExecParallelReinitialize(node->pei);
+
+    ExecReScan(node->ps.lefttree);
+}
+
diff --git a/src/gausskernel/runtime/executor/nodeSamplescan.cpp b/src/gausskernel/runtime/executor/nodeSamplescan.cpp
index 6fb9e0e18..2e2b644fa 100755
--- a/src/gausskernel/runtime/executor/nodeSamplescan.cpp
+++ b/src/gausskernel/runtime/executor/nodeSamplescan.cpp
@@ -90,6 +90,16 @@ AbsTblScanDesc InitSampleScanDesc(ScanState* scanstate, Relation currentRelation
 static inline HeapTuple SampleFetchNextTuple(SeqScanState* node)
 {
     HeapScanDesc heapScanDesc = GetHeapScanDesc(node->ss_currentScanDesc);
+    if (heapScanDesc == NULL) {
+        /*
+         * We reach here if the scan is not parallel, or if we're executing
+         * a scan that was intended to be parallel serially.
+         * It must be a non-partitioned table.
+         */
+        Assert(!node->isPartTbl);
+        heapScanDesc = (HeapScanDesc)InitSampleScanDesc(node, node->ss_currentRelation);
+        node->ss_currentScanDesc = (AbsTblScanDesc)heapScanDesc;
+    }
     heapScanDesc->rs_ss_accessor = node->ss_scanaccessor;
 
     /*
@@ -419,7 +429,7 @@ void RowTableSample::getMaxOffset()
 {
     HeapScanDesc heapscan = NULL;
     AbsTblScanDesc scan = sampleScanState->ss_currentScanDesc;
-    bool pagemode = GetHeapScanDesc(scan)->rs_pageatatime;
+    bool pagemode = (GetHeapScanDesc(scan)->rs_flags) & SO_ALLOW_PAGEMODE;
     Page page;
 
     Assert(BlockNumberIsValid(currentBlock));
@@ -456,7 +466,7 @@ void RowTableSample::getMaxOffset()
 ScanValid RowTableSample::scanTup()
 {
     HeapScanDesc scan = GetHeapScanDesc(sampleScanState->ss_currentScanDesc);
-    bool pagemode = scan->rs_pageatatime;
+    bool pagemode = scan->rs_flags & SO_ALLOW_PAGEMODE;
     HeapTuple tuple = &(scan->rs_ctup);
     Snapshot snapshot = scan->rs_snapshot;
     ItemId itemid;
diff --git a/src/gausskernel/runtime/executor/nodeSeqscan.cpp b/src/gausskernel/runtime/executor/nodeSeqscan.cpp
index a7611dbac..51ad0a980 100755
--- a/src/gausskernel/runtime/executor/nodeSeqscan.cpp
+++ b/src/gausskernel/runtime/executor/nodeSeqscan.cpp
@@ -39,6 +39,8 @@
 #include "utils/rel_gs.h"
 #include "nodes/execnodes.h"
 
+static AbsTblScanDesc InitBeginScan(SeqScanState* node, Relation current_relation);
+
 extern void StrategyGetRingPrefetchQuantityAndTrigger(BufferAccessStrategy strategy, int* quantity, int* trigger);
 /* ----------------------------------------------------------------
  *		prefetch_pages
@@ -202,6 +204,18 @@ static TupleTableSlot* SeqNext(SeqScanState* node)
     estate = node->ps.state;
     direction = estate->es_direction;
     slot = node->ss_ScanTupleSlot;
+
+    if (scanDesc == NULL) {
+        /*
+         * We reach here if the scan is not parallel, or if we're executing
+         * a scan that was intended to be parallel serially.
+         * It must be a non-partitioned table.
+         */
+        Assert(!node->isPartTbl);
+        scanDesc = InitBeginScan(node, node->ss_currentRelation);
+        node->ss_currentScanDesc = scanDesc;
+    }
+
     GetHeapScanDesc(scanDesc)->rs_ss_accessor = node->ss_scanaccessor;
 
     /*
@@ -341,10 +355,13 @@ void InitScanRelation(SeqScanState* node, EState* estate)
      * open that relation and acquire appropriate lock on it.
      */
     current_relation = ExecOpenScanRelation(estate, ((SeqScan*)node->ps.plan)->scanrelid);
-
     if (!node->isPartTbl) {
-        /* add qual for redis */
-        current_scan_desc = InitBeginScan(node, current_relation);
+        /*
+         * For non-partitioned table, we will do InitBeginScan later to check whether we can do
+         * parallel scan or not. Check ExecInitSeqScan and SeqNext for details.
+         * But we still need to add qual here, otherwise ExecScan will get no qual.
+         */
+        (void)reset_scan_qual(current_relation, node);
     } else {
         plan = (SeqScan*)node->ps.plan;
 
@@ -507,7 +524,15 @@ SeqScanState* ExecInitSeqScan(SeqScan* node, EState* estate, int eflags)
         abs_tbl_init_parallel_seqscan(
             scanstate->ss_currentScanDesc, scanstate->ps.plan->dop, scanstate->partScanDirection);
     } else {
-        scanstate->ps.stubType = PST_Scan;
+        /*
+         * For non-partitioned table, ss_currentScanDesc may be none cause we will try to do parallel.
+         * Check InitScanRelation and SeqNext for details.
+         */
+        if (!node->isPartTbl) {
+            scanstate->ps.stubType = PST_None;
+        } else {
+            scanstate->ps.stubType = PST_Scan;
+        }
     }
 
     scanstate->ps.ps_TupFromTlist = false;
@@ -602,28 +627,88 @@ void ExecReScanSeqScan(SeqScanState* node)
     }
 
     scan = node->ss_currentScanDesc;
-    if (node->isPartTbl) {
-        if (PointerIsValid(node->partitions)) {
-            /* end scan the prev partition first, */
-            abs_tbl_endscan(scan);
 
-            /* finally init Scan for the next partition */
-            ExecInitNextPartitionForSeqScan(node);
+    if (scan != NULL) {
+        if (node->isPartTbl) {
+            if (PointerIsValid(node->partitions)) {
+                /* end scan the prev partition first, */
+                abs_tbl_endscan(scan);
 
-            scan = node->ss_currentScanDesc;
+                /* finally init Scan for the next partition */
+                ExecInitNextPartitionForSeqScan(node);
+
+                scan = node->ss_currentScanDesc;
+            }
+        } else {
+            abs_tbl_rescan(scan, NULL);
         }
-    } else {
-        abs_tbl_rescan(scan, NULL);
-    }
 
-    abs_tbl_init_parallel_seqscan(scan, node->ps.plan->dop, node->partScanDirection);
+        abs_tbl_init_parallel_seqscan(scan, node->ps.plan->dop, node->partScanDirection);
+    }
     ExecScanReScan((ScanState*)node);
 }
 
 /* ----------------------------------------------------------------
- *		ExecSeqMarkPos(node)
+ *      ExecSeqScanEstimate
  *
- *		Marks scan position.
+ *      estimates the space required to serialize seqscan node.
+ * ----------------------------------------------------------------
+ */
+void ExecSeqScanEstimate(SeqScanState *node, ParallelContext *pcxt)
+{
+    EState *estate = node->ps.state;
+    node->pscan_len = heap_parallelscan_estimate(estate->es_snapshot);
+}
+
+/* ----------------------------------------------------------------
+ *      ExecSeqScanInitializeDSM
+ *
+ *      Set up a parallel heap scan descriptor.
+ * ----------------------------------------------------------------
+ */
+void ExecSeqScanInitializeDSM(SeqScanState *node, ParallelContext *pcxt, int nodeid)
+{
+    EState *estate = node->ps.state;
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
+
+    /* Here we can't use palloc, cause we have switch to old memctx in ExecInitParallelPlan */
+    cxt->pwCtx->pscan[nodeid] = (ParallelHeapScanDesc)MemoryContextAllocZero(cxt->memCtx, node->pscan_len);
+    heap_parallelscan_initialize(cxt->pwCtx->pscan[nodeid], node->pscan_len, node->ss_currentRelation,
+        estate->es_snapshot);
+    cxt->pwCtx->pscan[nodeid]->plan_node_id = node->ps.plan->plan_node_id;
+    node->ss_currentScanDesc =
+        (AbsTblScanDesc)heap_beginscan_parallel(node->ss_currentRelation, cxt->pwCtx->pscan[nodeid]);
+}
+
+/* ----------------------------------------------------------------
+ *      ExecSeqScanInitializeWorker
+ *
+ *      Copy relevant information from TOC into planstate.
+ * ----------------------------------------------------------------
+ */
+void ExecSeqScanInitializeWorker(SeqScanState *node, void *context)
+{
+    ParallelHeapScanDesc pscan = NULL;
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)context;
+
+    for (int i = 0; i < cxt->pwCtx->pscan_num; i++) {
+        if (node->ps.plan->plan_node_id == cxt->pwCtx->pscan[i]->plan_node_id) {
+            pscan = cxt->pwCtx->pscan[i];
+            break;
+        }
+    }
+
+    if (pscan == NULL) {
+        ereport(ERROR, (errmsg("could not find plan info, plan node id:%d", node->ps.plan->plan_node_id)));
+    }
+
+    node->ss_currentScanDesc = (AbsTblScanDesc)heap_beginscan_parallel(node->ss_currentRelation, pscan);
+}
+
+/* ----------------------------------------------------------------
+ * 		ExecSeqMarkPos(node)
+ *
+ * 		Marks scan position.
  * ----------------------------------------------------------------
  */
 void ExecSeqMarkPos(SeqScanState* node)
diff --git a/src/gausskernel/runtime/executor/spi.cpp b/src/gausskernel/runtime/executor/spi.cpp
index 98343c7d4..8643e1245 100755
--- a/src/gausskernel/runtime/executor/spi.cpp
+++ b/src/gausskernel/runtime/executor/spi.cpp
@@ -421,6 +421,18 @@ void AtEOSubXact_SPI(bool isCommit, SubTransactionId mySubid, bool stpRollback,
     }
 }
 
+/*
+ * Are we executing inside a procedure (that is, a nonatomic SPI context)?
+ */
+bool SPI_inside_nonatomic_context(void)
+{
+    if (u_sess->SPI_cxt._current == NULL)
+        return false; /* not in any SPI context at all */
+    if (u_sess->SPI_cxt._current->atomic)
+        return false; /* it's atomic (ie function not procedure) */
+    return true;
+}
+
 /* Pushes SPI stack to allow recursive SPI calls */
 void SPI_push(void)
 {
@@ -1382,23 +1394,27 @@ static Portal SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, ParamL
     }
 
     /*
-     * If told to be read-only, we'd better check for read-only queries. This
-     * can't be done earlier because we need to look at the finished, planned
-     * queries.  (In particular, we don't want to do it between GetCachedPlan
-     * and PortalDefineQuery, because throwing an error between those steps
-     * would result in leaking our plancache refcount.)
+     * If told to be read-only, or in parallel mode, verify that this query is
+     * in fact read-only. This  can't be done earlier because we need to look
+     * at the finished, planned queries.  (In particular, we don't want to do
+     * it between GetCachedPlan and PortalDefineQuery, because throwing an
+     * error between those steps would result in leaking our plancache refcount.)
      */
-    if (read_only) {
+    if (read_only || IsInParallelMode()) {
         ListCell *lc = NULL;
 
         foreach (lc, stmt_list) {
             Node *pstmt = (Node *)lfirst(lc);
 
             if (!CommandIsReadOnly(pstmt)) {
-                ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                    /* translator: %s is a SQL statement name */
-                    errmsg("%s is not allowed in a non-volatile function", CreateCommandTag(pstmt)),
-                    errhint("You can change function definition.")));
+                if (read_only) {
+                    ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        /* translator: %s is a SQL statement name */
+                        errmsg("%s is not allowed in a non-volatile function", CreateCommandTag(pstmt)),
+                        errhint("You can change function definition.")));
+                } else {
+                    PreventCommandIfParallelMode(CreateCommandTag((Node *) pstmt));
+                }
             }
         }
     }
@@ -2153,6 +2169,10 @@ static int _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI, Snapshot sn
                     errmsg("%s is not allowed in a non-volatile function", CreateCommandTag(stmt))));
             }
 
+            if (IsInParallelMode() && !CommandIsReadOnly(stmt)) {
+                PreventCommandIfParallelMode(CreateCommandTag((Node *) stmt));
+            }
+
             /*
              * If not read-only mode, advance the command counter before each
              * command and update the snapshot.
@@ -2360,6 +2380,7 @@ static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes, Datum *Values
         param_list_info->parserSetupArg = NULL;
         param_list_info->params_need_process = false;
         param_list_info->numParams = nargs;
+        param_list_info->paramMask = NULL;
 
         for (i = 0; i < nargs; i++) {
             ParamExternData *prm = &param_list_info->params[i];
diff --git a/src/gausskernel/runtime/executor/tqueue.cpp b/src/gausskernel/runtime/executor/tqueue.cpp
new file mode 100644
index 000000000..96545a9c5
--- /dev/null
+++ b/src/gausskernel/runtime/executor/tqueue.cpp
@@ -0,0 +1,905 @@
+/* -------------------------------------------------------------------------
+ *
+ * tqueue.c
+ * 	  Use shm_mq to send & receive tuples between parallel backends
+ *
+ * A DestReceiver of type DestTupleQueue, which is a TQueueDestReceiver
+ * under the hood, writes tuples from the executor to a shm_mq.
+ *
+ * A TupleQueueReader reads tuples from a shm_mq and returns the tuples.
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * 	  src/backend/executor/tqueue.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup.h"
+#include "catalog/pg_type.h"
+#include "executor/tqueue.h"
+#include "funcapi.h"
+#include "lib/stringinfo.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rangetypes.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+typedef enum {
+    TQUEUE_REMAP_NONE,  /* no special processing required */
+    TQUEUE_REMAP_ARRAY, /* array */
+    TQUEUE_REMAP_RANGE, /* range */
+    TQUEUE_REMAP_RECORD /* composite type, named or anonymous */
+} RemapClass;
+
+typedef struct {
+    int natts;
+    RemapClass mapping[FLEXIBLE_ARRAY_MEMBER];
+} RemapInfo;
+
+typedef struct {
+    DestReceiver pub;
+    shm_mq_handle *handle;
+    MemoryContext tmpcontext;
+    HTAB *recordhtab;
+    char mode;
+    TupleDesc tupledesc;
+    RemapInfo *remapinfo;
+} TQueueDestReceiver;
+
+typedef struct RecordTypemodMap {
+    int remotetypmod;
+    int localtypmod;
+} RecordTypemodMap;
+
+struct TupleQueueReader {
+    shm_mq_handle *queue;
+    char mode;
+    TupleDesc tupledesc;
+    RemapInfo *remapinfo;
+    HTAB *typmodmap;
+};
+
+#define TUPLE_QUEUE_MODE_CONTROL 'c'
+#define TUPLE_QUEUE_MODE_DATA 'd'
+
+static void tqueueWalk(TQueueDestReceiver *tqueue, RemapClass walktype, Datum value);
+static void tqueueWalkRecord(TQueueDestReceiver *tqueue, Datum value);
+static void tqueueWalkArray(TQueueDestReceiver *tqueue, Datum value);
+static void tqueueWalkRange(TQueueDestReceiver *tqueue, Datum value);
+static void tqueueSendTypmodInfo(TQueueDestReceiver *tqueue, int typmod, TupleDesc tupledesc);
+static void TupleQueueHandleControlMessage(TupleQueueReader *reader, Size nbytes, char *data);
+static HeapTuple TupleQueueHandleDataMessage(TupleQueueReader *reader, Size nbytes, HeapTupleHeader data);
+static HeapTuple TupleQueueRemapTuple(TupleQueueReader *reader, TupleDesc tupledesc, RemapInfo *remapinfo,
+    HeapTuple tuple);
+static Datum TupleQueueRemap(TupleQueueReader *reader, RemapClass remapclass, Datum value);
+static Datum TupleQueueRemapArray(TupleQueueReader *reader, Datum value);
+static Datum TupleQueueRemapRange(TupleQueueReader *reader, Datum value);
+static Datum TupleQueueRemapRecord(TupleQueueReader *reader, Datum value);
+static RemapClass GetRemapClass(Oid type_id);
+static RemapInfo *BuildRemapInfo(TupleDesc tupledesc);
+
+
+/*
+ * Receive a tuple from a query, and send it to the designated shm_mq.
+ *
+ * Returns true if successful, false if shm_mq has been detached.
+ */
+static void tqueueReceiveSlot(TupleTableSlot *slot, DestReceiver *self)
+{
+    TQueueDestReceiver *tqueue = (TQueueDestReceiver *)self;
+    TupleDesc tupledesc = slot->tts_tupleDescriptor;
+
+    /*
+     * Test to see whether the tupledesc has changed; if so, set up for the
+     * new tupledesc.  This is a strange test both because the executor really
+     * shouldn't change the tupledesc, and also because it would be unsafe if
+     * the old tupledesc could be freed and a new one allocated at the same
+     * address.  But since some very old code in printtup.c uses a similar
+     * test, we adopt it here as well.
+     */
+    if (tqueue->tupledesc != tupledesc) {
+        if (tqueue->remapinfo != NULL)
+            pfree(tqueue->remapinfo);
+        tqueue->remapinfo = BuildRemapInfo(tupledesc);
+        tqueue->tupledesc = tupledesc;
+    }
+
+    HeapTuple tuple = ExecMaterializeSlot(slot);
+
+    /*
+     * When, because of the types being transmitted, no record typemod mapping
+     * can be needed, we can skip a good deal of work.
+     */
+    if (tqueue->remapinfo != NULL) {
+        RemapInfo *remapinfo = tqueue->remapinfo;
+        MemoryContext oldcontext = NULL;
+
+        /* Deform the tuple so we can examine it, if not done already. */
+        slot_getallattrs(slot);
+
+        /* Iterate over each attribute and search it for transient typemods. */
+        Assert(slot->tts_tupleDescriptor->natts == remapinfo->natts);
+        for (AttrNumber i = 0; i < remapinfo->natts; ++i) {
+            /* Ignore nulls and types that don't need special handling. */
+            if (slot->tts_isnull[i] || remapinfo->mapping[i] == TQUEUE_REMAP_NONE)
+                continue;
+
+            /* Switch to temporary memory context to avoid leaking. */
+            if (oldcontext == NULL) {
+                if (tqueue->tmpcontext == NULL)
+                    tqueue->tmpcontext = AllocSetContextCreate(TopMemoryContext, "tqueue temporary context",
+                        ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE);
+                oldcontext = MemoryContextSwitchTo(tqueue->tmpcontext);
+            }
+
+            /* Invoke the appropriate walker function. */
+            tqueueWalk(tqueue, remapinfo->mapping[i], slot->tts_values[i]);
+        }
+
+        /* If we used the temp context, reset it and restore prior context. */
+        if (oldcontext != NULL) {
+            (void)MemoryContextSwitchTo(oldcontext);
+            MemoryContextReset(tqueue->tmpcontext);
+        }
+
+        /* If we entered control mode, switch back to data mode. */
+        if (tqueue->mode != TUPLE_QUEUE_MODE_DATA) {
+            tqueue->mode = TUPLE_QUEUE_MODE_DATA;
+            (void)shm_mq_send(tqueue->handle, sizeof(char), &tqueue->mode, false);
+        }
+    }
+
+    /* Send the tuple itself. */
+    (void)shm_mq_send(tqueue->handle, tuple->t_len, tuple->t_data, false);
+}
+
+/*
+ * Invoke the appropriate walker function based on the given RemapClass.
+ */
+static void tqueueWalk(TQueueDestReceiver *tqueue, RemapClass walktype, Datum value)
+{
+    check_stack_depth();
+
+    switch (walktype) {
+        case TQUEUE_REMAP_NONE:
+            break;
+        case TQUEUE_REMAP_ARRAY:
+            tqueueWalkArray(tqueue, value);
+            break;
+        case TQUEUE_REMAP_RANGE:
+            tqueueWalkRange(tqueue, value);
+            break;
+        case TQUEUE_REMAP_RECORD:
+            tqueueWalkRecord(tqueue, value);
+            break;
+    }
+}
+
+/*
+ * Walk a record and send control messages for transient record types
+ * contained therein.
+ */
+static void tqueueWalkRecord(TQueueDestReceiver *tqueue, Datum value)
+{
+    /* Extract typmod from tuple. */
+    HeapTupleHeader tup = DatumGetHeapTupleHeader(value);
+    Oid type_id = HeapTupleHeaderGetTypeId(tup);
+    int32 typmod = HeapTupleHeaderGetTypMod(tup);
+
+    /* Look up tuple descriptor in typecache. */
+    TupleDesc tupledesc = lookup_rowtype_tupdesc(type_id, typmod);
+
+    /*
+     * If this is a transient record time, send its TupleDesc as a control
+     * message.  (tqueueSendTypemodInfo is smart enough to do this only once
+     * per typmod.)
+     */
+    if (type_id == RECORDOID)
+        tqueueSendTypmodInfo(tqueue, typmod, tupledesc);
+
+    /*
+     * Build the remap information for this tupledesc.  We might want to think
+     * about keeping a cache of this information keyed by typeid and typemod,
+     * but let's keep it simple for now.
+     */
+    RemapInfo *remapinfo = BuildRemapInfo(tupledesc);
+
+    /*
+     * If remapping is required, deform the tuple and process each field. When
+     * BuildRemapInfo is null, the data types are such that there can be no
+     * transient record types here, so we can skip all this work.
+     */
+    if (remapinfo != NULL) {
+        HeapTupleData tdata;
+
+        /* Deform the tuple so we can check each column within. */
+        Datum *values = (Datum *)palloc(tupledesc->natts * sizeof(Datum));
+        bool *isnull = (bool *)palloc(tupledesc->natts * sizeof(bool));
+        tdata.t_len = HeapTupleHeaderGetDatumLength(tup);
+        ItemPointerSetInvalid(&(tdata.t_self));
+        tdata.t_tableOid = InvalidOid;
+        tdata.t_data = tup;
+        heap_deform_tuple(&tdata, tupledesc, values, isnull);
+
+        /* Recursively check each non-NULL attribute. */
+        for (AttrNumber i = 0; i < tupledesc->natts; ++i) {
+            if (!isnull[i]) {
+                tqueueWalk(tqueue, remapinfo->mapping[i], values[i]);
+            }
+        }
+    }
+
+    /* Release reference count acquired by lookup_rowtype_tupdesc. */
+    DecrTupleDescRefCount(tupledesc);
+}
+
+/*
+ * Walk a record and send control messages for transient record types
+ * contained therein.
+ */
+static void tqueueWalkArray(TQueueDestReceiver *tqueue, Datum value)
+{
+    ArrayType *arr = DatumGetArrayTypeP(value);
+    Oid type_id = ARR_ELEMTYPE(arr);
+    int16 typlen;
+    bool typbyval = false;
+    char typalign;
+    Datum *elem_values = NULL;
+    bool *elem_nulls = NULL;
+    int num_elems;
+    RemapClass remapclass = GetRemapClass(type_id);
+
+    /*
+     * If the elements of the array don't need to be walked, we shouldn't have
+     * been called in the first place: GetRemapClass should have returned NULL
+     * when asked about this array type.
+     */
+    Assert(remapclass != TQUEUE_REMAP_NONE);
+
+    /* Deconstruct the array. */
+    get_typlenbyvalalign(type_id, &typlen, &typbyval, &typalign);
+    deconstruct_array(arr, type_id, typlen, typbyval, typalign, &elem_values, &elem_nulls, &num_elems);
+
+    /* Walk each element. */
+    for (int i = 0; i < num_elems; ++i) {
+        if (!elem_nulls[i]) {
+            tqueueWalk(tqueue, remapclass, elem_values[i]);
+        }
+    }
+}
+
+/*
+ * Walk a range type and send control messages for transient record types
+ * contained therein.
+ */
+static void tqueueWalkRange(TQueueDestReceiver *tqueue, Datum value)
+{
+    RangeType *range = DatumGetRangeType(value);
+    Oid type_id = RangeTypeGetOid(range);
+    RangeBound lower;
+    RangeBound upper;
+    bool empty = false;
+
+    /*
+     * Extract the lower and upper bounds.  It might be worth implementing
+     * some caching scheme here so that we don't look up the same typeids in
+     * the type cache repeatedly, but for now let's keep it simple.
+     */
+    TypeCacheEntry *typcache = lookup_type_cache(type_id, TYPECACHE_RANGE_INFO);
+    if (typcache->rngelemtype == NULL)
+        ereport(ERROR, (errmsg("type %u is not a range type", type_id)));
+    range_deserialize(typcache, range, &lower, &upper, &empty);
+
+    /* Nothing to do for an empty range. */
+    if (empty) {
+        return;
+    }
+
+    /*
+     * If the range bounds don't need to be walked, we shouldn't have been
+     * called in the first place: GetRemapClass should have returned NULL when
+     * asked about this range type.
+     */
+    RemapClass remapclass = GetRemapClass(type_id);
+    Assert(remapclass != TQUEUE_REMAP_NONE);
+
+    /* Walk each bound, if present. */
+    if (!upper.infinite)
+        tqueueWalk(tqueue, remapclass, upper.val);
+    if (!lower.infinite)
+        tqueueWalk(tqueue, remapclass, lower.val);
+}
+
+/*
+ * Send tuple descriptor information for a transient typemod, unless we've
+ * already done so previously.
+ */
+static void tqueueSendTypmodInfo(TQueueDestReceiver *tqueue, int typmod, TupleDesc tupledesc)
+{
+    StringInfoData buf;
+    bool found = false;
+    AttrNumber i;
+
+    /* Initialize hash table if not done yet. */
+    if (tqueue->recordhtab == NULL) {
+        HASHCTL ctl;
+
+        ctl.keysize = sizeof(int);
+        ctl.entrysize = sizeof(int);
+        ctl.hcxt = TopMemoryContext;
+        tqueue->recordhtab = hash_create("tqueue record hashtable", 100, &ctl, HASH_ELEM | HASH_CONTEXT);
+    }
+
+    /* Have we already seen this record type?  If not, must report it. */
+    (void)hash_search(tqueue->recordhtab, &typmod, HASH_ENTER, &found);
+    if (found) {
+        return;
+    }
+
+    /* If message queue is in data mode, switch to control mode. */
+    if (tqueue->mode != TUPLE_QUEUE_MODE_CONTROL) {
+        tqueue->mode = TUPLE_QUEUE_MODE_CONTROL;
+        (void)shm_mq_send(tqueue->handle, sizeof(char), &tqueue->mode, false);
+    }
+
+    /* Assemble a control message. */
+    initStringInfo(&buf);
+    appendBinaryStringInfo(&buf, (char *)&typmod, sizeof(int));
+    appendBinaryStringInfo(&buf, (char *)&tupledesc->natts, sizeof(int));
+    appendBinaryStringInfo(&buf, (char *)&tupledesc->tdhasoid, sizeof(bool));
+    for (i = 0; i < tupledesc->natts; ++i)
+        appendBinaryStringInfo(&buf, (char *)tupledesc->attrs[i], sizeof(FormData_pg_attribute));
+
+    /* Send control message. */
+    (void)shm_mq_send(tqueue->handle, buf.len, buf.data, false);
+}
+
+
+/*
+ * Prepare to receive tuples from executor.
+ */
+static void tqueueStartupReceiver(DestReceiver *self, int operation, TupleDesc typeinfo)
+{
+    /* do nothing */
+}
+
+/*
+ * Clean up at end of an executor run
+ */
+static void tqueueShutdownReceiver(DestReceiver *self)
+{
+    TQueueDestReceiver *tqueue = (TQueueDestReceiver *)self;
+
+    if (tqueue->handle != NULL) {
+        shm_mq_detach(tqueue->handle);
+        tqueue->handle = NULL;
+    }
+}
+
+/*
+ * Destroy receiver when done with it
+ */
+static void tqueueDestroyReceiver(DestReceiver *self)
+{
+    TQueueDestReceiver *tqueue = (TQueueDestReceiver *)self;
+
+    if (tqueue->tmpcontext != NULL)
+        MemoryContextDelete(tqueue->tmpcontext);
+    if (tqueue->recordhtab != NULL)
+        hash_destroy(tqueue->recordhtab);
+    if (tqueue->remapinfo != NULL)
+        pfree(tqueue->remapinfo);
+    pfree(self);
+}
+
+/*
+ * Create a DestReceiver that writes tuples to a tuple queue.
+ */
+DestReceiver *CreateTupleQueueDestReceiver(shm_mq_handle *handle)
+{
+    TQueueDestReceiver *self = (TQueueDestReceiver *)palloc0(sizeof(TQueueDestReceiver));
+
+    self->pub.receiveSlot = tqueueReceiveSlot;
+    self->pub.rStartup = tqueueStartupReceiver;
+    self->pub.rShutdown = tqueueShutdownReceiver;
+    self->pub.rDestroy = tqueueDestroyReceiver;
+    self->pub.mydest = DestTupleQueue;
+    self->handle = handle;
+    self->tmpcontext = NULL;
+    self->recordhtab = NULL;
+    self->mode = TUPLE_QUEUE_MODE_DATA;
+    self->remapinfo = NULL;
+
+    return (DestReceiver *)self;
+}
+
+/*
+ * Create a tuple queue reader.
+ */
+TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle, TupleDesc tupledesc)
+{
+    TupleQueueReader *reader = (TupleQueueReader *)palloc0(sizeof(TupleQueueReader));
+
+    reader->queue = handle;
+    reader->mode = TUPLE_QUEUE_MODE_DATA;
+    reader->tupledesc = tupledesc;
+    reader->remapinfo = BuildRemapInfo(tupledesc);
+
+    return reader;
+}
+
+/*
+ * Destroy a tuple queue reader.
+ *
+ * Note: cleaning up the underlying shm_mq is the caller's responsibility.
+ * We won't access it here, as it may be detached already.
+ */
+void DestroyTupleQueueReader(TupleQueueReader *reader)
+{
+    if (reader->queue != NULL) {
+        shm_mq_detach(reader->queue);
+        reader->queue = NULL;
+    }
+    if (reader->remapinfo != NULL)
+        pfree(reader->remapinfo);
+    pfree(reader);
+}
+
+/*
+ * Fetch a tuple from a tuple queue reader.
+ *
+ * The return value is NULL if there are no remaining tuples or if
+ * nowait = true and no tuple is ready to return.  *done, if not NULL,
+ * is set to true when there are no remaining tuples and otherwise to false.
+ *
+ * The returned tuple, if any, is allocated in CurrentMemoryContext.
+ * Note that this routine must not leak memory!  (We used to allow that,
+ * but not any more.)
+ *
+ * Even when shm_mq_receive() returns SHM_MQ_WOULD_BLOCK, this can still
+ * accumulate bytes from a partially-read message, so it's useful to call
+ * this with nowait = true even if nothing is returned.
+ */
+HeapTuple TupleQueueReaderNext(TupleQueueReader *reader, bool nowait, bool *done)
+{
+    if (done != NULL)
+        *done = false;
+
+    for (;;) {
+        Size nbytes;
+        void *data = NULL;
+        /* Attempt to read a message. */
+        shm_mq_result result = shm_mq_receive(reader->queue, &nbytes, &data, nowait);
+        /* If queue is detached, set *done and return NULL. */
+        if (result == SHM_MQ_DETACHED) {
+            if (done != NULL)
+                *done = true;
+            return NULL;
+        }
+
+        /* In non-blocking mode, bail out if no message ready yet. */
+        if (result == SHM_MQ_WOULD_BLOCK)
+            return NULL;
+        Assert(result == SHM_MQ_SUCCESS);
+
+        /*
+         * OK, we got a message.  Process it.
+         *
+         * One-byte messages are mode switch messages, so that we can switch
+         * between "control" and "data" mode.  When in "data" mode, each
+         * message (unless exactly one byte) is a tuple.  When in "control"
+         * mode, each message provides a transient-typmod-to-tupledesc mapping
+         * so we can interpret future tuples.
+         */
+        if (nbytes == 1) {
+            /* Mode switch message. */
+            reader->mode = ((char *)data)[0];
+        } else if (reader->mode == TUPLE_QUEUE_MODE_DATA) {
+            /* Tuple data. */
+            return TupleQueueHandleDataMessage(reader, nbytes, (HeapTupleHeader)data);
+        } else if (reader->mode == TUPLE_QUEUE_MODE_CONTROL) {
+            /* Control message, describing a transient record type. */
+            TupleQueueHandleControlMessage(reader, nbytes, (char *)data);
+        } else {
+            ereport(ERROR, (errmsg("invalid mode: %d", (int)reader->mode)));
+        }
+    }
+}
+
+/*
+ * Handle a data message - that is, a tuple - from the remote side.
+ */
+static HeapTuple TupleQueueHandleDataMessage(TupleQueueReader *reader, Size nbytes, HeapTupleHeader data)
+{
+    HeapTupleData htup;
+
+    ItemPointerSetInvalid(&htup.t_self);
+    htup.t_tableOid = InvalidOid;
+    htup.t_len = (uint32)nbytes;
+    htup.t_data = data;
+
+    return TupleQueueRemapTuple(reader, reader->tupledesc, reader->remapinfo, &htup);
+}
+
+/*
+ * Remap tuple typmods per control information received from remote side.
+ */
+static HeapTuple TupleQueueRemapTuple(TupleQueueReader *reader, TupleDesc tupledesc, RemapInfo *remapinfo,
+    HeapTuple tuple)
+{
+    /*
+     * If no remapping is necessary, just copy the tuple into a single
+     * palloc'd chunk, as caller will expect.
+     */
+    if (remapinfo == NULL)
+        return heap_copytuple(tuple);
+
+    /* Deform tuple so we can remap record typmods for individual attrs. */
+    Datum *values = (Datum *)palloc(tupledesc->natts * sizeof(Datum));
+    bool *isnull = (bool *)palloc(tupledesc->natts * sizeof(bool));
+    heap_deform_tuple(tuple, tupledesc, values, isnull);
+    Assert(tupledesc->natts == remapinfo->natts);
+
+    /* Recursively check each non-NULL attribute. */
+    for (int i = 0; i < tupledesc->natts; ++i) {
+        if (isnull[i] || remapinfo->mapping[i] == TQUEUE_REMAP_NONE)
+            continue;
+        values[i] = TupleQueueRemap(reader, remapinfo->mapping[i], values[i]);
+    }
+
+    /* Reform the modified tuple. */
+    return heap_form_tuple(tupledesc, values, isnull);
+}
+
+/*
+ * Remap a value based on the specified remap class.
+ */
+static Datum TupleQueueRemap(TupleQueueReader *reader, RemapClass remapclass, Datum value)
+{
+    check_stack_depth();
+
+    switch (remapclass) {
+        case TQUEUE_REMAP_NONE:
+            /* caller probably shouldn't have called us at all, but... */
+            return value;
+
+        case TQUEUE_REMAP_ARRAY:
+            return TupleQueueRemapArray(reader, value);
+
+        case TQUEUE_REMAP_RANGE:
+            return TupleQueueRemapRange(reader, value);
+
+        case TQUEUE_REMAP_RECORD:
+            return TupleQueueRemapRecord(reader, value);
+    }
+
+    ereport(ERROR, (errmsg("unknown remap class: %d", (int)remapclass)));
+    return (Datum)0;
+}
+
+/*
+ * Remap an array.
+ */
+static Datum TupleQueueRemapArray(TupleQueueReader *reader, Datum value)
+{
+    ArrayType *arr = DatumGetArrayTypeP(value);
+    Oid type_id = ARR_ELEMTYPE(arr);
+    int16 typlen;
+    bool typbyval;
+    char typalign;
+    Datum *elem_values = NULL;
+    bool *elem_nulls = NULL;
+    int num_elems;
+    RemapClass remapclass = GetRemapClass(type_id);
+
+    /*
+     * If the elements of the array don't need to be walked, we shouldn't have
+     * been called in the first place: GetRemapClass should have returned NULL
+     * when asked about this array type.
+     */
+    Assert(remapclass != TQUEUE_REMAP_NONE);
+
+    /* Deconstruct the array. */
+    get_typlenbyvalalign(type_id, &typlen, &typbyval, &typalign);
+    deconstruct_array(arr, type_id, typlen, typbyval, typalign, &elem_values, &elem_nulls, &num_elems);
+
+    /* Remap each element. */
+    for (int i = 0; i < num_elems; ++i) {
+        if (!elem_nulls[i]) {
+            elem_values[i] = TupleQueueRemap(reader, remapclass, elem_values[i]);
+        }
+    }
+
+    /* Reconstruct and return the array.  */
+    arr = construct_md_array(elem_values, elem_nulls, ARR_NDIM(arr), ARR_DIMS(arr), ARR_LBOUND(arr), type_id, typlen,
+        typbyval, typalign);
+    return PointerGetDatum(arr);
+}
+
+/*
+ * Remap a range type.
+ */
+static Datum TupleQueueRemapRange(TupleQueueReader *reader, Datum value)
+{
+    RangeType *range = DatumGetRangeType(value);
+    Oid type_id = RangeTypeGetOid(range);
+    RangeBound lower;
+    RangeBound upper;
+    bool empty = false;
+
+    /*
+     * Extract the lower and upper bounds.  As in tqueueWalkRange, some
+     * caching might be a good idea here.
+     */
+    TypeCacheEntry *typcache = lookup_type_cache(type_id, TYPECACHE_RANGE_INFO);
+    if (typcache->rngelemtype == NULL)
+        ereport(ERROR, (errmsg("type %u is not a range type", type_id)));
+    range_deserialize(typcache, range, &lower, &upper, &empty);
+
+    /* Nothing to do for an empty range. */
+    if (empty)
+        return value;
+
+    /*
+     * If the range bounds don't need to be walked, we shouldn't have been
+     * called in the first place: GetRemapClass should have returned NULL when
+     * asked about this range type.
+     */
+    RemapClass remapclass = GetRemapClass(type_id);
+    Assert(remapclass != TQUEUE_REMAP_NONE);
+
+    /* Remap each bound, if present. */
+    if (!upper.infinite)
+        upper.val = TupleQueueRemap(reader, remapclass, upper.val);
+    if (!lower.infinite)
+        lower.val = TupleQueueRemap(reader, remapclass, lower.val);
+
+    /* And reserialize. */
+    range = range_serialize(typcache, &lower, &upper, empty);
+    return RangeTypeGetDatum(range);
+}
+
+/*
+ * Remap a record.
+ */
+static Datum TupleQueueRemapRecord(TupleQueueReader *reader, Datum value)
+{
+    HeapTupleData htup;
+
+    /* Fetch type OID and typemod. */
+    HeapTupleHeader tup = DatumGetHeapTupleHeader(value);
+    Oid type_id = HeapTupleHeaderGetTypeId(tup);
+    int typmod = HeapTupleHeaderGetTypMod(tup);
+
+    /* If transient record, replace remote typmod with local typmod. */
+    if (type_id == RECORDOID) {
+        Assert(reader->typmodmap != NULL);
+        RecordTypemodMap *mapent = (RecordTypemodMap *)hash_search(reader->typmodmap, &typmod, HASH_FIND, NULL);
+        if (mapent == NULL)
+            ereport(ERROR, (errmsg("found unrecognized remote typmod %d", typmod)));
+        typmod = mapent->localtypmod;
+    }
+
+    /*
+     * Fetch tupledesc and compute remap info.  We should probably cache this
+     * so that we don't have to keep recomputing it.
+     */
+    TupleDesc tupledesc = lookup_rowtype_tupdesc(type_id, typmod);
+    RemapInfo *remapinfo = BuildRemapInfo(tupledesc);
+    DecrTupleDescRefCount(tupledesc);
+
+    /* Remap tuple. */
+    ItemPointerSetInvalid(&htup.t_self);
+    htup.t_tableOid = InvalidOid;
+    htup.t_len = HeapTupleHeaderGetDatumLength(tup);
+    htup.t_data = tup;
+    HeapTuple atup = TupleQueueRemapTuple(reader, tupledesc, remapinfo, &htup);
+    HeapTupleHeaderSetTypeId(atup->t_data, type_id);
+    HeapTupleHeaderSetTypMod(atup->t_data, typmod);
+    HeapTupleHeaderSetDatumLength(atup->t_data, htup.t_len);
+
+    /* And return the results. */
+    return HeapTupleGetDatum(atup);
+}
+
+/*
+ * Handle a control message from the tuple queue reader.
+ *
+ * Control messages are sent when the remote side is sending tuples that
+ * contain transient record types.  We need to arrange to bless those
+ * record types locally and translate between remote and local typmods.
+ */
+static void TupleQueueHandleControlMessage(TupleQueueReader *reader, Size nbytes, char *data)
+{
+    int natts;
+    int remotetypmod;
+    bool hasoid = false;
+    char *buf = data;
+    Size rc = 0;
+    int i;
+    Form_pg_attribute *attrs;
+    MemoryContext oldcontext;
+    TupleDesc tupledesc;
+    RecordTypemodMap *mapent;
+    bool found;
+
+    /* Extract remote typmod. */
+    int errorno = memcpy_s(&remotetypmod, nbytes, &buf[rc], sizeof(int));
+    securec_check_c(errorno, "", "");
+    nbytes -= sizeof(int);
+    rc += sizeof(int);
+
+    /* Extract attribute count. */
+    errorno = memcpy_s(&natts, nbytes, &buf[rc], sizeof(int));
+    securec_check_c(errorno, "", "");
+    nbytes -= sizeof(int);
+    rc += sizeof(int);
+
+    /* Extract hasoid flag. */
+    errorno = memcpy_s(&hasoid, nbytes, &buf[rc], sizeof(bool));
+    securec_check_c(errorno, "", "");
+    nbytes -= sizeof(bool);
+    rc += sizeof(bool);
+
+    /* Extract attribute details. */
+    oldcontext = MemoryContextSwitchTo(t_thrd.mem_cxt.cur_transaction_mem_cxt);
+    attrs = (Form_pg_attribute *)palloc(natts * sizeof(Form_pg_attribute));
+    for (i = 0; i < natts; ++i) {
+        attrs[i] = (Form_pg_attribute)palloc(sizeof(FormData_pg_attribute));
+        errorno = memcpy_s(attrs[i], nbytes, &buf[rc], sizeof(FormData_pg_attribute));
+        securec_check_c(errorno, "", "");
+        nbytes -= sizeof(FormData_pg_attribute);
+        rc += sizeof(FormData_pg_attribute);
+    }
+    (void)MemoryContextSwitchTo(oldcontext);
+
+    /* We should have read the whole message. */
+    Assert(rc == nbytes);
+
+    /* Construct TupleDesc. */
+    tupledesc = CreateTupleDesc(natts, hasoid, attrs);
+    tupledesc = BlessTupleDesc(tupledesc);
+
+    /* Create map if it doesn't exist already. */
+    if (reader->typmodmap == NULL) {
+        HASHCTL ctl;
+
+        ctl.keysize = sizeof(int);
+        ctl.entrysize = sizeof(RecordTypemodMap);
+        ctl.hcxt = t_thrd.mem_cxt.cur_transaction_mem_cxt;
+        reader->typmodmap = hash_create("typmodmap hashtable", 100, &ctl, HASH_ELEM | HASH_CONTEXT);
+    }
+
+    /* Create map entry. */
+    mapent = (RecordTypemodMap *)hash_search(reader->typmodmap, &remotetypmod, HASH_ENTER, &found);
+    if (found)
+        ereport(ERROR, (errmsg("duplicate message for typmod %d", remotetypmod)));
+    mapent->localtypmod = tupledesc->tdtypmod;
+    ereport(DEBUG3, (errmsg("mapping remote typmod %d to local typmod %d", remotetypmod, tupledesc->tdtypmod)));
+}
+
+/*
+ * Build a mapping indicating what remapping class applies to each attribute
+ * described by a tupledesc.
+ */
+static RemapInfo *BuildRemapInfo(TupleDesc tupledesc)
+{
+    Size size;
+    AttrNumber i;
+    bool noop = true;
+
+    size = offsetof(RemapInfo, mapping) + sizeof(RemapClass) * tupledesc->natts;
+    RemapInfo *remapinfo = (RemapInfo *)MemoryContextAllocZero(TopMemoryContext, size);
+    remapinfo->natts = tupledesc->natts;
+    for (i = 0; i < tupledesc->natts; ++i) {
+        Form_pg_attribute attr = tupledesc->attrs[i];
+
+        if (attr->attisdropped) {
+            remapinfo->mapping[i] = TQUEUE_REMAP_NONE;
+            continue;
+        }
+
+        remapinfo->mapping[i] = GetRemapClass(attr->atttypid);
+        if (remapinfo->mapping[i] != TQUEUE_REMAP_NONE)
+            noop = false;
+    }
+
+    if (noop) {
+        pfree(remapinfo);
+        remapinfo = NULL;
+    }
+
+    return remapinfo;
+}
+
+/*
+ * Determine the remap class assocociated with a particular data type.
+ *
+ * Transient record types need to have the typmod applied on the sending side
+ * replaced with a value on the receiving side that has the same meaning.
+ *
+ * Arrays, range types, and all record types (including named composite types)
+ * need to searched for transient record values buried within them.
+ * Surprisingly, a walker is required even when the indicated type is a
+ * composite type, because the actual value may be a compatible transient
+ * record type.
+ */
+static RemapClass GetRemapClass(Oid type_id)
+{
+    RemapClass forceResult = TQUEUE_REMAP_NONE;
+    RemapClass innerResult = TQUEUE_REMAP_NONE;
+
+    for (;;) {
+        /* Simple cases. */
+        if (type_id == RECORDOID) {
+            innerResult = TQUEUE_REMAP_RECORD;
+            break;
+        }
+        if (type_id == RECORDARRAYOID) {
+            innerResult = TQUEUE_REMAP_ARRAY;
+            break;
+        }
+
+        /* Otherwise, we need a syscache lookup to figure it out. */
+        HeapTuple tup = SearchSysCache1((int)TYPEOID, ObjectIdGetDatum(type_id));
+        if (!HeapTupleIsValid(tup))
+            ereport(ERROR, (errmsg("cache lookup failed for type %u", type_id)));
+        Form_pg_type typ = (Form_pg_type)GETSTRUCT(tup);
+        /* Look through domains to underlying base type. */
+        if (typ->typtype == TYPTYPE_DOMAIN) {
+            type_id = typ->typbasetype;
+            ReleaseSysCache(tup);
+            continue;
+        }
+
+        /*
+         * Look through arrays to underlying base type, but the final return
+         * value must be either TQUEUE_REMAP_ARRAY or TQUEUE_REMAP_NONE.  (If
+         * this is an array of integers, for example, we don't need to walk
+         * it.)
+         */
+        if (OidIsValid(typ->typelem) && typ->typlen == -1) {
+            type_id = typ->typelem;
+            ReleaseSysCache(tup);
+            if (forceResult == TQUEUE_REMAP_NONE) {
+                forceResult = TQUEUE_REMAP_ARRAY;
+            }
+            continue;
+        }
+
+        /*
+         * Similarly, look through ranges to the underlying base type, but the
+         * final return value must be either TQUEUE_REMAP_RANGE or
+         * TQUEUE_REMAP_NONE.
+         */
+        if (typ->typtype == TYPTYPE_RANGE) {
+            ReleaseSysCache(tup);
+            if (forceResult == TQUEUE_REMAP_NONE) {
+                forceResult = TQUEUE_REMAP_RANGE;
+            }
+            type_id = get_range_subtype(type_id);
+            continue;
+        }
+
+        /* Walk composite types.  Nothing else needs special handling. */
+        if (typ->typtype == TYPTYPE_COMPOSITE) {
+            innerResult = TQUEUE_REMAP_RECORD;
+        }
+        ReleaseSysCache(tup);
+        break;
+    }
+
+    if (innerResult != TQUEUE_REMAP_NONE && forceResult != TQUEUE_REMAP_NONE) {
+        return forceResult;
+    }
+    return innerResult;
+}
+
diff --git a/src/gausskernel/storage/access/hbstore/hbucket_am.cpp b/src/gausskernel/storage/access/hbstore/hbucket_am.cpp
index ccfc8e232..baec56740 100755
--- a/src/gausskernel/storage/access/hbstore/hbucket_am.cpp
+++ b/src/gausskernel/storage/access/hbstore/hbucket_am.cpp
@@ -285,7 +285,7 @@ static HeapTuple switch_and_scan_next_tbl_hbkt(HBktTblScanDesc hp_scan, ScanDire
         (void)reset_scan_qual(next_bkt_rel, hp_scan->scanState);
         next_bkt_scan = heap_beginscan(next_bkt_rel, curr_bkt_scan->rs_snapshot,
             curr_bkt_scan->rs_nkeys, curr_bkt_scan->rs_key,
-            curr_bkt_scan->rs_isRangeScanInRedis);
+            curr_bkt_scan->rs_flags & SO_TYPE_RANGESCAN);
 
         try_init_bucket_parallel(next_bkt_scan, hp_scan->scanState);
 
@@ -347,8 +347,8 @@ bool hbkt_sampling_scan_nextbucket(HBktTblScanDesc hp_scan)
     /* Step 3. Build a HeapScan for new bucket */
     next_bkt_scan = heap_beginscan_sampling(next_bkt_rel, curr_bkt_scan->rs_snapshot,
         curr_bkt_scan->rs_nkeys, curr_bkt_scan->rs_key,
-        curr_bkt_scan->rs_allow_strat, curr_bkt_scan->rs_allow_sync,
-        curr_bkt_scan->rs_isRangeScanInRedis);
+        curr_bkt_scan->rs_flags & SO_ALLOW_STRAT, curr_bkt_scan->rs_flags & SO_ALLOW_SYNC,
+        curr_bkt_scan->rs_flags & SO_TYPE_RANGESCAN);
 
     /* Step 4. Set the parallel scan parameter */
     ScanState* sstate = hp_scan->scanState;
diff --git a/src/gausskernel/storage/access/heap/heapam.cpp b/src/gausskernel/storage/access/heap/heapam.cpp
index 4a8f5e457..c0f81499c 100644
--- a/src/gausskernel/storage/access/heap/heapam.cpp
+++ b/src/gausskernel/storage/access/heap/heapam.cpp
@@ -44,6 +44,7 @@
 #include "access/heapam.h"
 #include "access/hio.h"
 #include "access/multixact.h"
+#include "access/parallel.h"
 #include "access/relscan.h"
 #include "access/sysattr.h"
 #include "access/tableam.h"
@@ -124,7 +125,9 @@ const TableAm g_HeapTblAm = {.table_endscan = (table_endscan_t)heap_endscan,
     .table_init_parallel_seqscan = (table_init_parallel_seqscan_t)heap_init_parallel_seqscan};
 
 static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot, int nkeys, ScanKey key,
-    bool allow_strat, bool allow_sync, bool is_bitmapscan, bool is_range_scan_in_redis = false, bool is_samplescan = false);
+    ParallelHeapScanDesc parallel_scan, uint32 flag);
+static void heap_parallelscan_startblock_init(HeapScanDesc scan);
+static BlockNumber heap_parallelscan_nextpage(HeapScanDesc scan);
 static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, CommandId cid, int options);
 static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, const ItemPointer from, Buffer newbuf, HeapTuple newtup,
     HeapTuple old_key_tup, bool all_visible_cleared, bool new_all_visible_cleared);
@@ -148,7 +151,7 @@ static void initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
     bool allow_strat = false;
     bool allow_sync = false;
     BlockNumber nblocks;
-    bool is_range_scan_in_redis = scan->rs_isRangeScanInRedis;
+    bool is_range_scan_in_redis = scan->rs_flags & SO_TYPE_RANGESCAN;
 
     /*
      * Determine the number of blocks we have to scan.
@@ -161,7 +164,9 @@ static void initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
      * results for a non-MVCC snapshot, the caller must hold some higher-level
      * lock that ensures the interesting tuple(s) won't change.)
      */
-    if (RelationIsPartitioned(scan->rs_rd)) {
+    if (scan->rs_parallel != NULL) {
+        nblocks = scan->rs_parallel->phs_nblocks;
+    } else if (RelationIsPartitioned(scan->rs_rd)) {
         /*  partition table just set Initial Value, in BitmapHeapTblNext will update */
         nblocks = InvalidBlockNumber;
     } else {
@@ -189,8 +194,8 @@ static void initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
      * During a rescan, don't make a new strategy object if we don't have to.
      */
     if (scan->rs_nblocks > (uint32)(g_instance.attr.attr_storage.NBuffers / 4)) {
-        allow_strat = scan->rs_allow_strat;
-        allow_sync = scan->rs_allow_sync;
+        allow_strat = scan->rs_flags & SO_ALLOW_STRAT;
+        allow_sync = scan->rs_flags & SO_ALLOW_SYNC;
     } else
         allow_strat = allow_sync = false;
 
@@ -203,7 +208,10 @@ static void initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
         scan->rs_strategy = NULL;
     }
 
-    if (is_rescan) {
+    if (scan->rs_parallel != NULL) {
+        /* For parallel scan, believe whatever ParallelHeapScanDesc says. */
+        scan->rs_syncscan = scan->rs_parallel->phs_syncscan;
+    } else if (is_rescan) {
         /*
          * If rescan, keep the previous startblock setting so that rewinding a
          * cursor doesn't generate surprising results.  Reset the syncscan
@@ -246,7 +254,7 @@ static void initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
      * underlying bitmap index scans will be counted) or sample scans (we only
      * update stats for tuple fetches there).
      */
-    if (!scan->rs_bitmapscan && !scan->rs_samplescan) {
+    if (!(scan->rs_flags & (SO_TYPE_BITMAPSCAN | SO_TYPE_SAMPLESCAN))) {
         pgstat_count_heap_scan(scan->rs_rd);
     }
 }
@@ -269,7 +277,7 @@ void heapgetpage(HeapScanDesc scan, BlockNumber page)
     ItemId lpp;
     bool all_visible = false;
 
-    if (!scan->rs_isRangeScanInRedis) {
+    if (!(scan->rs_flags & SO_TYPE_RANGESCAN)) {
         Assert(page < scan->rs_nblocks);
     } else {
         Assert(page < scan->rs_nblocks + scan->rs_startblock);
@@ -297,7 +305,7 @@ void heapgetpage(HeapScanDesc scan, BlockNumber page)
     /* We've pinned the buffer, nobody can prune this buffer, check whether snapshot is valid. */
     CheckSnapshotIsValidException(scan->rs_snapshot, "heapgetpage");
 
-    if (!scan->rs_pageatatime) {
+    if (!(scan->rs_flags & SO_ALLOW_PAGEMODE)) {
         gstrace_exit(GS_TRC_ID_heapgetpage);
         return;
     }
@@ -311,7 +319,7 @@ void heapgetpage(HeapScanDesc scan, BlockNumber page)
      * since we use append mode and never look back holes in previous pages
      * anyway.
      */
-    if (!scan->rs_isRangeScanInRedis) {
+    if (!(scan->rs_flags & SO_TYPE_RANGESCAN)) {
         heap_page_prune_opt(scan->rs_rd, buffer);
     }
 
@@ -417,7 +425,7 @@ bool next_page(HeapScanDesc scan, ScanDirection dir, BlockNumber& page)
                 page += (scan->dop - 1) * PARALLEL_SCAN_GAP;
             }
 
-            if (scan->rs_isRangeScanInRedis) {
+            if (scan->rs_flags & SO_TYPE_RANGESCAN) {
                 /* Parallel workers start from different point. */
                 finished =
                     (page >= scan->rs_startblock + scan->rs_nblocks - PARALLEL_SCAN_GAP * u_sess->stream_cxt.smp_id);
@@ -432,10 +440,13 @@ bool next_page(HeapScanDesc scan, ScanDirection dir, BlockNumber& page)
                 page = scan->rs_nblocks;
             }
             page--;
+        } else if (scan->rs_parallel != NULL) {
+            page = heap_parallelscan_nextpage(scan);
+            finished = (page == InvalidBlockNumber);
         } else {
             page++;
 
-            if (scan->rs_isRangeScanInRedis) {
+            if (scan->rs_flags & SO_TYPE_RANGESCAN) {
                 if (page >= scan->rs_startblock + scan->rs_nblocks) {
                     page = 0;
                 }
@@ -742,7 +753,19 @@ static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey
                 gstrace_exit(GS_TRC_ID_heapgettup);
                 return;
             }
-            page = scan->rs_startblock; /* first page */
+
+            if (scan->rs_parallel != NULL) {
+                heap_parallelscan_startblock_init(scan);
+                page = heap_parallelscan_nextpage(scan);
+                /* Other processes might have already finished the scan. */
+                if (page == InvalidBlockNumber) {
+                    Assert(!BufferIsValid(scan->rs_cbuf));
+                    tuple->t_data = NULL;
+                    return;
+                }
+            } else {
+                page = scan->rs_startblock; /* first page */
+            }
             heapgetpage(scan, page);
             line_off = FirstOffsetNumber; /* first offnum */
             scan->rs_inited = true;
@@ -759,6 +782,9 @@ static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey
         /* page and line_off now reference the physically next tid */
         lines_left = lines - line_off + 1;
     } else if (backward) {
+        /* backward parallel scan not supported */
+        Assert(scan->rs_parallel == NULL);
+
         if (!scan->rs_inited) {
             /* return null immediately if relation is empty */
             if (scan->rs_nblocks == 0) {
@@ -952,7 +978,7 @@ static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys,
 {
     HeapTuple tuple = &(scan->rs_ctup);
     bool backward = ScanDirectionIsBackward(dir);
-    bool is_range_scan_in_redis = scan->rs_isRangeScanInRedis;
+    bool is_range_scan_in_redis = scan->rs_flags & SO_TYPE_RANGESCAN;
     BlockNumber page;
     bool finished = false;
     Page dp;
@@ -983,7 +1009,20 @@ static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys,
                 gstrace_exit(GS_TRC_ID_heapgettup_pagemode);
                 return;
             }
-            page = scan->rs_startblock; /* first page */
+
+            if (scan->rs_parallel != NULL) {
+                heap_parallelscan_startblock_init(scan);
+                page = heap_parallelscan_nextpage(scan);
+
+                /* Other processes might have already finished the scan. */
+                if (page == InvalidBlockNumber) {
+                    Assert(!BufferIsValid(scan->rs_cbuf));
+                    tuple->t_data = NULL;
+                    return;
+                }
+            } else {
+                page = scan->rs_startblock; /* first page */
+            }
             heapgetpage(scan, page);
             line_index = 0;
             scan->rs_inited = true;
@@ -998,6 +1037,9 @@ static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys,
         /* page and line_index now reference the next visible tid */
         lines_left = lines - line_index;
     } else if (backward) {
+        /* backward parallel scan not supported */
+        Assert(scan->rs_parallel == NULL);
+
         if (!scan->rs_inited) {
             /* return null immediately if relation is empty */
             if (scan->rs_nblocks == 0) {
@@ -1558,19 +1600,32 @@ Relation heap_openrv_extended(
 HeapScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, bool is_range_scan_in_redis)
 {
     /* We don't allow sync buffer read if it is a range scan in redis */
-    return heap_beginscan_internal(
-        relation, snapshot, nkeys, key, !is_range_scan_in_redis, !is_range_scan_in_redis, false, is_range_scan_in_redis);
+    uint32 flag;
+    if (is_range_scan_in_redis) {
+        flag = SO_TYPE_RANGESCAN;
+    } else {
+        flag = SO_ALLOW_STRAT | SO_ALLOW_SYNC;
+    }
+    return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, flag);
 }
 
 HeapScanDesc heap_beginscan_strat(
     Relation relation, Snapshot snapshot, int nkeys, ScanKey key, bool allow_strat, bool allow_sync)
 {
-    return heap_beginscan_internal(relation, snapshot, nkeys, key, allow_strat, allow_sync, false);
+    uint32 flag = 0;
+    if (allow_strat) {
+        flag |= SO_ALLOW_STRAT;
+    }
+    if (allow_sync) {
+        flag |= SO_ALLOW_SYNC;
+    }
+    return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, flag);
 }
 
 HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, int nkeys, ScanKey key)
 {
-    return heap_beginscan_internal(relation, snapshot, nkeys, key, false, false, true);
+    uint32 flag = SO_TYPE_BITMAPSCAN;
+    return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, flag);
 }
 
 /*
@@ -1590,12 +1645,21 @@ HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, int nkeys,
 HeapScanDesc heap_beginscan_sampling(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, bool allow_strat,
     bool allow_sync, bool is_range_scan_in_redis)
 {
-    return heap_beginscan_internal(
-        relation, snapshot, nkeys, key, allow_strat, allow_sync, false, is_range_scan_in_redis, true);
+    uint32 flag = SO_TYPE_SAMPLESCAN;
+    if (allow_strat) {
+        flag |= SO_ALLOW_STRAT;
+    }
+    if (allow_sync) {
+        flag |= SO_ALLOW_SYNC;
+    }
+    if (is_range_scan_in_redis) {
+        flag |= SO_TYPE_RANGESCAN;
+    }
+    return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, flag);
 }
 
 static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot, int nkeys, ScanKey key,
-    bool allow_strat, bool allow_sync, bool is_bitmapscan, bool is_range_scan_in_redis, bool is_samplescan)
+    ParallelHeapScanDesc parallel_scan, uint32 flag)
 {
     HeapScanDesc scan;
 
@@ -1614,7 +1678,7 @@ static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot
          * bitmapscan to scan tuples using GPI. Therefore,
          * the value of rs_rd in the scan is used to store partition-fake-relation.
          */
-        Assert(is_bitmapscan);
+        Assert(flag & SO_TYPE_BITMAPSCAN);
     }
 
     /*
@@ -1628,17 +1692,16 @@ static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot
     scan->rs_tupdesc = RelationGetDescr(relation);
     scan->rs_snapshot = snapshot;
     scan->rs_nkeys = nkeys;
-    scan->rs_bitmapscan = is_bitmapscan;
-    scan->rs_samplescan = is_samplescan;
+    scan->rs_flags = flag;
     scan->rs_strategy = NULL; /* set in initscan */
-    scan->rs_allow_strat = allow_strat;
-    scan->rs_allow_sync = allow_sync;
-    scan->rs_isRangeScanInRedis = is_range_scan_in_redis;
+    scan->rs_parallel = parallel_scan;
 
     /*
      * we can use page-at-a-time mode if it's an MVCC-safe snapshot
      */
-    scan->rs_pageatatime = IsMVCCSnapshot(snapshot);
+    if (IsMVCCSnapshot(snapshot)) {
+        scan->rs_flags |= SO_ALLOW_PAGEMODE;
+    }
 
     /*
      * For a seqscan in a serializable transaction, acquire a predicate lock
@@ -1651,7 +1714,7 @@ static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot
      * covering the predicate. But in that case we still have to lock any
      * matching heap tuples.
      */
-    if (!is_bitmapscan) {
+    if (!(flag & SO_TYPE_BITMAPSCAN)) {
         PredicateLockRelation(relation, snapshot);
     }
 
@@ -1694,6 +1757,20 @@ void heap_rescan(HeapScanDesc scan, ScanKey key)
      * reinitialize scan descriptor
      */
     initscan(scan, key, true);
+
+    /*
+     * reset parallel scan, if present
+     */
+    if (scan->rs_parallel != NULL) {
+        ParallelHeapScanDesc parallel_scan;
+
+        /*
+         * Caller is responsible for making sure that all workers have
+         * finished the scan before calling this.
+         */
+        parallel_scan = scan->rs_parallel;
+        pg_atomic_write_u64(&parallel_scan->phs_nallocated, 0);
+    }
 }
 
 /* ----------------
@@ -1727,6 +1804,10 @@ void heap_endscan(HeapScanDesc scan)
         FreeAccessStrategy(scan->rs_strategy);
     }
 
+    if (scan->rs_flags & SO_TEMP_SNAPSHOT) {
+        UnregisterSnapshot(scan->rs_snapshot);
+    }
+
     pfree(scan);
     scan = NULL;
 }
@@ -1788,12 +1869,159 @@ HeapTuple heapGetNextForVerify(HeapScanDesc scan, ScanDirection direction, bool&
     return &(scan->rs_ctup);
 }
 
+/* ----------------
+ * 		heap_parallelscan_estimate - estimate storage for ParallelHeapScanDesc
+ *
+ * 		Sadly, this doesn't reduce to a constant, because the size required
+ * 		to serialize the snapshot can vary.
+ * ----------------
+ */
+Size heap_parallelscan_estimate(Snapshot snapshot)
+{
+    return add_size(offsetof(ParallelHeapScanDescData, phs_snapshot_data), EstimateSnapshotSpace(snapshot));
+}
+
+/* ----------------
+ * 		heap_parallelscan_initialize - initialize ParallelHeapScanDesc
+ *
+ * 		Must allow as many bytes of shared memory as returned by
+ * 		heap_parallelscan_estimate.  Call this just once in the leader
+ * 		process; then, individual workers attach via heap_beginscan_parallel.
+ * ----------------
+ */
+void heap_parallelscan_initialize(ParallelHeapScanDesc target, Size pscan_len, Relation relation, Snapshot snapshot)
+{
+    target->phs_relid = RelationGetRelid(relation);
+    target->phs_nblocks = RelationGetNumberOfBlocks(relation);
+    /* compare phs_syncscan initialization to similar logic in initscan */
+    target->phs_syncscan = u_sess->attr.attr_storage.synchronize_seqscans && !RelationUsesLocalBuffers(relation) &&
+        target->phs_nblocks > (uint)g_instance.attr.attr_storage.NBuffers / 4;
+    SpinLockInit(&target->phs_mutex);
+    target->phs_startblock = InvalidBlockNumber;
+    target->pscan_len = pscan_len;
+    pg_atomic_write_u64(&target->phs_nallocated, 0);
+    SerializeSnapshot(snapshot, target->phs_snapshot_data,
+        pscan_len - offsetof(ParallelHeapScanDescData, phs_snapshot_data));
+}
+
+/* ----------------
+ * 		heap_beginscan_parallel - join a parallel scan
+ *
+ * 		Caller must hold a suitable lock on the correct relation.
+ * ----------------
+ */
+HeapScanDesc heap_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan)
+{
+    Assert(RelationGetRelid(relation) == parallel_scan->phs_relid);
+    Snapshot snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data,
+        parallel_scan->pscan_len - offsetof(ParallelHeapScanDescData, phs_snapshot_data));
+    RegisterSnapshot(snapshot);
+
+    uint32 flag = SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_TEMP_SNAPSHOT;
+    return heap_beginscan_internal(relation, snapshot, 0, NULL, parallel_scan, flag);
+}
+
+/* ----------------
+ * 		heap_parallelscan_startblock_init - find and set the scan's startblock
+ *
+ * 		Determine where the parallel seq scan should start.  This function may
+ * 		be called many times, once by each parallel worker.  We must be careful
+ * 		only to set the startblock once.
+ * ----------------
+ */
+static void heap_parallelscan_startblock_init(HeapScanDesc scan)
+{
+    Assert(scan->rs_parallel);
+    BlockNumber sync_startpage = InvalidBlockNumber;
+    ParallelHeapScanDesc parallel_scan = scan->rs_parallel;
+
+retry:
+    /* Grab the spinlock. */
+    SpinLockAcquire(&parallel_scan->phs_mutex);
+
+    /*
+     * If the scan's startblock has not yet been initialized, we must do so
+     * now.  If this is not a synchronized scan, we just start at block 0, but
+     * if it is a synchronized scan, we must get the starting position from
+     * the synchronized scan machinery.  We can't hold the spinlock while
+     * doing that, though, so release the spinlock, get the information we
+     * need, and retry.  If nobody else has initialized the scan in the
+     * meantime, we'll fill in the value we fetched on the second time
+     * through.
+     */
+    if (parallel_scan->phs_startblock == InvalidBlockNumber) {
+        if (!parallel_scan->phs_syncscan)
+            parallel_scan->phs_startblock = 0;
+        else if (sync_startpage != InvalidBlockNumber)
+            parallel_scan->phs_startblock = sync_startpage;
+        else {
+            SpinLockRelease(&parallel_scan->phs_mutex);
+            sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks);
+            goto retry;
+        }
+    }
+    SpinLockRelease(&parallel_scan->phs_mutex);
+}
+
+/* ----------------
+ * 		heap_parallelscan_nextpage - get the next page to scan
+ *
+ * 		Get the next page to scan.  Even if there are no pages left to scan,
+ * 		another backend could have grabbed a page to scan and not yet finished
+ * 		looking at it, so it doesn't follow that the scan is done when the
+ * 		first backend gets an InvalidBlockNumber return.
+ * ----------------
+ */
+static BlockNumber heap_parallelscan_nextpage(HeapScanDesc scan)
+{
+    Assert(scan->rs_parallel);
+    BlockNumber page;
+    ParallelHeapScanDesc parallel_scan = scan->rs_parallel;
+
+    /*
+     * phs_nallocated tracks how many pages have been allocated to workers
+     * already.  When phs_nallocated >= rs_nblocks, all blocks have been
+     * allocated.
+     *
+     * Because we use an atomic fetch-and-add to fetch the current value, the
+     * phs_nallocated counter will exceed rs_nblocks, because workers will
+     * still increment the value, when they try to allocate the next block but
+     * all blocks have been allocated already. The counter must be 64 bits
+     * wide because of that, to avoid wrapping around when rs_nblocks is close
+     * to 2^32.
+     *
+     * The actual page to return is calculated by adding the counter to the
+     * starting block number, modulo nblocks.
+     */
+    uint64 nallocated = pg_atomic_fetch_add_u64(&parallel_scan->phs_nallocated, 1);
+    if (nallocated >= scan->rs_nblocks)
+        page = InvalidBlockNumber; /* all blocks have been allocated */
+    else
+        page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks;
+
+    /*
+     * Report scan location.  Normally, we report the current page number.
+     * When we reach the end of the scan, though, we report the starting page,
+     * not the ending page, just so the starting positions for later scans
+     * doesn't slew backwards.  We only report the position at the end of the
+     * scan once, though: subsequent callers will report nothing.
+     */
+    if (scan->rs_syncscan) {
+        if (page != InvalidBlockNumber)
+            ss_report_location(scan->rs_rd, page);
+        else if (nallocated == scan->rs_nblocks)
+            ss_report_location(scan->rs_rd, parallel_scan->phs_startblock);
+    }
+
+    return page;
+}
+
 HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction)
 {
     /* Note: no locking manipulations needed */
     HEAPDEBUG_1; /* heap_getnext( info ) */
 
-    if (scan->rs_pageatatime) {
+    if (scan->rs_flags & SO_ALLOW_PAGEMODE) {
         heapgettup_pagemode(scan, direction, scan->rs_nkeys, scan->rs_key);
     } else {
         heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key);
@@ -3276,7 +3504,7 @@ void heap_markpos(HeapScanDesc scan)
     /* Note: no locking manipulations needed */
     if (scan->rs_ctup.t_data != NULL) {
         scan->rs_mctid = scan->rs_ctup.t_self;
-        if (scan->rs_pageatatime) {
+        if (scan->rs_flags & SO_ALLOW_PAGEMODE) {
             scan->rs_mindex = scan->rs_cindex;
         }
     } else
@@ -3292,6 +3520,19 @@ void heap_markpos(HeapScanDesc scan)
  */
 static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, CommandId cid, int options)
 {
+    /*
+     * Parallel operations are required to be strictly read-only in a parallel
+     * worker.  Parallel inserts are not safe even in the leader in the
+     * general case, because group locking means that heavyweight locks for
+     * relation extension or GIN page locks will not conflict between members
+     * of a lock group, but we don't prohibit that case here because there are
+     * useful special cases that we can safely allow, such as CREATE TABLE AS.
+     */
+    if (IsParallelWorker()) {
+        ereport(ERROR,
+            (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot insert tuples in a parallel worker")));
+    }
+
     if (relation->rd_rel->relhasoids) {
 #ifdef NOT_USED
         /* this is redundant with an Assert in HeapTupleSetOid */
@@ -3797,6 +4038,16 @@ HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, Tr
     /* Don't allow any write/lock operator in stream. */
     Assert(!StreamThreadAmI());
 
+    /*
+     * Forbid this during a parallel operation, lest it allocate a combocid.
+     * Other workers might need that combocid for visibility checks, and we
+     * have no provision for broadcasting it to them.
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR,
+            (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot delete tuples during a parallel operation")));
+    }
+
     block = ItemPointerGetBlockNumber(tid);
     buffer = ReadBuffer(relation, block);
     page = BufferGetPage(buffer);
@@ -4242,6 +4493,16 @@ HTSU_Result heap_update(Relation relation, Relation parentRelation, ItemPointer
     /* Don't allow any write/lock operator in stream. */
     Assert(!StreamThreadAmI());
 
+    /*
+     * Forbid this during a parallel operation, lest it allocate a combocid.
+     * Other workers might need that combocid for visibility checks, and we
+     * have no provision for broadcasting it to them.
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR,
+            (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot update tuples during a parallel operation")));
+    }
+
     /*
      * Fetch the list of attributes to be checked for HOT update.  This is
      * wasted effort if we fail to update or have to put the new tuple on a
@@ -5688,6 +5949,17 @@ void heap_inplace_update(Relation relation, HeapTuple tuple)
     uint32 newlen;
     errno_t rc;
 
+    /*
+     * For now, parallel operations are required to be strictly read-only.
+     * Unlike a regular update, this should never create a combo CID, so it
+     * might be possible to relax this restriction, but not without more
+     * thought and testing.  It's not clear that it would be useful, anyway.
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR,
+            (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot update tuples during a parallel operation")));
+    }
+
     buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
     LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
     page = (Page)BufferGetPage(buffer);
@@ -5907,7 +6179,7 @@ void heap_restrpos(HeapScanDesc scan)
          */
         scan->rs_inited = true;
         scan->rs_ctup.t_self = scan->rs_mctid;
-        if (scan->rs_pageatatime) {
+        if (scan->rs_flags & SO_ALLOW_PAGEMODE) {
             scan->rs_cindex = scan->rs_mindex;
             heapgettup_pagemode(scan,
                 NoMovementScanDirection,
@@ -6885,7 +7157,6 @@ static void heap_xlog_newpage(XLogReaderState* record)
 
 inline static void heap_xlog_allvisiblecleared(RelFileNode target_node, BlockNumber blkno)
 {
-
     Relation reln = CreateFakeRelcacheEntry(target_node);
     Buffer vmbuffer = InvalidBuffer;
 
@@ -7786,7 +8057,7 @@ void heap_init_parallel_seqscan(HeapScanDesc scan, int32 dop, ScanDirection dir)
 
     if (ScanDirectionIsBackward(dir)) {
         paral_blocks = (scan->rs_nblocks - 1) - paral_blocks;
-        if (scan->rs_isRangeScanInRedis) {
+        if (scan->rs_flags & SO_TYPE_RANGESCAN) {
             scan->rs_startblock = paral_blocks;
         } else {
             scan->rs_startblock += paral_blocks;
@@ -7795,7 +8066,7 @@ void heap_init_parallel_seqscan(HeapScanDesc scan, int32 dop, ScanDirection dir)
     }
 
     /* If not range scan in redistribute, just start from 0. */
-    if (scan->rs_isRangeScanInRedis) {
+    if (scan->rs_flags & SO_TYPE_RANGESCAN) {
         scan->rs_startblock += paral_blocks;
     } else {
         scan->rs_startblock = paral_blocks;
diff --git a/src/gausskernel/storage/access/transam/Makefile b/src/gausskernel/storage/access/transam/Makefile
index 1c33520d3..349d27079 100755
--- a/src/gausskernel/storage/access/transam/Makefile
+++ b/src/gausskernel/storage/access/transam/Makefile
@@ -10,12 +10,12 @@ ifneq "$(MAKECMDGOALS)" "clean"
   endif
 endif
 ifeq ($(enable_multiple_nodes), yes)
-OBJS = clog.o multixact.o rmgr.o slru.o csnlog.o transam.o twophase.o \
+OBJS = clog.o multixact.o parallel.o rmgr.o slru.o csnlog.o transam.o twophase.o \
 	twophase_rmgr.o varsup.o double_write.o redo_statistic.o multi_redo_api.o multi_redo_settings.o \
 	xact.o xlog.o xlogfuncs.o \
 	xloginsert.o xlogreader.o xlogutils.o cbmparsexlog.o cbmfuncs.o
 else
-OBJS = clog.o gtm_single.o multixact.o rmgr.o slru.o csnlog.o transam.o twophase.o \
+OBJS = clog.o gtm_single.o multixact.o parallel.o rmgr.o slru.o csnlog.o transam.o twophase.o \
 	twophase_rmgr.o varsup.o double_write.o redo_statistic.o multi_redo_api.o multi_redo_settings.o \
 	xact.o xlog.o xlogfuncs.o \
 	xloginsert.o xlogreader.o xlogutils.o cbmparsexlog.o cbmfuncs.o
diff --git a/src/gausskernel/storage/access/transam/parallel.cpp b/src/gausskernel/storage/access/transam/parallel.cpp
new file mode 100644
index 000000000..a6e95823a
--- /dev/null
+++ b/src/gausskernel/storage/access/transam/parallel.cpp
@@ -0,0 +1,1093 @@
+/* -------------------------------------------------------------------------
+ *
+ * parallel.c
+ * 	  Infrastructure for launching parallel workers
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * 	  src/backend/access/transam/parallel.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/nbtree.h"
+#include "access/parallel.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/pg_enum.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
+#include "commands/async.h"
+#include "executor/execParallel.h"
+#include "libpq/libpq.h"
+#include "libpq/pqsignal.h"
+#include "libpq/pqformat.h"
+#include "libpq/pqmq.h"
+#include "miscadmin.h"
+#include "optimizer/planner.h"
+#include "pgstat.h"
+#include "storage/ipc.h"
+#include "storage/predicate.h"
+#include "storage/sinval.h"
+#include "storage/spin.h"
+#include "tcop/tcopprot.h"
+#include "utils/combocid.h"
+#include "utils/guc.h"
+#include "utils/inval.h"
+#include "utils/memutils.h"
+#include "utils/relmapper.h"
+#include "utils/snapmgr.h"
+#include "utils/typcache.h"
+
+
+/*
+ * We don't want to waste a lot of memory on an error queue which, most of
+ * the time, will process only a handful of small messages.  However, it is
+ * desirable to make it large enough that a typical ErrorResponse can be sent
+ * without blocking.  That way, a worker that errors out can write the whole
+ * message into the queue and terminate without waiting for the user backend.
+ */
+#define PARALLEL_ERROR_QUEUE_SIZE 16384
+
+/*
+ * List of internal parallel worker entry points.  We need this for
+ * reasons explained in LookupParallelWorkerFunction(), below.
+ */
+static const struct {
+    const char *fn_name;
+    parallel_worker_main_type fn_addr;
+}   InternalParallelWorkers[] = {
+    {
+        "ParallelQueryMain", ParallelQueryMain
+    }
+};
+
+/* Private functions. */
+static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg);
+static void WaitForParallelWorkersToExit(ParallelContext *pcxt);
+static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname);
+static void ParallelWorkerShutdown(int code, Datum arg);
+
+/*
+ * Establish a new parallel context.  This should be done after entering
+ * parallel mode, and (unless there is an error) the context should be
+ * destroyed before exiting the current subtransaction.
+ */
+ParallelContext *CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
+{
+    /* It is unsafe to create a parallel context if not in parallel mode. */
+    Assert(IsInParallelMode());
+
+    /* Number of workers should be non-negative. */
+    Assert(nworkers >= 0);
+
+    /* We might be running in a short-lived memory context. */
+    MemoryContext oldcontext = MemoryContextSwitchTo(u_sess->top_transaction_mem_cxt);
+
+    /* Initialize a new ParallelContext. */
+    ParallelContext *pcxt = (ParallelContext *)palloc0(sizeof(ParallelContext));
+    pcxt->subid = GetCurrentSubTransactionId();
+    pcxt->nworkers = nworkers;
+    pcxt->library_name = pstrdup(library_name);
+    pcxt->function_name = pstrdup(function_name);
+    pcxt->error_context_stack = t_thrd.log_cxt.error_context_stack;
+    dlist_push_head(&t_thrd.bgworker_cxt.pcxt_list, &pcxt->node);
+
+    /* Restore previous memory context. */
+    (void)MemoryContextSwitchTo(oldcontext);
+
+    return pcxt;
+}
+
+/*
+ * Establish the dynamic shared memory segment for a parallel context and
+ * copy state and other bookkeeping information that will be needed by
+ * parallel workers into it.
+ */
+void InitializeParallelDSM(ParallelContext *pcxt)
+{
+    int i;
+    Snapshot transaction_snapshot = GetTransactionSnapshot();
+    Snapshot active_snapshot = GetActiveSnapshot();
+
+    /*
+     * Create DSM and initialize with new table of contents.  But if the user
+     * didn't request any workers, then don't bother creating a dynamic shared
+     * memory segment; instead, just use backend-private memory.
+     *
+     * Also, if we can't create a dynamic shared memory segment because the
+     * maximum number of segments have already been created, then fall back to
+     * backend-private memory, and plan not to use any workers.  We hope this
+     * won't happen very often, but it's better to abandon the use of
+     * parallelism than to fail outright.
+     */
+    pcxt->seg = dsm_create();
+
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
+    MemoryContext oldcontext = MemoryContextSwitchTo(cxt->memCtx);
+
+    /* Initialize fixed-size state in shared memory. */
+    cxt->pwCtx->database_id = u_sess->proc_cxt.MyDatabaseId;
+    cxt->pwCtx->authenticated_user_id = GetAuthenticatedUserId();
+    cxt->pwCtx->outer_user_id = GetCurrentRoleId();
+    cxt->pwCtx->is_superuser = u_sess->attr.attr_common.session_auth_is_superuser;
+    GetUserIdAndSecContext(&cxt->pwCtx->current_user_id, &cxt->pwCtx->sec_context);
+    GetTempNamespaceState(&cxt->pwCtx->temp_namespace_id, &cxt->pwCtx->temp_toast_namespace_id);
+    cxt->pwCtx->parallel_master_pgproc = t_thrd.proc;
+    cxt->pwCtx->parallel_master_pid = t_thrd.proc_cxt.MyProcPid;
+    cxt->pwCtx->parallel_master_backend_id = t_thrd.proc_cxt.MyBackendId;
+    cxt->pwCtx->xact_ts = GetCurrentTransactionStartTimestamp();
+    cxt->pwCtx->stmt_ts = GetCurrentStatementStartTimestamp();
+    SpinLockInit(&cxt->pwCtx->mutex);
+    cxt->pwCtx->last_xlog_end = 0;
+
+    /* We can skip the rest of this if we're not budgeting for any workers. */
+    if (pcxt->nworkers > 0) {
+        /* Serialize combo CID state. */
+        cxt->pwCtx->usedComboCids = u_sess->utils_cxt.usedComboCids;
+        cxt->pwCtx->comboCids = u_sess->utils_cxt.comboCids;
+        cxt->pwCtx->sizeComboCids = u_sess->utils_cxt.sizeComboCids;
+        cxt->pwCtx->comboHash = u_sess->utils_cxt.comboHash;
+
+        /* Serialize transaction snapshot and active snapshot. */
+        Size tsnaplen = EstimateSnapshotSpace(transaction_snapshot);
+        Size asnaplen = EstimateSnapshotSpace(active_snapshot);
+
+        cxt->pwCtx->tsnapspace = (char *)palloc0(tsnaplen);
+        cxt->pwCtx->tsnapspace_len = tsnaplen;
+        SerializeSnapshot(transaction_snapshot, cxt->pwCtx->tsnapspace, tsnaplen);
+        cxt->pwCtx->asnapspace = (char *)palloc0(asnaplen);
+        cxt->pwCtx->asnapspace_len = asnaplen;
+        SerializeSnapshot(active_snapshot, cxt->pwCtx->asnapspace, asnaplen);
+
+        Size searchPathLen = strlen(u_sess->attr.attr_common.namespace_search_path);
+        cxt->pwCtx->namespace_search_path = (char *)palloc(searchPathLen + 1);
+        int rc = strcpy_s(cxt->pwCtx->namespace_search_path, searchPathLen + 1,
+            u_sess->attr.attr_common.namespace_search_path);
+        securec_check_c(rc, "", "");
+
+        /* Serialize transaction state. */
+        cxt->pwCtx->xactIsoLevel = u_sess->utils_cxt.XactIsoLevel;
+        cxt->pwCtx->xactDeferrable = u_sess->attr.attr_storage.XactDeferrable;
+        cxt->pwCtx->topTransactionId = GetTopTransactionIdIfAny();
+        cxt->pwCtx->currentTransactionId = GetCurrentTransactionIdIfAny();
+        cxt->pwCtx->currentCommandId = t_thrd.xact_cxt.currentCommandId;
+        cxt->pwCtx->nParallelCurrentXids = t_thrd.xact_cxt.nParallelCurrentXids;
+        cxt->pwCtx->ParallelCurrentXids = t_thrd.xact_cxt.ParallelCurrentXids;
+
+        /* Serialize relmapper state. */
+        cxt->pwCtx->active_shared_updates = u_sess->relmap_cxt.active_shared_updates;
+        cxt->pwCtx->active_local_updates = u_sess->relmap_cxt.active_local_updates;
+
+        /* Allocate space for worker information. */
+        pcxt->worker = (ParallelWorkerInfo *)palloc0(sizeof(ParallelWorkerInfo) * pcxt->nworkers);
+
+        /*
+         * Establish error queues in dynamic shared memory.
+         *
+         * These queues should be used only for transmitting ErrorResponse,
+         * NoticeResponse, and NotifyResponse protocol messages.  Tuple data
+         * should be transmitted via separate (possibly larger?) queues.
+         */
+        cxt->pwCtx->errorQueue = (char *)palloc0(mul_size(pcxt->nworkers, PARALLEL_ERROR_QUEUE_SIZE));
+        for (i = 0; i < pcxt->nworkers; ++i) {
+            shm_mq *mq =
+                shm_mq_create(cxt->pwCtx->errorQueue + i * PARALLEL_ERROR_QUEUE_SIZE, PARALLEL_ERROR_QUEUE_SIZE);
+            shm_mq_set_receiver(mq, t_thrd.proc);
+            pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
+        }
+
+        /*
+         * Serialize entrypoint information.  It's unsafe to pass function
+         * pointers across processes, as the function pointer may be different
+         * in each process in EXEC_BACKEND builds, so we always pass library
+         * and function name.  (We use library name "postgres" for functions
+         * in the core backend.)
+         */
+        Size lnamelen = strlen(pcxt->library_name);
+        cxt->pwCtx->library_name = (char *)palloc(lnamelen + 1);
+        rc = strcpy_s(cxt->pwCtx->library_name, lnamelen + 1, pcxt->library_name);
+        securec_check_c(rc, "", "");
+
+        Size fnamelen = strlen(pcxt->function_name);
+        cxt->pwCtx->function_name = (char *)palloc(fnamelen + 1);
+        rc = strcpy_s(cxt->pwCtx->function_name, fnamelen + 1, pcxt->function_name);
+        securec_check_c(rc, "", "");
+    }
+
+    /* Restore previous memory context. */
+    (void)MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Reinitialize the dynamic shared memory segment for a parallel context such
+ * that we could launch workers for it again.
+ */
+void ReinitializeParallelDSM(ParallelContext *pcxt)
+{
+    /* Wait for any old workers to exit. */
+    if (pcxt->nworkers_launched > 0) {
+        WaitForParallelWorkersToFinish(pcxt);
+        WaitForParallelWorkersToExit(pcxt);
+        pcxt->nworkers_launched = 0;
+        if (pcxt->known_attached_workers) {
+            pfree(pcxt->known_attached_workers);
+            pcxt->known_attached_workers = NULL;
+            pcxt->nknown_attached_workers = 0;
+        }
+    }
+
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
+
+    /* Reset a few bits of fixed parallel state to a clean state. */
+    cxt->pwCtx->last_xlog_end = 0;
+
+    /* Recreate error queues (if they exist). */
+    if (pcxt->nworkers > 0) {
+        for (int i = 0; i < pcxt->nworkers; ++i) {
+            char *start = cxt->pwCtx->errorQueue + i * PARALLEL_ERROR_QUEUE_SIZE;
+            shm_mq *mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
+            shm_mq_set_receiver(mq, t_thrd.proc);
+            pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
+        }
+    }
+}
+
+/*
+ * Launch parallel workers.
+ */
+void LaunchParallelWorkers(ParallelContext *pcxt)
+{
+    BackgroundWorker worker;
+    int i;
+    bool any_registrations_failed = false;
+
+    /* Skip this if we have no workers. */
+    if (pcxt->nworkers == 0)
+        return;
+
+    /* If we do have workers, we'd better have a DSM segment. */
+    Assert(pcxt->seg != NULL);
+
+    /* We might be running in a short-lived memory context. */
+    MemoryContext oldcontext = MemoryContextSwitchTo(u_sess->top_transaction_mem_cxt);
+
+    /* Configure a worker. */
+    int rc = memset_s(&worker, sizeof(worker), 0, sizeof(worker));
+    securec_check(rc, "", "");
+    rc = sprintf_s(worker.bgw_name, BGW_MAXLEN, "parallel worker for PID %lu", t_thrd.proc_cxt.MyProcPid);
+    securec_check_ss(rc, "", "");
+    rc = sprintf_s(worker.bgw_type, BGW_MAXLEN, "parallel worker");
+    securec_check_ss(rc, "", "");
+    worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION | BGWORKER_CLASS_PARALLEL;
+    worker.bgw_start_time = BgWorkerStart_ConsistentState;
+    worker.bgw_restart_time = BGW_NEVER_RESTART;
+    rc = strcpy_s(worker.bgw_library_name, BGW_MAXLEN, "postgres");
+    securec_check(rc, "", "");
+    rc = strcpy_s(worker.bgw_function_name, BGW_MAXLEN, "ParallelWorkerMain");
+    securec_check(rc, "", "");
+    worker.bgw_main_arg = PointerGetDatum(pcxt->seg);
+    worker.bgw_notify_pid = t_thrd.proc_cxt.MyProcPid;
+    worker.bgw_parallel_context = pcxt->seg;
+
+    /*
+     * Start workers.
+     *
+     * The caller must be able to tolerate ending up with fewer workers than
+     * expected, so there is no need to throw an error here if registration
+     * fails.  It wouldn't help much anyway, because registering the worker in
+     * no way guarantees that it will start up and initialize successfully.
+     */
+    for (i = 0; i < pcxt->nworkers; ++i) {
+        rc = memcpy_s(worker.bgw_extra, BGW_EXTRALEN, &i, sizeof(int));
+        securec_check(rc, "", "");
+        if (!any_registrations_failed && RegisterDynamicBackgroundWorker(&worker, &pcxt->worker[i].bgwhandle)) {
+            shm_mq_set_handle(pcxt->worker[i].error_mqh, pcxt->worker[i].bgwhandle);
+            pcxt->nworkers_launched++;
+        } else {
+            /*
+             * If we weren't able to register the worker, then we've bumped up
+             * against the max_worker_processes limit, and future
+             * registrations will probably fail too, so arrange to skip them.
+             * But we still have to execute this code for the remaining slots
+             * to make sure that we forget about the error queues we budgeted
+             * for those workers.  Otherwise, we'll wait for them to start,
+             * but they never will.
+             */
+            any_registrations_failed = true;
+            pcxt->worker[i].bgwhandle = NULL;
+            shm_mq_detach(pcxt->worker[i].error_mqh);
+            pcxt->worker[i].error_mqh = NULL;
+        }
+    }
+
+    /*
+     * Now that nworkers_launched has taken its final value, we can initialize
+     * known_attached_workers.
+     */
+    if (pcxt->nworkers_launched > 0) {
+        pcxt->known_attached_workers = (bool *)palloc0(sizeof(bool) * pcxt->nworkers_launched);
+        pcxt->nknown_attached_workers = 0;
+    }
+
+    /* Restore previous memory context. */
+    (void)MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Wait for all workers to attach to their error queues, and throw an error if
+ * any worker fails to do this.
+ *
+ * Callers can assume that if this function returns successfully, then the
+ * number of workers given by pcxt->nworkers_launched have initialized and
+ * attached to their error queues.  Whether or not these workers are guaranteed
+ * to still be running depends on what code the caller asked them to run;
+ * this function does not guarantee that they have not exited.  However, it
+ * does guarantee that any workers which exited must have done so cleanly and
+ * after successfully performing the work with which they were tasked.
+ *
+ * If this function is not called, then some of the workers that were launched
+ * may not have been started due to a fork() failure, or may have exited during
+ * early startup prior to attaching to the error queue, so nworkers_launched
+ * cannot be viewed as completely reliable.  It will never be less than the
+ * number of workers which actually started, but it might be more.  Any workers
+ * that failed to start will still be discovered by
+ * WaitForParallelWorkersToFinish and an error will be thrown at that time,
+ * provided that function is eventually reached.
+ *
+ * In general, the leader process should do as much work as possible before
+ * calling this function.  fork() failures and other early-startup failures
+ * are very uncommon, and having the leader sit idle when it could be doing
+ * useful work is undesirable.  However, if the leader needs to wait for
+ * all of its workers or for a specific worker, it may want to call this
+ * function before doing so.  If not, it must make some other provision for
+ * the failure-to-start case, lest it wait forever.  On the other hand, a
+ * leader which never waits for a worker that might not be started yet, or
+ * at least never does so prior to WaitForParallelWorkersToFinish(), need not
+ * call this function at all.
+ */
+void WaitForParallelWorkersToAttach(ParallelContext *pcxt)
+{
+    int i;
+
+    /* Skip this if we have no launched workers. */
+    if (pcxt->nworkers_launched == 0)
+        return;
+
+    for (;;) {
+        /*
+         * This will process any parallel messages that are pending and it may
+         * also throw an error propagated from a worker.
+         */
+        CHECK_FOR_INTERRUPTS();
+
+        for (i = 0; i < pcxt->nworkers_launched; ++i) {
+            shm_mq *mq = NULL;
+            int rc;
+            ThreadId pid;
+
+            if (pcxt->known_attached_workers[i])
+                continue;
+
+            /*
+             * If error_mqh is NULL, then the worker has already exited
+             * cleanly.
+             */
+            if (pcxt->worker[i].error_mqh == NULL) {
+                pcxt->known_attached_workers[i] = true;
+                ++pcxt->nknown_attached_workers;
+                continue;
+            }
+
+            BgwHandleStatus status = GetBackgroundWorkerPid(pcxt->worker[i].bgwhandle, &pid);
+            if (status == BGWH_STARTED) {
+                /* Has the worker attached to the error queue? */
+                mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
+                if (shm_mq_get_sender(mq) != NULL) {
+                    /* Yes, so it is known to be attached. */
+                    pcxt->known_attached_workers[i] = true;
+                    ++pcxt->nknown_attached_workers;
+                }
+            } else if (status == BGWH_STOPPED) {
+                /*
+                 * If the worker stopped without attaching to the error queue,
+                 * throw an error.
+                 */
+                mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
+                if (shm_mq_get_sender(mq) == NULL)
+                    ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                        errmsg("parallel worker failed to initialize"),
+                        errhint("More details may be available in the server log.")));
+
+                pcxt->known_attached_workers[i] = true;
+                ++pcxt->nknown_attached_workers;
+            } else {
+                /*
+                 * Worker not yet started, so we must wait.  The postmaster
+                 * will notify us if the worker's state changes.  Our latch
+                 * might also get set for some other reason, but if so we'll
+                 * just end up waiting for the same worker again.
+                 */
+                rc = WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, -1);
+                if (rc & WL_LATCH_SET) {
+                    ResetLatch(&t_thrd.proc->procLatch);
+                }
+            }
+        }
+
+        /* If all workers are known to have started, we're done. */
+        if (pcxt->nknown_attached_workers >= pcxt->nworkers_launched) {
+            Assert(pcxt->nknown_attached_workers == pcxt->nworkers_launched);
+            break;
+        }
+    }
+}
+
+/*
+ * Wait for all workers to finish computing.
+ *
+ * Even if the parallel operation seems to have completed successfully, it's
+ * important to call this function afterwards.  We must not miss any errors
+ * the workers may have thrown during the parallel operation, or any that they
+ * may yet throw while shutting down.
+ *
+ * Also, we want to update our notion of XactLastRecEnd based on worker
+ * feedback.
+ */
+void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
+{
+    for (;;) {
+        bool anyone_alive = false;
+        int nfinished = 0;
+        int i;
+
+        /*
+         * This will process any parallel messages that are pending, which may
+         * change the outcome of the loop that follows.  It may also throw an
+         * error propagated from a worker.
+         */
+        CHECK_FOR_INTERRUPTS();
+
+        for (i = 0; i < pcxt->nworkers_launched; ++i) {
+            /*
+             * If error_mqh is NULL, then the worker has already exited
+             * cleanly.  If we have received a message through error_mqh from
+             * the worker, we know it started up cleanly, and therefore we're
+             * certain to be notified when it exits.
+             */
+            if (pcxt->worker[i].error_mqh == NULL)
+                ++nfinished;
+            else if (pcxt->known_attached_workers[i]) {
+                anyone_alive = true;
+                break;
+            }
+        }
+
+        if (!anyone_alive) {
+            /* If all workers are known to have finished, we're done. */
+            if (nfinished >= pcxt->nworkers_launched) {
+                Assert(nfinished == pcxt->nworkers_launched);
+                break;
+            }
+
+            /*
+             * We didn't detect any living workers, but not all workers are
+             * known to have exited cleanly.  Either not all workers have
+             * launched yet, or maybe some of them failed to start or
+             * terminated abnormally.
+             */
+            for (i = 0; i < pcxt->nworkers_launched; ++i) {
+                ThreadId pid;
+
+                /*
+                 * If the worker is BGWH_NOT_YET_STARTED or BGWH_STARTED, we
+                 * should just keep waiting.  If it is BGWH_STOPPED, then
+                 * further investigation is needed.
+                 */
+                if (pcxt->worker[i].error_mqh == NULL || pcxt->worker[i].bgwhandle == NULL ||
+                    GetBackgroundWorkerPid(pcxt->worker[i].bgwhandle, &pid) != BGWH_STOPPED)
+                    continue;
+
+                /*
+                 * Check whether the worker ended up stopped without ever
+                 * attaching to the error queue.  If so, the postmaster was
+                 * unable to fork the worker or it exited without initializing
+                 * properly.  We must throw an error, since the caller may
+                 * have been expecting the worker to do some work before
+                 * exiting.
+                 */
+                shm_mq *mq = shm_mq_get_queue(pcxt->worker[i].error_mqh);
+                if (shm_mq_get_sender(mq) == NULL)
+                    ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                        errmsg("parallel worker failed to initialize"),
+                        errhint("More details may be available in the server log.")));
+
+                /*
+                 * The worker is stopped, but is attached to the error queue.
+                 * Unless there's a bug somewhere, this will only happen when
+                 * the worker writes messages and terminates after the
+                 * CHECK_FOR_INTERRUPTS() near the top of this function and
+                 * before the call to GetBackgroundWorkerPid().  In that case,
+                 * or latch should have been set as well and the right things
+                 * will happen on the next pass through the loop.
+                 */
+            }
+        }
+
+        (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, -1);
+        ResetLatch(&t_thrd.proc->procLatch);
+    }
+
+    knl_u_parallel_context *cxt = (knl_u_parallel_context *)pcxt->seg;
+    if (cxt->pwCtx->last_xlog_end > t_thrd.xlog_cxt.XactLastRecEnd)
+        t_thrd.xlog_cxt.XactLastRecEnd = cxt->pwCtx->last_xlog_end;
+}
+
+/*
+ * Wait for all workers to exit.
+ *
+ * This function ensures that workers have been completely shutdown.  The
+ * difference between WaitForParallelWorkersToFinish and this function is
+ * that former just ensures that last message sent by worker backend is
+ * received by master backend whereas this ensures the complete shutdown.
+ */
+static void WaitForParallelWorkersToExit(ParallelContext *pcxt)
+{
+    /* Wait until the workers actually die. */
+    for (int i = 0; i < pcxt->nworkers_launched; ++i) {
+        if (pcxt->worker == NULL || pcxt->worker[i].bgwhandle == NULL) {
+            continue;
+        }
+
+        BgwHandleStatus status = WaitForBackgroundWorkerShutdown(pcxt->worker[i].bgwhandle);
+        /*
+         * If the postmaster kicked the bucket, we have no chance of cleaning
+         * up safely -- we won't be able to tell when our workers are actually
+         * dead.  This doesn't necessitate a PANIC since they will all abort
+         * eventually, but we can't safely continue this session.
+         */
+        if (status == BGWH_POSTMASTER_DIED)
+            ereport(FATAL,
+                (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("postmaster exited during a parallel transaction")));
+
+        /* Release memory. */
+        pfree(pcxt->worker[i].bgwhandle);
+        pcxt->worker[i].bgwhandle = NULL;
+    }
+}
+
+/*
+ * Destroy a parallel context.
+ *
+ * If expecting a clean exit, you should use WaitForParallelWorkersToFinish()
+ * first, before calling this function.  When this function is invoked, any
+ * remaining workers are forcibly killed; the dynamic shared memory segment
+ * is unmapped; and we then wait (uninterruptibly) for the workers to exit.
+ */
+void DestroyParallelContext(ParallelContext *pcxt)
+{
+    int i;
+
+    /*
+     * Be careful about order of operations here!  We remove the parallel
+     * context from the list before we do anything else; otherwise, if an
+     * error occurs during a subsequent step, we might try to nuke it again
+     * from AtEOXact_Parallel or AtEOSubXact_Parallel.
+     */
+    dlist_delete(&pcxt->node);
+
+    /* Kill each worker in turn, and forget their error queues. */
+    if (pcxt->worker != NULL) {
+        for (i = 0; i < pcxt->nworkers_launched; ++i) {
+            if (pcxt->worker[i].error_mqh != NULL) {
+                TerminateBackgroundWorker(pcxt->worker[i].bgwhandle);
+
+                shm_mq_detach(pcxt->worker[i].error_mqh);
+                pcxt->worker[i].error_mqh = NULL;
+            }
+        }
+    }
+
+    /*
+     * If this parallel context is actually in backend-private memory rather
+     * than shared memory, free that memory instead.
+     */
+    if (pcxt->private_memory != NULL) {
+        pfree(pcxt->private_memory);
+        pcxt->private_memory = NULL;
+    }
+
+    /*
+     * We can't finish transaction commit or abort until all of the workers
+     * have exited.  This means, in particular, that we can't respond to
+     * interrupts at this stage.
+     */
+    HOLD_INTERRUPTS();
+    WaitForParallelWorkersToExit(pcxt);
+    RESUME_INTERRUPTS();
+
+    /* Free the worker array itself. */
+    if (pcxt->worker != NULL) {
+        pfree(pcxt->worker);
+        pcxt->worker = NULL;
+    }
+
+    /*
+     * If we have allocated a shared memory segment, detach it.  This will
+     * implicitly detach the error queues, and any other shared memory queues,
+     * stored there.
+     */
+    if (pcxt->seg != NULL) {
+        dsm_detach(&(pcxt->seg));
+        pcxt->seg = NULL;
+    }
+
+    /* Free memory. */
+    pfree(pcxt->library_name);
+    pfree(pcxt->function_name);
+    pfree(pcxt);
+}
+
+/*
+ * Are there any parallel contexts currently active?
+ */
+bool ParallelContextActive(void)
+{
+    return !dlist_is_empty(&t_thrd.bgworker_cxt.pcxt_list);
+}
+
+/*
+ * Handle receipt of an interrupt indicating a parallel worker message.
+ *
+ * Note: this is called within a signal handler!  All we can do is set a flag
+ * that will cause the next CHECK_FOR_INTERRUPTS() to invoke HandleParallelMessages().
+ */
+void HandleParallelMessageInterrupt(void)
+{
+    InterruptPending = true;
+    t_thrd.bgworker_cxt.ParallelMessagePending = true;
+    SetLatch(&t_thrd.proc->procLatch);
+}
+
+/*
+ * Handle any queued protocol messages received from parallel workers.
+ */
+void HandleParallelMessages(void)
+{
+    dlist_iter iter;
+
+    /*
+     * This is invoked from ProcessInterrupts(), and since some of the
+     * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential
+     * for recursive calls if more signals are received while this runs.  It's
+     * unclear that recursive entry would be safe, and it doesn't seem useful
+     * even if it is safe, so let's block interrupts until done.
+     */
+    HOLD_INTERRUPTS();
+
+    /*
+     * Moreover, CurrentMemoryContext might be pointing almost anywhere.  We
+     * don't want to risk leaking data into long-lived contexts, so let's do
+     * our work here in a private context that we can reset on each use.
+     */
+    if (t_thrd.bgworker_cxt.hpm_context == NULL) /* first time through? */
+        t_thrd.bgworker_cxt.hpm_context =
+            AllocSetContextCreate(TopMemoryContext, "HandleParallelMessages", ALLOCSET_DEFAULT_SIZES);
+    else
+        MemoryContextReset(t_thrd.bgworker_cxt.hpm_context);
+
+    MemoryContext oldcontext = MemoryContextSwitchTo(t_thrd.bgworker_cxt.hpm_context);
+
+    /* OK to process messages.  Reset the flag saying there are more to do. */
+    t_thrd.bgworker_cxt.ParallelMessagePending = false;
+
+    dlist_foreach(iter, &t_thrd.bgworker_cxt.pcxt_list)
+    {
+        ParallelContext *pcxt = dlist_container(ParallelContext, node, iter.cur);
+        if (pcxt->worker == NULL)
+            continue;
+
+        for (int i = 0; i < pcxt->nworkers_launched; ++i) {
+            /*
+             * Read as many messages as we can from each worker, but stop when
+             * either (1) the worker's error queue goes away, which can happen
+             * if we receive a Terminate message from the worker; or (2) no
+             * more messages can be read from the worker without blocking.
+             */
+            while (pcxt->worker[i].error_mqh != NULL) {
+                Size nbytes;
+                void *data = NULL;
+
+                shm_mq_result res = shm_mq_receive(pcxt->worker[i].error_mqh, &nbytes, &data, true);
+                if (res == SHM_MQ_WOULD_BLOCK) {
+                    break;
+                } else if (res == SHM_MQ_SUCCESS) {
+                    StringInfoData msg;
+
+                    initStringInfo(&msg);
+                    appendBinaryStringInfo(&msg, (const char *)data, nbytes);
+                    HandleParallelMessage(pcxt, i, &msg);
+                    pfree(msg.data);
+                } else {
+                    ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                        errmsg("lost connection to parallel worker")));
+                }
+            }
+        }
+    }
+
+    (void)MemoryContextSwitchTo(oldcontext);
+
+    /* Might as well clear the context on our way out */
+    MemoryContextReset(t_thrd.bgworker_cxt.hpm_context);
+
+    RESUME_INTERRUPTS();
+}
+
+/*
+ * Handle a single protocol message received from a single parallel worker.
+ */
+static void HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg)
+{
+    if (pcxt->known_attached_workers != NULL && !pcxt->known_attached_workers[i]) {
+        pcxt->known_attached_workers[i] = true;
+        pcxt->nknown_attached_workers++;
+    }
+
+    char msgtype = (char)pq_getmsgbyte(msg);
+
+    switch (msgtype) {
+        case 'K': /* BackendKeyData */
+        {
+            ThreadId pid = pq_getmsgint64(msg);
+
+            (void)pq_getmsgint64(msg); /* discard cancel key */
+            pq_getmsgend(msg);
+            pcxt->worker[i].pid = pid;
+            break;
+        }
+
+        case 'E': /* ErrorResponse */
+        case 'N': /* NoticeResponse */
+        {
+            ErrorData edata;
+
+            /* Parse ErrorResponse or NoticeResponse. */
+            pq_parse_errornotice(msg, &edata);
+
+            /* Death of a worker isn't enough justification for suicide. */
+            edata.elevel = Min(edata.elevel, ERROR);
+
+            /*
+             * If desired, add a context line to show that this is a
+             * message propagated from a parallel worker.  Otherwise, it
+             * can sometimes be confusing to understand what actually
+             * happened.  (We don't do this in FORCE_PARALLEL_REGRESS mode
+             * because it causes test-result instability depending on
+             * whether a parallel worker is actually used or not.)
+             */
+            if (u_sess->attr.attr_sql.force_parallel_mode != FORCE_PARALLEL_REGRESS) {
+                if (edata.context) {
+                    /* 1 for '\0', 1 for '\n' */
+                    Size len = strlen(edata.context) + strlen("parallel worker") + 2;
+                    edata.context = (char *)palloc(len);
+                    int rc = sprintf_s(edata.context, len, "%s\n%s", edata.context, "parallel worker");
+                    securec_check_ss(rc, "", "");
+                } else {
+                    edata.context = pstrdup(_("parallel worker"));
+                }
+            }
+
+            /*
+             * Context beyond that should use the error context callbacks
+             * that were in effect when the ParallelContext was created,
+             * not the current ones.
+             */
+            ErrorContextCallback *save_error_context_stack = t_thrd.log_cxt.error_context_stack;
+            t_thrd.log_cxt.error_context_stack = pcxt->error_context_stack;
+
+            /* Rethrow error or print notice. */
+            ThrowErrorData(&edata);
+
+            /* Not an error, so restore previous context stack. */
+            t_thrd.log_cxt.error_context_stack = save_error_context_stack;
+
+            break;
+        }
+
+        case 'A': /* NotifyResponse */
+        {
+            /* Propagate NotifyResponse. */
+            uint32 pid = pq_getmsgint(msg, 4);
+            const char *channel = pq_getmsgrawstring(msg);
+            const char *payload = pq_getmsgrawstring(msg);
+            pq_endmessage(msg);
+
+            NotifyMyFrontEnd(channel, payload, pid);
+
+            break;
+        }
+
+        case 'X': /* Terminate, indicating clean exit */
+        {
+            shm_mq_detach(pcxt->worker[i].error_mqh);
+            pcxt->worker[i].error_mqh = NULL;
+            break;
+        }
+
+        default: {
+            ereport(ERROR,
+                (errmsg("unrecognized message type received from parallel worker: %c (message length %d bytes)",
+                    msgtype, msg->len)));
+        }
+    }
+}
+
+/*
+ * End-of-subtransaction cleanup for parallel contexts.
+ *
+ * Currently, it's forbidden to enter or leave a subtransaction while
+ * parallel mode is in effect, so we could just blow away everything.  But
+ * we may want to relax that restriction in the future, so this code
+ * contemplates that there may be multiple subtransaction IDs in pcxt_list.
+ */
+void AtEOSubXact_Parallel(bool isCommit, SubTransactionId mySubId)
+{
+    while (!dlist_is_empty(&t_thrd.bgworker_cxt.pcxt_list)) {
+        ParallelContext *pcxt = dlist_head_element(ParallelContext, node, &t_thrd.bgworker_cxt.pcxt_list);
+        if (pcxt->subid != mySubId)
+            break;
+        if (isCommit)
+            ereport(WARNING, (errmsg("leaked parallel context")));
+        DestroyParallelContext(pcxt);
+    }
+}
+
+/*
+ * End-of-transaction cleanup for parallel contexts.
+ */
+void AtEOXact_Parallel(bool isCommit)
+{
+    while (!dlist_is_empty(&t_thrd.bgworker_cxt.pcxt_list)) {
+        ParallelContext *pcxt = dlist_head_element(ParallelContext, node, &t_thrd.bgworker_cxt.pcxt_list);
+        if (isCommit)
+            ereport(WARNING, (errmsg("leaked parallel context")));
+        DestroyParallelContext(pcxt);
+    }
+}
+
+/*
+ * Main entrypoint for parallel workers.
+ */
+void ParallelWorkerMain(Datum main_arg)
+{
+    StringInfoData msgbuf;
+
+    knl_u_parallel_context *ctx = (knl_u_parallel_context *)DatumGetPointer(main_arg);
+
+    /* Set flag to indicate that we're initializing a parallel worker. */
+    t_thrd.bgworker_cxt.InitializingParallelWorker = true;
+
+    /* Establish signal handlers. */
+    gspqsignal(SIGTERM, die);
+    BackgroundWorkerUnblockSignals();
+
+    /* Determine and set our parallel worker number. */
+    Assert(t_thrd.bgworker_cxt.ParallelWorkerNumber == -1);
+    int rc = memcpy_s(&t_thrd.bgworker_cxt.ParallelWorkerNumber, sizeof(int),
+        t_thrd.bgworker_cxt.my_bgworker_entry->bgw_extra, sizeof(int));
+    securec_check(rc, "", "");
+
+    /* Set up a memory context to work in, just for cleanliness. */
+    CurrentMemoryContext = AllocSetContextCreate(TopMemoryContext, "Parallel worker", ALLOCSET_DEFAULT_SIZES);
+
+    /* Arrange to signal the leader if we exit. */
+    on_shmem_exit(ParallelWorkerShutdown, (Datum)0);
+
+    /*
+     * Now we can find and attach to the error queue provided for us.  That's
+     * good, because until we do that, any errors that happen here will not be
+     * reported back to the process that requested that this worker be
+     * launched.
+     */
+    char *error_queue_space = ctx->pwCtx->errorQueue;
+    shm_mq *mq = (shm_mq *)(error_queue_space + t_thrd.bgworker_cxt.ParallelWorkerNumber * PARALLEL_ERROR_QUEUE_SIZE);
+    shm_mq_set_sender(mq, t_thrd.proc);
+    shm_mq_handle *mqh = shm_mq_attach(mq, ctx, NULL);
+    pq_redirect_to_shm_mq(mqh);
+    pq_set_parallel_master(ctx->pwCtx->parallel_master_pid, ctx->pwCtx->parallel_master_backend_id);
+
+    /*
+     * Send a BackendKeyData message to the process that initiated parallelism
+     * so that it has access to our PID before it receives any other messages
+     * from us.  Our cancel key is sent, too, since that's the way the
+     * protocol message is defined, but it won't actually be used for anything
+     * in this case.
+     */
+    pq_beginmessage(&msgbuf, 'K');
+    pq_sendint64(&msgbuf, t_thrd.proc_cxt.MyProcPid);
+    pq_sendint64(&msgbuf, t_thrd.proc_cxt.MyCancelKey);
+    pq_endmessage(&msgbuf);
+
+    /*
+    * Hooray! Primary initialization is complete.  Now, we need to set up our
+    * backend-local state to match the original backend.
+    */
+    /*
+     * Restore transaction and statement start-time timestamps.  This must
+     * happen before anything that would start a transaction, else asserts in
+     * xact.c will fire.
+     */
+    SetParallelStartTimestamps(ctx->pwCtx->xact_ts, ctx->pwCtx->stmt_ts);
+
+    /*
+     * Identify the entry point to be called.  In theory this could result in
+     * loading an additional library, though most likely the entry point is in
+     * the core backend or in a library we just loaded.
+     */
+    parallel_worker_main_type entrypt =
+        LookupParallelWorkerFunction(ctx->pwCtx->library_name, ctx->pwCtx->function_name);
+
+    /* Restore database connection. */
+    BackgroundWorkerInitializeConnectionByOid(ctx->pwCtx->database_id, ctx->pwCtx->authenticated_user_id, 0);
+
+    /*
+     * Set the client encoding to the database encoding, since that is what
+     * the leader will expect.
+     */
+    (void)SetClientEncoding(GetDatabaseEncoding());
+
+    /* Crank up a transaction state appropriate to a parallel worker. */
+    StartParallelWorkerTransaction(ctx->pwCtx);
+
+    /* Restore combo CID state. */
+    u_sess->utils_cxt.usedComboCids = ctx->pwCtx->usedComboCids;
+    u_sess->utils_cxt.comboCids = ctx->pwCtx->comboCids;
+    u_sess->utils_cxt.sizeComboCids = ctx->pwCtx->sizeComboCids;
+    u_sess->utils_cxt.comboHash = ctx->pwCtx->comboHash;
+
+    /* Restore namespace search path */
+    u_sess->attr.attr_common.namespace_search_path = ctx->pwCtx->namespace_search_path;
+
+    /* Restore transaction snapshot. */
+    RestoreTransactionSnapshot(RestoreSnapshot(ctx->pwCtx->tsnapspace, ctx->pwCtx->tsnapspace_len),
+                                ctx->pwCtx->parallel_master_pgproc);
+    /* Restore active snapshot. */
+    PushActiveSnapshot(RestoreSnapshot(ctx->pwCtx->asnapspace, ctx->pwCtx->asnapspace_len));
+
+    /*
+     * We've changed which tuples we can see, and must therefore invalidate
+     * system caches.
+     */
+    InvalidateSystemCaches();
+
+    /*
+     * Restore current role id.  Skip verifying whether session user is
+     * allowed to become this role and blindly restore the leader's state for
+     * current role.
+     */
+    SetCurrentRoleId(ctx->pwCtx->outer_user_id, ctx->pwCtx->is_superuser);
+
+    /* Restore user ID and security context. */
+    SetUserIdAndSecContext(ctx->pwCtx->current_user_id, ctx->pwCtx->sec_context);
+
+    /* Restore temp-namespace state to ensure search path matches leader's. */
+    SetTempNamespaceState(ctx->pwCtx->temp_namespace_id, ctx->pwCtx->temp_toast_namespace_id);
+
+    /* Restore relmapper state. */
+    u_sess->relmap_cxt.active_shared_updates = ctx->pwCtx->active_shared_updates;
+    u_sess->relmap_cxt.active_local_updates = ctx->pwCtx->active_local_updates;
+
+    /*
+     * We've initialized all of our state now; nothing should change
+     * hereafter.
+     */
+    t_thrd.bgworker_cxt.InitializingParallelWorker = false;
+    EnterParallelMode();
+
+    /*
+     * Time to do the real work: invoke the caller-supplied code.
+     */
+    entrypt(ctx);
+
+    /* Must exit parallel mode to pop active snapshot. */
+    ExitParallelMode();
+
+    /* Must pop active snapshot so snapmgr.c doesn't complain. */
+    PopActiveSnapshot();
+
+    /* Shut down the parallel-worker transaction. */
+    EndParallelWorkerTransaction();
+
+    /* Report success. */
+    pq_putmessage('X', NULL, 0);
+}
+
+/*
+ * Update shared memory with the ending location of the last WAL record we
+ * wrote, if it's greater than the value already stored there.
+ */
+void ParallelWorkerReportLastRecEnd(XLogRecPtr last_xlog_end)
+{
+    knl_u_parallel_context *ctx = (knl_u_parallel_context *)t_thrd.bgworker_cxt.my_bgworker_entry->bgw_parallel_context;
+    Assert(ctx->pwCtx != NULL);
+    SpinLockAcquire(&ctx->pwCtx->mutex);
+    if (ctx->pwCtx->last_xlog_end < last_xlog_end) {
+        ctx->pwCtx->last_xlog_end = last_xlog_end;
+    }
+    SpinLockRelease(&ctx->pwCtx->mutex);
+}
+
+/*
+ * Make sure the leader tries to read from our error queue one more time.
+ * This guards against the case where we exit uncleanly without sending an
+ * ErrorResponse to the leader, for example because some code calls proc_exit
+ * directly.
+ */
+static void ParallelWorkerShutdown(int code, Datum arg)
+{
+    (void)SendProcSignal(t_thrd.msqueue_cxt.pq_mq_parallel_master_pid, PROCSIG_PARALLEL_MESSAGE,
+        t_thrd.msqueue_cxt.pq_mq_parallel_master_backend_id);
+}
+
+/*
+ * Look up (and possibly load) a parallel worker entry point function.
+ *
+ * For functions contained in the core code, we use library name "postgres"
+ * and consult the InternalParallelWorkers array.  External functions are
+ * looked up, and loaded if necessary, using load_external_function().
+ *
+ * The point of this is to pass function names as strings across process
+ * boundaries.  We can't pass actual function addresses because of the
+ * possibility that the function has been loaded at a different address
+ * in a different process.  This is obviously a hazard for functions in
+ * loadable libraries, but it can happen even for functions in the core code
+ * on platforms using EXEC_BACKEND (e.g., Windows).
+ *
+ * At some point it might be worthwhile to get rid of InternalParallelWorkers[]
+ * in favor of applying load_external_function() for core functions too;
+ * but that raises portability issues that are not worth addressing now.
+ */
+static parallel_worker_main_type LookupParallelWorkerFunction(const char *libraryname, const char *funcname)
+{
+    /*
+     * If the function is to be loaded from postgres itself, search the
+     * InternalParallelWorkers array.
+     */
+    if (strcmp(libraryname, "postgres") == 0) {
+        for (size_t i = 0; i < lengthof(InternalParallelWorkers); i++) {
+            if (strcmp(InternalParallelWorkers[i].fn_name, funcname) == 0)
+                return InternalParallelWorkers[i].fn_addr;
+        }
+
+        ereport(ERROR, (errmsg("internal function \"%s\" not found", funcname)));
+    }
+
+    ereport(ERROR, (errmsg("library\"%s\" function \"%s\" not supported", libraryname, funcname)));
+    return NULL;
+}
+
diff --git a/src/gausskernel/storage/access/transam/varsup.cpp b/src/gausskernel/storage/access/transam/varsup.cpp
index b96a1a8ea..f33fec74f 100755
--- a/src/gausskernel/storage/access/transam/varsup.cpp
+++ b/src/gausskernel/storage/access/transam/varsup.cpp
@@ -117,6 +117,15 @@ TransactionId GetNewTransactionId(bool isSubXact)
     bool incrementXid = true;
 #endif
 
+    /*
+     * Workers synchronize transaction state at the beginning of each parallel
+     * operation, so we can't account for new XIDs after that point.
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+            errmsg("cannot assign TransactionIds during a parallel operation")));
+    }
+
     /*
      * During bootstrap initialization, we return the special bootstrap
      * transaction id.
diff --git a/src/gausskernel/storage/access/transam/xact.cpp b/src/gausskernel/storage/access/transam/xact.cpp
index 60900c8e4..007d299da 100644
--- a/src/gausskernel/storage/access/transam/xact.cpp
+++ b/src/gausskernel/storage/access/transam/xact.cpp
@@ -36,6 +36,7 @@
 #include "access/cstore_am.h"
 #include "access/cstore_rewrite.h"
 #include "access/multixact.h"
+#include "access/parallel.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
 #include "access/twophase.h"
@@ -141,6 +142,7 @@ typedef enum TBlockState {
     /* transaction block states */
     TBLOCK_BEGIN,         /* starting transaction block */
     TBLOCK_INPROGRESS,    /* live transaction */
+    TBLOCK_PARALLEL_INPROGRESS, /* live transaction inside parallel worker */
     TBLOCK_END,           /* COMMIT received */
     TBLOCK_ABORT,         /* failed xact, awaiting ROLLBACK */
     TBLOCK_ABORT_END,     /* failed xact, ROLLBACK received */
@@ -189,6 +191,7 @@ struct TransactionStateData {
     bool prevXactReadOnly;               /* entry-time xact r/o state */
     bool startedInRecovery;              /* did we start in recovery? */
     bool didLogXid;                      /* has xid been included in WAL record? */
+    int parallelModeLevel;               /* Enter/ExitParallelMode counter */
     struct TransactionStateData* parent; /* back link to parent */
 
     /* which storage engine tables are used in current transaction for D/I/U/S statements */
@@ -225,6 +228,7 @@ static THR_LOCAL TransactionStateData TopTransactionStateData = {
     false,              /* entry-time xact r/o state */
     false,              /* startedInRecovery */
     false,              /* didLogXid */
+    0,                  /* parallelModeLevel */
     NULL,               /* link to parent state block */
     SE_TYPE_UNSPECIFIED /* storage engine used in transaction */
 };
@@ -717,6 +721,15 @@ static void AssignTransactionId(TransactionState s)
     Assert(!TransactionIdIsValid(s->transactionId));
     Assert(s->state == TRANS_INPROGRESS);
 
+    /*
+     * Workers synchronize transaction state at the beginning of each parallel
+     * operation, so we can't account for new XIDs at this point.
+     */
+    if (IsInParallelMode() || IsParallelWorker()) {
+        ereport(ERROR,
+            (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot assign XIDs during a parallel operation")));
+    }
+
     /*
      * Ensure parent(s) have XIDs, so that a child always has an XID later
      * than its parent.  Musn't recurse here, or we might get a stack overflow
@@ -918,8 +931,16 @@ CommandId GetCurrentCommandId(bool used)
 #endif
 
     /* this is global to a transaction, not subtransaction-local */
-    if (used)
+    if (used) {
+        /*
+         * Forbid setting currentCommandIdUsed in a parallel worker, because
+         * we have no provision for communicating this back to the master.  We
+         * could relax this restriction when currentCommandIdUsed was already
+         * true at the start of the parallel operation.
+         */
+        Assert(!IsParallelWorker());
         t_thrd.xact_cxt.currentCommandIdUsed = true;
+    }
     return t_thrd.xact_cxt.currentCommandId;
 }
 
@@ -1007,12 +1028,18 @@ TimestampTz GetCurrentStmtsysTimestamp(void)
 /*
  *	SetCurrentStatementStartTimestamp
  *
- *	The time on the DN is obtained from the CN. If the CN does not deliver the time,
- *	the time of the current DN is used.
+ * The time on the DN is obtained from the CN. If the CN does not deliver the time,
+ * the time of the current DN is used.
+ * In a parallel worker, this should already have been provided by a call
+ * to SetParallelStartTimestamps().
  */
 void SetCurrentStatementStartTimestamp(void)
 {
-    t_thrd.xact_cxt.stmtStartTimestamp = GetCurrentTimestamp();
+    if (!IsParallelWorker()) {
+        t_thrd.xact_cxt.stmtStartTimestamp = GetCurrentTimestamp();
+    } else {
+        Assert(t_thrd.xact_cxt.stmtStartTimestamp != 0);
+    }
 }
 
 void SetStatementStartTimestamp(TimestampTz timestamp)
@@ -1169,7 +1196,42 @@ bool TransactionStartedDuringRecovery(void)
 }
 
 /*
- *	CommandCounterIncrement
+ * 	EnterParallelMode
+ */
+void EnterParallelMode(void)
+{
+    TransactionState s = CurrentTransactionState;
+    Assert(s->parallelModeLevel >= 0);
+    ++s->parallelModeLevel;
+}
+
+/*
+ * 	ExitParallelMode
+ */
+void ExitParallelMode(void)
+{
+    TransactionState s = CurrentTransactionState;
+    Assert(s->parallelModeLevel > 0);
+    Assert(s->parallelModeLevel > 1 || !ParallelContextActive());
+    --s->parallelModeLevel;
+}
+
+/*
+ * 	IsInParallelMode
+ *
+ * Are we in a parallel operation, as either the master or a worker?  Check
+ * this to prohibit operations that change backend-local state expected to
+ * match across all workers.  Mere caches usually don't require such a
+ * restriction.  State modified in a strict push/pop fashion, such as the
+ * active snapshot stack, is often fine.
+ */
+bool IsInParallelMode(void)
+{
+    return CurrentTransactionState->parallelModeLevel != 0;
+}
+
+/*
+ * 	CommandCounterIncrement
  */
 void CommandCounterIncrement(void)
 {
@@ -1180,6 +1242,16 @@ void CommandCounterIncrement(void)
      * overflow, and keeps no-op CommandCounterIncrement operations cheap.
      */
     if (t_thrd.xact_cxt.currentCommandIdUsed) {
+        /*
+         * Workers synchronize transaction state at the beginning of each
+         * parallel operation, so we can't account for new commands after that
+         * point.
+         */
+        if (IsInParallelMode() || IsParallelWorker()) {
+            ereport(ERROR,
+                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot start commands during a parallel operation")));
+        }
+
         t_thrd.xact_cxt.currentCommandId += 1;
         if (t_thrd.xact_cxt.currentCommandId == InvalidCommandId) { /* check for overflow */
             t_thrd.xact_cxt.currentCommandId -= 1;
@@ -1790,7 +1862,6 @@ static void AtSubCommit_childXids(void)
          * here or in the calculation of new_nChildXids.)
          */
         new_maxChildXids = Min(new_nChildXids * 2, (int)(MaxAllocSize / sizeof(TransactionId)));
-
         if (new_maxChildXids < new_nChildXids)
             ereport(ERROR,
                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
@@ -2272,12 +2343,22 @@ static void StartTransaction(bool begin_on_gtm)
     Assert(s->prevSecContext == 0);
 
     /*
-     * set transaction_timestamp() (a/k/a now()).  We want this to be the same
-     * as the first command's statement_timestamp(), so don't do a fresh
-     * GetCurrentTimestamp() call (which'd be expensive anyway).  Also, mark
-     * xactStopTimestamp as unset.
+     * set transaction_timestamp() (a/k/a now()).  Normally, we want this to
+     * be the same as the first command's statement_timestamp(), so don't do a
+     * fresh GetCurrentTimestamp() call (which'd be expensive anyway).  But
+     * for transactions started inside procedures (i.e., nonatomic SPI
+     * contexts), we do need to advance the timestamp.  Also, in a parallel
+     * worker, the timestamp should already have been provided by a call to
+     * SetParallelStartTimestamps.
      */
-    t_thrd.xact_cxt.xactStartTimestamp = t_thrd.xact_cxt.stmtStartTimestamp;
+    if (!IsParallelWorker()) {
+        if (!SPI_inside_nonatomic_context())
+            t_thrd.xact_cxt.xactStartTimestamp = t_thrd.xact_cxt.stmtStartTimestamp;
+        else
+            t_thrd.xact_cxt.xactStartTimestamp = GetCurrentTimestamp();
+    } else {
+        Assert(t_thrd.xact_cxt.xactStartTimestamp != 0);
+    }
     t_thrd.xact_cxt.xactStopTimestamp = 0;
 
     s->txnKey.txnHandle = InvalidTransactionHandle;
@@ -2372,6 +2453,12 @@ static void CommitTransaction(bool stpCommit)
     TransactionId latestXid;
     bool barrierLockHeld = false;
     bool use_old_version_gid = GTM_MODE || (t_thrd.proc->workingVersionNum <= GTM_OLD_VERSION_NUM);
+    bool is_parallel_worker = (s->blockState == TBLOCK_PARALLEL_INPROGRESS);
+
+    /* Enforce parallel mode restrictions during parallel worker commit. */
+    if (is_parallel_worker) {
+        EnterParallelMode();
+    }
 
     ShowTransactionState("CommitTransaction");
 
@@ -2537,6 +2624,10 @@ static void CommitTransaction(bool stpCommit)
      * of this stuff could still throw an error, which would switch us into
      * the transaction-abort path.
      */
+    /* If we might have parallel workers, clean them up now. */
+    if (IsInParallelMode()) {
+        AtEOXact_Parallel(true);
+    }
     /* Shut down the deferred-trigger manager */
     AfterTriggerEndXact(true);
 
@@ -2662,6 +2753,7 @@ static void CommitTransaction(bool stpCommit)
      */
     TransState oldstate = s->state;
     s->state = TRANS_COMMIT;
+    s->parallelModeLevel = 0;
 
     /* Wait data replicate */
     if (!IsInitdb && !g_instance.attr.attr_storage.enable_mix_replication) {
@@ -2683,7 +2775,21 @@ static void CommitTransaction(bool stpCommit)
     /*
      * Here is where we really truly local commit.
      */
-    latestXid = RecordTransactionCommit();
+    if (!is_parallel_worker) {
+        latestXid = RecordTransactionCommit();
+    } else {
+        /*
+         * We must not mark our XID committed; the parallel master is
+         * responsible for that.
+         */
+        latestXid = InvalidTransactionId;
+
+        /*
+         * Make sure the master will know about any WAL we wrote before it
+         * commits.
+         */
+        ParallelWorkerReportLastRecEnd(t_thrd.xlog_cxt.XactLastRecEnd);
+    }
     if (TwoPhaseCommit)
         StmtRetrySetTransactionCommitFlag(true);
 
@@ -2859,7 +2965,7 @@ static void CommitTransaction(bool stpCommit)
     AtEOXact_SPI(true, false, stpCommit);
     AtEOXact_on_commit_actions(true);
     if (!stpCommit){
-        AtEOXact_Namespace(true);
+        AtEOXact_Namespace(true, is_parallel_worker);
     }
     AtEOXact_SMgr();
     AtEOXact_Files();
@@ -2893,6 +2999,9 @@ static void CommitTransaction(bool stpCommit)
     s->maxChildXids = 0;
     s->storageEngineType = SE_TYPE_UNSPECIFIED;
 
+    t_thrd.xact_cxt.XactTopTransactionId = InvalidTransactionId;
+    t_thrd.xact_cxt.nParallelCurrentXids = 0;
+
 #ifdef PGXC
     s->isLocalParameterUsed = false;
     ForgetTransactionLocalNode();
@@ -3116,6 +3225,8 @@ static void PrepareTransaction(bool stpCommit)
     char* nodestring = NULL;
 #endif
 
+    Assert(!IsInParallelMode());
+
     ShowTransactionState("PrepareTransaction");
 
     /*
@@ -3410,7 +3521,7 @@ static void PrepareTransaction(bool stpCommit)
      * since we push the search path hasn't pop yet.
      */
     if (!stpCommit) {
-        AtEOXact_Namespace(true);
+        AtEOXact_Namespace(true, false);
     }
     AtEOXact_SMgr();
     AtEOXact_Files();
@@ -3703,6 +3814,7 @@ static void AbortTransaction(bool PerfectRollback, bool stpRollback)
     t_thrd.xact_cxt.needRemoveTwophaseState = false;
 
     /* check the current transaction state */
+    bool is_parallel_worker = (s->blockState == TBLOCK_PARALLEL_INPROGRESS);
     if (s->state != TRANS_INPROGRESS && s->state != TRANS_PREPARE)
         ereport(WARNING,
             (errcode(ERRCODE_WARNING), errmsg("AbortTransaction while in %s state", TransStateAsString(s->state))));
@@ -3735,6 +3847,12 @@ static void AbortTransaction(bool PerfectRollback, bool stpRollback)
     SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
     u_sess->exec_cxt.is_exec_trigger_func = false;
 
+    /* If in parallel mode, clean up workers and exit parallel mode. */
+    if (IsInParallelMode()) {
+        AtEOXact_Parallel(false);
+        s->parallelModeLevel = 0;
+    }
+
     /*
      * do abort processing
      */
@@ -3753,9 +3871,22 @@ static void AbortTransaction(bool PerfectRollback, bool stpRollback)
 
     /*
      * Advertise the fact that we aborted in pg_clog (assuming that we got as
-     * far as assigning an XID to advertise).
+     * far as assigning an XID to advertise).  But if we're inside a parallel
+     * worker, skip this; the user backend must be the one to write the abort
+     * record.
      */
-    latestXid = RecordTransactionAbort(false);
+    if (!is_parallel_worker) {
+        latestXid = RecordTransactionAbort(false);
+    } else {
+        latestXid = InvalidTransactionId;
+
+        /*
+         * Since the parallel master won't get our value of XactLastRecEnd in
+         * this case, we nudge WAL-writer ourselves in this case.  See related
+         * comments in RecordTransactionAbort for why this matters.
+         */
+        XLogSetAsyncXactLSN(t_thrd.xlog_cxt.XactLastRecEnd);
+    }
 
     t_thrd.pgxact->prepare_xid = InvalidTransactionId;
 
@@ -3807,7 +3938,7 @@ static void AbortTransaction(bool PerfectRollback, bool stpRollback)
         AtEOXact_SPI(false, stpRollback, false);
         AtEOXact_on_commit_actions(false);
         if (!stpRollback) {
-            AtEOXact_Namespace(false);
+            AtEOXact_Namespace(false, is_parallel_worker);
         }
         AtEOXact_SMgr();
         AtEOXact_Files();
@@ -3942,9 +4073,10 @@ void StartTransactionCommand(bool stpRollback)
             }
             break;
 
-            /* These cases are invalid. */
+        /* These cases are invalid. */
         case TBLOCK_STARTED:
         case TBLOCK_BEGIN:
+        case TBLOCK_PARALLEL_INPROGRESS:
         case TBLOCK_SUBBEGIN:
         case TBLOCK_END:
         case TBLOCK_SUBRELEASE:
@@ -3981,9 +4113,12 @@ void CommitTransactionCommand(bool stpCommit)
             /*
              * This shouldn't happen, because it means the previous
              * StartTransactionCommand didn't set the STARTED state
-             * appropriately.
+             * appropriately, while TBLOCK_PARALLEL_INPROGRESS
+             * should be ended by EndParallelWorkerTranaction(),
+             * not this function.
              */
         case TBLOCK_DEFAULT:
+        case TBLOCK_PARALLEL_INPROGRESS:
             ereport(FATAL,
                 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
                     errmsg("CommitTransactionCommand: unexpected state %s", BlockStateAsString(s->blockState))));
@@ -4299,6 +4434,7 @@ void AbortCurrentTransaction(bool stpRollback)
              * ABORT state.  We will stay in ABORT until we get a ROLLBACK.
              */
         case TBLOCK_INPROGRESS:
+        case TBLOCK_PARALLEL_INPROGRESS:
             AbortTransaction(false, stpRollback);
             s->blockState = TBLOCK_ABORT;
             /* CleanupTransaction happens when we exit TBLOCK_ABORT_END */
@@ -4795,6 +4931,7 @@ void BeginTransactionBlock(void)
 
             /* Already a transaction block in progress. */
         case TBLOCK_INPROGRESS:
+        case TBLOCK_PARALLEL_INPROGRESS:
         case TBLOCK_SUBINPROGRESS:
         case TBLOCK_ABORT:
         case TBLOCK_SUBABORT:
@@ -5023,6 +5160,15 @@ bool EndTransactionBlock(void)
             result = true;
             break;
 
+        /*
+         * The user issued a COMMIT that somehow ran inside a parallel
+         * worker.  We can't cope with that.
+         */
+        case TBLOCK_PARALLEL_INPROGRESS:
+            ereport(FATAL,
+                (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot commit during a parallel operation")));
+            break;
+
             /* These cases are invalid. */
         case TBLOCK_DEFAULT:
         case TBLOCK_BEGIN:
@@ -5125,6 +5271,15 @@ void UserAbortTransactionBlock(void)
             s->blockState = TBLOCK_ABORT_PENDING;
             break;
 
+        /*
+         * The user issued an ABORT that somehow ran inside a parallel
+         * worker.  We can't cope with that.
+         */
+        case TBLOCK_PARALLEL_INPROGRESS:
+            ereport(FATAL,
+                (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("cannot abort during a parallel operation")));
+            break;
+
             /* These cases are invalid. */
         case TBLOCK_DEFAULT:
         case TBLOCK_BEGIN:
@@ -5160,6 +5315,18 @@ void DefineSavepoint(const char* name)
 {
     TransactionState s = CurrentTransactionState;
 
+    /*
+     * Workers synchronize transaction state at the beginning of each parallel
+     * operation, so we can't account for new subtransactions after that
+     * point.  (Note that this check will certainly error out if s->blockState
+     * is TBLOCK_PARALLEL_INPROGRESS, so we can treat that as an invalid case
+     * below.)
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+            errmsg("cannot define savepoints during a parallel operation")));
+    }
+
     switch (s->blockState) {
         case TBLOCK_INPROGRESS:
         case TBLOCK_SUBINPROGRESS:
@@ -5180,6 +5347,7 @@ void DefineSavepoint(const char* name)
         case TBLOCK_DEFAULT:
         case TBLOCK_STARTED:
         case TBLOCK_BEGIN:
+        case TBLOCK_PARALLEL_INPROGRESS:
         case TBLOCK_SUBBEGIN:
         case TBLOCK_END:
         case TBLOCK_SUBRELEASE:
@@ -5230,6 +5398,18 @@ void ReleaseSavepoint(List* options)
     ListCell* cell = NULL;
     char* name = NULL;
 
+    /*
+     * Workers synchronize transaction state at the beginning of each parallel
+     * operation, so we can't account for transaction state change after that
+     * point.  (Note that this check will certainly error out if s->blockState
+     * is TBLOCK_PARALLEL_INPROGRESS, so we can treat that as an invalid case
+     * below.)
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+            errmsg("cannot release savepoints during a parallel operation")));
+    }
+
     switch (s->blockState) {
             /*
              * We can't rollback to a savepoint if there is no savepoint
@@ -5250,6 +5430,7 @@ void ReleaseSavepoint(List* options)
         case TBLOCK_DEFAULT:
         case TBLOCK_STARTED:
         case TBLOCK_BEGIN:
+        case TBLOCK_PARALLEL_INPROGRESS:
         case TBLOCK_SUBBEGIN:
         case TBLOCK_END:
         case TBLOCK_SUBRELEASE:
@@ -5325,6 +5506,19 @@ void RollbackToSavepoint(List* options)
     ListCell* cell = NULL;
     char* name = NULL;
 
+    /*
+     * Workers synchronize transaction state at the beginning of each parallel
+     * operation, so we can't account for transaction state change after that
+     * point.  (Note that this check will certainly error out if s->blockState
+     * is TBLOCK_PARALLEL_INPROGRESS, so we can treat that as an invalid case
+     * below.)
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+            errmsg("cannot rollback to savepoints during a parallel operation")));
+    }
+
+
     switch (s->blockState) {
             /*
              * We can't rollback to a savepoint if there is no savepoint
@@ -5344,6 +5538,7 @@ void RollbackToSavepoint(List* options)
         case TBLOCK_DEFAULT:
         case TBLOCK_STARTED:
         case TBLOCK_BEGIN:
+        case TBLOCK_PARALLEL_INPROGRESS:
         case TBLOCK_SUBBEGIN:
         case TBLOCK_END:
         case TBLOCK_SUBRELEASE:
@@ -5435,6 +5630,21 @@ void BeginInternalSubTransaction(const char* name)
 {
     TransactionState s = CurrentTransactionState;
 
+    /*
+     * Workers synchronize transaction state at the beginning of each parallel
+     * operation, so we can't account for new subtransactions after that
+     * point. We might be able to make an exception for the type of
+     * subtransaction established by this function, which is typically used in
+     * contexts where we're going to release or roll back the subtransaction
+     * before proceeding further, so that no enduring change to the
+     * transaction state occurs. For now, however, we prohibit this case along
+     * with all the others.
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+            errmsg("cannot start subtransactions during a parallel operation")));
+    }
+
     switch (s->blockState) {
         case TBLOCK_STARTED:
         case TBLOCK_INPROGRESS:
@@ -5456,6 +5666,7 @@ void BeginInternalSubTransaction(const char* name)
             /* These cases are invalid. */
         case TBLOCK_DEFAULT:
         case TBLOCK_BEGIN:
+        case TBLOCK_PARALLEL_INPROGRESS:
         case TBLOCK_SUBBEGIN:
         case TBLOCK_SUBRELEASE:
         case TBLOCK_SUBCOMMIT:
@@ -5487,6 +5698,18 @@ void ReleaseCurrentSubTransaction(void)
 {
     TransactionState s = CurrentTransactionState;
 
+    /*
+     * Workers synchronize transaction state at the beginning of each parallel
+     * operation, so we can't account for commit of subtransactions after that
+     * point.  This should not happen anyway.  Code calling this would
+     * typically have called BeginInternalSubTransaction() first, failing
+     * there.
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+            errmsg("cannot commit subtransactions during a parallel operation")));
+    }
+
     if (s->blockState != TBLOCK_SUBINPROGRESS) {
         ereport(ERROR,
             (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
@@ -5520,6 +5743,7 @@ void RollbackAndReleaseCurrentSubTransaction(void)
         case TBLOCK_DEFAULT:
         case TBLOCK_STARTED:
         case TBLOCK_BEGIN:
+        case TBLOCK_PARALLEL_INPROGRESS:
         case TBLOCK_SUBBEGIN:
         case TBLOCK_INPROGRESS:
         case TBLOCK_END:
@@ -5620,6 +5844,7 @@ void AbortOutOfAnyTransaction(bool reserve_topxact_abort)
                 break;
 
             case TBLOCK_INPROGRESS:
+            case TBLOCK_PARALLEL_INPROGRESS:
                 AbortTransaction();
                 if (reserve_topxact_abort) {
                     s->blockState = TBLOCK_ABORT;
@@ -5740,6 +5965,7 @@ char TransactionBlockStatusCode(void)
         case TBLOCK_BEGIN:
         case TBLOCK_SUBBEGIN:
         case TBLOCK_INPROGRESS:
+        case TBLOCK_PARALLEL_INPROGRESS:
         case TBLOCK_SUBINPROGRESS:
         case TBLOCK_END:
         case TBLOCK_SUBRELEASE:
@@ -5854,6 +6080,12 @@ static void CommitSubTransaction(bool stpCommit)
         ereport(WARNING, (errmsg("CommitSubTransaction while in %s state", TransStateAsString(s->state))));
     }
 
+    /* If in parallel mode, clean up workers and exit parallel mode. */
+    if (IsInParallelMode()) {
+        AtEOSubXact_Parallel(true, s->subTransactionId);
+        s->parallelModeLevel = 0;
+    }
+
     /* Pre-commit processing goes here -- nothing to do at the moment */
     s->state = TRANS_COMMIT;
 
@@ -6085,6 +6317,12 @@ static void AbortSubTransaction(bool stpRollback)
     SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
     u_sess->exec_cxt.is_exec_trigger_func = false;
 
+    /* Exit from parallel mode, if necessary. */
+    if (IsInParallelMode()) {
+        AtEOSubXact_Parallel(false, s->subTransactionId);
+        s->parallelModeLevel = 0;
+    }
+
     /*
      * We can skip all this stuff if the subxact failed before creating a
      * ResourceOwner...
@@ -6316,9 +6554,56 @@ static void PopTransaction(void)
     pfree(s);
 }
 
+/*
+ * 	SetParallelStartTimestamps
+ *
+ * In a parallel worker, we should inherit the parent transaction's
+ * timestamps rather than setting our own.  The parallel worker
+ * infrastructure must call this to provide those values before
+ * calling StartTransaction() or SetCurrentStatementStartTimestamp().
+ */
+void SetParallelStartTimestamps(TimestampTz xact_ts, TimestampTz stmt_ts)
+{
+    Assert(IsParallelWorker());
+    t_thrd.xact_cxt.xactStartTimestamp = xact_ts;
+    t_thrd.xact_cxt.stmtStartTimestamp = stmt_ts;
+}
+
+/*
+ * StartParallelWorkerTransaction
+ * 		Start a parallel worker transaction, restoring the relevant
+ * 		transaction state serialized by SerializeTransactionState.
+ */
+void StartParallelWorkerTransaction(ParallelInfoContext *cxt)
+{
+    Assert(CurrentTransactionState->blockState == TBLOCK_DEFAULT);
+    StartTransaction(true);
+
+    u_sess->utils_cxt.XactIsoLevel = cxt->xactIsoLevel;
+    u_sess->attr.attr_storage.XactDeferrable = cxt->xactDeferrable;
+    TopTransactionStateData.transactionId = cxt->topTransactionId;
+    CurrentTransactionState->transactionId = cxt->currentTransactionId;
+    t_thrd.xact_cxt.currentCommandId = cxt->currentCommandId;
+    t_thrd.xact_cxt.nParallelCurrentXids = cxt->nParallelCurrentXids;
+    t_thrd.xact_cxt.ParallelCurrentXids = cxt->ParallelCurrentXids;
+
+    CurrentTransactionState->blockState = TBLOCK_PARALLEL_INPROGRESS;
+}
+
+/*
+ * EndParallelWorkerTransaction
+ * 		End a parallel worker transaction.
+ */
+void EndParallelWorkerTransaction(void)
+{
+    Assert(CurrentTransactionState->blockState == TBLOCK_PARALLEL_INPROGRESS);
+    CommitTransaction();
+    CurrentTransactionState->blockState = TBLOCK_DEFAULT;
+}
+
 /*
  * ShowTransactionState
- *		Debug support
+ * 		Debug support
  */
 static void ShowTransactionState(const char* str)
 {
@@ -6384,6 +6669,8 @@ static const char* BlockStateAsString(TBlockState blockState)
             return "BEGIN";
         case TBLOCK_INPROGRESS:
             return "INPROGRESS";
+        case TBLOCK_PARALLEL_INPROGRESS:
+            return "PARALLEL_INPROGRESS";
         case TBLOCK_END:
             return "END";
         case TBLOCK_ABORT:
diff --git a/src/gausskernel/storage/buffer/localbuf.cpp b/src/gausskernel/storage/buffer/localbuf.cpp
index 4169f98b8..8bc222a81 100644
--- a/src/gausskernel/storage/buffer/localbuf.cpp
+++ b/src/gausskernel/storage/buffer/localbuf.cpp
@@ -18,6 +18,7 @@
 #include "knl/knl_variable.h"
 
 #include "catalog/catalog.h"
+#include "access/parallel.h"
 #include "access/double_write.h"
 #include "executor/instrument.h"
 #include "storage/buf_internals.h"
@@ -435,6 +436,19 @@ static void InitLocalBuffers(void)
     HASHCTL info;
     int i;
 
+    /*
+     * Parallel workers can't access data in temporary tables, because they
+     * have no visibility into the local buffers of their leader.  This is a
+     * convenient, low-cost place to provide a backstop check for that.  Note
+     * that we don't wish to prevent a parallel worker from accessing catalog
+     * metadata about a temp table, so checks at higher levels would be
+     * inappropriate.
+     */
+    if (IsParallelWorker()) {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+            errmsg("cannot access temporary tables during a parallel operation")));
+    }
+
     /* Allocate and zero buffer headers and auxiliary arrays */
     u_sess->storage_cxt.LocalBufferDescriptors =
         (BufferDesc*)MemoryContextAllocZero(u_sess->top_mem_cxt, (unsigned int)nbufs * sizeof(BufferDesc));
diff --git a/src/gausskernel/storage/ipc/Makefile b/src/gausskernel/storage/ipc/Makefile
index 4b09ca2ce..8bdc7c560 100644
--- a/src/gausskernel/storage/ipc/Makefile
+++ b/src/gausskernel/storage/ipc/Makefile
@@ -17,6 +17,6 @@ ifneq "$(MAKECMDGOALS)" "clean"
   endif
 endif
 OBJS = ipc.o ipci.o pmsignal.o procarray.o procsignal.o shmem.o shmqueue.o \
-	sinval.o sinvaladt.o standby.o shm_mq.o shm_toc.o
+	sinval.o sinvaladt.o standby.o shm_mq.o shm_toc.o dsm.o
 
 include $(top_srcdir)/src/gausskernel/common.mk
\ No newline at end of file
diff --git a/src/gausskernel/storage/ipc/dsm.cpp b/src/gausskernel/storage/ipc/dsm.cpp
new file mode 100644
index 000000000..9571d82bd
--- /dev/null
+++ b/src/gausskernel/storage/ipc/dsm.cpp
@@ -0,0 +1,63 @@
+/* -------------------------------------------------------------------------
+ *
+ * dsm.c
+ * manage dynamic shared memory segments
+ *
+ * This file provides a set of services to make programming with dynamic
+ * shared memory segments more convenient.  Unlike the low-level
+ * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
+ * created using this module will be cleaned up automatically.  Mappings
+ * will be removed when the resource owner under which they were created
+ * is cleaned up, unless dsm_pin_mapping() is used, in which case they
+ * have session lifespan.  Segments will be removed when there are no
+ * remaining mappings, or at postmaster shutdown in any case.  After a
+ * hard postmaster crash, remaining segments will be removed, if they
+ * still exist, at the next postmaster startup.
+ *
+ * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * 	  src/gausskernel/storage/ipc/dsm.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "storage/dsm.h"
+#include "knl/knl_session.h"
+#include "utils/memutils.h"
+#include "postmaster/bgworker_internals.h"
+
+void dsm_detach(void **seg)
+{
+    Assert(*seg != NULL);
+    knl_u_parallel_context *ctx = (knl_u_parallel_context *)*seg;
+    MemoryContextDelete(ctx->memCtx);
+    ctx->memCtx = NULL;
+    ctx->pwCtx = NULL;
+    ctx->used = false;
+}
+
+void *dsm_create(void)
+{
+    for (int i = 0; i < DSM_MAX_ITEM_PER_QUERY; i++) {
+        if (u_sess->parallel_ctx[i].used == false) {
+            u_sess->parallel_ctx[i].memCtx = AllocSetContextCreate(u_sess->top_mem_cxt, "parallel query",
+                ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE, SHARED_CONTEXT);
+
+            MemoryContext oldContext = MemoryContextSwitchTo(u_sess->parallel_ctx[i].memCtx);
+            u_sess->parallel_ctx[i].pwCtx = (ParallelInfoContext *)palloc0(sizeof(ParallelInfoContext));
+            (void)MemoryContextSwitchTo(oldContext);
+
+            u_sess->parallel_ctx[i].used = true;
+            return &(u_sess->parallel_ctx[i]);
+        }
+    }
+
+    ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("too many dynamic shared memory segments")));
+    return NULL;
+}
+
diff --git a/src/gausskernel/storage/ipc/procarray.cpp b/src/gausskernel/storage/ipc/procarray.cpp
index 0877e6a19..4e97936cf 100644
--- a/src/gausskernel/storage/ipc/procarray.cpp
+++ b/src/gausskernel/storage/ipc/procarray.cpp
@@ -819,6 +819,45 @@ void ProcArrayInitRecovery(TransactionId initializedUptoXID)
     TransactionIdRetreat(t_thrd.storage_cxt.latestObservedXid);
 }
 
+/*
+ * ProcArrayInstallRestoredXmin -- install restored xmin into MyPgXact->xmin
+ *
+ * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
+ * PGPROC of the transaction from which we imported the snapshot, rather than
+ * an XID.
+ *
+ * Returns TRUE if successful, FALSE if source xact is no longer running.
+ */
+bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
+{
+    bool result = false;
+
+    Assert(TransactionIdIsNormal(xmin));
+    Assert(proc != NULL);
+
+    /* Get lock so source xact can't end while we're doing this */
+    LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+    volatile PGXACT *pgxact = &g_instance.proc_base->allPgXact[proc->pgprocno];
+
+    /*
+     * Be certain that the referenced PGPROC has an advertised xmin which is
+     * no later than the one we're installing, so that the system-wide xmin
+     * can't go backwards.  Also, make sure it's running in the same database,
+     * so that the per-database xmin cannot go backwards.
+     */
+    TransactionId xid = pgxact->xmin; /* fetch just once */
+    if (proc->databaseId == u_sess->proc_cxt.MyDatabaseId && TransactionIdIsNormal(xid) &&
+        TransactionIdPrecedesOrEquals(xid, xmin)) {
+        t_thrd.pgxact->xmin = u_sess->utils_cxt.TransactionXmin = xmin;
+        result = true;
+    }
+
+    LWLockRelease(ProcArrayLock);
+
+    return result;
+}
+
 /*
  * GetRunningTransactionData -- returns information about running transactions.
  *
@@ -2658,8 +2697,8 @@ int CountDBBackends(Oid databaseid)
         int pgprocno = arrayP->pgprocnos[index];
         volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
 
-        if (proc->pid == 0)
-            continue; /* do not count prepared xacts */
+        if (proc->pid == 0 || t_thrd.bgworker_cxt.is_background_worker)
+            continue; /* do not count prepared xacts and backgroud workers */
 
         if (!OidIsValid(databaseid) || proc->databaseId == databaseid)
             count++;
@@ -2721,8 +2760,8 @@ int CountUserBackends(Oid roleid)
         int pgprocno = arrayP->pgprocnos[index];
         volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
 
-        if (proc->pid == 0)
-            continue; /* do not count prepared xacts */
+        if (proc->pid == 0 || t_thrd.bgworker_cxt.is_background_worker)
+            continue; /* do not count prepared xacts and background workers */
 
         if (proc->roleId == roleid)
             count++;
diff --git a/src/gausskernel/storage/ipc/procsignal.cpp b/src/gausskernel/storage/ipc/procsignal.cpp
index 3308fe14e..4b16f0aeb 100644
--- a/src/gausskernel/storage/ipc/procsignal.cpp
+++ b/src/gausskernel/storage/ipc/procsignal.cpp
@@ -19,6 +19,7 @@
 #include <signal.h>
 #include <unistd.h>
 
+#include "access/parallel.h"
 #include "commands/async.h"
 #include "miscadmin.h"
 #include "storage/latch.h"
@@ -272,6 +273,9 @@ void procsignal_sigusr1_handler(SIGNAL_ARGS)
     if (CheckProcSignal(PROCSIG_NOTIFY_INTERRUPT))
         HandleNotifyInterrupt();
 
+    if (CheckProcSignal(PROCSIG_PARALLEL_MESSAGE))
+        HandleParallelMessageInterrupt();
+
 #ifdef PGXC
 
     if (CheckProcSignal(PROCSIG_PGXCPOOL_RELOAD))
diff --git a/src/gausskernel/storage/ipc/shm_mq.cpp b/src/gausskernel/storage/ipc/shm_mq.cpp
index 7c731336b..9daa949d5 100644
--- a/src/gausskernel/storage/ipc/shm_mq.cpp
+++ b/src/gausskernel/storage/ipc/shm_mq.cpp
@@ -127,7 +127,7 @@ struct shm_mq {
  */
 struct shm_mq_handle {
     shm_mq     *mqh_queue;
-    char       *mqh_segment;
+    void       *mqh_segment;
     BackgroundWorkerHandle *mqh_handle;
     char       *mqh_buffer;
     Size        mqh_buflen;
@@ -265,7 +265,7 @@ PGPROC *shm_mq_get_sender(shm_mq *mq)
  * counterpart won't get stuck waiting for us to fill or drain the queue
  * after we've already lost interest.
  */
-shm_mq_handle *shm_mq_attach(shm_mq *mq, char *seg, BackgroundWorkerHandle *handle)
+shm_mq_handle *shm_mq_attach(shm_mq *mq, void *seg, BackgroundWorkerHandle *handle)
 {
     shm_mq_handle *mqh = (shm_mq_handle*)palloc(sizeof(shm_mq_handle));
 
diff --git a/src/gausskernel/storage/lmgr/lock.cpp b/src/gausskernel/storage/lmgr/lock.cpp
index 614681944..cb6f363ea 100755
--- a/src/gausskernel/storage/lmgr/lock.cpp
+++ b/src/gausskernel/storage/lmgr/lock.cpp
@@ -1196,6 +1196,36 @@ bool IsInSameTransaction(PGPROC *proc1, PGPROC *proc2)
                                                     : u_sess->stream_cxt.global_obj->inNodeGroup(proc1->pid, proc2->pid);
 }
 
+/*
+ * when query run as parallel mode, the parallel leader and worker thread hold differnt
+ * Procs, but we treat them as one transaction.
+ */
+static bool IsInSameParallelQuery(PGPROC *proc1, PGPROC *proc2)
+{
+    if (!IsInParallelMode()) {
+        return false;
+    }
+
+    /* Which proc is me? */
+    PGPROC *otherProc = NULL;
+    if (proc1 == t_thrd.proc) {
+        otherProc = proc2;
+    } else if (proc2 == t_thrd.proc) {
+        otherProc = proc1;
+    } else {
+        return false;
+    }
+
+    if (ParallelWorkerAmI()) {
+        /* I'm worker, so check whether other proc is my master or not */
+        return t_thrd.msqueue_cxt.pq_mq_parallel_master_pid == otherProc->pid;
+    } else if (ParallelLeaderAmI()) {
+        /* I'm leader, so check whether other proc is a worker of mine or not */
+        return GetBackgroundWorkerTypeByPid(otherProc->pid) != NULL;
+    }
+    return false;
+}
+
 /*
  * LockCheckConflicts -- test whether requested lock conflicts
  *		with those already granted
@@ -1246,12 +1276,13 @@ int LockCheckConflicts(LockMethod lockMethodTable, LOCKMODE lockmode, LOCK *lock
          * thread is in one transaction, but these threads use differnt procs.
          * We need treat these procs as one proc
          */
-        if (StreamTopConsumerAmI() || StreamThreadAmI()) {
+        if (StreamTopConsumerAmI() || StreamThreadAmI() || ParallelWorkerAmI() || ParallelLeaderAmI()) {
             SHM_QUEUE *otherProcLocks = &(lock->procLocks);
             PROCLOCK *otherProcLock = (PROCLOCK *)SHMQueueNext(otherProcLocks, otherProcLocks,
                                                                offsetof(PROCLOCK, lockLink));
             while (otherProcLock != NULL) {
-                if (IsInSameTransaction(otherProcLock->tag.myProc, proc)) {
+                if (IsInSameParallelQuery(otherProcLock->tag.myProc, proc) ||
+                    IsInSameTransaction(otherProcLock->tag.myProc, proc)) {
                     if (otherProcLock->holdMask & LOCKBIT_ON((unsigned int)i))
                         ++myHolding;
                 }
diff --git a/src/gausskernel/storage/lmgr/predicate.cpp b/src/gausskernel/storage/lmgr/predicate.cpp
index 5b9832923..421b0159e 100755
--- a/src/gausskernel/storage/lmgr/predicate.cpp
+++ b/src/gausskernel/storage/lmgr/predicate.cpp
@@ -1425,6 +1425,16 @@ static Snapshot GetSerializableTransactionSnapshotInt(Snapshot snapshot, Transac
 
     Assert(!RecoveryInProgress());
 
+    /*
+     * Since all parts of a serializable transaction must use the same
+     * snapshot, it is too late to establish one after a parallel operation
+     * has begun.
+     */
+    if (IsInParallelMode()) {
+        ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+            errmsg("cannot establish serializable snapshot during a parallel operation")));
+    }
+
     proc = t_thrd.proc;
     Assert(proc != NULL);
     GET_VXID_FROM_PGPROC(vxid, *proc);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 86b9bc49b..aac08d89b 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -80,6 +80,7 @@ extern void bucketClosePartition(Partition bucket);
 
 /* struct definition appears in relscan.h */
 typedef struct HeapScanDescData* HeapScanDesc;
+typedef struct ParallelHeapScanDescData *ParallelHeapScanDesc;
 
 /*
  * HeapScanIsValid
@@ -101,6 +102,11 @@ extern void heap_rescan(HeapScanDesc scan, ScanKey key);
 extern void heap_endscan(HeapScanDesc scan);
 extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
 
+extern Size heap_parallelscan_estimate(Snapshot snapshot);
+extern void heap_parallelscan_initialize(ParallelHeapScanDesc target, Size pscan_len, Relation relation,
+    Snapshot snapshot);
+extern HeapScanDesc heap_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan);
+
 extern void heap_init_parallel_seqscan(HeapScanDesc scan, int32 dop, ScanDirection dir);
 
 extern HeapTuple heapGetNextForVerify(HeapScanDesc scan, ScanDirection direction, bool& isValidRelationPage);
diff --git a/src/include/access/parallel.h b/src/include/access/parallel.h
new file mode 100644
index 000000000..13b3f52f8
--- /dev/null
+++ b/src/include/access/parallel.h
@@ -0,0 +1,68 @@
+/* -------------------------------------------------------------------------
+ *
+ * parallel.h
+ * 	  Infrastructure for launching parallel workers
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/parallel.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef PARALLEL_H
+#define PARALLEL_H
+
+#include "access/xlogdefs.h"
+#include "lib/ilist.h"
+#include "postmaster/bgworker.h"
+#include "storage/shm_mq.h"
+
+typedef void (*parallel_worker_main_type)(void *seg);
+
+typedef struct ParallelWorkerInfo {
+    BackgroundWorkerHandle *bgwhandle;
+    shm_mq_handle *error_mqh;
+    ThreadId pid;
+} ParallelWorkerInfo;
+
+typedef struct ParallelContext {
+    dlist_node node;
+    SubTransactionId subid;
+    int nworkers;
+    int nworkers_launched;
+    char *library_name;
+    char *function_name;
+    ErrorContextCallback *error_context_stack;
+    void *seg;
+    void *private_memory;
+    ParallelWorkerInfo *worker;
+    int nknown_attached_workers;
+    bool *known_attached_workers;
+} ParallelContext;
+
+typedef struct ParallelWorkerContext {
+    void *seg;
+} ParallelWorkerContext;
+
+#define IsParallelWorker() (t_thrd.bgworker_cxt.ParallelWorkerNumber >= 0)
+
+extern ParallelContext *CreateParallelContext(const char *library_name, const char *function_name, int nworkers);
+extern void InitializeParallelDSM(ParallelContext *pcxt);
+extern void ReinitializeParallelDSM(ParallelContext *pcxt);
+extern void LaunchParallelWorkers(ParallelContext *pcxt);
+extern void WaitForParallelWorkersToAttach(ParallelContext *pcxt);
+extern void WaitForParallelWorkersToFinish(ParallelContext *pcxt);
+extern void DestroyParallelContext(ParallelContext *pcxt);
+extern bool ParallelContextActive(void);
+
+extern void HandleParallelMessageInterrupt(void);
+extern void HandleParallelMessages(void);
+extern void AtEOXact_Parallel(bool isCommit);
+extern void AtEOSubXact_Parallel(bool isCommit, SubTransactionId mySubId);
+extern void ParallelWorkerReportLastRecEnd(XLogRecPtr last_xlog_end);
+
+extern void ParallelWorkerMain(Datum main_arg);
+
+#endif /* PARALLEL_H */
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index a88a34e56..3ae8208c5 100755
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -21,6 +21,27 @@
 
 #define PARALLEL_SCAN_GAP 100
 
+/*
+ * Shared state for parallel heap scan.
+ *
+ * Each backend participating in a parallel heap scan has its own
+ * HeapScanDesc in backend-private memory, and those objects all contain
+ * a pointer to this structure.  The information here must be sufficient
+ * to properly initialize each new HeapScanDesc as workers join the scan,
+ * and it must act as a font of block numbers for those workers.
+ */
+typedef struct ParallelHeapScanDescData {
+    int plan_node_id;                /* used to identify speicific plan */
+    Oid phs_relid;                   /* OID of relation to scan */
+    bool phs_syncscan;               /* report location to syncscan logic? */
+    BlockNumber phs_nblocks;         /* # blocks in relation at start of scan */
+    slock_t phs_mutex;               /* mutual exclusion for setting startblock */
+    BlockNumber phs_startblock;      /* starting block number */
+    pg_atomic_uint64 phs_nallocated; /* number of blocks allocated to workers so far. */
+    uint32 pscan_len;                /* total size of this struct, including phs_snapshot_data */
+    char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
+} ParallelHeapScanDescData;
+
 /* ----------------------------------------------------------------
  *				 Scan State Information
  * ----------------------------------------------------------------
@@ -62,18 +83,17 @@ typedef struct HeapScanDescData {
     Snapshot rs_snapshot; /* snapshot to see */
     int rs_nkeys;         /* number of scan keys */
     ScanKey rs_key;       /* array of scan key descriptors */
-    bool rs_bitmapscan;   /* true if this is really a bitmap scan */
-    bool rs_samplescan;   /* true if this is really a sample scan */
-    bool rs_pageatatime;  /* verify visibility page-at-a-time? */
-    bool rs_allow_strat;  /* allow or disallow use of access strategy */
-    bool rs_allow_sync;   /* allow or disallow use of syncscan */
+    /*
+     * Information about type and behaviour of the scan, a bitmask of members
+     * of the ScanOptions enum (see tableam.h).
+     */
+    uint32 rs_flags;
 
     /* state set up at initscan time */
     BlockNumber rs_nblocks;           /* number of blocks to scan */
     BlockNumber rs_startblock;        /* block # to start at */
     BufferAccessStrategy rs_strategy; /* access strategy for reads */
     bool rs_syncscan;                 /* report location to syncscan logic? */
-    bool rs_isRangeScanInRedis;       /* if it is a range scan in redistribution */
 
     /* scan current state */
     bool rs_inited;        /* false = scan not init'd yet */
@@ -82,6 +102,7 @@ typedef struct HeapScanDescData {
     Buffer rs_cbuf;        /* current buffer in scan, if any */
     /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
     ItemPointerData rs_mctid; /* marked scan position, if any */
+    ParallelHeapScanDesc rs_parallel; /* parallel scan information */
 
     /* these fields only used in page-at-a-time mode and for bitmap scans */
     int rs_cindex;                                   /* current tuple's index in vistuples */
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 8c42d02ba..8a53128e2 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -28,6 +28,26 @@
 #include "utils/snapshot.h"
 #include "nodes/execnodes.h"
 
+/*
+ * Bitmask values for the flags argument to the scan_begin callback.
+ */
+typedef enum ScanOptions {
+    /* true if this is really a bitmap scan */
+    SO_TYPE_BITMAPSCAN = 1 << 1,
+    /* true if this is really a sample scan */
+    SO_TYPE_SAMPLESCAN = 1 << 2,
+    /* if it is a range scan in redistribution */
+    SO_TYPE_RANGESCAN = 1 << 3,
+    /* verify visibility page-at-a-time? */
+    SO_ALLOW_PAGEMODE = 1 << 4,
+    /* allow or disallow use of access strategy */
+    SO_ALLOW_STRAT = 1 << 5,
+    /* allow or disallow use of syncscan */
+    SO_ALLOW_SYNC = 1 << 6,
+    /* unregister snapshot at scan end? */
+    SO_TEMP_SNAPSHOT = 1 << 7
+} ScanOptions;
+
 extern bool reset_scan_qual(Relation currHeapRel, ScanState * node);
 
 
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index be5e6b94c..c263ead49 100755
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -328,6 +328,9 @@ extern void BeginInternalSubTransaction(const char* name);
 extern void ReleaseCurrentSubTransaction(void);
 extern void RollbackAndReleaseCurrentSubTransaction(void);
 extern bool IsSubTransaction(void);
+extern void StartParallelWorkerTransaction(ParallelInfoContext *cxt);
+extern void EndParallelWorkerTransaction(void);
+extern void SetParallelStartTimestamps(TimestampTz xact_ts, TimestampTz stmt_ts);
 extern void SetCurrentTransactionId(TransactionId tid);
 extern bool IsTransactionBlock(void);
 extern bool IsTransactionOrTransactionBlock(void);
@@ -390,4 +393,8 @@ extern bool IsMixedEngineUsed();
 extern void SetCurrentTransactionStorageEngine(StorageEngineType storageEngineType);
 extern void CallXactCallbacks(XactEvent event);
 
+extern void EnterParallelMode(void);
+extern void ExitParallelMode(void);
+extern bool IsInParallelMode(void);
+
 #endif /* XACT_H */
diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h
index ddd32daff..2da5f5a3a 100755
--- a/src/include/catalog/namespace.h
+++ b/src/include/catalog/namespace.h
@@ -157,6 +157,8 @@ extern bool isTempOrToastNamespace(Oid namespaceId);
 extern bool isAnyTempNamespace(Oid namespaceId);
 extern bool isOtherTempNamespace(Oid namespaceId);
 extern Oid	GetTempToastNamespace(void);
+extern void GetTempNamespaceState(Oid *tempNamespaceId, Oid *tempToastNamespaceId);
+extern void SetTempNamespaceState(Oid tempNamespaceId, Oid tempToastNamespaceId);
 extern void ResetTempTableNamespace(void);
 
 extern OverrideSearchPath *GetOverrideSearchPath(MemoryContext context);
@@ -173,7 +175,7 @@ extern Oid	FindDefaultConversionProc(int4 for_encoding, int4 to_encoding);
 
 /* initialization & transaction cleanup code */
 extern void InitializeSearchPath(void);
-extern void AtEOXact_Namespace(bool isCommit);
+extern void AtEOXact_Namespace(bool isCommit, bool parallel);
 extern void AtEOSubXact_Namespace(bool isCommit, SubTransactionId mySubid,
 					  SubTransactionId parentSubid);
 
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 8ee216c4c..97b53c8d0 100755
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -413,6 +413,15 @@ typedef FormData_pg_proc *Form_pg_proc;
 #define PROVOLATILE_STABLE      's'        /* does not change within a scan */
 #define PROVOLATILE_VOLATILE    'v'        /* can change even within a scan */
 
+/*
+ * Symbolic values for proparallel column: these indicate whether a function
+ * can be safely be run in a parallel backend, during parallelism but
+ * necessarily in the master, or only in non-parallel mode.
+ */
+#define PROPARALLEL_SAFE        's' /* can run in worker or master */
+#define PROPARALLEL_RESTRICTED  'r' /* can run in parallel master only */
+#define PROPARALLEL_UNSAFE      'u' /* banned while in parallel mode */
+
 /*
  * Symbolic values for proargmodes column.    Note that these must agree with
  * the FunctionParameterMode enum in parsenodes.h; we declare them here to
diff --git a/src/include/executor/execParallel.h b/src/include/executor/execParallel.h
new file mode 100644
index 000000000..947d57067
--- /dev/null
+++ b/src/include/executor/execParallel.h
@@ -0,0 +1,38 @@
+/* --------------------------------------------------------------------
+ * execParallel.h
+ * 		POSTGRES parallel execution interface
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * 		src/include/executor/execParallel.h
+ * --------------------------------------------------------------------
+ */
+
+#ifndef EXECPARALLEL_H
+#define EXECPARALLEL_H
+
+#include "access/parallel.h"
+#include "nodes/execnodes.h"
+#include "nodes/parsenodes.h"
+#include "nodes/plannodes.h"
+
+typedef struct SharedExecutorInstrumentation SharedExecutorInstrumentation;
+
+typedef struct ParallelExecutorInfo {
+    PlanState *planstate;
+    ParallelContext *pcxt;
+    BufferUsage *buffer_usage;
+    SharedExecutorInstrumentation *instrumentation;
+    shm_mq_handle **tqueue;
+    bool finished;
+} ParallelExecutorInfo;
+
+extern ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers);
+extern void ExecParallelFinish(ParallelExecutorInfo *pei);
+extern void ExecParallelCleanup(ParallelExecutorInfo *pei);
+extern void ExecParallelReinitialize(ParallelExecutorInfo *pei);
+
+extern void ParallelQueryMain(void *seg);
+#endif /* EXECPARALLEL_H */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index e329385e0..2a0aa45d6 100755
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -247,6 +247,7 @@ extern PlanState* ExecInitNode(Plan* node, EState* estate, int eflags);
 extern TupleTableSlot* ExecProcNode(PlanState* node);
 extern Node* MultiExecProcNode(PlanState* node);
 extern void ExecEndNode(PlanState* node);
+extern bool ExecShutdownNode(PlanState *node);
 
 extern long ExecGetPlanMemCost(Plan* node);
 
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 938b97848..c160d39f9 100755
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -320,6 +320,11 @@ typedef struct Instrumentation {
     RecursiveInfo recursiveInfo;
 } Instrumentation;
 
+typedef struct WorkerInstrumentation {
+    int num_workers; /* # of structures that follow */
+    Instrumentation instrument[FLEXIBLE_ARRAY_MEMBER];
+} WorkerInstrumentation;
+
 /* instrumentation data */
 typedef struct InstrStreamPlanData {
     /* whether the plannode is valid */
@@ -1000,9 +1005,14 @@ typedef struct size_info {
 extern OperatorProfileTable g_operator_table;
 
 extern Instrumentation* InstrAlloc(int n, int instrument_options);
+extern void InstrInit(Instrumentation *instr, int instrument_options);
 extern void InstrStartNode(Instrumentation* instr);
 extern void InstrStopNode(Instrumentation* instr, double nTuples);
 extern void InstrEndLoop(Instrumentation* instr);
+extern void InstrAggNode(Instrumentation *dst, Instrumentation *add);
+extern void InstrStartParallelQuery(void);
+extern void InstrEndParallelQuery(BufferUsage *result);
+extern void InstrAccumParallelQuery(BufferUsage *result);
 extern void StreamEndLoop(StreamTime* instr);
 extern void AddControlMemoryContext(Instrumentation* instr, MemoryContext context);
 extern void CalculateContextSize(MemoryContext ctx, int64* memorySize);
diff --git a/src/include/executor/nodeGather.h b/src/include/executor/nodeGather.h
new file mode 100644
index 000000000..d161df59d
--- /dev/null
+++ b/src/include/executor/nodeGather.h
@@ -0,0 +1,25 @@
+/* -------------------------------------------------------------------------
+ *
+ * nodeGather.h
+ * 		prototypes for nodeGather.c
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/executor/nodeGather.h
+ *
+ * -------------------------------------------------------------------------
+ */
+#ifndef NODEGATHER_H
+#define NODEGATHER_H
+
+#include "nodes/execnodes.h"
+
+extern GatherState *ExecInitGather(Gather *node, EState *estate, int eflags);
+extern TupleTableSlot *ExecGather(GatherState *node);
+extern void ExecEndGather(GatherState *node);
+extern void ExecShutdownGather(GatherState *node);
+extern void ExecReScanGather(GatherState *node);
+
+#endif /* NODEGATHER_H */
diff --git a/src/include/executor/nodeSeqscan.h b/src/include/executor/nodeSeqscan.h
index fa5ddf20f..2860fdb88 100644
--- a/src/include/executor/nodeSeqscan.h
+++ b/src/include/executor/nodeSeqscan.h
@@ -14,6 +14,7 @@
 #ifndef NODESEQSCAN_H
 #define NODESEQSCAN_H
 
+#include "access/parallel.h"
 #include "nodes/execnodes.h"
 
 extern SeqScanState* ExecInitSeqScan(SeqScan* node, EState* estate, int eflags);
@@ -25,4 +26,9 @@ extern void ExecReScanSeqScan(SeqScanState* node);
 
 extern void InitScanRelation(SeqScanState* node, EState* estate);
 
+/* parallel scan support */
+extern void ExecSeqScanEstimate(SeqScanState *node, ParallelContext *pcxt);
+extern void ExecSeqScanInitializeDSM(SeqScanState *node, ParallelContext *pcxt, int nodeid);
+extern void ExecSeqScanInitializeWorker(SeqScanState *node, void *context);
+
 #endif /* NODESEQSCAN_H */
diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h
index 962cc56b8..722336ef9 100755
--- a/src/include/executor/spi.h
+++ b/src/include/executor/spi.h
@@ -133,6 +133,7 @@ extern void SPICleanup(void);
 
 extern void AtEOXact_SPI(bool isCommit, bool stpRollback, bool stpCommit);
 extern void AtEOSubXact_SPI(bool isCommit, SubTransactionId mySubid, bool stpRollback, bool stpCommit);
+extern bool SPI_inside_nonatomic_context(void);
 extern DestReceiver* createAnalyzeSPIDestReceiver(CommandDest dest);
 /* SPI execution helpers */
 extern void spi_exec_with_callback(CommandDest dest, const char* src, bool read_only, long tcount, bool direct_call,
diff --git a/src/include/executor/tqueue.h b/src/include/executor/tqueue.h
new file mode 100644
index 000000000..e09840bb8
--- /dev/null
+++ b/src/include/executor/tqueue.h
@@ -0,0 +1,29 @@
+/*-------------------------------------------------------------------------
+ *
+ * tqueue.h
+ *	  Use shm_mq to send & receive tuples between parallel backends
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/executor/tqueue.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef TQUEUE_H
+#define TQUEUE_H
+
+#include "storage/shm_mq.h"
+#include "tcop/dest.h"
+
+/* Use this to send tuples to a shm_mq. */
+extern DestReceiver *CreateTupleQueueDestReceiver(shm_mq_handle *handle);
+
+/* Use these to receive tuples from a shm_mq. */
+typedef struct TupleQueueReader TupleQueueReader;
+extern TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle, TupleDesc tupledesc);
+extern void DestroyTupleQueueReader(TupleQueueReader *funnel);
+extern HeapTuple TupleQueueReaderNext(TupleQueueReader *, bool nowait, bool *done);
+
+#endif   /* TQUEUE_H */
diff --git a/src/include/gs_thread.h b/src/include/gs_thread.h
index dc0bfaf1b..47cdecbd4 100755
--- a/src/include/gs_thread.h
+++ b/src/include/gs_thread.h
@@ -107,6 +107,7 @@ typedef enum knl_thread_role {
     WAL_STANDBY_SENDER, /* Am I cascading WAL to another standby ? */
     WAL_DB_SENDER,
     TOP_CONSUMER,
+    BACKGROUND_LEADER,
 } knl_thread_role;
 
 /*
diff --git a/src/include/knl/knl_guc/knl_instance_attr_common.h b/src/include/knl/knl_guc/knl_instance_attr_common.h
index a4b64a2aa..914de7f28 100755
--- a/src/include/knl/knl_guc/knl_instance_attr_common.h
+++ b/src/include/knl/knl_guc/knl_instance_attr_common.h
@@ -76,6 +76,10 @@ typedef struct knl_instance_attr_common {
     bool enable_alarm;
     char* Alarm_component;
     char* MOTConfigFileName;
+
+    int max_worker_processes;
+    int max_parallel_workers;
+    int max_parallel_workers_per_gather;
 } knl_instance_attr_common;
 
 #endif /* SRC_INCLUDE_KNL_KNL_INSTANCE_ATTR_COMMON_H_ */
diff --git a/src/include/knl/knl_guc/knl_session_attr_sql.h b/src/include/knl/knl_guc/knl_session_attr_sql.h
index 6527cc665..212f853e5 100644
--- a/src/include/knl/knl_guc/knl_session_attr_sql.h
+++ b/src/include/knl/knl_guc/knl_session_attr_sql.h
@@ -154,6 +154,7 @@ typedef struct knl_session_attr_sql {
     int acce_min_datasize_per_thread;
     int max_cn_temp_file_size;
     int default_statistics_target;
+    int min_parallel_table_scan_size;
     /* Memory Limit user could set in session */
     int FencedUDFMemoryLimit;
     int64 g_default_expthresh;
@@ -163,6 +164,8 @@ typedef struct knl_session_attr_sql {
     double allocate_mem_cost;
     double cpu_index_tuple_cost;
     double cpu_operator_cost;
+    double parallel_tuple_cost;
+    double parallel_setup_cost;
     double stream_multiple;
     double cursor_tuple_fraction;
     double Geqo_selection_bias;
@@ -198,8 +201,10 @@ typedef struct knl_session_attr_sql {
     bool enable_opfusion;
     bool enable_beta_opfusion;
     bool enable_beta_nestloop_fusion;
+    bool parallel_leader_participation;
     int opfusion_debug_mode;
     int single_shard_stmt;
+    int force_parallel_mode;
 } knl_session_attr_sql;
 
 #endif /* SRC_INCLUDE_KNL_KNL_SESSION_ATTR_SQL */
diff --git a/src/include/knl/knl_session.h b/src/include/knl/knl_session.h
index 130161cc3..1e5e1ae45 100644
--- a/src/include/knl/knl_session.h
+++ b/src/include/knl/knl_session.h
@@ -45,6 +45,7 @@
 #include <signal.h>
 
 #include "c.h"
+#include "access/heapam.h"
 #include "datatype/timestamp.h"
 #include "gs_thread.h"
 #include "knl/knl_guc.h"
@@ -59,10 +60,14 @@
 #include "storage/backendid.h"
 #include "storage/s_lock.h"
 #include "storage/shmem.h"
+#include "storage/predicate.h"
+#include "postmaster/bgworker.h"
+#include "storage/dsm.h"
 #include "utils/palloc.h"
 
 typedef void (*pg_on_exit_callback)(int code, Datum arg);
 
+
 /* all session level attribute which expose to user. */
 typedef struct knl_session_attr {
     knl_session_attr_sql attr_sql;
@@ -2036,6 +2041,63 @@ typedef struct knl_u_ext_fdw_context {
     pg_on_exit_callback fdwExitFunc;    /* Exit callback, will be called when session exit */
 } knl_u_ext_fdw_context;
 
+/* Info need to pass from leader to worker */
+typedef struct ParallelInfoContext {
+    Oid database_id;
+    Oid authenticated_user_id;
+    Oid current_user_id;
+    Oid outer_user_id;
+    Oid temp_namespace_id;
+    Oid temp_toast_namespace_id;
+    int sec_context;
+    bool is_superuser;
+    void *parallel_master_pgproc; /* PGPROC */
+    ThreadId parallel_master_pid;
+    BackendId parallel_master_backend_id;
+    TimestampTz xact_ts;
+    TimestampTz stmt_ts;
+    char *pstmt_space;
+    char *param_space;
+    Size param_len;
+    int pscan_num;
+    ParallelHeapScanDesc *pscan;
+    int usedComboCids;
+    int sizeComboCids;
+    HTAB *comboHash;
+    struct ComboCidKeyData *comboCids;
+    char *tsnapspace;
+    Size tsnapspace_len;
+    char *asnapspace;
+    Size asnapspace_len;
+    struct RelMapFile *active_shared_updates;
+    struct RelMapFile *active_local_updates;
+    char *errorQueue;
+    int xactIsoLevel;
+    bool xactDeferrable;
+    TransactionId topTransactionId;
+    TransactionId currentTransactionId;
+    CommandId currentCommandId;
+    int nParallelCurrentXids;
+    TransactionId *ParallelCurrentXids;
+    char *library_name;
+    char *function_name;
+    BufferUsage *bufUsage;
+    char *tupleQueue;
+    struct SharedExecutorInstrumentation *instrumentation;
+    char *namespace_search_path;
+
+    /* Mutex protects remaining fields. */
+    slock_t mutex;
+    /* Maximum XactLastRecEnd of any worker. */
+    XLogRecPtr last_xlog_end;
+} ParallelInfoContext;
+
+typedef struct knl_u_parallel_context {
+    ParallelInfoContext *pwCtx;
+    MemoryContext memCtx;
+    bool used;
+} knl_u_parallel_context;
+
 enum knl_session_status {
     KNL_SESS_FAKE,
     KNL_SESS_UNINIT,
@@ -2131,6 +2193,9 @@ typedef struct knl_session_context {
 
     /* external FDW */
     knl_u_ext_fdw_context ext_fdw_ctx[MAX_TYPE_FDW];
+
+    /* parallel query context */
+    knl_u_parallel_context parallel_ctx[DSM_MAX_ITEM_PER_QUERY];
 } knl_session_context;
 
 extern knl_session_context* create_session_context(MemoryContext parent, uint64 id);
diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h
index a1325f9be..e9f58d0cb 100644
--- a/src/include/knl/knl_thread.h
+++ b/src/include/knl/knl_thread.h
@@ -346,6 +346,33 @@ typedef struct knl_t_xact_context {
     struct SERIALIZABLEXACT* MySerializableXact;
     bool MyXactDidWrite;
 
+    /*
+     * When running as a parallel worker, we place only a single
+     * TransactionStateData on the parallel worker's state stack, and the XID
+     * reflected there will be that of the *innermost* currently-active
+     * subtransaction in the backend that initiated parallelism.  However,
+     * GetTopTransactionId and TransactionIdIsCurrentTransactionId
+     * need to return the same answers in the parallel worker as they would have
+     * in the user backend, so we need some additional bookkeeping.
+     *
+     * XactTopTransactionId stores the XID of our toplevel transaction, which
+     * will be the same as TopTransactionState.transactionId in an ordinary
+     * backend; but in a parallel backend, which does not have the entire
+     * transaction state, it will instead be copied from the backend that started
+     * the parallel operation.
+     *
+     * nParallelCurrentXids will be 0 and ParallelCurrentXids NULL in an ordinary
+     * backend, but in a parallel backend, nParallelCurrentXids will contain the
+     * number of XIDs that need to be considered current, and ParallelCurrentXids
+     * will contain the XIDs themselves.  This includes all XIDs that were current
+     * or sub-committed in the parent at the time the parallel operation began.
+     * The XIDs are stored sorted in numerical order (not logical order) to make
+     * lookups as fast as possible.
+     */
+    TransactionId XactTopTransactionId;
+    int nParallelCurrentXids;
+    TransactionId *ParallelCurrentXids;
+
 #ifdef PGXC
     bool useLocalSnapshot;
     /*
@@ -2724,6 +2751,23 @@ typedef struct knl_t_bgworker_context {
      * The postmaster's list of registered background workers, in private memory.
      */
     slist_head background_worker_list;
+
+    /* Is there a parallel message pending which we need to receive? */
+    volatile bool ParallelMessagePending;
+    /* Are we initializing a parallel worker? */
+    bool InitializingParallelWorker;
+    /*
+     * Our parallel worker number.  We initialize this to -1, meaning that we are
+     * not a parallel worker.  In parallel workers, it will be set to a value >= 0
+     * and < the number of workers before any user code is invoked; each parallel
+     * worker will get a different parallel worker number.
+     */
+    int ParallelWorkerNumber;
+    /* List of active parallel contexts. */
+    dlist_head pcxt_list;
+
+    BufferUsage *save_pgBufferUsage;
+    MemoryContext hpm_context;
 } knl_t_bgworker_context;
 
 struct shm_mq;
diff --git a/src/include/libpq/pqmq.h b/src/include/libpq/pqmq.h
index 2a749790e..a760249c3 100644
--- a/src/include/libpq/pqmq.h
+++ b/src/include/libpq/pqmq.h
@@ -18,7 +18,7 @@
 
 extern void pq_redirect_to_shm_mq(shm_mq_handle* mqh);
 extern void pq_stop_redirect_to_shm_mq(void);
-extern void pq_set_parallel_master(pid_t pid, BackendId backend_id);
+extern void pq_set_parallel_master(ThreadId pid, BackendId backend_id);
 
 extern void pq_parse_errornotice(StringInfo str, ErrorData* edata);
 
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 4b0a3c9be..cbf3ab312 100755
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -207,6 +207,7 @@ extern bool stack_is_too_deep(void);
 
 /* in tcop/utility.c */
 extern void PreventCommandIfReadOnly(const char* cmdname);
+extern void PreventCommandIfParallelMode(const char *cmdname);
 extern void PreventCommandDuringRecovery(const char* cmdname);
 
 extern int trace_recovery(int trace_level);
@@ -410,6 +411,9 @@ extern void EarlyBindingTLSVariables(void);
 extern bool StreamThreadAmI();
 extern void StreamTopConsumerReset();
 extern bool StreamTopConsumerAmI();
+extern bool ParallelWorkerAmI();
+extern bool ParallelLeaderAmI();
+
 
 /*
  * converts the 64 bits unsigned integer between host byte order and network byte order.
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 5a755f491..25485356a 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1192,6 +1192,7 @@ typedef struct PlanState {
                     * top-level plan */
 
     Instrumentation* instrument; /* Optional runtime stats for this node */
+    WorkerInstrumentation *worker_instrument; /* per-worker instrumentation */
 
     /*
      * Common structural data for all Plan types.  These links to subsidiary
@@ -1546,6 +1547,7 @@ typedef struct ScanState {
     bool isSampleScan;               /* identify is it table sample scan or not. */
     SampleScanParams sampleScanInfo; /* TABLESAMPLE params include type/seed/repeatable. */
     ExecScanAccessMtd ScanNextMtd;
+    Size pscan_len; /* size of parallel heap scan descriptor */
 } ScanState;
 
 /*
@@ -2285,6 +2287,24 @@ typedef struct UniqueState {
     MemoryContext tempContext; /* short-term context for comparisons */
 } UniqueState;
 
+/* ----------------
+ * GatherState information
+ *
+ * 		Gather nodes launch 1 or more parallel workers, run a subplan
+ * 		in those workers, and collect the results.
+ * ----------------
+ */
+typedef struct GatherState {
+    PlanState ps; /* its first field is NodeTag */
+    bool initialized;
+    struct ParallelExecutorInfo *pei;
+    int nreaders;
+    int nextreader;
+    struct TupleQueueReader **reader;
+    TupleTableSlot *funnel_slot;
+    bool need_to_scan_locally;
+} GatherState;
+
 /* ----------------
  *	 HashState information
  * ----------------
diff --git a/src/include/nodes/nodeFuncs.h b/src/include/nodes/nodeFuncs.h
index bc9ca374a..6043b6bf0 100755
--- a/src/include/nodes/nodeFuncs.h
+++ b/src/include/nodes/nodeFuncs.h
@@ -56,4 +56,6 @@ extern bool is_func_distinct_unshippable(Oid funcid);
 
 extern bool lockNextvalWalker(Node* node, void* context);
 
+struct PlanState;
+extern bool planstate_tree_walker(struct PlanState *planstate, bool (*walker)(), void *context);
 #endif /* NODEFUNCS_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 1c900fe0d..1aab61316 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -74,6 +74,7 @@ typedef enum NodeTag {
     T_Agg,
     T_WindowAgg,
     T_Unique,
+    T_Gather,
     T_Hash,
     T_SetOp,
     T_LockRows,
@@ -151,6 +152,7 @@ typedef enum NodeTag {
     T_AggState,
     T_WindowAggState,
     T_UniqueState,
+    T_GatherState,
     T_HashState,
     T_SetOpState,
     T_LockRowsState,
@@ -289,6 +291,7 @@ typedef enum NodeTag {
     T_ResultPath,
     T_MaterialPath,
     T_UniquePath,
+    T_GatherPath,
     T_PartIteratorPath,
     T_EquivalenceClass,
     T_EquivalenceMember,
diff --git a/src/include/nodes/params.h b/src/include/nodes/params.h
index 9d17ddafc..a0f035dc9 100755
--- a/src/include/nodes/params.h
+++ b/src/include/nodes/params.h
@@ -79,6 +79,7 @@ typedef struct ParamListInfoData {
     void* parserSetupArg;
     int numParams; /* number of ParamExternDatas following */
     bool params_need_process;
+    struct Bitmapset *paramMask; /* if non-NULL, can ignore omitted params */
     ParamExternData params[FLEXIBLE_ARRAY_MEMBER];
 } ParamListInfoData;
 
@@ -112,5 +113,8 @@ enum { CURSOR_ISOPEN = 1, CURSOR_FOUND, CURSOR_NOTFOUND, CURSOR_ROWCOUNT };
 
 /* Functions found in src/backend/nodes/params.c */
 extern ParamListInfo copyParamList(ParamListInfo from);
+extern Size EstimateParamListSpace(ParamListInfo paramLI);
+extern void SerializeParamList(ParamListInfo paramLI, char *start_address, Size len);
+extern ParamListInfo RestoreParamList(char *start_address, Size len);
 
 #endif /* PARAMS_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 2da4118aa..fc2bf3e19 100755
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2539,6 +2539,7 @@ typedef struct SecLabelStmt {
 #define CURSOR_OPT_FAST_PLAN 0x0020    /* prefer fast-start plan */
 #define CURSOR_OPT_GENERIC_PLAN 0x0040 /* force use of generic plan */
 #define CURSOR_OPT_CUSTOM_PLAN 0x0080  /* force use of custom plan */
+#define CURSOR_OPT_PARALLEL_OK 0x0100  /* parallel mode OK */
 
 typedef struct DeclareCursorStmt {
     NodeTag type;
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index f62ecc5aa..5f4fca027 100755
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -85,6 +85,8 @@ typedef struct PlannedStmt {
 
     bool dependsOnRole; /* is plan specific to current role? */
 
+    bool parallelModeNeeded; /* parallel mode required to execute? */
+
     Plan* planTree; /* tree of Plan nodes */
 
     List* rtable; /* list of RangeTblEntry nodes */
@@ -242,6 +244,11 @@ typedef struct Plan {
     int plan_width; /* average row width in bytes */
     int dop;        /* degree of parallelism of current plan */
 
+    /*
+     * information needed for parallel query
+     */
+    bool parallel_aware; /* engage parallel-aware logic? */
+
     /*
      * machine learning model estimations
      */
@@ -1144,6 +1151,16 @@ typedef struct Unique {
     Oid* uniqOperators;     /* equality operators to compare with */
 } Unique;
 
+/* ------------
+ * 		gather node
+ * ------------
+ */
+typedef struct Gather {
+    Plan plan;
+    int num_workers;
+    bool single_copy;
+} Gather;
+
 /* ----------------
  *		hash build node
  *
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index b9ab9a64f..67fb1b0f0 100755
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -157,6 +157,10 @@ typedef struct PlannerGlobal {
 
     bool dependsOnRole; /* is plan specific to current role? */
 
+    bool parallelModeOK; /* parallel mode potentially OK? */
+
+    bool parallelModeNeeded; /* parallel mode actually required? */
+
     /* Added post-release, will be in a saner place in 9.3: */
     int nParamExec;       /* number of PARAM_EXEC Params used */
     bool insideRecursion; /* For sql on hdfs, internal flag. */
@@ -548,6 +552,8 @@ typedef struct RelOptInfo {
     int encodedwidth;      /* estimated avg width of encoded columns in result tuples */
     AttrNumber encodednum; /* number of encoded column */
 
+    bool consider_parallel; /* consider parallel paths? */
+
     /* materialization information */
     List* reltargetlist;   /* Vars to be output by scan of relation */
     List* distribute_keys; /* distribute key */
@@ -878,6 +884,9 @@ typedef struct Path {
     RelOptInfo* parent;        /* the relation this path can build */
     ParamPathInfo* param_info; /* parameterization info, or NULL if none */
 
+    bool parallel_aware; /* engage parallel-aware logic? */
+    bool parallel_safe; /* OK to use as part of parallel plan? */
+
     /* estimated size/costs for path (see costsize.c for more info) */
     double rows; /* estimated number of global result tuples */
     double multiple;
@@ -1173,6 +1182,18 @@ typedef struct UniquePath {
     OpMemInfo mem_info; /* Memory info for hashagg or sort */
 } UniquePath;
 
+/*
+ * GatherPath runs several copies of a plan in parallel and collects the
+ * results.  The parallel leader may also execute the plan, unless the
+ * single_copy flag is set.
+ */
+typedef struct GatherPath {
+    Path path;
+    Path *subpath;    /* path for each worker */
+    int num_workers;  /* number of workers sought to help */
+    bool single_copy; /* don't execute path more than once */
+} GatherPath;
+
 /*
  * All join-type paths share these fields.
  */
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index ce19295b1..a7fbc402e 100755
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -65,6 +65,7 @@ extern bool contain_subplans(Node* clause);
 extern bool contain_mutable_functions(Node* clause);
 extern bool contain_volatile_functions(Node* clause);
 extern bool contain_specified_function(Node* clause, Oid funcid);
+extern bool has_parallel_hazard(Node *node, bool allow_restricted);
 extern bool contain_nonstrict_functions(Node* clause, bool check_agg = false);
 extern bool contain_leaky_functions(Node* clause);
 extern bool exec_simple_check_mutable_function(Node* clause);
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 080a5b57a..72116787c 100755
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -51,6 +51,8 @@
 #define LOCAL_RECEIVE_KDATA_COST 1.3 /* The receive cost for local stream */
 #define DEFAULT_SMP_THREAD_COST 1000 /* The cost for add a new thread */
 #define DEFAULT_STREAM_MULTIPLE 1.0
+#define DEFAULT_PARALLEL_TUPLE_COST 0.1
+#define DEFAULT_PARALLEL_SETUP_COST 1000.0
 
 #define DEFAULT_EFFECTIVE_CACHE_SIZE 16384 /* measured in pages */
 
@@ -80,7 +82,8 @@ extern void cost_update(Path* path, bool vectorized, Cost input_cost, double tup
 extern double clamp_row_est(double nrows);
 extern double index_pages_fetched(
     double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo* root, bool ispartitionedindex);
-extern void cost_seqscan(Path* path, PlannerInfo* root, RelOptInfo* baserel, ParamPathInfo* param_info);
+extern void cost_seqscan(Path* path, PlannerInfo* root, RelOptInfo* baserel,
+    ParamPathInfo* param_info, int nworkers = 0);
 extern void cost_samplescan(Path* path, PlannerInfo* root, RelOptInfo* baserel, ParamPathInfo* param_info);
 extern void cost_cstorescan(Path* path, PlannerInfo* root, RelOptInfo* baserel);
 extern void cost_dfsscan(Path* path, PlannerInfo* root, RelOptInfo* baserel);
@@ -130,6 +133,7 @@ extern void final_cost_hashjoin(PlannerInfo* root, HashPath* path, JoinCostWorks
 extern void cost_rescan(PlannerInfo* root, Path* path, Cost* rescan_startup_cost, /* output parameters */
     Cost* rescan_total_cost, OpMemInfo* mem_info);
 extern Cost cost_rescan_material(double rows, int width, OpMemInfo* mem_info, bool vectorized, int dop);
+extern void cost_gather(GatherPath *path, RelOptInfo *baserel, ParamPathInfo *param_info);
 extern void cost_subplan(PlannerInfo* root, SubPlan* subplan, Plan* plan);
 extern void cost_qual_eval(QualCost* cost, List* quals, PlannerInfo* root);
 extern void cost_qual_eval_node(QualCost* cost, Node* qual, PlannerInfo* root);
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 9dcdef176..cc8f78415 100755
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -50,7 +50,8 @@ extern void add_path(PlannerInfo* root, RelOptInfo* parent_rel, Path* new_path);
 extern bool add_path_precheck(
     RelOptInfo* parent_rel, Cost startup_cost, Cost total_cost, List* pathkeys, Relids required_outer);
 
-extern Path* create_seqscan_path(PlannerInfo* root, RelOptInfo* rel, Relids required_outer, int dop = 1);
+extern Path* create_seqscan_path(PlannerInfo* root, RelOptInfo* rel, Relids required_outer,
+    int dop = 1, int nworkers = 0);
 extern Path* create_cstorescan_path(PlannerInfo* root, RelOptInfo* rel, int dop = 1);
 extern Path *create_tsstorescan_path(PlannerInfo* root, RelOptInfo* rel, int dop = 1);
 extern IndexPath* create_index_path(PlannerInfo* root, IndexOptInfo* index, List* indexclauses, List* indexclausecols,
@@ -73,6 +74,8 @@ extern MergeAppendPath* create_merge_append_path(
 extern ResultPath* create_result_path(List* quals, Path* subpath = NULL);
 extern MaterialPath* create_material_path(Path* subpath, bool materialize_all = false);
 extern UniquePath* create_unique_path(PlannerInfo* root, RelOptInfo* rel, Path* subpath, SpecialJoinInfo* sjinfo);
+extern GatherPath *create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, Relids required_outer,
+    int nworkers);
 extern Path* create_subqueryscan_path(PlannerInfo* root, RelOptInfo* rel, List* pathkeys, Relids required_outer);
 extern Path* create_functionscan_path(PlannerInfo* root, RelOptInfo* rel);
 extern Path* create_valuesscan_path(PlannerInfo* root, RelOptInfo* rel);
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index 8fc4dd034..4b3e7894a 100755
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -44,6 +44,13 @@ typedef struct {
     bool has_denserank;
 } DenseRank_context;
 
+/* possible values for force_parallel_mode */
+typedef enum {
+    FORCE_PARALLEL_OFF,
+    FORCE_PARALLEL_ON,
+    FORCE_PARALLEL_REGRESS
+} ForceParallelMode;
+
 extern ExecNodes* getExecNodesByGroupName(const char* gname);
 extern PlannedStmt* planner(Query* parse, int cursorOptions, ParamListInfo boundParams);
 extern PlannedStmt* standard_planner(Query* parse, int cursorOptions, ParamListInfo boundParams);
diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h
index 6d529e0ff..0634a9f18 100644
--- a/src/include/postmaster/bgworker.h
+++ b/src/include/postmaster/bgworker.h
@@ -96,6 +96,7 @@ typedef struct BackgroundWorker {
     Datum       bgw_main_arg;
     char        bgw_extra[BGW_EXTRALEN];
     ThreadId    bgw_notify_pid; /* SIGUSR1 this backend on start/stop */
+    void       *bgw_parallel_context;
 } BackgroundWorker;
 
 typedef enum BgwHandleStatus {
diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h
index 5070a7996..a20a4c0bc 100644
--- a/src/include/postmaster/postmaster.h
+++ b/src/include/postmaster/postmaster.h
@@ -140,6 +140,8 @@ extern void ClosePostmasterPorts(bool am_syslogger);
 
 extern int MaxLivePostmasterChildren(void);
 
+extern bool PostmasterMarkPIDForWorkerNotify(ThreadId pid);
+
 extern Size CBMShmemSize(void);
 extern void CBMShmemInit(void);
 
diff --git a/src/include/storage/dsm.h b/src/include/storage/dsm.h
new file mode 100644
index 000000000..f07111d78
--- /dev/null
+++ b/src/include/storage/dsm.h
@@ -0,0 +1,48 @@
+/* -------------------------------------------------------------------------
+ *
+ * dsm.h
+ * manage dynamic shared memory segments
+ *
+ * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/dsm.h
+ *
+ * -------------------------------------------------------------------------
+ */
+#ifndef DSM_H
+#define DSM_H
+
+#define DSM_MAX_ITEM_PER_QUERY 8
+
+/* Startup and shutdown functions. */
+#define dsm_cleanup_using_control_segment(oldControlHandle)
+#define dsm_postmaster_startup(shmemHeader)
+#define dsm_backend_shutdown
+#define dsm_detach_all
+#define dsm_set_control_handle(dsmHandle)
+
+/* Functions that create or remove mappings. */
+extern void *dsm_create(void);
+#define dsm_attach(dsmHandle)
+extern void dsm_detach(void **seg);
+
+/* Resource management functions. */
+#define dsm_pin_mapping(dsmSegment)
+#define dsm_unpin_mapping(dsmSegment)
+#define dsm_pin_segment(dsmSegment)
+#define dsm_unpin_segment(dsmHandle)
+#define dsm_find_mapping(dsmHandle)
+
+/* Informational functions. */
+#define dsm_segment_address(dsmSegment)
+#define dsm_segment_map_length(dsmSegment)
+#define dsm_segment_handle(dsmSegment)
+
+/* Cleanup hooks. */
+#define on_dsm_detach(dsmSegment, callbackFunc, arg)
+#define cancel_on_dsm_detach(dsmSegment, callbackFunc, arg)
+#define reset_on_dsm_detach
+
+#endif /* DSM_H */
\ No newline at end of file
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index bcd291932..858ce5d3e 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -55,6 +55,8 @@ extern Snapshot GetLocalSnapshotData(Snapshot snapshot);
 extern void ReleaseSnapshotData(Snapshot snapshot);
 
 extern bool ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid);
+extern bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc);
+
 extern void set_proc_csn_and_check(const char* func, CommitSeqNo csn_min, SnapshotType snapshot_type);
 extern RunningTransactions GetRunningTransactionData(void);
 
diff --git a/src/include/storage/shm_mq.h b/src/include/storage/shm_mq.h
index 27f98af44..8d00ede30 100644
--- a/src/include/storage/shm_mq.h
+++ b/src/include/storage/shm_mq.h
@@ -53,7 +53,7 @@ extern PGPROC *shm_mq_get_receiver(shm_mq *);
 extern PGPROC *shm_mq_get_sender(shm_mq *);
 
 /* Set up backend-local queue state. */
-extern shm_mq_handle *shm_mq_attach(shm_mq *mq, char *seg,
+extern shm_mq_handle *shm_mq_attach(shm_mq *mq, void *seg,
                                     BackgroundWorkerHandle *handle);
 
 /* Associate worker handle with shm_mq. */
diff --git a/src/include/tcop/dest.h b/src/include/tcop/dest.h
index 464324e08..653ea1d2d 100755
--- a/src/include/tcop/dest.h
+++ b/src/include/tcop/dest.h
@@ -108,7 +108,8 @@ typedef enum {
     DestBatchLocalRedistribute, /* results send to consumer thread in a local redistribute way */
     DestBatchLocalRoundRobin,   /* results send to consumer thread in a local roundrobin way */
 
-    DestBatchHybrid
+    DestBatchHybrid,
+    DestTupleQueue              /* results sent to tuple queue */
 
 } CommandDest;
 
diff --git a/src/include/utils/datum.h b/src/include/utils/datum.h
index 087f904d0..c818045df 100644
--- a/src/include/utils/datum.h
+++ b/src/include/utils/datum.h
@@ -45,4 +45,12 @@ extern void datumFree(Datum value, bool typByVal, int typLen);
  */
 extern bool datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen);
 
+/*
+ * Serialize and restore datums so that we can transfer them to parallel
+ * workers.
+ */
+extern Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen);
+extern void datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, char **start_address, Size *remainLen);
+extern Datum datumRestore(char **start_address, Size *remainLen, bool *isnull);
+
 #endif /* DATUM_H */
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index 2c83af000..99a8d2562 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -79,6 +79,7 @@ extern Oid get_func_variadictype(Oid funcid);
 extern bool get_func_retset(Oid funcid);
 extern bool func_strict(Oid funcid);
 extern char func_volatile(Oid funcid);
+extern char func_parallel(Oid funcid);
 extern bool get_func_proshippable(Oid funcid);
 extern bool get_func_leakproof(Oid funcid);
 extern float4 get_func_cost(Oid funcid);
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index 0fa155283..2b2b2fd90 100755
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -14,6 +14,7 @@
 #define SNAPMGR_H
 
 #include "utils/resowner.h"
+#include "utils/snapshot.h"
 
 extern Snapshot GetTransactionSnapshot(bool force_local_snapshot = false);
 extern Snapshot GetLatestSnapshot(void);
@@ -59,4 +60,10 @@ extern struct HTAB* HistoricSnapshotGetTupleCids(void);
 extern void SetupHistoricSnapshot(Snapshot snapshot_now, struct HTAB* tuplecids);
 extern void TeardownHistoricSnapshot(bool is_error);
 extern bool HistoricSnapshotActive(void);
+
+extern Size EstimateSnapshotSpace(Snapshot snapshot);
+extern void SerializeSnapshot(Snapshot snapshot, char *start_address, Size len);
+extern Snapshot RestoreSnapshot(char *start_address, Size len);
+extern void RestoreTransactionSnapshot(Snapshot snapshot, void *master_pgproc);
+
 #endif /* SNAPMGR_H */
diff --git a/src/test/regress/expected/bypass_simplequery_support.out b/src/test/regress/expected/bypass_simplequery_support.out
index 900262ef4..68771fa29 100644
--- a/src/test/regress/expected/bypass_simplequery_support.out
+++ b/src/test/regress/expected/bypass_simplequery_support.out
@@ -7,6 +7,7 @@ set enable_seqscan=off;
 set opfusion_debug_mode = 'log';
 set log_min_messages=debug;
 set logging_module = 'on(OPFUSION)';
+set max_parallel_workers_per_gather=0;
 -- create table
 drop table if exists test_bypass_sq1;
 NOTICE:  table "test_bypass_sq1" does not exist, skipping
diff --git a/src/test/regress/sql/bypass_simplequery_support.sql b/src/test/regress/sql/bypass_simplequery_support.sql
index 156ba7c64..7922d127b 100644
--- a/src/test/regress/sql/bypass_simplequery_support.sql
+++ b/src/test/regress/sql/bypass_simplequery_support.sql
@@ -7,6 +7,7 @@ set enable_seqscan=off;
 set opfusion_debug_mode = 'log';
 set log_min_messages=debug;
 set logging_module = 'on(OPFUSION)';
+set max_parallel_workers_per_gather=0;
 
 -- create table
 drop table if exists test_bypass_sq1;