openGauss-server/src/gausskernel/storage/ipc/procarray.cpp

/* -------------------------------------------------------------------------
 *
 * procarray.cpp
 *	  openGauss process array code.
 *
 *
 * This module maintains arrays of the PGPROC and PGXACT structures for all
 * active backends.  Although there are several uses for this, the principal
 * one is as a means of determining the set of currently running transactions.
 *
 * Because of various subtle race conditions it is critical that a backend
 * hold the correct locks while setting or clearing its MyPgXact->xid field.
 * See notes in src/backend/access/transam/README.
 *
 * The process arrays now also include structures representing prepared
 * transactions.  The xid and subxids fields of these are valid, as are the
 * myProcLocks lists.  They can be distinguished from regular backend PGPROCs
 * at need by checking for pid == 0.
 *
#ifdef PGXC
 * Vanilla PostgreSQL assumes maximum TransactinIds in any snapshot is
 * arrayP->maxProcs.  It does not apply to XC because XC's snapshot
 * should include XIDs running in other node, which may come at any
 * time.   This means that needed size of xip varies from time to time.
 *
 * This must be handled properly in all the functions in this module.
 *
 * The member max_xcnt was added as SnapshotData member to indicate the
 * real size of xip array.
 *
 * Here, the following assumption is made for SnapshotData struct throughout
 * this module.
 *
 * 1. xip member physical size is indicated by max_xcnt member.
 * 2. If max_xcnt == 0, it means that xip members is NULL, and vise versa.
 * 3. xip (and subxip) are allocated usign malloc() or realloc() directly.
 *
 * For Postgres-XC, there is some special handling for ANALYZE.
 * An XID for a local ANALYZE command will never involve other nodes.
 * Also, ANALYZE may run for a long time, affecting snapshot xmin values
 * on other nodes unnecessarily.  We want to exclude the XID
 * in global snapshots, but include it in local ones. As a result,
 * these are tracked in shared memory separately.
#endif
 *
 * During hot standby, we also keep a list of XIDs representing transactions
 * that are known to be running in the master (or more precisely, were running
 * as of the current point in the WAL stream).	This list is kept in the
 * KnownAssignedXids array, and is updated by watching the sequence of
 * arriving XIDs.  This is necessary because if we leave those XIDs out of
 * snapshots taken for standby queries, then they will appear to be already
 * complete, leading to MVCC failures.	Note that in hot standby, the PGPROC
 * array represents standby processes, which by definition are not running
 * transactions that have XIDs.
 *
 * It is perhaps possible for a backend on the master to terminate without
 * writing an abort record for its transaction.  While that shouldn't really
 * happen, it would tie up KnownAssignedXids indefinitely, so we protect
 * ourselves by pruning the array when a valid list of running XIDs arrives.
 *
 * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 * Portions Copyright (c) 2010-2012 Postgres-XC Development Group
 *
 *
 * IDENTIFICATION
 *	  src/gausskernel/storage/ipc/procarray.cpp
 *
 * -------------------------------------------------------------------------
 */
#include "postgres.h"
#include "knl/knl_variable.h"

#include <signal.h>

#include "access/clog.h"
#include "access/csnlog.h"
#include "access/extreme_rto/page_redo.h"
#include "access/subtrans.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "catalog/catalog.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "catalog/pg_type.h"
#include "commands/vacuum.h"
#include "funcapi.h"
#include "gtm/gtm_txn.h"
#include "miscadmin.h"
#include "postmaster/snapcapturer.h"
#include "postmaster/cfs_shrinker.h"
#include "storage/lmgr.h"
#include "storage/spin.h"
#include "threadpool/threadpool.h"
#include "utils/builtins.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
#include "utils/timestamp.h"
#include "utils/memutils.h"
#include "utils/atomic.h"
#include "utils/distribute_test.h"
#include "access/heapam.h"
#ifdef PGXC
    #include "pgxc/pgxc.h"
    #include "access/gtm.h"
    #include "storage/ipc.h"
    #include "pgxc/nodemgr.h"
    #include"replication/walreceiver.h"
    /* PGXC_DATANODE */
    #include "postmaster/autovacuum.h"
    #include "postmaster/postmaster.h"
    #include "postmaster/twophasecleaner.h"
#endif
#include "gssignal/gs_signal.h"
#include "catalog/pg_control.h"
#include "pgstat.h"
#include "storage/lock/lwlock.h"
#include "threadpool/threadpool_sessctl.h"
#include "access/parallel_recovery/dispatcher.h"
#include "access/multi_redo_api.h"
#include "gstrace/gstrace_infra.h"
#include "gstrace/storage_gstrace.h"
#include "ddes/dms/ss_common_attr.h"
#include "ddes/dms/ss_transaction.h"
#include "ddes/dms/ss_reform_common.h"
#include "replication/ss_cluster_replication.h"

#ifdef ENABLE_UT
    #define static
#endif /* USE_UT */


#ifdef XIDCACHE_DEBUG

    /* counters for XidCache measurement */
    static long xc_by_recent_xmin = 0;
    static long xc_by_known_xact = 0;
    static long xc_by_my_xact = 0;
    static long xc_by_latest_xid = 0;
    static long xc_by_main_xid = 0;
    static long xc_by_child_xid = 0;
    static long xc_by_known_assigned = 0;
    static long xc_no_overflow = 0;
    static long xc_slow_answer = 0;

    #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++)
    #define xc_by_known_xact_inc() (xc_by_known_xact++)
    #define xc_by_my_xact_inc() (xc_by_my_xact++)
    #define xc_by_latest_xid_inc() (xc_by_latest_xid++)
    #define xc_by_main_xid_inc() (xc_by_main_xid++)
    #define xc_by_child_xid_inc() (xc_by_child_xid++)

    static void DisplayXidCache(void);
#else /* !XIDCACHE_DEBUG */

    #define xc_by_recent_xmin_inc() ((void)0)
    #define xc_by_known_xact_inc() ((void)0)
    #define xc_by_my_xact_inc() ((void)0)
    #define xc_by_latest_xid_inc() ((void)0)
    #define xc_by_main_xid_inc() ((void)0)
    #define xc_by_child_xid_inc() ((void)0)
#endif /* XIDCACHE_DEBUG */

#ifdef PGXC /* PGXC_DATANODE */

#define ADD_XMIN_TO_ARRAY(xmin) \
    if (xminArray != NULL) \
        (xminArray)[count] = xmin

void SetGlobalSnapshotData(
    TransactionId xmin, TransactionId xmax, uint64 csn, GTM_Timeline timeline, bool ss_need_sync_wait_all);
void UnsetGlobalSnapshotData(void);
static bool GetPGXCSnapshotData(Snapshot snapshot);
#ifdef ENABLE_MULTIPLE_NODES
    static bool GetSnapshotDataDataNode(Snapshot snapshot);
    static bool GetSnapshotDataCoordinator(Snapshot snapshot);
#endif
static void cleanSnapshot(Snapshot snapshot);
static void ResetProcXidCache(PGPROC* proc, bool needlock);

#endif

/* for local multi version snapshot */
void CalculateLocalLatestSnapshot(bool forceCalc);
static TransactionId GetMultiSnapshotOldestXmin();
static inline void ProcArrayEndTransactionInternal(PGPROC* proc, PGXACT* pgxact, TransactionId latestXid,
                                                   TransactionId* xid, uint32* nsubxids);

void XidCacheRemoveRunningXids(PGPROC* proc, PGXACT* pgxact);
void ProcArrayGroupClearXid(bool isSubTransaction, PGPROC* proc, TransactionId latestXid,
                            TransactionId subTranactionXid, int nSubTransactionXids,
                            TransactionId* subTransactionXids, TransactionId subTransactionLatestXid);

extern bool StreamTopConsumerAmI();

#define PROCARRAY_MAXPROCS      (g_instance.shmem_cxt.MaxBackends + \
    g_instance.attr.attr_storage.max_prepared_xacts * NUM_TWOPHASE_PARTITIONS)

/*
 * Report shared-memory space needed by CreateProcXactHashTable
 */
Size ProcXactHashTableShmemSize(void)
{
    return hash_estimate_size(PROCARRAY_MAXPROCS, sizeof(ProcXactLookupEntry));
}

void CreateProcXactHashTable(void)
{
    HASHCTL         info;

    info.keysize = sizeof(TransactionId);
    info.entrysize = sizeof(ProcXactLookupEntry);
    info.hash = tag_hash;
    info.num_partitions = NUM_PROCXACT_PARTITIONS; /* We only have 812 threads in current configuration */

    g_instance.ProcXactTable = ShmemInitHash("Proc Xact Lookup Table", PROCARRAY_MAXPROCS, PROCARRAY_MAXPROCS,
        &info, HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);
}

/* @Return partition lock id. */
static LWLock *LockProcXactHashTablePartition(TransactionId xid, LWLockMode mode)
{
        uint32 hashValue = get_hash_value(g_instance.ProcXactTable, &xid);
        uint32 partition = hashValue & (NUM_PROCXACT_PARTITIONS - 1);
        uint32 lockid = (uint32)(FirstProcXactMappingLock + partition);
        LWLock* lock = &t_thrd.shemem_ptr_cxt.mainLWLockArray[lockid].lock;

        LWLockAcquire(lock, mode);

        return lock;
}

int
ProcXactHashTableLookup(TransactionId xid)
{
        /* Caller should make sure ProcArrayLock is held by it in share mode */
        ProcXactLookupEntry *result = NULL;
        bool found = false;

        LWLock* lock = LockProcXactHashTablePartition(xid, LW_SHARED);

        result = (ProcXactLookupEntry *) hash_search(g_instance.ProcXactTable, &xid, HASH_FIND, &found);

        LWLockRelease(lock);

        return found ? result->proc_id : InvalidProcessId;
}

void
ProcXactHashTableAdd(TransactionId xid, int procId)
{
        /* Caller should make sure ProcArrayLock is held by it in exclusive mode */
        bool found = true;

        LWLock* lock = LockProcXactHashTablePartition(xid, LW_EXCLUSIVE);

        ProcXactLookupEntry *result =
            (ProcXactLookupEntry *)hash_search(g_instance.ProcXactTable, &xid, HASH_ENTER, &found);

        LWLockRelease(lock);

        result->proc_id = procId;
}

void
ProcXactHashTableRemove(TransactionId xid)
{
        bool found = false;

        LWLock *lock = LockProcXactHashTablePartition(xid, LW_EXCLUSIVE);

        hash_search(g_instance.ProcXactTable, &xid, HASH_REMOVE, &found);

        LWLockRelease(lock);

        if (!found)
            ereport(WARNING, (errcode(ERRCODE_DUPLICATE_OBJECT),
                errmsg("transaction identifier %lu not exists in ProcXact hash table", xid)));
}

/*
 * Report shared-memory space needed by CreateSharedProcArray.
 */
Size ProcArrayShmemSize(void)
{
    Size size;

    /* Size of the ProcArray structure itself */
#define PROCARRAY_MAXPROCS (g_instance.shmem_cxt.MaxBackends + \
    g_instance.attr.attr_storage.max_prepared_xacts * NUM_TWOPHASE_PARTITIONS)

    size = offsetof(ProcArrayStruct, pgprocnos);
    size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));

    /*
     * During Hot Standby processing we have a data structure called
     * KnownAssignedXids, created in shared memory. Local data structures are
     * also created in various backends during GetSnapshotData(),
     * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
     * main structures created in those functions must be identically sized,
     * since we may at times copy the whole of the data structures around. We
     * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
     *
     * Ideally we'd only create this structure if we were actually doing hot
     * standby in the current run, but we don't know that yet at the time
     * shared memory is being set up.
     */
#define TOTAL_MAX_CACHED_SUBXIDS ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)

    if (g_instance.attr.attr_storage.EnableHotStandby) {
        size = add_size(size, mul_size(sizeof(TransactionId), TOTAL_MAX_CACHED_SUBXIDS));
        size = add_size(size, mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
    }

    return size;
}

/*
 * Initialize the shared PGPROC array during postmaster startup.
 */
void CreateSharedProcArray(void)
{
    /* Create or attach to the ProcArray shared structure */
    MemoryContext oldcontext = MemoryContextSwitchTo(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_CBB));
    size_t array_size = offsetof(ProcArrayStruct, pgprocnos) + PROCARRAY_MAXPROCS * sizeof(int) + PG_CACHE_LINE_SIZE;
    if (g_instance.proc_array_idx == NULL) {
        g_instance.proc_array_idx = (ProcArrayStruct*)CACHELINEALIGN(palloc(array_size));
    }
    {
        /* We're the first - initialize. */
        g_instance.proc_array_idx->numProcs = 0;
        g_instance.proc_array_idx->maxProcs = PROCARRAY_MAXPROCS;
        g_instance.proc_array_idx->replication_slot_xmin = InvalidTransactionId;
        g_instance.proc_array_idx->replication_slot_catalog_xmin = InvalidTransactionId;
    }

    g_instance.proc_base_all_procs = g_instance.proc_base->allProcs;
    g_instance.proc_base_all_xacts = g_instance.proc_base->allPgXact;

    MemoryContextSwitchTo(oldcontext);
}

/*
 * Add the specified PGPROC to the shared array.
 */
void ProcArrayAdd(PGPROC* proc)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index = 0;
    errno_t rc;
    LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    PGXACT* pgxact = &g_instance.proc_base_all_xacts[proc->pgprocno];

    if (arrayP->numProcs >= arrayP->maxProcs) {
        /*
         * Ooops, no room.	(This really shouldn't happen, since there is a
         * fixed supply of PGPROC structs too, and so we should have failed
         * earlier.)
         */
        LWLockRelease(ProcArrayLock);
        ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already")));
    }

    /*
     * Keep the procs array sorted by (PGPROC *) so that we can utilize
     * locality of references much better. This is useful while traversing the
     * ProcArray because there is a increased likelihood of finding the next
     * PGPROC structure in the cache.
     *
     * Since the occurrence of adding/removing a proc is much lower than the
     * access to the ProcArray itself, the overhead should be marginal
     */
    for (index = 0; index < arrayP->numProcs; index++) {
        /*
         * If we are the first PGPROC or if we have found our right position
         * in the array, break
         */
        if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
            break;
    }

    rc = memmove_s(&arrayP->pgprocnos[index + 1],
                   PROCARRAY_MAXPROCS * sizeof(int),
                   &arrayP->pgprocnos[index],
                   (arrayP->numProcs - index) * sizeof(int));
    securec_check(rc, "\0", "\0");
    arrayP->pgprocnos[index] = proc->pgprocno;
    arrayP->numProcs++;

    if (TransactionIdIsValid(pgxact->xid)) {
        ProcXactHashTableAdd(pgxact->xid, proc->pgprocno);
    }

    LWLockRelease(ProcArrayLock);
}

/*
 * Remove the specified PGPROC from the shared array.
 *
 * When latestXid is a valid XID, we are removing a live 2PC gxact from the
 * array, and thus causing it to appear as "not running" anymore.  In this
 * case we must advance latestCompletedXid.  (This is essentially the same
 * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
 * the ProcArrayLock only once, and don't damage the content of the PGPROC;
 * twophase.c depends on the latter.)
 */
void ProcArrayRemove(PGPROC* proc, TransactionId latestXid)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    PGXACT* pgxact = &g_instance.proc_base_all_xacts[proc->pgprocno];
    int index = 0;

    LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

    if (TransactionIdIsValid(latestXid)) {
        Assert(TransactionIdIsValid(pgxact->xid));

        /* Advance global latestCompletedXid while holding the lock */
        if (TransactionIdPrecedes(t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid, latestXid))
            t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid = latestXid;
    } else {
        if (IS_PGXC_DATANODE || !IsConnFromCoord()) {
            /* Shouldn't be trying to remove a live transaction here */
            Assert(!TransactionIdIsValid(pgxact->xid));
        }
    }

    /* Clear xid from ProcXactHashTable. We can ignore BootstrapTransactionId */
    if (TransactionIdIsNormal(pgxact->xid)) {
        ProcXactHashTableRemove(pgxact->xid);
    }

    for (index = 0; index < arrayP->numProcs; index++) {
        if (arrayP->pgprocnos[index] == proc->pgprocno) {
            /* Keep the PGPROC array sorted. See notes above */
            errno_t rc;
            rc = memmove_s(&arrayP->pgprocnos[index],
                           arrayP->numProcs * sizeof(int),
                           &arrayP->pgprocnos[index + 1],
                           (arrayP->numProcs - index - 1) * sizeof(int));
            securec_check(rc, "\0", "\0");
            arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
            arrayP->numProcs--;

            /* Calc new sanpshot. */
            if (TransactionIdIsValid(latestXid))
                CalculateLocalLatestSnapshot(false);
            LWLockRelease(ProcArrayLock);

            /* Free xid cache memory if needed, must after procarray remove */
            ResetProcXidCache(proc, true);
            proc->commitCSN = 0;
            pgxact->needToSyncXid = 0;
            return;
        }
    }

    /* Ooops */
    LWLockRelease(ProcArrayLock);

    ereport(LOG, (errmsg("failed to find proc in ProcArray")));
}

static void inline ProcArrayClearAutovacuum(PGXACT* pgxact)
{
    if (!IsAutoVacuumWorkerProcess() && IS_PGXC_DATANODE && !IS_SINGLE_NODE) {
        pgxact->vacuumFlags &= ~PROC_IS_AUTOVACUUM;
    }
}

/*
 * ProcArrayEndTransaction -- mark a transaction as no longer running
 *
 * This is used interchangeably for commit and abort cases.  The transaction
 * commit/abort must already be reported to WAL and pg_clog.
 *
 * proc is currently always t_thrd.proc, but we pass it explicitly for flexibility.
 * latestXid is the latest Xid among the transaction's main XID and
 * subtransactions, or InvalidTransactionId if it has no XID.  (We must ask
 * the caller to pass latestXid, instead of computing it from the PGPROC's
 * contents, because the subxid information in the PGPROC might be
 * incomplete.)
 */
void ProcArrayEndTransaction(PGPROC* proc, TransactionId latestXid, bool isCommit)
{
    PGXACT* pgxact = &g_instance.proc_base_all_xacts[proc->pgprocno];

#ifndef ENABLE_DISTRIBUTE_TEST
    if (ENABLE_WORKLOAD_CONTROL && WLMIsInfoInit()) {
        if (isCommit) {
            UpdateWlmCatalogInfoHash();
        } else {
            ResetWlmCatalogFlag();
        }
    }
#endif

    if (TransactionIdIsValid(latestXid)) {
        /*
         * We must lock ProcArrayLock while clearing our advertised XID, so
         * that we do not exit the set of "running" transactions while someone
         * else is taking a snapshot.  See discussion in
         * src/backend/access/transam/README.
         */
#ifdef PGXC
        /*
         * Remove this assertion. We have seen this failing because a ROLLBACK
         * statement may get canceled by a Coordinator, leading to recursive
         * abort of a transaction. This must be a openGauss issue, highlighted
         * by XC. See thread on hackers with subject "Canceling ROLLBACK
         * statement"
         */
#else
        Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
#endif
        /*
         * If we can immediately acquire ProcArrayLock, we clear our own XID
         * and release the lock.  If not, use group XID clearing to improve
         * efficiency.
         */
        if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE)) {
            TransactionId xid;
            uint32 nsubxids;

            ProcArrayEndTransactionInternal(proc, pgxact, latestXid, &xid, &nsubxids);
            CalculateLocalLatestSnapshot(false);
            LWLockRelease(ProcArrayLock);
        } else
            ProcArrayGroupClearXid(false, proc, latestXid, InvalidTransactionId, 0, NULL, InvalidTransactionId);
    } else {
        /*
         * If we have no XID, we don't need to lock, since we won't affect
         * anyone else's calculation of a snapshot.  We might change their
         * estimate of global xmin, but that's OK.
         */
        Assert(!TransactionIdIsValid(pgxact->xid));

        pgxact->handle = InvalidTransactionHandle;
        proc->lxid = InvalidLocalTransactionId;
        pgxact->next_xid = InvalidTransactionId;
        pgxact->xmin = InvalidTransactionId;
        proc->snapXmax = InvalidTransactionId;
        proc->snapCSN = InvalidCommitSeqNo;
        proc->exrto_read_lsn = 0;
        proc->exrto_min = 0;
        proc->exrto_gen_snap_time = 0;
        pgxact->csn_min = InvalidCommitSeqNo;
        pgxact->csn_dr = InvalidCommitSeqNo;
        /* must be cleared with xid/xmin: */
        pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
        ProcArrayClearAutovacuum(pgxact);
        pgxact->delayChkpt = false; /* be sure this is cleared in abort */
        proc->recoveryConflictPending = false;
        proc->commitCSN = 0;
        pgxact->needToSyncXid = 0;

        Assert(pgxact->nxids == 0);
    }

    /*
     * Reset isInResetUserName to false. isInResetUserName is set true in case 'O' so as to mask the log
     * in GetPGXCSnapshotData and GetSnapshotData.
     */
    t_thrd.postgres_cxt.isInResetUserName = false;
}

/*
 * Mark a write transaction as no longer running.
 *
 * We don't do any locking here; caller must handle that.
 */
static inline void ProcArrayEndTransactionInternal(PGPROC* proc, PGXACT* pgxact, TransactionId latestXid,
                                                   TransactionId* xid, uint32* nsubxids)
{
    /* Store xid and nsubxids to update csnlog */
    *xid = pgxact->xid;
    *nsubxids = pgxact->nxids;

    /* Clear xid from ProcXactHashTable. We can ignore BootstrapTransactionId */
    if (TransactionIdIsNormal(*xid)) {
        ProcXactHashTableRemove(*xid);
    }

    pgxact->handle = InvalidTransactionHandle;
    pgxact->xid = InvalidTransactionId;
    pgxact->next_xid = InvalidTransactionId;
    proc->lxid = InvalidLocalTransactionId;
    pgxact->xmin = InvalidTransactionId;
    proc->snapXmax = InvalidTransactionId;
    proc->snapCSN = InvalidCommitSeqNo;
    proc->exrto_read_lsn = 0;
    proc->exrto_min = 0;
    proc->exrto_gen_snap_time = 0;
    pgxact->csn_min = InvalidCommitSeqNo;
    pgxact->csn_dr = InvalidCommitSeqNo;
    /* must be cleared with xid/xmin: */
    pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
    ProcArrayClearAutovacuum(pgxact);
    pgxact->delayChkpt = false; /* be sure this is cleared in abort */
    proc->recoveryConflictPending = false;

    /* Clear the subtransaction-XID cache too while holding the lock */
    pgxact->nxids = 0;

    /* Also advance global latestCompletedXid while holding the lock */
    if (TransactionIdPrecedes(t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid, latestXid))
        t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid = latestXid;

    /* Clear commit csn after csn update */
    proc->commitCSN = 0;
    pgxact->needToSyncXid = 0;

    ResetProcXidCache(proc, true);
}

static inline void ProcInsertIntoGroup(PGPROC* proc, uint32* nextidx) {
    while (true) {
        *nextidx = pg_atomic_read_u32(&g_instance.proc_base->procArrayGroupFirst);
        pg_atomic_write_u32(&proc->procArrayGroupNext, *nextidx);

        if (pg_atomic_compare_exchange_u32(
                &g_instance.proc_base->procArrayGroupFirst, nextidx, (uint32)proc->pgprocno))
            break;
    }

}

static inline void ClearProcArrayGroupCache(PGPROC* proc) {
        proc->procArrayGroupMember = false;
        proc->procArrayGroupMemberXid = InvalidTransactionId;
        proc->procArrayGroupSubXactNXids = 0;
        proc->procArrayGroupSubXactXids = NULL;
        proc->procArrayGroupSubXactLatestXid = InvalidTransactionId;
}

static inline void SetProcArrayGroupCache(PGPROC* proc, TransactionId xid, int nxids,
                                          TransactionId* xids, TransactionId latestXid)
{
    proc->procArrayGroupMemberXid = xid;
    proc->procArrayGroupSubXactNXids = nxids;
    proc->procArrayGroupSubXactXids = xids;
    proc->procArrayGroupSubXactLatestXid = latestXid;
}

/*
 * ProcArrayGroupClearXid -- group XID clearing
 *
 * When we cannot immediately acquire ProcArrayLock in exclusive mode at
 * commit time, add ourselves to a list of processes that need their XIDs
 * cleared.  The first process to add itself to the list will acquire
 * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
 * for transaction group members and XidCacheRemoveRunningXids
 * for subtransaction group members. This avoids a great deal of contention
 * around ProcArrayLock when many processes are trying to commit at once,
 * since the lock need not be repeatedly handed off from one committing
 * process to the next.
 */
void ProcArrayGroupClearXid(bool isSubTransaction, PGPROC* proc,
                            TransactionId latestXid, TransactionId subTranactionXid,
                            int nSubTransactionXids, TransactionId* subTransactionXids,
                            TransactionId subTransactionLatestXid)
{
    uint32 nextidx;
    uint32 wakeidx;
    TransactionId xid[PROCARRAY_MAXPROCS];
    uint32 nsubxids[PROCARRAY_MAXPROCS];
    uint32 index = 0;
    bool groupMemberHasTransaction = false;

    /* We should definitely have an XID to clear. */
    /* Add ourselves to the list of processes needing a group XID clear. */
    proc->procArrayGroupMember = true;
    if (isSubTransaction) {
        SetProcArrayGroupCache(proc, subTranactionXid, nSubTransactionXids, subTransactionXids, subTransactionLatestXid);
    } else {
        SetProcArrayGroupCache(proc, latestXid, 0, NULL, InvalidTransactionId);
    }

    /* add current proc into ProcArrayGroup */
    ProcInsertIntoGroup(proc, &nextidx);

    /*
     * If the list was not empty, the leader will clear our XID.  It is
     * impossible to have followers without a leader because the first process
     * that has added itself to the list will always have nextidx as
     * INVALID_PGPROCNO.
     */
    if (nextidx != INVALID_PGPROCNO) {
        int extraWaits = 0;

        /* Sleep until the leader clears our XID. */
        for (;;) {
            /* acts as a read barrier */
            PGSemaphoreLock(&proc->sem, false);
            if (!proc->procArrayGroupMember)
                break;
            extraWaits++;
        }

        Assert(pg_atomic_read_u32(&proc->procArrayGroupNext) == INVALID_PGPROCNO);

        /* Fix semaphore count for any absorbed wakeups */
        while (extraWaits-- > 0)
            PGSemaphoreUnlock(&proc->sem);
        return;
    }

    /* We are the leader.  Acquire the lock on behalf of everyone. */
    LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

    /*
     * Now that we've got the lock, clear the list of processes waiting for
     * group XID clearing, saving a pointer to the head of the list.  Trying
     * to pop elements one at a time could lead to an ABA problem.
     */
    while (true) {
        nextidx = pg_atomic_read_u32(&g_instance.proc_base->procArrayGroupFirst);
        if (pg_atomic_compare_exchange_u32(&g_instance.proc_base->procArrayGroupFirst, &nextidx, INVALID_PGPROCNO))
            break;
    }

    /* Remember head of list so we can perform wakeups after dropping lock. */
    wakeidx = nextidx;

    /* Walk the list and clear all XIDs. */
    while (nextidx != INVALID_PGPROCNO) {
        PGPROC* proc_member = g_instance.proc_base_all_procs[nextidx];
        PGXACT* pgxact = &g_instance.proc_base_all_xacts[nextidx];
        ereport(DEBUG2, (errmsg("handle group member from procArrayGroup, procno = %u, "
            "procArrayGroupMemberXid = " XID_FMT ", "
            "procArrayGroupSubXactNXids = %d, "
            "procArrayGroupSubXactLatestXid = " XID_FMT ", "
            "procArrayGroupNext = %u",
            proc_member->pgprocno,
            proc_member->procArrayGroupMemberXid,
            proc_member->procArrayGroupSubXactNXids,
            proc_member->procArrayGroupSubXactLatestXid,
            proc_member->procArrayGroupNext)));

        /*
         * If the proc_member is a transaction, perform ProcArrayEndTransactionInternal
         * to clear its XID. If the proc_member is a subtransaction,
         * perform XidCacheRemoveRunningXids to clear its XIDs and
         * its committed subtransaction's XIDS.
         *
         * proc_member->procArrayGroupSubXactLatestXid !=0 when the group member
         * is a subtransaction.
         */
        if (proc_member->procArrayGroupSubXactLatestXid != InvalidTransactionId) {
            XidCacheRemoveRunningXids(proc_member, pgxact);
        } else {
            groupMemberHasTransaction = true;
            ProcArrayEndTransactionInternal(
                proc_member, pgxact, proc_member->procArrayGroupMemberXid, &xid[index], &nsubxids[index]);
        }

        /* Move to next proc in list. */
        nextidx = pg_atomic_read_u32(&proc_member->procArrayGroupNext);
        index++;
    }

    /* Already hold lock, caculate snapshot after last invocation,
     * if there is at least one transaction in group.
     */
    if (groupMemberHasTransaction) {
        CalculateLocalLatestSnapshot(false);
    }

    /* We're done with the lock now. */
    LWLockRelease(ProcArrayLock);

    /*
     * Now that we've released the lock, go back and wake everybody up.  We
     * don't do this under the lock so as to keep lock hold times to a
     * minimum.  The system calls we need to perform to wake other processes
     * up are probably much slower than the simple memory writes we did while
     * holding the lock.
     */
    index = 0;
    while (wakeidx != INVALID_PGPROCNO) {
        PGPROC* proc_member = g_instance.proc_base_all_procs[wakeidx];

        wakeidx = pg_atomic_read_u32(&proc_member->procArrayGroupNext);
        pg_atomic_write_u32(&proc_member->procArrayGroupNext, INVALID_PGPROCNO);

        /* ensure all previous writes are visible before follower continues. */
        pg_write_barrier();

        ClearProcArrayGroupCache(proc_member);

        if (proc_member != t_thrd.proc)
            PGSemaphoreUnlock(&proc_member->sem);

        index++;
    }
}

/*
 * ProcArrayClearTransaction -- clear the transaction fields
 *
 * This is used after successfully preparing a 2-phase transaction.  We are
 * not actually reporting the transaction's XID as no longer running --- it
 * will still appear as running because the 2PC's gxact is in the ProcArray
 * too.  We just have to clear out our own PGXACT.
 */
void ProcArrayClearTransaction(PGPROC* proc)
{
    PGXACT* pgxact = &g_instance.proc_base_all_xacts[proc->pgprocno];

    /*
     * We can skip locking ProcArrayLock here, because this action does not
     * actually change anyone's view of the set of running XIDs: our entry is
     * duplicate with the gxact that has already been inserted into the
     * ProcArray.
     */
    pgxact->handle = InvalidTransactionHandle;
    pgxact->xid = InvalidTransactionId;
    pgxact->next_xid = InvalidTransactionId;
    proc->lxid = InvalidLocalTransactionId;
    pgxact->xmin = InvalidTransactionId;
    proc->snapXmax = InvalidTransactionId;
    proc->snapCSN = InvalidCommitSeqNo;
    proc->exrto_read_lsn = 0;
    proc->exrto_gen_snap_time = 0;
    pgxact->csn_min = InvalidCommitSeqNo;
    pgxact->csn_dr = InvalidCommitSeqNo;
    proc->recoveryConflictPending = false;

    /* redundant, but just in case */
    pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
    ProcArrayClearAutovacuum(pgxact);
    pgxact->delayChkpt = false;
    pgxact->needToSyncXid = 0;

    /* Clear the subtransaction-XID cache too */
    pgxact->nxids = 0;

    proc->exrto_min = 0;
    /* Free xid cache memory if needed */
    ResetProcXidCache(proc, true);
}

void UpdateCSNLogAtTransactionEND(
    TransactionId xid, int nsubxids, TransactionId* subXids, CommitSeqNo csn, bool isCommit)
{
    if (TransactionIdIsNormal(xid) && isCommit) {
        Assert(csn >= COMMITSEQNO_FROZEN);

        /* Update CSN log, stamp this XID (and sub-XIDs) with the CSN */
#ifdef ENABLE_MULTIPLE_NODES
        CSNLogSetCommitSeqNo(xid, nsubxids, subXids, csn);
#else
        CSNLogSetCommitSeqNo(xid, nsubxids, subXids, csn & ~COMMITSEQNO_COMMIT_INPROGRESS);
#endif
    }
}

/*
 * This is called in revorage stage, extend the CSN log page while doing
 * xact_redo if need, after the CSN log is initialized to latestObservedXid.
 */
void CSNLogRecordAssignedTransactionId(TransactionId newXid)
{
    if (TransactionIdFollows(newXid, t_thrd.storage_cxt.latestObservedXid)) {
        TransactionId next_expected_xid = t_thrd.storage_cxt.latestObservedXid;
        while (TransactionIdPrecedes(next_expected_xid, newXid)) {
            TransactionIdAdvance(next_expected_xid);
            ExtendCSNLOG(next_expected_xid);
        }
        Assert(next_expected_xid == newXid);

        /*
         * Now we can advance latestObservedXid
         */
        t_thrd.storage_cxt.latestObservedXid = newXid;

        if (t_thrd.xlog_cxt.standbyState <= STANDBY_INITIALIZED) {
            return;
        }

        LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
        if (TransactionIdFollowsOrEquals(next_expected_xid, t_thrd.xact_cxt.ShmemVariableCache->nextXid)) {
            t_thrd.xact_cxt.ShmemVariableCache->nextXid = next_expected_xid;
            TransactionIdAdvance(t_thrd.xact_cxt.ShmemVariableCache->nextXid);
        }
        LWLockRelease(XidGenLock);
    }
}

/*
 * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
 *
 * Remember up to where the startup process initialized the CLOG and subtrans
 * so we can ensure it's initialized gaplessly up to the point where necessary
 * while in recovery.
 */
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
{
    Assert(t_thrd.xlog_cxt.standbyState == STANDBY_INITIALIZED);
    Assert(TransactionIdIsNormal(initializedUptoXID));

    /*
     * we set latestObservedXid to the xid SUBTRANS has been initialized upto,
     * so we can extend it from that point onwards in RecordKnownAssignedTransactionIds,
     * and when we get consistent in ProcArrayApplyRecoveryInfo().
     */
    t_thrd.storage_cxt.latestObservedXid = initializedUptoXID;
    TransactionIdRetreat(t_thrd.storage_cxt.latestObservedXid);
}

/*
 * GetRunningTransactionData -- returns information about running transactions.
 *
 * Similar to GetSnapshotData but returns more information. We include
 * all PGXACTs with an assigned TransactionId, even VACUUM processes.
 *
 * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
 * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
 * array until the caller has WAL-logged this snapshot, and releases the
 * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
 * lock is released.

 *
 * The returned data structure is statically allocated; caller should not
 * modify it, and must not assume it is valid past the next call.
 *
 * This is never executed during recovery so there is no need to look at
 * KnownAssignedXids.
 *
 * We don't worry about updating other counters, we want to keep this as
 * simple as possible and leave GetSnapshotData() as the primary code for
 * that bookkeeping.
 *
 * Note that if any transaction has overflowed its cached subtransactions
 * then there is no real need include any subtransactions. That isn't a
 * common enough case to worry about optimising the size of the WAL record,
 * and we may wish to see that data for diagnostic purposes anyway.
 */
RunningTransactions GetRunningTransactionData(void)
{
    /* result workspace */
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    RunningTransactions CurrentRunningXacts = t_thrd.storage_cxt.CurrentRunningXacts;
    TransactionId latestCompletedXid;
    TransactionId oldestRunningXid;
    TransactionId* xids = NULL;
    int index;
    int count = 0;
    int subcount = 0;
    bool suboverflowed = false;
    int rc = 0;
    Assert(!RecoveryInProgress());

    /*
     * Allocating space for maxProcs xids is usually overkill; numProcs would
     * be sufficient.  But it seems better to do the malloc while not holding
     * the lock, so we can't look at numProcs.  Likewise, we allocate much
     * more subxip storage than is probably needed.
     *
     * Should only be allocated in bgwriter, since only ever executed during
     * checkpoints.
     */
    if (CurrentRunningXacts->xids == NULL) {
        /*
         * First call
         */
        CurrentRunningXacts->xids = (TransactionId*)MemoryContextAlloc(
            THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE),
            (unsigned int)TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId));

        if (CurrentRunningXacts->xids == NULL)
            ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
    }

    xids = CurrentRunningXacts->xids;

    /*
     * Ensure that no xids enter or leave the procarray while we obtain
     * snapshot.
     */
    LWLockAcquire(XidGenLock, LW_SHARED);
    LWLockAcquire(ProcArrayLock, LW_SHARED);

    latestCompletedXid = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid;

    oldestRunningXid = t_thrd.xact_cxt.ShmemVariableCache->nextXid;

    /* xmax is always latestCompletedXid + 1 */
    TransactionId xmax = latestCompletedXid;
    TransactionIdAdvance(xmax);
    TransactionId globalXmin = xmax;

    /*
     * Spin over procArray collecting all xids and subxids.
     */
    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        TransactionId xid;
        int nxids;

        /* Update globalxmin to be the smallest valid xmin */
        xid = pgxact->xmin; /* fetch just once */

        if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, globalXmin)) {
            globalXmin = xid;
        }

        /* Fetch xid just once - see GetNewTransactionId */
        xid = pgxact->xid;

        /*
         * We don't need to store transactions that don't have a TransactionId
         * yet because they will not show as running on a standby server.
         */
        if (!TransactionIdIsValid(xid))
            continue;

        xids[count++] = xid;

        if (TransactionIdPrecedes(xid, oldestRunningXid))
            oldestRunningXid = xid;

        /*
         * Save subtransaction XIDs. Other backends can't add or remove
         * entries while we're holding XidGenLock.
         */
        nxids = pgxact->nxids;

        if (nxids > 0) {
            if (nxids > PGPROC_MAX_CACHED_SUBXIDS)
                nxids = PGPROC_MAX_CACHED_SUBXIDS;

            rc = memcpy_s(&xids[count], nxids * sizeof(TransactionId), (void *)proc->subxids.xids,
                          nxids * sizeof(TransactionId));
            securec_check(rc, "\0", "\0");
            count += nxids;
            subcount += nxids;

            if (pgxact->nxids > PGPROC_MAX_CACHED_SUBXIDS)
                suboverflowed = true;

            /*
             * Top-level XID of a transaction is always less than any of its
             * subxids, so we don't need to check if any of the subxids are
             * smaller than oldestRunningXid
             */
        }
    }

    /*
     * Update globalxmin to include actual process xids.  This is a slightly
     * different way of computing it than GetOldestXmin uses, but should give
     * the same result.
     */
    if (TransactionIdPrecedes(oldestRunningXid, globalXmin)) {
        globalXmin = oldestRunningXid;
    }

    /*
     * It's important *not* to include the limits set by slots here because
     * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
     * were to be included here the initial value could never increase because
     * of a circular dependency where slots only increase their limits when
     * running xacts increases oldestRunningXid and running xacts only
     * increases if slots do.
     */
    CurrentRunningXacts->xcnt = count;
    CurrentRunningXacts->subxid_overflow = suboverflowed;
    CurrentRunningXacts->nextXid = t_thrd.xact_cxt.ShmemVariableCache->nextXid;
    CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
    CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
    CurrentRunningXacts->globalXmin = globalXmin;

    Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
    Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
    Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
    /* We don't release the locks here, the caller is responsible for that */
    return CurrentRunningXacts;
}

/*
 * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
 *
 * Takes us through 3 states: Initialized, Pending and Ready.
 * Normal case is to go all the way to Ready straight away, though there
 * are atypical cases where we need to take it in steps.
 *
 * Use the data about running transactions on master to create the initial
 * state of KnownAssignedXids. We also use these records to regularly prune
 * KnownAssignedXids because we know it is possible that some transactions
 * with FATAL errors fail to write abort records, which could cause eventual
 * overflow.
 *
 * See comments for LogStandbySnapshot().
 */
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
{
    TransactionId nextXid;

    Assert(t_thrd.xlog_cxt.standbyState >= STANDBY_INITIALIZED);
    Assert(TransactionIdIsValid(running->nextXid));
    Assert(TransactionIdIsValid(running->oldestRunningXid));
    Assert(TransactionIdIsNormal(running->latestCompletedXid));

    /*
     * Remove stale locks, if any.
     *
     * Locks are always assigned to the toplevel xid so we don't need to care
     * about subxcnt/subxids (and by extension not about ->suboverflowed).
     */
    StandbyReleaseOldLocks(running->oldestRunningXid);

    /*
     * If our snapshot is already valid, nothing else to do...
     */
    if (t_thrd.xlog_cxt.standbyState == STANDBY_SNAPSHOT_READY)
        return;

    Assert(t_thrd.xlog_cxt.standbyState == STANDBY_INITIALIZED);

    /*
     * latestObservedXid is at least set to the point where CSNLOG was
     * started up to (c.f. ProcArrayInitRecovery()) or to the biggest xid
     * RecordKnownAssignedTransactionIds() was called for.  Initialize
     * subtrans from thereon, up to nextXid - 1.
     *
     * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
     * because we've just added xids to the known assigned xids machinery that
     * haven't gone through RecordKnownAssignedTransactionId().
     */
    Assert(TransactionIdIsNormal(t_thrd.storage_cxt.latestObservedXid));
    TransactionIdAdvance(t_thrd.storage_cxt.latestObservedXid);
    while (TransactionIdPrecedes(t_thrd.storage_cxt.latestObservedXid, running->nextXid)) {
        ExtendCSNLOG(t_thrd.storage_cxt.latestObservedXid);
        TransactionIdAdvance(t_thrd.storage_cxt.latestObservedXid);
    }
    TransactionIdRetreat(t_thrd.storage_cxt.latestObservedXid); /* = running->nextXid - 1 */

    t_thrd.xlog_cxt.standbyState = STANDBY_SNAPSHOT_READY;
    MultiRedoUpdateStandbyState((HotStandbyState)t_thrd.xlog_cxt.standbyState);

    /*
     * If a transaction wrote a commit record in the gap between taking and
     * logging the snapshot then latestCompletedXid may already be higher than
     * the value from the snapshot, so check before we use the incoming value.
     */
    if (TransactionIdPrecedes(t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid, running->latestCompletedXid))
        t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid = running->latestCompletedXid;

    Assert(TransactionIdIsNormal(t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid));

    /*
     * ShmemVariableCache->nextXid must be beyond any observed xid.
     *
     * We don't expect anyone else to modify nextXid, hence we don't need to
     * hold a lock while examining it.	We still acquire the lock to modify
     * it, though.
     */
    nextXid = t_thrd.storage_cxt.latestObservedXid;
    TransactionIdAdvance(nextXid);

    if (TransactionIdFollows(nextXid, t_thrd.xact_cxt.ShmemVariableCache->nextXid)) {
        LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
        if (TransactionIdFollows(nextXid, t_thrd.xact_cxt.ShmemVariableCache->nextXid)) {
            t_thrd.xact_cxt.ShmemVariableCache->nextXid = nextXid;
        }
        LWLockRelease(XidGenLock);
    }

    Assert(TransactionIdIsValid(t_thrd.xact_cxt.ShmemVariableCache->nextXid));
    ereport(trace_recovery(DEBUG1), (errmsg("recovery snapshots are now enabled")));
}

/*
 * TransactionIdIsActive -- is xid the top-level XID of an active backend?
 *
 * This differs from TransactionIdIsInProgress in that it ignores prepared
 * transactions, as well as transactions running on the master if we're in
 * hot standby.  Also, we ignore subtransactions since that's not needed
 * for current uses.
 */
bool TransactionIdIsActive(TransactionId xid)
{
    bool result = false;
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int i;

    /*
     * Don't bother checking a transaction older than RecentXmin; it could not
     * possibly still be running.
     */
    if (TransactionIdPrecedes(xid, u_sess->utils_cxt.RecentXmin))
        return false;

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (i = 0; i < arrayP->numProcs; i++) {
        int pgprocno = arrayP->pgprocnos[i];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        TransactionId pxid;

        /* Fetch xid just once - see GetNewTransactionId */
        pxid = pgxact->xid;

        if (!TransactionIdIsValid(pxid))
            continue;

        if (proc->pid == 0)
            continue; /* ignore prepared transactions */

        if (TransactionIdEquals(pxid, xid)) {
            result = true;
            break;
        }
    }

    LWLockRelease(ProcArrayLock);

    return result;
}

/* Free xid cache memory if max number exceed PGPROC_MAX_CACHED_SUBXIDS */
static void ResetProcXidCache(PGPROC* proc, bool needlock)
{
    if (proc->subxids.maxNumber > PGPROC_INIT_CACHED_SUBXIDS) {
        /* Use subxidsLock to protect subxids */
        if (needlock)
            LWLockAcquire(proc->subxidsLock, LW_EXCLUSIVE);
        else
            HOLD_INTERRUPTS();

        proc->subxids.maxNumber = 0;
        pfree(proc->subxids.xids);
        proc->subxids.xids = NULL;

        if (needlock)
            LWLockRelease(proc->subxidsLock);
        else
            RESUME_INTERRUPTS();
    }
}

/* Free xidcache before proc exit */
void ProcSubXidCacheClean()
{
    if (t_thrd.proc && t_thrd.proc->subxids.maxNumber > PGPROC_INIT_CACHED_SUBXIDS) {
        /* Use subxidsLock to protect subxids */
        LWLockAcquire(t_thrd.proc->subxidsLock, LW_EXCLUSIVE);
        t_thrd.pgxact->nxids = 0;
        t_thrd.proc->subxids.maxNumber = 0;
        pfree(t_thrd.proc->subxids.xids);
        t_thrd.proc->subxids.xids = NULL;
        LWLockRelease(t_thrd.proc->subxidsLock);
    }
}

void InitProcSubXidCacheContext()
{
    if (ProcSubXidCacheContext == NULL) {
        ProcSubXidCacheContext = AllocSetContextCreate(g_instance.instance_context,
                                                       "ProcSubXidCacheContext",
                                                       ALLOCSET_DEFAULT_MINSIZE,
                                                       ALLOCSET_DEFAULT_INITSIZE,
                                                       ALLOCSET_DEFAULT_MAXSIZE,
                                                       SHARED_CONTEXT);
    }
}

/*
 * TransactionIdIsInProgress -- is given transaction running in some backend
 *
 * Aside from some shortcuts such as checking RecentXmin and our own Xid,
 * there are four possibilities for finding a running transaction:
 *
 * 1. The given Xid is a main transaction Id.  We will find this out cheaply
 * by looking at the PGXACT struct for each backend.
 *
 * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
 * We can find this out cheaply too.
 *
 * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
 * if the Xid is running on the master.
 *
 * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
 * if that is running according to PGXACT or KnownAssignedXids.  This is the
 * slowest way, but sadly it has to be done always if the others failed,
 * unless we see that the cached subxact sets are complete (none have
 * overflowed).
 *
 * ProcArrayLock has to be held while we do 1, 2, 3.  If we save the top Xids
 * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
 * This buys back some concurrency (and we can't retrieve the main Xids from
 * PGXACT again anyway; see GetNewTransactionId).
 *
 * In MPPDB cluster environment, RecentXmin might not be the minimun xid, e.g.
 * 1. T1 starts at CN
 * 2. T2 starts at DN, gets RecentXmin from GTM, larger than T1 if GTM is
cleared up
 * 3. CN send T1 to DN
 * 4. T2 maybe set wrong tuple hints of T1 if it considered T1 is minor than
RecentXmin
 * as not in progress.
 * This scene using RecentXmin to shortcut might has wrong status of T1, then
wrong
 * infomask for tuple T1 dealt. So we will not shortut by RecentXmin by
default.
 * But if using MVCC snapshot, we confirm local snapshot will sync with GTM,
and make
 * sure RecentXmin is the minimun xid here. So we just shortcuts by checking
RecentXmin
 * in HeapTupleSatisfiesMVCC. But we keep assert checking every scene for
data consistency.
 */
bool TransactionIdIsInProgress(TransactionId xid, uint32* needSync, bool shortcutByRecentXmin,
                               bool bCareNextxid, bool isTopXact, bool checkLatestCompletedXid)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
#ifdef USE_ASSERT_CHECKING
    bool shortCutCheckRes = true;
#endif
    volatile int i = 0;
    volatile int j = 0;

    /*
     * Don't bother checking a transaction older than RecentXmin; it could not
     * possibly still be running.  (Note: in particular, this guarantees that
     * we reject InvalidTransactionId, FrozenTransactionId, etc as not
     * running.)
     *
     * Notes: our principle for distribute transaction is:
     * 	 We should treat gtm xact state as the global xact state, when local
    xact state
     * is not match with gtm xact, we block until they are match(
    SyncLocalXactsWithGTM).
     *
     * 	 So, the shortcut `RecentXmin' is not worth worried, because when it is
    assigned value
     * local must sync with gtm.
     */
    uint64 recycle_xid = pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid);
    /* in hotstandby mode, the proc may being runnnig */
    if (RecoveryInProgress()) {
        recycle_xid = InvalidTransactionId;
    }
    if (shortcutByRecentXmin && TransactionIdPrecedes(xid, recycle_xid)) {
        xc_by_recent_xmin_inc();

        /*
         * As xc_maintenance_mode not sync local xacts with GTM for consistency,
         * Here we just check not in xc_maintenance_mode.
         */
        if (!u_sess->attr.attr_common.xc_maintenance_mode) {
#ifdef USE_ASSERT_CHECKING
            shortCutCheckRes = false;
#endif
        }

#ifdef USE_ASSERT_CHECKING
        /* fall through to do recheck */
#else
        return false;
#endif
    }

    /*
     * We may have just checked the status of this transaction, so if it is
     * already known to be completed, we can fall out without any access to
     * shared memory.
     */
    if (TransactionIdIsKnownCompleted(xid)) {
        xc_by_known_xact_inc();
        return false;
    }

    if (ENABLE_DMS) {
        /* fetch TXN info locally if either reformer, original primary, or normal primary */
        bool local_fetch = SSCanFetchLocalSnapshotTxnRelatedInfo();
        if (!local_fetch) {
            bool in_progress = true;
            SSTransactionIdIsInProgress(xid, &in_progress);
            return in_progress;
        }
    }

    /*
     * Also, we can handle our own transaction (and subtransactions) without
     * any access to shared memory.
     */
    if (TransactionIdIsCurrentTransactionId(xid)) {
        xc_by_my_xact_inc();
        Assert(shortCutCheckRes == true);
        return true;
    }

    if (!RecoveryInProgress()) {
        LWLockAcquire(ProcArrayLock, LW_SHARED);

        /*
         * Now that we have the lock, we can check latestCompletedXid; if the
         * target Xid is after that, it's surely still running.
         */
        if (checkLatestCompletedXid &&
            TransactionIdPrecedes(t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid, xid)) {
            LWLockRelease(ProcArrayLock);
            xc_by_latest_xid_inc();

            /*
             * If xid < RecentXmin, xid should smaller than latestCompletedXid,
             * So shortCutCheckRes should be false. But for data replication,
             * page maybe faster than xlog, and tuple xid will be more than
             * latestCompletedXid after standby promote to primary. So the assert cannot
             * be always true, we will remove the assert. And it will not affect MVCC,
             * the xid should be aborted. Assert(shortCutCheckRes == true);
             */
            return true;
        }

        if (isTopXact && !bCareNextxid) {
            int procId = ProcXactHashTableLookup(xid);

            volatile PGXACT *pgxact = &g_instance.proc_base_all_xacts[procId];

            if (procId != InvalidProcessId) {
                if (needSync != NULL) {
                    *needSync = pgxact->needToSyncXid;
                }
                LWLockRelease(ProcArrayLock);
                return true;
            }

            LWLockRelease(ProcArrayLock);
        } else {
            /* No shortcuts, gotta grovel through the array */
            for (i = 0; i < arrayP->numProcs; i++) {
                int pgprocno = arrayP->pgprocnos[i];
                volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
                volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
                TransactionId pxid;

                /* Ignore my own proc --- dealt with it above */
                if (proc == t_thrd.proc)
                    continue;

                /* Fetch xid just once - see GetNewTransactionId */
                pxid = pgxact->xid;

                if (!TransactionIdIsValid(pxid)) {
                    if (bCareNextxid && TransactionIdIsValid(pgxact->next_xid))
                        pxid = pgxact->next_xid;
                    else
                        continue;
                }

                /*
                 * Step 1: check the main Xid
                 */
                if (TransactionIdEquals(pxid, xid)) {
                    if (needSync != NULL)
                        *needSync = pgxact->needToSyncXid;
                    LWLockRelease(ProcArrayLock);
                    xc_by_main_xid_inc();
                    Assert(shortCutCheckRes == true);
                    return true;
                }

                /*
                 * We can ignore main Xids that are younger than the target Xid, since
                 * the target could not possibly be their child.
                 */
                if (TransactionIdPrecedes(xid, pxid))
                    continue;

                /*
                 * Step 2: check the cached child-Xids arrays
                 */
                if (pgxact->nxids > 0) {
                    /* Use subxidsLock to protect subxids */
                    LWLockAcquire(proc->subxidsLock, LW_SHARED);
                    for (j = pgxact->nxids - 1; j >= 0; j--) {
                        /* Fetch xid just once - see GetNewTransactionId */
                        TransactionId cxid = proc->subxids.xids[j];

                        if (TransactionIdEquals(cxid, xid)) {
                            if (needSync != NULL)
                                *needSync = pgxact->needToSyncXid;
                            LWLockRelease(proc->subxidsLock);
                            LWLockRelease(ProcArrayLock);
                            xc_by_child_xid_inc();
                            Assert(shortCutCheckRes == true);
                            return true;
                        }
                    }
                    LWLockRelease(proc->subxidsLock);
                }
            }

            LWLockRelease(ProcArrayLock);
        }
    }
    /*
     * Step 3: in hot standby mode, check the CSN log.
     */
    if (RecoveryInProgress()) {
        CommitSeqNo csn;
        csn = TransactionIdGetCommitSeqNo(xid, false, false, true, NULL);
        if (COMMITSEQNO_IS_COMMITTED(csn) || COMMITSEQNO_IS_ABORTED(csn))
            return false;
        else
            return true;
    }

    return false;
}


/* Called by GetOldestXmin() */
static void UpdateRecentGlobalXmin(TransactionId currGlobalXmin, TransactionId result)
{
    if (module_logging_is_on(MOD_TRANS_SNAPSHOT))
        ereport(LOG, (errmodule(MOD_TRANS_SNAPSHOT), errmsg("recentGlobalXmin before update: currGlobalXmin = %lu",
                                                            currGlobalXmin)));
    while (TransactionIdFollows(result, currGlobalXmin)) {
        if (pg_atomic_compare_exchange_u64(
                &t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin, &currGlobalXmin, result)) {
            if (module_logging_is_on(MOD_TRANS_SNAPSHOT))
                ereport(LOG,
                        (errmodule(MOD_TRANS_SNAPSHOT), errmsg("recentGlobalXmin after update: %lu.", result)));
            break;
        }
    }
}

/*
 * GetOldestXmin -- returns oldest transaction that was running
 *					when any current transaction was started.
 *
 * If rel is NULL or a shared relation, all backends are considered, otherwise
 * only backends running in this database are considered.
 *
 * If ignoreVacuum is TRUE then backends with the PROC_IN_VACUUM flag set are
 * ignored.
 *
 * This is used by VACUUM to decide which deleted tuples must be preserved in
 * the passed in table. For shared relations backends in all databases must be
 * considered, but for non-shared relations that's not required, since only
 * backends in my own database could ever see the tuples in them. Also, we can
 * ignore concurrently running lazy VACUUMs because (a) they must be working
 * on other tables, and (b) they don't need to do snapshot-based lookups.
 *
 * This is also used to determine where to truncate pg_subtrans.  For that
 * backends in all databases have to be considered, so rel = NULL has to be
 * passed in.
 *
 * Note: we include all currently running xids in the set of considered xids.
 * This ensures that if a just-started xact has not yet set its snapshot,
 * when it does set the snapshot it cannot set xmin less than what we compute.
 * See notes in src/backend/access/transam/README.
 *
 * Note: despite the above, it's possible for the calculated value to move
 * backwards on repeated calls. The calculated value is conservative, so that
 * anything older is definitely not considered as running by anyone anymore,
 * but the exact value calculated depends on a number of things. For example,
 * if rel = NULL and there are no transactions running in the current
 * database, GetOldestXmin() returns latestCompletedXid. If a transaction
 * begins after that, its xmin will include in-progress transactions in other
 * databases that started earlier, so another call will return a lower value.
 * Nonetheless it is safe to vacuum a table in the current database with the
 * first result.  There are also replication-related effects: a walsender
 * process can set its xmin based on transactions that are no longer running
 * in the master but are still being replayed on the standby, thus possibly
 * making the GetOldestXmin reading go backwards.  In this case there is a
 * possibility that we lose data that the standby would like to have, but
 * there is little we can do about that --- data is only protected if the
 * walsender runs continuously while queries are executed on the standby.
 * (The Hot Standby code deals with such cases by failing standby queries
 * that needed to access already-removed data, so there's no integrity bug.)
 * The return value is also adjusted with vacuum_defer_cleanup_age, so
 * increasing that setting on the fly is another easy way to make
 * GetOldestXmin() move backwards, with no consequences for data integrity.
 */
TransactionId GetOldestXmin(Relation rel, bool bFixRecentGlobalXmin, bool bRecentGlobalXminNoCheck)
{
    TransactionId result = InvalidTransactionId;
    TransactionId currGlobalXmin;
    TransactionId replication_slot_xmin;
    volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;

    if (!bFixRecentGlobalXmin && TransactionIdIsNormal(u_sess->utils_cxt.RecentGlobalXmin) && !bRecentGlobalXminNoCheck)
        return u_sess->utils_cxt.RecentGlobalXmin;

    /* Fetch into local variable, don't need to hold ProcArrayLock */
    replication_slot_xmin = g_instance.proc_array_idx->replication_slot_xmin;

    if (!GTM_LITE_MODE) {
        /* Get recentLocalXmin from the latest snapshot */
        result = GetMultiSnapshotOldestXmin();

        if (bFixRecentGlobalXmin) {
            /* Fix recentGlobalXmin */
            if (!TransactionIdIsNormal(result) || TransactionIdFollows(result, u_sess->utils_cxt.RecentGlobalXmin))
                result = u_sess->utils_cxt.RecentGlobalXmin;

            /* Update recentGlobalXmin if needed */
            if (!u_sess->attr.attr_common.xc_maintenance_mode && !u_sess->utils_cxt.cn_xc_maintain_mode) {
                currGlobalXmin = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin);
                UpdateRecentGlobalXmin(currGlobalXmin, result);
            }
        } else if (!bRecentGlobalXminNoCheck) {
            /* Get recentGlobalXmin from ShmemVariableCache */
            currGlobalXmin = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin);
            if (TransactionIdIsNormal(currGlobalXmin) &&
                (!TransactionIdIsValid(result) || TransactionIdPrecedes(currGlobalXmin, result)))
                result = currGlobalXmin;
        }
    } else {
        /* directly fetch recentGlobalXmin from ShmemVariableCache */
        result = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin);
    }

    /* Update by vacuum_defer_cleanup_age */
    if (TransactionIdPrecedes(result, (uint64)u_sess->attr.attr_storage.vacuum_defer_cleanup_age)) {
        result = FirstNormalTransactionId;
    } else {
        result -= u_sess->attr.attr_storage.vacuum_defer_cleanup_age;
    }

    /* Check whether there's a replication slot requiring an older xmin. */
    if (TransactionIdIsNormal(replication_slot_xmin) && TransactionIdPrecedes(replication_slot_xmin, result))
        result = replication_slot_xmin;

    if (!TransactionIdIsNormal(result))
        result = FirstNormalTransactionId;
    /* fetch into volatile var while ProcArrayLock is held */
    replication_slot_xmin = g_instance.proc_array_idx->replication_slot_xmin;
    replication_slot_catalog_xmin = g_instance.proc_array_idx->replication_slot_catalog_xmin;

    /*
     * Check whether there are replication slots requiring an older xmin.
     */
    if (TransactionIdIsValid(replication_slot_xmin) &&
        NormalTransactionIdPrecedes(replication_slot_xmin, result)) {
        result = replication_slot_xmin;
    }

    /*
     * After locks have been released and defer_cleanup_age has been applied,
     * check whether we need to back up further to make logical decoding
     * possible. We need to do so if we're computing the global limit (rel =
     * NULL) or if the passed relation is a catalog relation of some kind.
     */
    if ((rel != NULL && RelationIsAccessibleInLogicalDecoding(rel)) &&
        TransactionIdIsValid(replication_slot_catalog_xmin) &&
        NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result))
        result = replication_slot_catalog_xmin;

    return result;
}

TransactionId GetGlobalOldestXmin()
{
    TransactionId result = InvalidTransactionId;

    /* directly fetch Global OldestXmin */
    result = GetMultiSnapshotOldestXmin();

    /* Update by vacuum_defer_cleanup_age */
    if (TransactionIdPrecedes(result, (uint64)u_sess->attr.attr_storage.vacuum_defer_cleanup_age)) {
        result = FirstNormalTransactionId;
    } else {
        result -= u_sess->attr.attr_storage.vacuum_defer_cleanup_age;
    }
    return result;
}

TransactionId GetOldestXminForUndo(TransactionId *recycleXmin)
{
    TransactionId oldestXmin = GetMultiSnapshotOldestXmin();
    *recycleXmin = oldestXmin;
    TransactionId globalRecycleXid = pg_atomic_read_u64(&g_instance.undo_cxt.globalRecycleXid);
    TransactionId xmin = InvalidTransactionId;
    if (ENABLE_TCAP_VERSION) {
        xmin = g_instance.flashback_cxt.globalOldestXminInFlashback;
        if (TransactionIdIsValid(xmin)) {
            *recycleXmin = (oldestXmin < xmin) ? oldestXmin : xmin;
        }
        if (unlikely(TransactionIdPrecedes(*recycleXmin, globalRecycleXid))) {
            *recycleXmin = globalRecycleXid;
        }
    }
    ereport(DEBUG1, (errmodule(MOD_UNDO),
        errmsg("recycleXmin is %lu, globalOldestXminInFlashback is %lu, oldestXmin is %lu.",
            *recycleXmin, g_instance.flashback_cxt.globalOldestXminInFlashback, oldestXmin)));
    return oldestXmin;
}

/*
 * GetMaxSnapshotXidCount -- get max size for snapshot XID array
 *
 * We have to export this for use by snapmgr.c.
 */
int GetMaxSnapshotXidCount(void)
{
    return g_instance.proc_array_idx->maxProcs;
}

/*
 * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
 *
 * We have to export this for use by snapmgr.c.
 */
int GetMaxSnapshotSubxidCount(void)
{
    return TOTAL_MAX_CACHED_SUBXIDS;
}

/*
 * returns oldest transaction for catalog that was running when any current transaction was started.
 * take replication slot into consideration. please make sure it's safe to read replication_slot_catalog_xmin
 * before calling this func.
 */
TransactionId GetOldestCatalogXmin()
{
    TransactionId res = u_sess->utils_cxt.RecentGlobalXmin;
    TransactionId repSlotCatalogXmin = g_instance.proc_array_idx->replication_slot_catalog_xmin;
    if (TransactionIdIsNormal(repSlotCatalogXmin) && TransactionIdPrecedes(repSlotCatalogXmin, res)) {
        return repSlotCatalogXmin;
    }
    return res;
}

#ifndef ENABLE_MULTIPLE_NODES
static void GroupGetSnapshotInternal(PGXACT* pgxact, Snapshot snapshot, TransactionId *xmin)
{
    if (!TransactionIdIsValid(pgxact->xmin)) {
        pgxact->xmin  = *xmin;
    }

    if (snapshot->takenDuringRecovery && TransactionIdIsValid(t_thrd.xact_cxt.ShmemVariableCache->standbyXmin)) {
        if (TransactionIdPrecedes(t_thrd.xact_cxt.ShmemVariableCache->standbyXmin, *xmin)) {
            *xmin = t_thrd.xact_cxt.ShmemVariableCache->standbyXmin;
        }
        pgxact->xmin = *xmin;
    }

    snapshot->snapshotcsn = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo);
}

/*
 * GroupGetSnapshot -- group snapshot getting
 *
 * When we cannot immediately acquire ProcArrayLock in exclusive mode, add
 * ourselves to a list of processes that need to get snapshot.
 * The first process to add itself to the list will acquire ProcArrayLock
 * in exclusive mode and perform GroupGetSnapshotInternal on behalf of all
 * group members.  This avoids a great deal of contention around
 * ProcArrayLock when many processes are trying to get snapshot at once,
 * since the lock need not be repeatedly handed off from one process to the next.
 */
static void GroupGetSnapshot(PGPROC* proc)
{
    uint32 nextidx;
    uint32 wakeidx;
    TransactionId xmin;
    TransactionId xmax;
    TransactionId globalxmin;
    volatile TransactionId replication_slot_xmin = InvalidTransactionId;
    volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
    bool clearGroup = false;

    HOLD_INTERRUPTS();

    /* Add ourselves to the list of processes needing to get snapshot. */
    proc->snapshotGroupMember = true;
    while (true) {
        nextidx = pg_atomic_read_u32(&g_instance.proc_base->snapshotGroupFirst);
        pg_atomic_write_u32(&proc->snapshotGroupNext, nextidx);

        /* Ensure all previous writes are visible before follower continues. */
        pg_memory_barrier();

        if (pg_atomic_compare_exchange_u32(
            &g_instance.proc_base->snapshotGroupFirst, &nextidx, (uint32)proc->pgprocno))
            break;
    }

    /*
     * If the list was not empty, the leader will get our snapshot. It is
     * impossible to have followers without a leader because the first process
     * that has added itself to the list will always have nextidx as
     * INVALID_PGPROCNO.
     */
    if (nextidx != INVALID_PGPROCNO) {
        int extraWaits = 0;

        /* Sleep until the leader gets our snapshot. */
        for (;;) {
            /* acts as a read barrier */
            PGSemaphoreLock(&proc->sem, false);
            if (!proc->snapshotGroupMember)
                break;
            extraWaits++;
        }

        Assert(pg_atomic_read_u32(&proc->snapshotGroupNext) == INVALID_PGPROCNO);

        /* Fix semaphore count for any absorbed wakeups */
        while (extraWaits-- > 0)
            PGSemaphoreUnlock(&proc->sem);

        /* in case of memory reordering in relaxed memory model like ARM */
        pg_memory_barrier();

        RESUME_INTERRUPTS();

        return;
    }
    RESUME_INTERRUPTS();

    /* We are the leader.  Acquire the lock on behalf of everyone. */
    bool retryGet = false;
RETRY_GET:
    if (retryGet) {
        if (InterruptPending) {
            clearGroup = true;
        }
        pg_usleep(100L);
    }
    if (!clearGroup) {
        XLogRecPtr redoEndLsn = GetXLogReplayRecPtr(NULL, NULL);
        LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
        bool condition = (t_thrd.xact_cxt.ShmemVariableCache->standbyXmin <=
            t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin) &&
            (t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn > redoEndLsn);
        if (condition) {
            LWLockRelease(ProcArrayLock);
            retryGet = true;
            goto RETRY_GET;
        }

        /*
         * Now that we've got the lock, clear the list of processes waiting for
         * group snapshot getting, saving a pointer to the head of the list. Trying
         * to pop elements one at a time could lead to an ABA problem.
         */
        while (true) {
            nextidx = pg_atomic_read_u32(&g_instance.proc_base->snapshotGroupFirst);
            if (pg_atomic_compare_exchange_u32(&g_instance.proc_base->snapshotGroupFirst, &nextidx, INVALID_PGPROCNO))
                break;
        }

        /* Remember head of list so we can perform wakeups after dropping lock. */
        wakeidx = nextidx;

        /* calculate the following infos after we have got ProcArrayLock. */
        /* xmax is always latestCompletedXid + 1 */
        xmax = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid;
        Assert(TransactionIdIsNormal(xmax));
        TransactionIdAdvance(xmax);

        /* initialize xmin calculation with xmax */
        globalxmin = xmin = xmax;

        /* fetch into volatile var while ProcArrayLock is held */
        replication_slot_xmin = g_instance.proc_array_idx->replication_slot_xmin;
        replication_slot_catalog_xmin = g_instance.proc_array_idx->replication_slot_catalog_xmin;


        /* Walk the list and get all snapshots. */
        while (nextidx != INVALID_PGPROCNO) {
            PGPROC* procMember = g_instance.proc_base_all_procs[nextidx];
            PGXACT* pgxact = &g_instance.proc_base_all_xacts[nextidx];

            pg_memory_barrier();
            GroupGetSnapshotInternal(pgxact, procMember->snapshotGroup, &xmin);

            procMember->xminGroup = xmin;
            procMember->xmaxGroup = xmax;
            procMember->globalxminGroup = globalxmin;
            procMember->replicationSlotXminGroup = replication_slot_xmin;
            procMember->replicationSlotCatalogXminGroup = replication_slot_catalog_xmin;

            /* Move to next proc in list. */
            nextidx = pg_atomic_read_u32(&procMember->snapshotGroupNext);
        }

        /* We're done with the lock now. */
        LWLockRelease(ProcArrayLock);
    } else {
        /* clear the group, then process interrupt */
        while (true) {
            nextidx = pg_atomic_read_u32(&g_instance.proc_base->snapshotGroupFirst);
            if (pg_atomic_compare_exchange_u32(&g_instance.proc_base->snapshotGroupFirst, &nextidx, INVALID_PGPROCNO))
                break;
        }

        wakeidx = nextidx;
    }
    /*
     * Now that we've released the lock, go back and wake everybody up.  We
     * don't do this under the lock so as to keep lock hold times to a
     * minimum.  The system calls we need to perform to wake other processes
     * up are probably much slower than the simple memory writes we did while
     * holding the lock.
     */
    while (wakeidx != INVALID_PGPROCNO) {
        PGPROC* procMember = g_instance.proc_base_all_procs[wakeidx];

        wakeidx = pg_atomic_read_u32(&procMember->snapshotGroupNext);
        pg_atomic_write_u32(&procMember->snapshotGroupNext, INVALID_PGPROCNO);

        /* ensure all previous writes are visible before follower continues. */
        pg_memory_barrier();

        procMember->snapshotGroupMember = false;

        if (procMember != t_thrd.proc)
            PGSemaphoreUnlock(&procMember->sem);
    }
    if (clearGroup) {
        CHECK_FOR_INTERRUPTS();
    }
}

void AgentCopySnapshot(TransactionId *xmin, TransactionId *xmax, CommitSeqNo *snapcsn)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int* pgprocnos = arrayP->pgprocnos;
    int numProcs = arrayP->numProcs;
    volatile PGXACT* pgxact = NULL;
    PGPROC* proc = NULL;
    int pgprocno;
    int maxPgprocno;
    TransactionId pgprocXmin;
    TransactionId maxpgprocXmin;

    maxpgprocXmin = InvalidTransactionId;
    for (int index = 0; index < numProcs; index++) {
        pgprocno = pgprocnos[index];
        pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        /*
         * Backend is doing logical decoding which manages snapshot
         * separately, check below.
         */
        if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING) {
            continue;
        }

        if (pgxact == t_thrd.pgxact) {
            continue;
        }

        pgprocXmin = pgxact->xmin;
        /* get pgprocno with maximal xmin to reduce recovery conflict. */
        if (TransactionIdIsNormal(pgprocXmin) && TransactionIdPrecedes(maxpgprocXmin, pgprocXmin)) {
            maxpgprocXmin = pgprocXmin;
            maxPgprocno = pgprocno;
        }
    }

    if (TransactionIdIsValid(maxpgprocXmin)) {
        pgxact = &g_instance.proc_base_all_xacts[maxPgprocno];
        proc = g_instance.proc_base_all_procs[maxPgprocno];

        *xmin = pgxact->xmin;
        *xmax = proc->snapXmax;
        *snapcsn = proc->snapCSN;
    } else {
        *xmin = InvalidTransactionId;
        *xmax = InvalidTransactionId;
        *snapcsn = InvalidCommitSeqNo;
    }
}
#endif

/*
 * GetSnapshotData -- returns information about running transactions.
 *
 * The returned snapshot includes xmin (lowest still-running xact ID),
 * xmax (highest completed xact ID + 1), and a list of running xact IDs
 * in the range xmin <= xid < xmax.  It is used as follows:
 *		All xact IDs < xmin are considered finished.
 *		All xact IDs >= xmax are considered still running.
 *		For an xact ID xmin <= xid < xmax, consult list to see whether
 *		it is considered running or not.
 * This ensures that the set of transactions seen as "running" by the
 * current xact will not change after it takes the snapshot.
 *
 * All running top-level XIDs are included in the snapshot, except for lazy
 * VACUUM processes.  We also try to include running subtransaction XIDs,
 * but since PGPROC has only a limited cache area for subxact XIDs, full
 * information may not be available.  If we find any overflowed subxid arrays,
 * we have to mark the snapshot's subxid data as overflowed, and extra work
 * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
 * in heapam_visibility.c).
 *
 * We also update the following backend-global variables:
 *		TransactionXmin: the oldest xmin of any snapshot in use in the
 *			current transaction (this is the same as MyPgXact->xmin).
 *		RecentXmin: the xmin computed for the most recent snapshot.  XIDs
 *			older than this are known not running any more.
 *		RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
 *			running transactions, except those running LAZY VACUUM).  This is
 *			the same computation done by GetOldestXmin(true, true).
 *     RecentGlobalDataXmin: the global xmin for non-catalog tables
 *         >= RecentGlobalXmin
 *
 * Note: this function should probably not be called with an argument that's
 * not statically allocated (see xip allocation below).
 */
#ifndef ENABLE_MULTIPLE_NODES
Snapshot GetSnapshotData(Snapshot snapshot, bool force_local_snapshot, bool forHSFeedBack)
#else
Snapshot GetSnapshotData(Snapshot snapshot, bool force_local_snapshot)
#endif
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    TransactionId xmin;
    TransactionId xmax;
    TransactionId globalxmin;
    int index;
    volatile TransactionId replication_slot_xmin = InvalidTransactionId;
    volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
    bool is_exec_cn = IS_PGXC_COORDINATOR && !IsConnFromCoord();
    bool is_exec_dn = IS_PGXC_DATANODE && !IsConnFromCoord() && !IsConnFromDatanode();
    WaitState oldStatus = STATE_WAIT_UNDEFINED;

    Assert(snapshot != NULL);

#ifdef PGXC /* PGXC_DATANODE */

    t_thrd.xact_cxt.useLocalSnapshot = false;

    if ((IS_MULTI_DISASTER_RECOVER_MODE && !is_exec_dn) ||
        (GTM_LITE_MODE &&
         ((is_exec_cn && !force_local_snapshot) ||                                        /* GTM_LITE exec cn */
          (!is_exec_cn && u_sess->utils_cxt.snapshot_source == SNAPSHOT_COORDINATOR)))) { /* GTM_LITE other node */
        /*
         * Obtain a global snapshot for a openGauss session
         * if possible. When not in postmaster environment, get local snapshot, --single mode e.g.
         */
        if (!useLocalXid) {
            if (!u_sess->attr.attr_common.xc_maintenance_mode && IsPostmasterEnvironment &&
                GetPGXCSnapshotData(snapshot)) {
                return snapshot;
            }
        }
    }

    /* first we try to get multiversion snapshot */
    if (t_thrd.postmaster_cxt.HaShmData->current_mode == PRIMARY_MODE ||
        t_thrd.postmaster_cxt.HaShmData->current_mode == NORMAL_MODE) {
RETRY:
        if (GTM_LITE_MODE) {
            /* local snapshot, setup preplist array, must construct preplist before getting local snapshot */
            SetLocalSnapshotPreparedArray(snapshot);
            snapshot->gtm_snapshot_type = GTM_SNAPSHOT_TYPE_LOCAL;
        }

        Snapshot result;
        if (ENABLE_DMS) {
            /* fetch TXN info locally if either reformer, original primary, or normal primary */
            if (SSCanFetchLocalSnapshotTxnRelatedInfo()) {
                result = GetLocalSnapshotData(snapshot);
                snapshot->snapshotcsn = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo);
            } else {
                result = SSGetSnapshotData(snapshot);
                if (result == NULL) {
                    ereport(ERROR, (errmsg("failed to request snapshot as current node is in reform!")));
                }
            }
        } else {
            result = GetLocalSnapshotData(snapshot);
            snapshot->snapshotcsn = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo);
        }

        if (result) {
            if (GTM_LITE_MODE) {
                /* gtm lite check csn, if not pass, try to get local snapshot form multiversion again */
                CommitSeqNo return_csn = set_proc_csn_and_check("GetLocalSnapshotData", snapshot->snapshotcsn,
                                                                snapshot->gtm_snapshot_type, SNAPSHOT_DATANODE);
                if (!COMMITSEQNO_IS_COMMITTED(return_csn)) {
                    ereport(LOG,
                            (errcode(ERRCODE_SNAPSHOT_INVALID), errmsg("Retry to get local multiversion snapshot")));
                    goto RETRY;
                }
                u_sess->utils_cxt.RecentGlobalXmin = GetOldestXmin(NULL, true);
                u_sess->utils_cxt.RecentGlobalCatalogXmin = GetOldestCatalogXmin();
            }

            return result;
        }
    }
    /* For gtm-lite and gtm-free, use local snapshot */
    t_thrd.xact_cxt.useLocalSnapshot = true;

    /*
     * The codes below run when GetPGXCSnapshotData() couldn't get snapshot from
     * GTM.  So no data in snapshot will be used.
     */
    cleanSnapshot(snapshot);

#endif

    /* By here no available version for local snapshot
     *
     * It is sufficient to get shared lock on ProcArrayLock, even if we are
     * going to set MyPgXact->xmin.
     */
    snapshot->takenDuringRecovery = RecoveryInProgress();
    if (snapshot->takenDuringRecovery) {
        oldStatus = pgstat_report_waitstatus(STATE_STANDBY_GET_SNAPSHOT);
    }
    bool retry_get = false;
    uint64 retry_count = 0;
    const static uint64 WAIT_COUNT = 0x7FFFF;
    /* reset xmin before acquiring lwlock, in case blocking redo */
    t_thrd.pgxact->xmin = InvalidTransactionId;
RETRY_GET:
    if (snapshot->takenDuringRecovery && !StreamThreadAmI() && !IS_EXRTO_READ &&
        !u_sess->proc_cxt.clientIsCMAgent) {
        if (InterruptPending) {
            (void)pgstat_report_waitstatus(oldStatus);
        }
        if (retry_get) {
            CHECK_FOR_INTERRUPTS();
            pg_usleep(100L);
        }
        XLogRecPtr redoEndLsn = GetXLogReplayRecPtr(NULL, NULL);
        retry_count++;
        if ((retry_count & WAIT_COUNT) == WAIT_COUNT) {
            ereport(LOG, (errmsg("standbyRedoCleanupXmin = %ld, "
                                 "standbyRedoCleanupXminLsn = %ld, "
                                 "standbyXmin = %ld, redoEndLsn = %ld",
                                 t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin,
                                 t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn,
                                 t_thrd.xact_cxt.ShmemVariableCache->standbyXmin,
                                 redoEndLsn)));
        }
        if ((u_sess->proc_cxt.gsqlRemainCopyNum > 0 && retry_get)) {
            LWLockAcquire(ProcArrayLock, LW_SHARED);
            if ((t_thrd.xact_cxt.ShmemVariableCache->standbyXmin
                <= t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin)
                && (t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn > redoEndLsn)) {
                /*
                 * If CM agent cannot get consistency snapshot immediately, try
                 * getting snapshot from other backends.
                 */
                AgentCopySnapshot(&xmin, &xmax, &snapshot->snapshotcsn);
                bool obtained = TransactionIdIsValid(xmin) && TransactionIdIsValid(xmax) &&
                    snapshot->snapshotcsn != InvalidCommitSeqNo;
                if (obtained) {
                    globalxmin = xmin;
                    /* fetch into volatile var while ProcArrayLock is held */
                    replication_slot_xmin = g_instance.proc_array_idx->replication_slot_xmin;
                    replication_slot_catalog_xmin = g_instance.proc_array_idx->replication_slot_catalog_xmin;

                    if (!TransactionIdIsValid(t_thrd.pgxact->xmin)) {
                        t_thrd.pgxact->handle = GetCurrentTransactionHandleIfAny();
                    }
                    t_thrd.pgxact->xmin = u_sess->utils_cxt.TransactionXmin = xmin;
                    LWLockRelease(ProcArrayLock);
                    u_sess->proc_cxt.gsqlRemainCopyNum--;
                    /* reuse the groupgetsnapshot logic to set snapshot and thread information. */
                    goto GROUP_GET_SNAPSHOT;
                }
                LWLockRelease(ProcArrayLock);
                retry_get = true;
                goto RETRY_GET;
            }
        } else if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE)) {
            if ((t_thrd.xact_cxt.ShmemVariableCache->standbyXmin <=
                t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin) &&
                (t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn > redoEndLsn) &&
                parallel_recovery::in_full_sync_dispatch()) {
                LWLockRelease(ProcArrayLock);
                retry_get = true;
                goto RETRY_GET;
            }
#ifndef ENABLE_MULTIPLE_NODES
        } else if (forHSFeedBack) {
            LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
            if ((t_thrd.xact_cxt.ShmemVariableCache->standbyXmin
                <= t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin)
                && (t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn > redoEndLsn)) {
                LWLockRelease(ProcArrayLock);
                retry_get = true;
                goto RETRY_GET;
            }
        }
#endif
        else {
            if (!retry_get) {
                retry_get = true;
                goto RETRY_GET;
            }

            if (!TransactionIdIsValid(t_thrd.pgxact->xmin)) {
                t_thrd.pgxact->handle = GetCurrentTransactionHandleIfAny();
            }
            t_thrd.proc->snapshotGroup = snapshot;

            /* ensure all previous writes are visible before setting snapshotGroup. */
            pg_memory_barrier();

            GroupGetSnapshot(t_thrd.proc);

            xmin = t_thrd.proc->xminGroup;
            xmax = t_thrd.proc->xmaxGroup;
            globalxmin = t_thrd.proc->globalxminGroup;
            replication_slot_xmin = t_thrd.proc->replicationSlotXminGroup;
            replication_slot_catalog_xmin = t_thrd.proc->replicationSlotCatalogXminGroup;
            u_sess->utils_cxt.TransactionXmin = t_thrd.pgxact->xmin;

            t_thrd.proc->snapshotGroup = NULL;
            t_thrd.proc->xminGroup = InvalidTransactionId;
            t_thrd.proc->xmaxGroup = InvalidTransactionId;
            t_thrd.proc->globalxminGroup = InvalidTransactionId;
            t_thrd.proc->replicationSlotXminGroup = InvalidTransactionId;
            t_thrd.proc->replicationSlotCatalogXminGroup = InvalidTransactionId;

            if (snapshot->snapshotcsn == 0) {
                retry_get = true;
                goto RETRY_GET;
            }
            goto GROUP_GET_SNAPSHOT;
        }
    } else {
        LWLockAcquire(ProcArrayLock, LW_SHARED);
    }
    /* xmax is always latestCompletedXid + 1 */
    xmax = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid;
    Assert(TransactionIdIsNormal(xmax));
    TransactionIdAdvance(xmax);

    /* initialize xmin calculation with xmax */
    globalxmin = xmin = xmax;

    /*
     * If we're in recovery then snapshot data comes from a different place,
     * so decide which route we take before grab the lock. It is possible for
     * recovery to end before we finish taking snapshot, and for newly
     * assigned transaction ids to be added to the procarray. Xmax cannot
     * change while we hold ProcArrayLock, so those newly added transaction
     * ids would be filtered away, so we need not be concerned about them.
     */

#ifndef ENABLE_MULTIPLE_NODES
    if (!snapshot->takenDuringRecovery || forHSFeedBack) {
#else
    if (!snapshot->takenDuringRecovery) {
#endif
        int* pgprocnos = arrayP->pgprocnos;
        int numProcs;

        /*
         * Spin over procArray checking xid, xmin, and subxids.  The goal is
         * to gather all active xids, find the lowest xmin, and try to record
         * subxids.
         */
        numProcs = arrayP->numProcs;

        for (index = 0; index < numProcs; index++) {
            int pgprocno = pgprocnos[index];
            volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
            TransactionId xid = InvalidTransactionId;
            /*
             * Backend is doing logical decoding which manages xmin
             * separately, check below.
             */
            if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
                continue;

            /* Update globalxmin to be the smallest valid xmin, only Ignore procs running LAZY VACUUM xmin */
            if (!(pgxact->vacuumFlags & PROC_IN_VACUUM)) {
                xid = pgxact->xmin; /* fetch just once */
            }

            if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, globalxmin))
                globalxmin = xid;

            /* Fetch xid just once - see GetNewTransactionId */
            xid = pgxact->xid;

            /* If no XID assigned, use xid passed down from CN */
            if (!TransactionIdIsNormal(xid))
                xid = pgxact->next_xid;

            /*
             * If the transaction has no XID assigned, we can skip it; it
             * won't have sub-XIDs either.  If the XID is >= xmax, we can also
             * skip it; such transactions will be treated as running anyway
             * (and any sub-XIDs will also be >= xmax).
             */
            if (!TransactionIdIsNormal(xid) || !TransactionIdPrecedes(xid, xmax))
                continue;

            /*
             * We don't include our own XIDs (if any) in the snapshot, but we
             * must include them in xmin.
             */
            if (TransactionIdPrecedes(xid, xmin))
                xmin = xid;

            if (pgxact == t_thrd.pgxact)
                continue;
        }
    }

    /* fetch into volatile var while ProcArrayLock is held */
    replication_slot_xmin = g_instance.proc_array_idx->replication_slot_xmin;
    replication_slot_catalog_xmin = g_instance.proc_array_idx->replication_slot_catalog_xmin;

    if (!TransactionIdIsValid(t_thrd.pgxact->xmin)) {
        t_thrd.pgxact->xmin = u_sess->utils_cxt.TransactionXmin = xmin;
        t_thrd.pgxact->handle = GetCurrentTransactionHandleIfAny();
    }

#ifndef ENABLE_MULTIPLE_NODES
    if (snapshot->takenDuringRecovery && TransactionIdIsValid(t_thrd.xact_cxt.ShmemVariableCache->standbyXmin)) {
        if (TransactionIdPrecedes(t_thrd.xact_cxt.ShmemVariableCache->standbyXmin, xmin)) {
            xmin = t_thrd.xact_cxt.ShmemVariableCache->standbyXmin;
        }
        t_thrd.pgxact->xmin = u_sess->utils_cxt.TransactionXmin = xmin;
    }
#endif

    snapshot->snapshotcsn = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo);

    if (GTM_LITE_MODE) {  /* gtm lite check csn, should always pass the check */
        (void)set_proc_csn_and_check("GetLocalSnapshotDataFromProc", snapshot->snapshotcsn,
                                     snapshot->gtm_snapshot_type, SNAPSHOT_LOCAL);
    }

    LWLockRelease(ProcArrayLock);

#ifndef ENABLE_MULTIPLE_NODES
GROUP_GET_SNAPSHOT:
#endif
    /* Save the xmax and csn, so that the CM agent can obtain them. */
    t_thrd.proc->snapXmax = xmax;
    t_thrd.proc->snapCSN = snapshot->snapshotcsn;

    /*
     * Update globalxmin to include actual process xids.  This is a slightly
     * different way of computing it than GetOldestXmin uses, but should give
     * the same result.
     */
    if (TransactionIdPrecedes(xmin, globalxmin)) {
        globalxmin = xmin;
    }

    /* When initdb we set vacuum_defer_cleanup_age to zero, so we can vacuum
        freeze three default database to avoid that localxid larger than GTM next_xid. */
    if (isSingleMode) {
        u_sess->attr.attr_storage.vacuum_defer_cleanup_age = 0;
    }

    /* Update global variables too */
    if (TransactionIdPrecedes(globalxmin, (uint64)u_sess->attr.attr_storage.vacuum_defer_cleanup_age)) {
        u_sess->utils_cxt.RecentGlobalXmin = FirstNormalTransactionId;
    } else {
        u_sess->utils_cxt.RecentGlobalXmin = globalxmin - u_sess->attr.attr_storage.vacuum_defer_cleanup_age;
    }

    if (!TransactionIdIsNormal(u_sess->utils_cxt.RecentGlobalXmin)) {
        u_sess->utils_cxt.RecentGlobalXmin = FirstNormalTransactionId;
    }

    /* Check whether there's a replication slot requiring an older xmin. */
    if (TransactionIdIsValid(replication_slot_xmin) &&
        TransactionIdPrecedes(replication_slot_xmin, u_sess->utils_cxt.RecentGlobalXmin)) {
        u_sess->utils_cxt.RecentGlobalXmin = replication_slot_xmin;
    }

    /* Check whether there's a standby requiring an older xmin when dms is enabled. */
    if (SS_NORMAL_PRIMARY && SS_REPLICATION_MAIN_STANBY_NODE) {
        uint64 global_xmin = SSGetGlobalOldestXmin(u_sess->utils_cxt.RecentGlobalXmin);
        u_sess->utils_cxt.RecentGlobalXmin = global_xmin;
    }

    /* Non-catalog tables can be vacuumed if older than this xid */
    u_sess->utils_cxt.RecentGlobalDataXmin = u_sess->utils_cxt.RecentGlobalXmin;

    /*
     * Check whether there's a replication slot requiring an older catalog
     * xmin.
     */
    if (TransactionIdIsNormal(replication_slot_catalog_xmin) &&
        NormalTransactionIdPrecedes(replication_slot_catalog_xmin, u_sess->utils_cxt.RecentGlobalXmin)) {
        u_sess->utils_cxt.RecentGlobalXmin = replication_slot_catalog_xmin;
    }
    u_sess->utils_cxt.RecentXmin = xmin;

#ifndef ENABLE_MULTIPLE_NODES
    if (forHSFeedBack) {
        u_sess->utils_cxt.RecentGlobalXmin = globalxmin;
    }
#endif

    snapshot->xmin = xmin;
    snapshot->xmax = xmax;
    snapshot->curcid = GetCurrentCommandId(false);

#ifdef PGXC

    if (!RecoveryInProgress()) {
        int errlevel = LOG;

        if (u_sess->attr.attr_common.xc_maintenance_mode || IsAutoVacuumLauncherProcess() || !IsNormalProcessingMode())
            errlevel = DEBUG1;

        /* Just ForeignScan runs in the compute pool, the snapshot and gxid is
         * not necessary. To avoid too much log, we set errlevel to DEBUG1. */
        if (IS_PGXC_COORDINATOR && (StreamTopConsumerAmI() || t_thrd.wlm_cxt.wlmalarm_dump_active))
            errlevel = DEBUG1;

        if (!GTM_FREE_MODE && !t_thrd.postgres_cxt.isInResetUserName)
            ereport(errlevel,
                    (errmsg("Local snapshot is built, xmin: %lu, xmax: %lu, "
                            "RecentGlobalXmin: %lu",
                            xmin,
                            xmax,
                            globalxmin)));
    }

#endif

    /*
     * This is a new snapshot, so set both refcounts are zero, and mark it as
     * not copied in persistent memory.
     */
    snapshot->active_count = 0;
    snapshot->regd_count = 0;
    snapshot->copied = false;

    if (snapshot->takenDuringRecovery) {
        if (IsDefaultExtremeRtoMode() && IS_EXRTO_STANDBY_READ) {
            exrto_read_snapshot(snapshot);
            if (t_thrd.proc->exrto_reload_cache) {
                t_thrd.proc->exrto_reload_cache = false;
                reset_invalidation_cache();
            }
            AcceptInvalidationMessages();
        }
        (void)pgstat_report_waitstatus(oldStatus);
    }

    return snapshot;
}

void exrto_get_snapshot_data(TransactionId &xmin, TransactionId &xmax, CommitSeqNo &snapshot_csn)
{
    LWLockAcquire(ProcArrayLock, LW_SHARED);

    /* xmax is always latest_completed_xid + 1 */
    xmax = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid;

    Assert(TransactionIdIsNormal(xmax));
    TransactionIdAdvance(xmax);
    /* initialize xmin calculation with xmax */
    xmin = xmax;
    if (TransactionIdIsValid(t_thrd.xact_cxt.ShmemVariableCache->standbyXmin)) {
        if (TransactionIdPrecedes(t_thrd.xact_cxt.ShmemVariableCache->standbyXmin, xmin)) {
            xmin = t_thrd.xact_cxt.ShmemVariableCache->standbyXmin;
        }
    }

    LWLockRelease(ProcArrayLock);
    snapshot_csn = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo);
}

/*
 * ProcArrayInstallImportedXmin -- install imported xmin into MyPgXact->xmin
 *
 * This is called when installing a snapshot imported from another
 * transaction.  To ensure that OldestXmin doesn't go backwards, we must
 * check that the source transaction is still running, and we'd better do
 * that atomically with installing the new xmin.
 *
 * Returns TRUE if successful, FALSE if source xact is no longer running.
 */
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
{
    bool result = false;
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index;

    Assert(TransactionIdIsNormal(xmin));

    if (!sourcevxid)
        return false;

    /* Get lock so source xact can't end while we're doing this */
    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        TransactionId xid;

        /* We are only interested in the specific virtual transaction. */
        if (proc->backendId != sourcevxid->backendId)
            continue;
        if (proc->lxid != sourcevxid->localTransactionId)
            continue;

        /*
         * We check the transaction's database ID for paranoia's sake: if it's
         * in another DB then its xmin does not cover us.  Caller should have
         * detected this already, so we just treat any funny cases as
         * "transaction not found".
         */
        if (proc->databaseId != u_sess->proc_cxt.MyDatabaseId)
            continue;

        /*
         * Likewise, let's just make real sure its xmin does cover us.
         */
        xid = pgxact->xmin; /* fetch just once */

        if (!TransactionIdIsNormal(xid) || !TransactionIdPrecedesOrEquals(xid, xmin))
            continue;

        /*
         * We're good.  Install the new xmin.  As in GetSnapshotData, set
         * TransactionXmin too.  (Note that because snapmgr.c called
         * GetSnapshotData first, we'll be overwriting a valid xmin here, so
         * we don't check that.)
         */
        t_thrd.pgxact->xmin = u_sess->utils_cxt.TransactionXmin = xmin;

        result = true;
        break;
    }

    LWLockRelease(ProcArrayLock);

    return result;
}

typedef struct GTM_RunningXacts {
    int cur_index;
} GTM_RunningXacts;

Datum pg_get_running_xacts(PG_FUNCTION_ARGS)
{
    FuncCallContext* funcctx = NULL;
    GTM_RunningXacts* status = NULL;
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;

    if (SRF_IS_FIRSTCALL()) {
        TupleDesc tupdesc;
        MemoryContext oldcontext;

        /* create a function context for cross-call persistence */
        funcctx = SRF_FIRSTCALL_INIT();

        /*
         * Switch to memory context appropriate for multiple function calls
         */
        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

        /* build tupdesc for result tuples */
        /* this had better match pg_prepared_xacts view in system_views.sql */
        tupdesc = CreateTemplateTupleDesc(10, false);
        TupleDescInitEntry(tupdesc, (AttrNumber)1, "handle", INT4OID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)2, "gxid", XIDOID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)3, "state", INT1OID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)4, "node", TEXTOID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)5, "xmin", XIDOID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)6, "vacuum", BOOLOID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)7, "timeline", INT8OID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)8, "prepare_xid", XIDOID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)9, "pid", INT8OID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)10, "next_xid", XIDOID, -1, 0);

        funcctx->tuple_desc = BlessTupleDesc(tupdesc);

        /*
         * Collect all the 2PC status information that we will format and send
         * out as a result set.
         */
        status = (GTM_RunningXacts*)palloc(sizeof(GTM_RunningXacts));
        status->cur_index = 0;
        funcctx->user_fctx = (void*)status;

        MemoryContextSwitchTo(oldcontext);

        /*
         * Ensure that no xids enter or leave the procarray while we obtain
         * snapshot.
         */
        LWLockAcquire(ProcArrayLock, LW_SHARED);
    }

    funcctx = SRF_PERCALL_SETUP();
    status = (GTM_RunningXacts*)funcctx->user_fctx;

    while (status->cur_index < arrayP->numProcs) {
        int pgprocno = arrayP->pgprocnos[status->cur_index++];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        Datum values[10];
        bool nulls[10];
        HeapTuple tuple;
        Datum result;

        /* Skip self */
        if (pgxact == t_thrd.pgxact)
            continue;

        /*
         * Form tuple with appropriate data.
         */
        errno_t ret = memset_s(values, sizeof(values), 0, sizeof(values));
        securec_check(ret, "\0", "\0");
        ret = memset_s(nulls, sizeof(nulls), 0, sizeof(nulls));
        securec_check(ret, "\0", "\0");

        values[0] = Int32GetDatum(pgxact->handle);
        values[1] = TransactionIdGetDatum(pgxact->xid);

        if (TransactionIdIsPrepared(pgxact->xid))
            values[2] = Int8GetDatum(GTM_TXN_PREPARED);
        else
            values[2] = Int8GetDatum(GTM_TXN_STARTING);

        values[3] = CStringGetTextDatum(g_instance.attr.attr_common.PGXCNodeName);
        values[4] = TransactionIdGetDatum(pgxact->xmin);

        if (pgxact->vacuumFlags & PROC_IN_VACUUM)
            values[5] = BoolGetDatum(true);
        else
            values[5] = BoolGetDatum(false);

        values[6] = Int64GetDatum(get_controlfile_timeline());
        values[7] = TransactionIdGetDatum(pgxact->prepare_xid);
        values[8] = Int64GetDatum(proc->pid);
        values[9] = TransactionIdGetDatum(pgxact->next_xid);

        tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
        result = HeapTupleGetDatum(tuple);
        SRF_RETURN_NEXT(funcctx, result);
    }

    LWLockRelease(ProcArrayLock);
    SRF_RETURN_DONE(funcctx);
}

/*
 * Similar to GetSnapshotData but returns just oldestActiveXid. We include
 * all PGXACTs with an assigned TransactionId, even VACUUM processes.
 * We look at all databases, though there is no need to include WALSender
 * since this has no effect on hot standby conflicts.
 *
 * This is never executed during recovery so there is no need to look at
 * KnownAssignedXids.
 *
 * We don't worry about updating other counters, we want to keep this as
 * simple as possible and leave GetSnapshotData() as the primary code for
 * that bookkeeping.
 */
TransactionId GetOldestActiveTransactionId(TransactionId *globalXmin)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    TransactionId oldestRunningXid;
    int index;

    /* xmax is always latestCompletedXid + 1 */
    TransactionId xmax = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid;
    Assert(TransactionIdIsNormal(xmax));
    TransactionIdAdvance(xmax);
    TransactionId xmin = xmax;

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    /*
     * It's okay to read nextXid without acquiring XidGenLock because (1) we
     * assume TransactionIds can be read atomically and (2) we don't care if
     * we get a slightly stale value.  It can't be very stale anyway, because
     * the LWLockAcquire above will have done any necessary memory
     * interlocking.
     */
    oldestRunningXid = t_thrd.xact_cxt.ShmemVariableCache->nextXid;

    /*
     * Spin over procArray collecting all xids and subxids.
     */
    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        TransactionId xid;

        /* Update globalxmin to be the smallest valid xmin */
        xid = pgxact->xmin; /* fetch just once */

        if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, xmin))
            xmin = xid;

        /* Fetch xid just once - see GetNewTransactionId */
        xid = pgxact->xid;

        if (!TransactionIdIsNormal(xid))
            continue;

        if (TransactionIdPrecedes(xid, oldestRunningXid))
            oldestRunningXid = xid;

        /*
         * Top-level XID of a transaction is always less than any of its
         * subxids, so we don't need to check if any of the subxids are
         * smaller than oldestRunningXid
         */
    }

    LWLockRelease(ProcArrayLock);

    /*
     * Update globalxmin to include actual process xids.  This is a slightly
     * different way of computing it than GetOldestXmin uses, but should give
     * the same result.
     */
    if (TransactionIdPrecedes(oldestRunningXid, xmin)) {
        xmin = oldestRunningXid;
    }
    *globalXmin = xmin;
    if (IS_EXRTO_STANDBY_READ) {
        ereport(LOG, (errmsg("proc_array_get_oldest_active_transaction_id: global_xmin = %lu", *globalXmin)));
    }
    return oldestRunningXid;
}

/*
 * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
 *
 * Returns the oldest xid that we can guarantee not to have been affected by
 * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
 * transaction aborted. Note that the value can (and most of the time will) be
 * much more conservative than what really has been affected by vacuum, but we
 * currently don't have better data available.
 *
 * This is useful to initalize the cutoff xid after which a new changeset
 * extraction replication slot can start decoding changes.
 *
 * Must be called with ProcArrayLock held either shared or exclusively,
 * although most callers will want to use exclusive mode since it is expected
 * that the caller will immediately use the xid to peg the xmin horizon.
 */
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    TransactionId oldestSafeXid;
    int index;
    bool recovery_in_progress = RecoveryInProgress();

    Assert(LWLockHeldByMe(ProcArrayLock));

    /*
     * Acquire XidGenLock, so no transactions can acquire an xid while we're
     * running. If no transaction with xid were running concurrently a new xid
     * could influence the the RecentXmin et al.
     *
     * We initialize the computation to nextXid since that's guaranteed to be
     * a safe, albeit pessimal, value.
     */
    LWLockAcquire(XidGenLock, LW_SHARED);
    oldestSafeXid = t_thrd.xact_cxt.ShmemVariableCache->nextXid;

    /*
     * If there's already a slot pegging the xmin horizon, we can start with
     * that value, it's guaranteed to be safe since it's computed by this
     * routine initially and has been enforced since.  We can always use the
     * slot's general xmin horizon, but the catalog horizon is only usable
     * when we only catalog data is going to be looked at.
     */
    if (TransactionIdIsValid(g_instance.proc_array_idx->replication_slot_xmin) &&
        TransactionIdPrecedes(g_instance.proc_array_idx->replication_slot_xmin, oldestSafeXid))
        oldestSafeXid = g_instance.proc_array_idx->replication_slot_xmin;

    if (catalogOnly && TransactionIdIsValid(g_instance.proc_array_idx->replication_slot_catalog_xmin) &&
        TransactionIdPrecedes(g_instance.proc_array_idx->replication_slot_catalog_xmin, oldestSafeXid))
        oldestSafeXid = g_instance.proc_array_idx->replication_slot_catalog_xmin;

    /*
     * If we're not in recovery, we walk over the procarray and collect the
     * lowest xid. Since we're called with ProcArrayLock held and have
     * acquired XidGenLock, no entries can vanish concurrently, since
     * PGXACT->xid is only set with XidGenLock held and only cleared with
     * ProcArrayLock held.
     *
     * In recovery we can't lower the safe value besides what we've computed
     * above, so we'll have to wait a bit longer there. We unfortunately can
     * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
     * machinery can miss values and return an older value than is safe.
     */
    if (!recovery_in_progress) {
        /*
         * Spin over procArray collecting all min(PGXACT->xid)
         */
        for (index = 0; index < arrayP->numProcs; index++) {
            int pgprocno = arrayP->pgprocnos[index];
            volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
            TransactionId xid;

            /* Fetch xid just once - see GetNewTransactionId */
            xid = pgxact->xid;

            if (!TransactionIdIsNormal(xid))
                continue;

            if (TransactionIdPrecedes(xid, oldestSafeXid))
                oldestSafeXid = xid;
        }
    }

    LWLockRelease(XidGenLock);

    return oldestSafeXid;
}

/*
 * GetVirtualXIDsDelayingChkpt -- Get the XIDs of transactions that are
 * delaying checkpoint because they have critical actions in progress.
 *
 * Constructs an array of VXIDs of transactions that are currently in commit
 * critical sections, as shown by having delayChkpt set in their PGXACT.
 *
 * Returns a palloc'd array that should be freed by the caller.
 * *nvxids is the number of valid entries.
 *
 * Note that because backends set or clear delayChkpt without holding any lock,
 * the result is somewhat indeterminate, but we don't really care.  Even in
 * a multiprocessor with delayed writes to shared memory, it should be certain
 * that setting of delayChkpt will propagate to shared memory when the backend
 * takes a lock, so we cannot fail to see an virtual xact as delayChkpt if
 * it's already inserted its commit record.  Whether it takes a little while
 * for clearing of delayChkpt to propagate is unimportant for correctness.
 */
VirtualTransactionId* GetVirtualXIDsDelayingChkpt(int* nvxids)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int count = 0;

    /* allocate what's certainly enough result space */
    VirtualTransactionId* vxids = (VirtualTransactionId*)palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (int index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];

        if (pgxact->delayChkpt) {
            VirtualTransactionId vxid;

            GET_VXID_FROM_PGPROC(vxid, *proc);
            if (VirtualTransactionIdIsValid(vxid))
                vxids[count++] = vxid;
        }
    }

    LWLockRelease(ProcArrayLock);

    *nvxids = count;
    return vxids;
}

/*
 * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
 *
 * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
 * of the specified VXIDs are still in critical sections of code.
 *
 * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
 * those numbers should be small enough for it not to be a problem.
 */
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId* vxids, int nvxids)
{
    bool result = false;
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index;

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        VirtualTransactionId vxid;

        GET_VXID_FROM_PGPROC(vxid, *proc);

        if (pgxact->delayChkpt && VirtualTransactionIdIsValid(vxid)) {
            int i;

            for (i = 0; i < nvxids; i++) {
                if (VirtualTransactionIdEquals(vxid, vxids[i])) {
                    result = true;
                    break;
                }
            }
            if (result) {
                break;
            }
        }
    }

    LWLockRelease(ProcArrayLock);

    return result;
}

/*
 * BackendPidGetProc -- get a backend's PGPROC given its PID
 *
 * Returns NULL if not found.  Note that it is up to the caller to be
 * sure that the question remains meaningful for long enough for the
 * answer to be used ...
 */
PGPROC* BackendPidGetProc(ThreadId pid)
{
    PGPROC* result = NULL;
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index;

    if (pid == 0) /* never match dummy PGPROCs */
        return NULL;

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (index = 0; index < arrayP->numProcs; index++) {
        PGPROC* proc = g_instance.proc_base_all_procs[arrayP->pgprocnos[index]];

        if (proc->pid == pid) {
            result = proc;
            break;
        }
    }

    LWLockRelease(ProcArrayLock);

    return result;
}

/*
 * BackendXidGetPid -- get a backend's pid given its XID
 *
 * Returns 0 if not found or it's a prepared transaction.  Note that
 * it is up to the caller to be sure that the question remains
 * meaningful for long enough for the answer to be used ...
 *
 * Only main transaction Ids are considered.  This function is mainly
 * useful for determining what backend owns a lock.
 *
 * Beware that not every xact has an XID assigned.	However, as long as you
 * only call this using an XID found on disk, you're safe.
 */
int BackendXidGetPid(TransactionId xid)
{
    int result = 0;
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index;

    if (xid == InvalidTransactionId) /* never match invalid xid */
        return 0;

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];

        if (pgxact->xid == xid) {
            result = proc->pid;
            break;
        }
    }

    LWLockRelease(ProcArrayLock);

    return result;
}

/*
 * IsBackendPid -- is a given pid a running backend
 */
bool IsBackendPid(ThreadId pid)
{
    return (BackendPidGetProc(pid) != NULL);
}

/*
 * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
 *
 * The array is palloc'd. The number of valid entries is returned into *nvxids.
 *
 * The arguments allow filtering the set of VXIDs returned.  Our own process
 * is always skipped.  In addition:
 *	If limitXmin is not InvalidTransactionId, skip processes with
 *		xmin > limitXmin.
 *	If excludeXmin0 is true, skip processes with xmin = 0.
 *	If allDbs is false, skip processes attached to other databases.
 *	If excludeVacuum isn't zero, skip processes for which
 *		(vacuumFlags & excludeVacuum) is not zero.
 *
 * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
 * allow skipping backends whose oldest live snapshot is no older than
 * some snapshot we have.  Since we examine the procarray with only shared
 * lock, there are race conditions: a backend could set its xmin just after
 * we look.  Indeed, on multiprocessors with weak memory ordering, the
 * other backend could have set its xmin *before* we look.	We know however
 * that such a backend must have held shared ProcArrayLock overlapping our
 * own hold of ProcArrayLock, else we would see its xmin update.  Therefore,
 * any snapshot the other backend is taking concurrently with our scan cannot
 * consider any transactions as still running that we think are committed
 * (since backends must hold ProcArrayLock exclusive to commit).
 */
VirtualTransactionId* GetCurrentVirtualXIDs(
    TransactionId limitXmin, bool excludeXmin0, bool allDbs, int excludeVacuum, int* nvxids)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int count = 0;

    /* allocate what's certainly enough result space */
    VirtualTransactionId* vxids = (VirtualTransactionId*)palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (int index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];

        if (proc == t_thrd.proc)
            continue;

        if (excludeVacuum & pgxact->vacuumFlags)
            continue;

        if (allDbs || proc->databaseId == u_sess->proc_cxt.MyDatabaseId) {
            /* Fetch xmin just once - might change on us */
            TransactionId pxmin = pgxact->xmin;

            if (excludeXmin0 && !TransactionIdIsValid(pxmin))
                continue;

            /*
             * InvalidTransactionId precedes all other XIDs, so a proc that
             * hasn't set xmin yet will not be rejected by this test.
             */
            if (!TransactionIdIsValid(limitXmin) || TransactionIdPrecedesOrEquals(pxmin, limitXmin)) {
                VirtualTransactionId vxid;

                GET_VXID_FROM_PGPROC(vxid, *proc);

                if (VirtualTransactionIdIsValid(vxid))
                    vxids[count++] = vxid;
            }
        }
    }

    LWLockRelease(ProcArrayLock);

    *nvxids = count;
    return vxids;
}

void UpdateCleanUpInfo(TransactionId limitXmin, XLogRecPtr lsn)
{
    if (t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin < limitXmin) {
        t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin = limitXmin;
        const int xid_gap = 10000000;
        if (limitXmin > t_thrd.xact_cxt.ShmemVariableCache->standbyXmin + xid_gap) {
            ereport(LOG, (errmsg("limitXmin = %ld, standbyRedoCleanupXmin = %ld, "
                                 "lsn = %ld, standbyRedoCleanupXminLsn = %ld, "
                                 "standbyXmin = %ld",
                                 limitXmin, t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXmin,
                                 lsn, t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn,
                                 t_thrd.xact_cxt.ShmemVariableCache->standbyXmin)));
        }
    }

    if (t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn < lsn) {
        t_thrd.xact_cxt.ShmemVariableCache->standbyRedoCleanupXminLsn = lsn;
    }
}

/*
 * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
 *
 * Usage is limited to conflict resolution during recovery on standby servers.
 * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
 * in cases where we cannot accurately determine a value for latestRemovedXid.
 *
 * If limitXmin is InvalidTransactionId then we want to kill everybody,
 * so we're not worried if they have a snapshot or not, nor does it really
 * matter what type of lock we hold.
 *
 * All callers that are checking xmins always now supply a valid and useful
 * value for limitXmin. The limitXmin is always lower than the lowest
 * numbered KnownAssignedXid that is not already a FATAL error. This is
 * because we only care about cleanup records that are cleaning up tuple
 * versions from committed transactions. In that case they will only occur
 * at the point where the record is less than the lowest running xid. That
 * allows us to say that if any backend takes a snapshot concurrently with
 * us then the conflict assessment made here would never include the snapshot
 * that is being derived. So we take LW_SHARED on the ProcArray and allow
 * concurrent snapshots when limitXmin is valid. We might
 * think about adding Assert(limitXmin < lowest(KnownAssignedXids))
 * but that would not be true in the case of FATAL errors lagging in array,
 * but we already know those are bogus anyway, so we skip that test.
 *
 * If dbOid is valid we skip backends attached to other databases.
 *
 * Be careful to *not* pfree the result from this function. We reuse
 * this array sufficiently often that we use malloc for the result.
 */
VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid, XLogRecPtr lsn,
                                                CommitSeqNo limitXminCSN, TransactionId* xminArray)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int count = 0;
    int index;

    /*
     * If first time through, get workspace to remember main XIDs in. We
     * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
     * result space, remembering room for a terminator.
     */
    if (t_thrd.storage_cxt.proc_vxids == NULL) {
        t_thrd.storage_cxt.proc_vxids = (VirtualTransactionId*)MemoryContextAlloc(
            THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE),
            sizeof(VirtualTransactionId) * (unsigned int)(arrayP->maxProcs + 1));

        if (t_thrd.storage_cxt.proc_vxids == NULL)
            ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
    }

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];

        /* Exclude prepared transactions and Statement flush thread */
        if (proc->pid == 0 || (OidIsValid(dbOid) && proc->databaseId != dbOid) ||
            strcmp((const char*)(proc->myProgName), "Statement flush thread") == 0) {
            continue;
        }

#ifndef ENABLE_MULTIPLE_NODES
        /* Fetch xmin just once - can't change on us, but good coding */
        TransactionId pxmin = pgxact->xmin;

        /*
         * We ignore an invalid pxmin because this means that backend has
         * no snapshot and cannot get another one while we hold exclusive
         * lock.
         */
        if (!TransactionIdIsValid(limitXmin) ||
            (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin))) {
            VirtualTransactionId vxid;

            GET_VXID_FROM_PGPROC(vxid, *proc);

            if (VirtualTransactionIdIsValid(vxid)) {
                ADD_XMIN_TO_ARRAY(pxmin);
                t_thrd.storage_cxt.proc_vxids[count++] = vxid;
            }
        }
#else
        if (!IS_MULTI_DISASTER_RECOVER_MODE) {
            break;
        }
        CommitSeqNo xact_csn = pgxact->csn_dr;
        if (!TransactionIdIsValid(limitXmin) || (limitXminCSN >= xact_csn && xact_csn != InvalidCommitSeqNo)) {
            VirtualTransactionId vxid;
            GET_VXID_FROM_PGPROC(vxid, *proc);

            if (VirtualTransactionIdIsValid(vxid)) {
                t_thrd.storage_cxt.proc_vxids[count++] = vxid;
            }
        }
#endif
    }
#ifndef ENABLE_MULTIPLE_NODES
    UpdateCleanUpInfo(limitXmin, lsn);
#endif
    LWLockRelease(ProcArrayLock);

    /* add the terminator */
    t_thrd.storage_cxt.proc_vxids[count].backendId = InvalidBackendId;
    t_thrd.storage_cxt.proc_vxids[count].localTransactionId = InvalidLocalTransactionId;
    ADD_XMIN_TO_ARRAY(InvalidTransactionId);

    return t_thrd.storage_cxt.proc_vxids;
}

/*
 * CancelVirtualTransaction - used in recovery conflict processing
 *
 * Returns pid of the process signaled, or 0 if not found.
 */
ThreadId CancelVirtualTransaction(const VirtualTransactionId& vxid, ProcSignalReason sigmode)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index;
    ThreadId pid = 0;

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        VirtualTransactionId procvxid;

        GET_VXID_FROM_PGPROC(procvxid, *proc);

        if (procvxid.backendId == vxid.backendId && procvxid.localTransactionId == vxid.localTransactionId) {
            proc->recoveryConflictPending = true;
            pid = proc->pid;

            if (pid != 0) {
                /*
                 * Kill the pid if it's still here. If not, that's what we
                 * wanted so ignore any errors.
                 */
                (void)SendProcSignal(pid, sigmode, vxid.backendId);
            }

            break;
        }
    }

    LWLockRelease(ProcArrayLock);

    return pid;
}

bool proc_array_cancel_conflicting_proc(
    TransactionId latest_removed_xid, XLogRecPtr truncate_redo_lsn, bool reach_max_check_times)
{
    ProcArrayStruct* proc_array = g_instance.proc_array_idx;
    bool conflict = false;

    LWLockAcquire(ProcArrayLock, LW_SHARED);
    for (int index = 0; index < proc_array->numProcs; index++) {
        int pg_proc_no = proc_array->pgprocnos[index];
        PGPROC* pg_proc = g_instance.proc_base_all_procs[pg_proc_no];
        PGXACT* pg_xact = &g_instance.proc_base_all_xacts[pg_proc_no];
        XLogRecPtr read_lsn = pg_proc->exrto_min;
        TransactionId pxmin = pg_xact->xmin;

        if (pg_proc->pid == 0 || XLogRecPtrIsInvalid(read_lsn)) {
            continue;
        }

        Assert(!(pg_xact->vacuumFlags & PROC_IN_VACUUM));
        /*
         * Backend is doing logical decoding which manages xmin
         * separately, check below.
         */
        if (pg_xact->vacuumFlags & PROC_IN_LOGICAL_DECODING) {
            continue;
        }

        /* cancel query when its xmin < latest_removed_xid */
        if (TransactionIdPrecedesOrEquals(pxmin, latest_removed_xid) ||
            (truncate_redo_lsn != InvalidXLogRecPtr && XLByteLT(read_lsn, truncate_redo_lsn))) {
            conflict = true;
            pg_proc->recoveryConflictPending = true;
            if (pg_proc->pid != 0) {
                /*
                 * Kill the pid if it's still here. If not, that's what we
                 * wanted so ignore any errors.
                 */
                (void)SendProcSignal(pg_proc->pid, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, pg_proc->backendId);
                /*
                 * Wait a little bit for it to die so that we avoid flooding
                 * an unresponsive backend when system is heavily loaded.
                 */
                ereport(LOG,
                    (errmsg(EXRTOFORMAT("cancel thread while "
                                        "redo truncate (lsn: %08X/%08X, latest_removed_xid: %lu), thread id = %lu, "
                                        "read_lsn: %08X/%08X, xmin: %lu"),
                        (uint32)(truncate_redo_lsn >> UINT64_HALF),
                        (uint32)truncate_redo_lsn,
                        latest_removed_xid,
                        pg_proc->pid,
                        (uint32)(read_lsn >> UINT64_HALF),
                        (uint32)read_lsn,
                        pxmin)));
                pg_usleep(5000L);
            }
        }
        if (reach_max_check_times) {
            ereport(WARNING, (
                errmsg("can not cancel thread while redo truncate, thread id = %lu", pg_proc->pid)));
        }
    }
    LWLockRelease(ProcArrayLock);

    return conflict;
}

/*
 * MinimumActiveBackends --- count backends (other than myself) that are
 *		in active transactions.  Return true if the count exceeds the
 *		minimum threshold passed.  This is used as a heuristic to decide if
 *		a pre-XLOG-flush delay is worthwhile during commit.
 *
 * Do not count backends that are blocked waiting for locks, since they are
 * not going to get to run until someone else commits.
 */
bool MinimumActiveBackends(int min)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int count = 0;
    int index;

    /* Quick short-circuit if no minimum is specified */
    if (min == 0) {
        return true;
    }

    /*
     * Note: for speed, we don't acquire ProcArrayLock.  This is a little bit
     * bogus, but since we are only testing fields for zero or nonzero, it
     * should be OK.  The result is only used for heuristic purposes anyway...
     */
    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        /*
         * Since we're not holding a lock, need to check that the pointer is
         * valid. Someone holding the lock could have incremented numProcs
         * already, but not yet inserted a valid pointer to the array.
         *
         * If someone just decremented numProcs, 'proc' could also point to a
         * PGPROC entry that's no longer in the array. It still points to a
         * PGPROC struct, though, because freed PGPROC entries just go to the
         * free list and are recycled. Its contents are nonsense in that case,
         * but that's acceptable for this function.
         */
        if (pgprocno == -1) {
            continue; /* do not count deleted entries */
        }

        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];

        if (proc == t_thrd.proc) {
            continue; /* do not count myself */
        }

        if (pgxact->xid == InvalidTransactionId) {
            continue; /* do not count if no XID assigned */
        }

        if (proc->pid == 0) {
            continue; /* do not count prepared xacts */
        }

        if (proc->waitLock != NULL) {
            continue; /* do not count if blocked on a lock */
        }

        count++;

        if (count >= min) {
            break;
        }
    }

    return count >= min;
}

/*
 * CountDBBackends:
 * The purpose is to collect statistics on the number of current connections.
 *
 * 1.In case of thread pool mode, active and inactive threads are counted through interface CountDBSessions.
 * 2.In case of none thread pool mode, the number of threads to be prepared and the number of active backend threads
 *   need to be collected with the help of global variable g_instance.proc_base_all_procs.
 */
int CountDBBackends(Oid database_oid)
{
    const int MAXAUTOVACPIDS = 10; /* max autovacs to SIGTERM per iteration */
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index, pgprocno, num_connections;
    int num_autovacs = 0;
    int num_wdrxdbs = 0;
    int num_backends = 0;
    int num_prepared = 0;

    CHECK_FOR_INTERRUPTS();
    /* Under thread pool mode, active and inactive threads are counted. */
    if (ENABLE_THREAD_POOL) {
        num_connections = g_threadPoolControler->GetSessionCtrl()->CountDBSessions(database_oid);
    } else {
        LWLockAcquire(ProcArrayLock, LW_SHARED);

        for (index = 0; index < arrayP->numProcs; index++) {
            pgprocno = arrayP->pgprocnos[index];
            volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
            volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
            volatile PgBackendStatus* beentry = pgstat_get_backend_single_entry(proc->sessionid);

            if (proc->databaseId != database_oid) {
                continue;
            }

            if (proc->pid == 0) {
                num_prepared++;
            } else {
                /* Internal threads are not counted. Function: autovacuum */
                if ((pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) && num_autovacs < MAXAUTOVACPIDS) {
                    num_autovacs++;
                    continue;
                }

                /* Internal threads are not counted. Function: cross-database query */
                if (beentry != NULL && strcmp(beentry->st_appname, "WDRXdb") == 0 &&
                    num_wdrxdbs < MAXAUTOVACPIDS) {
                    num_wdrxdbs++;
                    continue;
                }

                num_backends++;
            }
        }

        LWLockRelease(ProcArrayLock);
        num_connections = num_backends + num_prepared;
    }

    if (ENABLE_THREAD_POOL) {
        ereport(DEBUG5, (errmsg("count backend connections in threadpool mode, num_connections[%d].)",
                                num_connections)));
    } else {
        ereport(DEBUG5, (errmsg("count backend connections in none-threadpool mode, num_backends[%d], "
                                "num_prepared[%d], num_autovacs[%d], num_wdrxdbs[%d].)",
                                num_backends, num_prepared, num_autovacs, num_wdrxdbs)));
    }

    return num_connections;
}

/*
 * CountDBActiveBackends
 * The purpose is to count active backends that are using specified database which used for clearing links
 * in the redo scenario.
 */
int CountDBActiveBackends(Oid database_oid)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int count = 0;

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (int index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];

        if (proc->pid == 0)
            continue; /* do not count prepared xacts */

        if (!OidIsValid(database_oid) || proc->databaseId == database_oid)
            count++;
    }

    LWLockRelease(ProcArrayLock);

    return count;
}

/*
 * CancelDBBackends --- cancel backends that are using specified database
 */
void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index;
    ThreadId pid = 0;

    /* tell all backends to die */
    LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];

        if (databaseid == InvalidOid || proc->databaseId == databaseid) {
            VirtualTransactionId procvxid;

            GET_VXID_FROM_PGPROC(procvxid, *proc);

            proc->recoveryConflictPending = conflictPending;
            pid = proc->pid;

            if (pid != 0) {
                /*
                 * Kill the pid if it's still here. If not, that's what we
                 * wanted so ignore any errors.
                 */
                (void)SendProcSignal(pid, sigmode, procvxid.backendId);
            }
        }
    }

    LWLockRelease(ProcArrayLock);
}

static bool ValidDBoidAndUseroid(Oid databaseOid, Oid userOid, volatile PGPROC* proc)
{
    /*
     * Thread are 3 situation in CLEAN CONNECTION:
     * 1. Only database, for example: CLEAN CONNECTION TO ALL FORCE FOR DATABASE xxx;
     * 2. Only user, for example: CLEAN CONNECTION TO ALL FORCE TO USER xxx;
     * 3. Both database and user, for example: CLEAN CONNECTION TO ALL FORCE FOR DATABASE xxx TO USER xxx;
     */
    if (((databaseOid != InvalidOid) && (userOid == InvalidOid) && (proc->databaseId == databaseOid))
        || ((databaseOid == InvalidOid) && (userOid != InvalidOid) && (proc->roleId == userOid))
        || ((proc->databaseId == databaseOid) && (proc->roleId == userOid))) {
        return true;
    }
    return false;
}

int CountSingleNodeActiveBackends(Oid databaseOid, Oid userOid)
{
    if ((databaseOid == InvalidOid) && (userOid == InvalidOid)) {
        ereport(WARNING,
            (errmsg("DB oid and user oid are all Invalid (may be NULL). Shut down clean activite sessions.")));
        return 0;
    }
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int count = 0;

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    for (int index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];

        if (proc->pid == 0)
            continue; /* do not count prepared xacts */

        if (ValidDBoidAndUseroid(databaseOid, userOid, proc)) {
            count++;
        }
    }

    LWLockRelease(ProcArrayLock);

    return count;
}

/*
 * CancelSingleNodeBackends --- cancel backends in single node by database oid or user oid
 */
void CancelSingleNodeBackends(Oid databaseOid, Oid userOid, ProcSignalReason sigmode, bool conflictPending)
{
    if ((databaseOid == InvalidOid) && (userOid == InvalidOid)) {
        ereport(WARNING,
            (errmsg("DB oid and user oid are all Invalid (may be NULL). Shut down clean activite sessions.")));
        return;
    }
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index;
    ThreadId pid = 0;

    /* tell all backends to die */
    LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];

        if (ValidDBoidAndUseroid(databaseOid, userOid, proc)) {
            VirtualTransactionId procvxid;

            GET_VXID_FROM_PGPROC(procvxid, *proc);

            proc->recoveryConflictPending = conflictPending;
            pid = proc->pid;

            if (pid != 0) {
                /*
                 * Kill the pid if it's still here. If not, that's what we
                 * wanted so ignore any errors.
                 */
                (void)SendProcSignal(pid, sigmode, procvxid.backendId);
            }
        }
    }

    LWLockRelease(ProcArrayLock);
}


LWLock *RoleidPartitionLock(uint32 hashCode)
{
    int id = FirstSessRoleIdLock + hashCode % NUM_SESSION_ROLEID_PARTITIONS;
    return GetMainLWLockByIndex(id);
}

/* Init RoleId HashTable */
void InitRoleIdHashTable()
{
    HASHCTL hctl;
    errno_t rc = 0;

    MemoryContext context = AllocSetContextCreate(g_instance.instance_context,
                                                  "RoleIdHashtblContext",
                                                  ALLOCSET_SMALL_MINSIZE,
                                                  ALLOCSET_SMALL_INITSIZE,
                                                  ALLOCSET_DEFAULT_MAXSIZE);

    rc = memset_s(&hctl, sizeof(HASHCTL), 0, sizeof(HASHCTL));
    securec_check(rc, "", "");

    hctl.keysize = sizeof(Oid);
    hctl.entrysize = sizeof(RoleIdHashEntry);
    hctl.hash = oid_hash;
    hctl.hcxt = context;
    hctl.num_partitions = NUM_SESSION_ROLEID_PARTITIONS;

    g_instance.roleid_cxt.roleid_table = HeapMemInitHash("Roleid map",
        INIT_ROLEID_HASHTBL,
        MAX_ROLEID_HASHTBL,
        &hctl,
        HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);

}
/* get the RoleId Count. */
int GetRoleIdCount(Oid roleoid)
{
    bool found = false;
    uint32 hashCode = 0;
    volatile int64 roleNum = 0;
    RoleIdHashEntry *entry = NULL;

    hashCode = oid_hash(&roleoid, sizeof(Oid));
    LWLock *lock = RoleidPartitionLock(hashCode);

    (void)LWLockAcquire(lock, LW_SHARED);

    entry = (RoleIdHashEntry *)hash_search(g_instance.roleid_cxt.roleid_table, (void*)&roleoid, HASH_FIND, &found);

    if (!found) {
        roleNum = 0;
    } else {
        roleNum = entry->roleNum;
    }

    LWLockRelease(lock);

    return roleNum;
}

int IncreaseUserCount(Oid roleoid)
{
    bool found = false;
    uint32 hashCode = 0;
    volatile int64 roleNum = 0;
    RoleIdHashEntry *entry = NULL;

    if(roleoid == 0) {
        return 0;
    }

    hashCode = oid_hash(&roleoid, sizeof(Oid));
    LWLock *lock = RoleidPartitionLock(hashCode);

    (void)LWLockAcquire(lock, LW_EXCLUSIVE);

    entry = (RoleIdHashEntry *)hash_search(g_instance.roleid_cxt.roleid_table, (void*)&roleoid, HASH_ENTER, &found);

    if (!found) {
        entry->roleNum = 1;
    } else {
        entry->roleNum++;
    }

    roleNum = entry->roleNum;
    LWLockRelease(lock);
    return roleNum;
}

int DecreaseUserCount(Oid roleoid)
{
    bool found = false;
    uint32 hashCode = 0;
    volatile int64 roleNum = 0;
    RoleIdHashEntry *entry = NULL;

    if(roleoid == 0) {
        return 0;
    }
    hashCode = oid_hash(&roleoid, sizeof(Oid));
    LWLock *lock = RoleidPartitionLock(hashCode);

    (void)LWLockAcquire(lock, LW_EXCLUSIVE);

    entry = (RoleIdHashEntry *)hash_search(g_instance.roleid_cxt.roleid_table, (void*)&roleoid, HASH_FIND, &found);

    if (found) {
        entry->roleNum--;
        roleNum = entry->roleNum;
        if (entry->roleNum == 0) {
            (void)hash_search(g_instance.roleid_cxt.roleid_table, (void*)&roleoid, HASH_REMOVE, &found);
        }
    }

    LWLockRelease(lock);
    return roleNum;
}

/*
 * CountUserBackends --- count backends that are used by specified user
 */
int CountUserBackends(Oid roleid)
{

    int count = 0;
    if (!ENABLE_THREAD_POOL) {
        ProcArrayStruct* arrayP = g_instance.proc_array_idx;
        LWLockAcquire(ProcArrayLock, LW_SHARED);

        for (int index = 0; index < arrayP->numProcs; index++) {
            int pgprocno = arrayP->pgprocnos[index];
            volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];

            if (proc->pid == 0)
                continue; /* do not count prepared xacts */

            if (proc->roleId == roleid && (t_thrd.role != STREAM_WORKER))
                count++;
        }

        LWLockRelease(ProcArrayLock);
    } else {
        count = GetRoleIdCount(roleid);
    }

    return count;
}

/*
 * CountOtherDBBackends -- check for other backends running in the given DB
 *
 * If there are other backends in the DB, we will wait a maximum of 5 seconds
 * for them to exit.  Autovacuum backends are encouraged to exit early by
 * sending them SIGTERM, but normal user backends are just waited for.
 *
 * The current backend is always ignored; it is caller's responsibility to
 * check whether the current backend uses the given DB, if it's important.
 *
 * Returns TRUE if there are (still) other backends in the DB, FALSE if not.
 * Also, *nbackends and *nprepared are set to the number of other backends
 * and prepared transactions in the DB, respectively.
 *
 * This function is used to interlock DROP DATABASE and related commands
 * against there being any active backends in the target DB --- dropping the
 * DB while active backends remain would be a Bad Thing.  Note that we cannot
 * detect here the possibility of a newly-started backend that is trying to
 * connect to the doomed database, so additional interlocking is needed during
 * backend startup.  The caller should normally hold an exclusive lock on the
 * target DB before calling this, which is one reason we mustn't wait
 * indefinitely.
 */
bool CountOtherDBBackends(Oid databaseId, int* nbackends, int* nprepared)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;

#define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */
    ThreadId autovac_pids[MAXAUTOVACPIDS];
    ThreadId wdrxdb_pids[MAXAUTOVACPIDS];
    int tries;

    if (ENABLE_DMS && SS_PRIMARY_MODE) {
        bool ret = SSCheckDbBackendsFromAllStandby(databaseId);
        if (ret) {
            *nbackends = *nprepared = 0;
            return true;
        }
    }

    /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
    for (tries = 0; tries < 50; tries++) {
        int nworkers = 0;
        int nautovacs = 0;
        int nwdrxdbs = 0;
        int index;

        CHECK_FOR_INTERRUPTS();

        *nbackends = *nprepared = 0;

        LWLockAcquire(ProcArrayLock, LW_SHARED);

        for (index = 0; index < arrayP->numProcs; index++) {
            int pgprocno = arrayP->pgprocnos[index];
            volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
            volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
            volatile PgBackendStatus* beentry = pgstat_get_backend_single_entry(proc->sessionid);

            if (proc->databaseId != databaseId)
                continue;

            if (proc == t_thrd.proc)
                continue;

            if (proc->pid == 0)
                (*nprepared)++;
            else {
                (*nbackends)++;

                if (ENABLE_THREAD_POOL && proc->sessionid > 0) {
                    nworkers++;
                }

                if ((pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) && nautovacs < MAXAUTOVACPIDS) {
                    autovac_pids[nautovacs++] = proc->pid;
                }
                if (!ENABLE_THREAD_POOL && beentry != NULL && strcmp(beentry->st_appname, "WDRXdb") == 0 &&
                    nwdrxdbs < MAXAUTOVACPIDS) {
                    wdrxdb_pids[nwdrxdbs++] = proc->pid;
                    ereport(LOG, (errmsg("WDRXdb sessionid (beentry sessionid): %lu", beentry->st_sessionid)));
                    ereport(LOG, (errmsg("WDRXdb thread id (beentry st_tid): %d", beentry->st_tid)));
                }
            }
        }

        /* Under thread pool mode, we also need to count inactive sessions that are detached from worker threads */
        if (ENABLE_THREAD_POOL) {
            *nbackends -= nworkers;
            *nbackends += g_threadPoolControler->GetSessionCtrl()->CountDBSessions(databaseId);
        }

        LWLockRelease(ProcArrayLock);

        if (*nbackends == 0 && *nprepared == 0) {
            return false; /* no conflicting backends, so done */
        }

        /*
         * Send SIGTERM to any conflicting autovacuums before sleeping. We
         * postpone this step until after the loop because we don't want to
         * hold ProcArrayLock while issuing kill(). We have no idea what might
         * block kill() inside the kernel...
         */
        for (index = 0; index < nautovacs; index++) {
            gs_signal_send(autovac_pids[index], SIGTERM); /* ignore any error */
        }
        for (index = 0; index < nwdrxdbs; index++) {
            gs_signal_send(wdrxdb_pids[index], SIGTERM);
            gs_signal_send(wdrxdb_pids[index], SIGUSR2);
            ereport(LOG, (errmsg("WDRXdb thread pid: %lu is killed(proc->pid)", wdrxdb_pids[index])));
        }

        /* sleep, then try again */
        pg_usleep(100 * 1000L); /* 100ms */
    }

    return true; /* timed out, still conflicts */
}

#ifdef PGXC
/*
 * ReloadConnInfoOnBackends -- reload connection information for all the backends
 */
void ReloadConnInfoOnBackends(void)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int index;
    ThreadId pid = 0;

    /* tell all backends to reload except this one who already reloaded */
    LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        VirtualTransactionId vxid;
        GET_VXID_FROM_PGPROC(vxid, *proc);

        if (proc == t_thrd.proc)
            continue; /* do not do that on myself */

        if (proc->pid == 0)
            continue; /* useless on prepared xacts */

        if (pgxact->vacuumFlags & PROC_IN_VACUUM)
            continue; /* ignore vacuum processes */

        if (EnableGlobalSysCache()) {
            /* syscache is on thread in gsc mode. when enable thread pool,
             * even the thread does not connect to a database, we still need send signal to it */
            if (!OidIsValid(proc->databaseId) && !ENABLE_THREAD_POOL) {
                continue;
            }
        } else {
            if (!OidIsValid(proc->databaseId)) {
                continue;
            }
            if (ENABLE_THREAD_POOL && proc->sessionid > 0) {
                continue;
            }
        }

        pid = proc->pid;
        /*
         * Send the reload signal if backend still exists
         */
        (void)SendProcSignal(pid, PROCSIG_PGXCPOOL_RELOAD, vxid.backendId);
    }

    LWLockRelease(ProcArrayLock);

    if (ENABLE_THREAD_POOL) {
        g_threadPoolControler->GetSessionCtrl()->HandlePoolerReload();
    }
}
#endif

char dump_memory_context_name[MEMORY_CONTEXT_NAME_LEN];

/*
 * DumpMemoryCtxOnBackend -- dump memory context on some backend
 */
void DumpMemoryCtxOnBackend(ThreadId tid, const char* mem_ctx)
{
    int ret;
    errno_t ss_rc = EOK;

    if (strlen(mem_ctx) >= MEMORY_CONTEXT_NAME_LEN) {
        ereport(ERROR,
                (errcode(ERRCODE_OUT_OF_MEMORY),
                 errmsg("The name of memory context is too long(>=%dbytes)", MEMORY_CONTEXT_NAME_LEN)));
        return;
    }

    ss_rc = memset_s(dump_memory_context_name, MEMORY_CONTEXT_NAME_LEN, 0, MEMORY_CONTEXT_NAME_LEN);
    securec_check(ss_rc, "\0", "\0");
    ss_rc = strcpy_s(dump_memory_context_name, MEMORY_CONTEXT_NAME_LEN, mem_ctx);
    securec_check(ss_rc, "\0", "\0");

    LWLockAcquire(ProcArrayLock, LW_SHARED);
    ret = SendProcSignal(tid, PROCSIG_MEMORYCONTEXT_DUMP, InvalidBackendId);
    LWLockRelease(ProcArrayLock);
    if (ret)
        ereport(ERROR,
                (errcode(ERRCODE_CONNECTION_FAILURE),
                 errmsg("Fail to send signal to backend(tid:%lu).", (unsigned long)tid)));
}

/*
 * ProcArraySetReplicationSlotXmin
 *
 * Install limits to future computations of the xmin horizon to prevent vacuum
 * and HOT pruning from removing affected rows still needed by clients with
 * replicaton slots.
 */
void ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, bool already_locked)
{
    Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));

    if (!already_locked)
        LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);

    if (xmin == InvalidTransactionId || TransactionIdPrecedes(g_instance.proc_array_idx->replication_slot_xmin, xmin)) {
        g_instance.proc_array_idx->replication_slot_xmin = xmin;
    }
    if (catalog_xmin == InvalidTransactionId ||
        TransactionIdPrecedes(g_instance.proc_array_idx->replication_slot_catalog_xmin, catalog_xmin)) {
        g_instance.proc_array_idx->replication_slot_catalog_xmin = catalog_xmin;
    }

    if (!already_locked)
        LWLockRelease(ProcArrayLock);
}

/*
 * GetReplicationSlotCatalogXmin
 *
 * Return replication_slot_catalog_xmin.
 */
TransactionId GetReplicationSlotCatalogXmin() {
    return g_instance.proc_array_idx->replication_slot_catalog_xmin;
}

/*
 * ProcArrayGetReplicationSlotXmin
 *
 * Return the current slot xmin limits. That's useful to be able to remove
 * data that's older than those limits.
 */
void ProcArrayGetReplicationSlotXmin(TransactionId* xmin, TransactionId* catalog_xmin)
{
    LWLockAcquire(ProcArrayLock, LW_SHARED);

    if (xmin != NULL)
        *xmin = g_instance.proc_array_idx->replication_slot_xmin;

    if (catalog_xmin != NULL)
        *catalog_xmin = g_instance.proc_array_idx->replication_slot_catalog_xmin;

    LWLockRelease(ProcArrayLock);
}

/*
 * XidCacheRemoveRunningXids
 *
 * Remove a bunch of TransactionIds from the list of known-running
 * subtransactions for my backend.	Both the specified xid and those in
 * the xids[] array (of length nxids) are removed from the subxids cache.
 * latestXid must be the latest XID among the group. We should store the
 * required parameters into proc before performing XidCacheRemoveRunningXids,
 * including subtransaction xid, the number of committed subtransaction,
 * committed substransaction list, the latestXid between its xid and its
 * committed subtransactions'.
 *
 * We don't do any locking here; caller must get the procArrayLock before
 * perform XidCacheRemoveRunningXids.
 */
void XidCacheRemoveRunningXids(PGPROC* proc, PGXACT* pgxact)
{
    int i, j;
    TransactionId xid = proc->procArrayGroupMemberXid;
    int nxids = proc->procArrayGroupSubXactNXids;
    TransactionId* xids = proc->procArrayGroupSubXactXids;
    TransactionId latestXid = proc->procArrayGroupSubXactLatestXid;

    Assert(TransactionIdIsValid(xid));

    /*
     * Under normal circumstances xid and xids[] will be in increasing order,
     * as will be the entries in subxids.  Scan backwards to avoid O(N^2)
     * behavior when removing a lot of xids.
     */
    for (i = nxids - 1; i >= 0; i--) {
        TransactionId anxid = xids[i];

        for (j = pgxact->nxids - 1; j >= 0; j--) {
            if (TransactionIdEquals(proc->subxids.xids[j], anxid)) {
                proc->subxids.xids[j] = proc->subxids.xids[pgxact->nxids - 1];
                pgxact->nxids--;
                break;
            }
        }

        /*
         * Ordinarily we should have found it, unless the cache has
         * overflowed. However it's also possible for this routine to be
         * invoked multiple times for the same subtransaction, in case of an
         * error during AbortSubTransaction.  So instead of Assert, emit a
         * debug warning.
         */
        if (j < 0)
            ereport(WARNING, (errmsg("did not find subXID " XID_FMT " in t_thrd.proc", anxid)));
    }

    for (j = pgxact->nxids - 1; j >= 0; j--) {
        if (TransactionIdEquals(proc->subxids.xids[j], xid)) {
            proc->subxids.xids[j] = proc->subxids.xids[pgxact->nxids - 1];
            pgxact->nxids--;
            break;
        }
    }

    /* Ordinarily we should have found it, unless the cache has overflowed */
    if (j < 0)
        ereport(WARNING, (errmsg("did not find subXID " XID_FMT " in t_thrd.proc", xid)));

    /* Also advance global latestCompletedXid while holding the lock */
    if (TransactionIdPrecedes(t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid, latestXid))
        t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid = latestXid;

}

#ifdef XIDCACHE_DEBUG

/*
 * Print stats about effectiveness of XID cache
 */
static void DisplayXidCache(void)
{
    fprintf(stderr,
            "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, "
            "nooflo: %ld, slow: %ld\n",
            xc_by_recent_xmin,
            xc_by_known_xact,
            xc_by_my_xact,
            xc_by_latest_xid,
            xc_by_main_xid,
            xc_by_child_xid,
            xc_by_known_assigned,
            xc_no_overflow,
            xc_slow_answer);
}
#endif /* XIDCACHE_DEBUG */

#ifdef PGXC
/*
 * Store snapshot data received from the Coordinator
 */
void SetGlobalSnapshotData(
    TransactionId xmin, TransactionId xmax, uint64 csn, GTM_Timeline timeline, bool ss_need_sync_wait_all)
{
    u_sess->utils_cxt.snapshot_source = SNAPSHOT_COORDINATOR;
    u_sess->utils_cxt.g_GTM_Snapshot->sn_xmin = u_sess->utils_cxt.gxmin = xmin;
    u_sess->utils_cxt.g_GTM_Snapshot->sn_xmax = u_sess->utils_cxt.gxmax = xmax;
    u_sess->utils_cxt.g_GTM_Snapshot->sn_recent_global_xmin = u_sess->utils_cxt.RecentGlobalXmin;
    u_sess->utils_cxt.g_GTM_Snapshot->csn = u_sess->utils_cxt.g_snapshotcsn = csn;
    u_sess->utils_cxt.GtmTimeline = timeline;
    u_sess->utils_cxt.snapshot_need_sync_wait_all = ss_need_sync_wait_all;

    if (module_logging_is_on(MOD_TRANS_SNAPSHOT)) {
        ereport(LOG,
                (errmodule(MOD_TRANS_SNAPSHOT),
                 errmsg("global snapshot info from CN: gxmin: " XID_FMT ", gxmax: " XID_FMT ", gscn: %lu,"
                        "RecentGlobalXmin: %lu, cn_xc_maintain_mode: %s.",
                        u_sess->utils_cxt.gxmin,
                        u_sess->utils_cxt.gxmax,
                        u_sess->utils_cxt.g_snapshotcsn,
                        u_sess->utils_cxt.RecentGlobalXmin,
                        u_sess->utils_cxt.cn_xc_maintain_mode ? "on" : "off")));
    }
}

/*
 * Store snapshot data received from the Coordinator
 */
void SetGlobalSnapshotDataNode(TransactionId xmin, TransactionId xmax, uint64 csn, GTM_Timeline timeline)
{
    u_sess->utils_cxt.snapshot_source = SNAPSHOT_DATANODE;
    u_sess->utils_cxt.gxmin = xmin;
    u_sess->utils_cxt.gxmax = xmax;
    u_sess->utils_cxt.g_snapshotcsn = csn;
    u_sess->utils_cxt.GtmTimeline = timeline;

    ereport(DEBUG1,
            (errmsg("global snapshot info: gxmin: " XID_FMT ", gxmax: " XID_FMT ", gscn: %lu",
                    u_sess->utils_cxt.gxmin,
                    u_sess->utils_cxt.gxmax,
                    u_sess->utils_cxt.g_snapshotcsn)));
}

/*
 * Force Datanode to use local snapshot data
 */
void UnsetGlobalSnapshotData(void)
{
    u_sess->utils_cxt.snapshot_source = SNAPSHOT_UNDEFINED;
    u_sess->utils_cxt.gxmin = InvalidTransactionId;
    u_sess->utils_cxt.gxmax = InvalidTransactionId;
    u_sess->utils_cxt.g_snapshotcsn = 0;
    u_sess->utils_cxt.GtmTimeline = InvalidTransactionTimeline;
    u_sess->utils_cxt.is_autovacuum_snapshot = false;

    ereport(DEBUG1, (errmsg("unset snapshot info")));
}

/*
 * Entry of snapshot obtention for openGauss node
 * returns information about running transactions.
 * The returned snapshot includes xmin (lowest still-running xact ID),
 * xmax (highest completed xact ID + 1), and a list of running xact IDs
 * in the range xmin <= xid < xmax.  It is used as follows:
 *		All xact IDs < xmin are considered finished.
 *		All xact IDs >= xmax are considered still running.
 *		For an xact ID xmin <= xid < xmax, consult list to see whether
 *		it is considered running or not.
 * This ensures that the set of transactions seen as "running" by the
 * current xact will not change after it takes the snapshot.
 *
 * We also update the following backend-global variables:
 *		TransactionXmin: the oldest xmin of any snapshot in use in the
 *			current transaction (this is the same as MyPgXact->xmin).
 *		RecentXmin: the xmin computed for the most recent snapshot.  XIDs
 *			older than this are known not running any more.
 *		RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
 *			running transactions).  This is
 *			the same computation done by GetOldestXmin(true, true).
 */
static bool GetPGXCSnapshotData(Snapshot snapshot)
{
#ifdef ENABLE_MULTIPLE_NODES
    /*
     * If this node is in recovery phase,
     * snapshot has to be taken directly from WAL information.
     */
    if (!IS_MULTI_DISASTER_RECOVER_MODE && RecoveryInProgress())
        return false;

    /*
     * The typical case is that the local Coordinator passes down the snapshot to the
     * remote nodes to use, while it itself obtains it from GTM. Autovacuum processes
     * need however to connect directly to GTM themselves to obtain XID and snapshot
     * information for autovacuum worker threads.
     * A vacuum analyze uses a special function to get a transaction ID and signal
     * GTM not to include this transaction ID in snapshot.
     * A vacuum worker starts as a normal transaction would.
     */
    if ((IS_PGXC_DATANODE || IsConnFromCoord() || IsAutoVacuumWorkerProcess() || GetForceXidFromGTM()) &&
        IsNormalProcessingMode()) {
        if (GetSnapshotDataDataNode(snapshot))
            return true;

        /* else fallthrough */
    } else if (IS_PGXC_COORDINATOR && !IsConnFromCoord() && IsNormalProcessingMode()) {
        /* Snapshot has ever been received from remote Coordinator */
        if (GetSnapshotDataCoordinator(snapshot))
            return true;

        /* else fallthrough */
    }

    /*
     * If we have no snapshot, we will use a local one.
     * If we are in normal mode, we output a warning though.
     * We currently fallback and use a local one at initdb time,
     * as well as when a new connection occurs.
     * This is also the case for autovacuum launcher.
     *
     * IsPostmasterEnvironment - checks for initdb
     * IsNormalProcessingMode() - checks for new connections
     * IsAutoVacuumLauncherProcess - checks for autovacuum launcher process
     */
    if (IS_PGXC_DATANODE && !isRestoreMode && u_sess->utils_cxt.snapshot_source == SNAPSHOT_UNDEFINED &&
        IsPostmasterEnvironment && IsNormalProcessingMode() && !IsAutoVacuumLauncherProcess()) {
        if (!t_thrd.postgres_cxt.isInResetUserName)
            ereport(WARNING, (errmsg("Do not have a GTM snapshot available")));
    }

    return false;
#else
    DISTRIBUTED_FEATURE_NOT_SUPPORTED();
    return false;
#endif /* ENABLE_MULTIPLE_NODES */
}

#ifdef ENABLE_MULTIPLE_NODES
/*
 * Get snapshot data for Datanode
 * This is usually passed down from the Coordinator
 *
 * returns whether or not to return immediately with snapshot
 */
static bool GetSnapshotDataDataNode(Snapshot snapshot)
{
    Assert(IS_PGXC_DATANODE || IsConnFromCoord() || IsAutoVacuumWorkerProcess() || GetForceXidFromGTM());

    /*
     * Fallback to general case if Datanode is accessed directly by an application
     */
    if (IsPGXCNodeXactDatanodeDirect())
        return GetSnapshotDataCoordinator(snapshot);

    if (IsAutoVacuumWorkerProcess() || GetForceXidFromGTM()) {
        GTM_Snapshot gtm_snapshot;
        ereport(DEBUG1,
                (errmsg("Getting snapshot for autovacuum. Current XID = " XID_FMT, GetCurrentTransactionIdIfAny())));
        gtm_snapshot = IS_MULTI_DISASTER_RECOVER_MODE ? GetSnapshotGTMDR() : GetSnapshotGTMLite();

        if (!gtm_snapshot) {
            if (g_instance.status > NoShutdown) {
                if (module_logging_is_on(MOD_TRANS_SNAPSHOT)) {
                    ereport(LOG, (errmodule(MOD_TRANS_SNAPSHOT), errmsg("Shut down, could not obtain snapshot")));
                }
                return false;
            } else {
                ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE), errmsg("GTM error, could not obtain snapshot")));
            }
        } else {
            *u_sess->utils_cxt.g_GTM_Snapshot = *gtm_snapshot;
            u_sess->utils_cxt.snapshot_source = SNAPSHOT_DIRECT;
            snapshot->gtm_snapshot_type = IsAutoVacuumWorkerProcess() ? GTM_SNAPSHOT_TYPE_AUTOVACUUM : GTM_SNAPSHOT_TYPE_GLOBAL;
            /* only use gtm csn */
            Snapshot ret;
            ret = GetLocalSnapshotData(snapshot);
            Assert(ret != NULL);

            snapshot->snapshotcsn = set_proc_csn_and_check("GetSnapshotDataDataNodeDirectGTM",
                gtm_snapshot->csn, snapshot->gtm_snapshot_type, SNAPSHOT_DIRECT);
            u_sess->utils_cxt.g_GTM_Snapshot->csn = snapshot->snapshotcsn;
            u_sess->utils_cxt.RecentGlobalXmin = GetOldestXmin(NULL, true);
            u_sess->utils_cxt.RecentGlobalCatalogXmin = GetOldestCatalogXmin();
            return true;
        }
    }

    if (GTM_LITE_MODE && u_sess->utils_cxt.snapshot_source == SNAPSHOT_COORDINATOR) {
        TransactionId save_recentglobalxmin = u_sess->utils_cxt.RecentGlobalXmin;
        snapshot->gtm_snapshot_type =
            u_sess->utils_cxt.is_autovacuum_snapshot ? GTM_SNAPSHOT_TYPE_AUTOVACUUM : GTM_SNAPSHOT_TYPE_GLOBAL;
        if (IS_MULTI_DISASTER_RECOVER_MODE) {
            snapshot->snapshotcsn = u_sess->utils_cxt.g_snapshotcsn;
            t_thrd.pgxact->csn_dr = snapshot->snapshotcsn;
            pg_memory_barrier();
            CommitSeqNo lastReplayedConflictCSN = (CommitSeqNo)pg_atomic_read_u64(
                &(g_instance.comm_cxt.predo_cxt.last_replayed_conflict_csn));
            if (lastReplayedConflictCSN != 0 && snapshot->snapshotcsn - 1 <= lastReplayedConflictCSN) {
                ereport(ERROR, (errmsg("gtm csn small: gtm csn %lu, lastReplayedConflictCSN %lu",
                    snapshot->snapshotcsn, lastReplayedConflictCSN)));
            }
            LWLockAcquire(XLogMaxCSNLock, LW_SHARED);
            if (t_thrd.xact_cxt.ShmemVariableCache->xlogMaxCSN + 1 < snapshot->snapshotcsn) {
                ereport(ERROR, (errmsg("dn data invisible: local csn %lu, gtm snapshotcsn %lu",
                    t_thrd.xact_cxt.ShmemVariableCache->xlogMaxCSN, snapshot->snapshotcsn)));
            }
            LWLockRelease(XLogMaxCSNLock);
        } else {
            /* only use gtm csn */
            Snapshot ret;
            ret = GetLocalSnapshotData(snapshot);
            Assert(ret != NULL);
            snapshot->snapshotcsn = u_sess->utils_cxt.g_snapshotcsn;
            (void)set_proc_csn_and_check("GetSnapshotDataDataNodeFromCN", snapshot->snapshotcsn,
                                         snapshot->gtm_snapshot_type, SNAPSHOT_COORDINATOR);
            /* reset RecentGlobalXmin */
            u_sess->utils_cxt.RecentGlobalXmin = save_recentglobalxmin;
            /* too late to check and set */
        }
        return true;
    }

    return false;
}

/*
 * Get snapshot data for Coordinator
 * It will later be passed down to Datanodes
 *
 * returns whether or not to return immediately with snapshot
 */
static bool GetSnapshotDataCoordinator(Snapshot snapshot)
{
    GTM_Snapshot gtm_snapshot;

    Assert(IS_PGXC_COORDINATOR || IsPGXCNodeXactDatanodeDirect());

    /* Log some information about snapshot obtention */
    if (IsAutoVacuumWorkerProcess()) {
        ereport(DEBUG1,
                (errmsg("Getting snapshot for autovacuum. Current XID = " XID_FMT, GetCurrentTransactionIdIfAny())));
    } else {
        ereport(DEBUG1, (errmsg("Getting snapshot. Current XID = " XID_FMT, GetCurrentTransactionIdIfAny())));
    }

    gtm_snapshot = IS_MULTI_DISASTER_RECOVER_MODE ? GetSnapshotGTMDR() : GetSnapshotGTMLite();

    if (!gtm_snapshot) {
        if (g_instance.status > NoShutdown) {
            return false;
        } else {
            /* error level degrade when in AbortTransaction procedure */
            ereport(t_thrd.xact_cxt.bInAbortTransaction ? WARNING : ERROR,
                    (errcode(ERRCODE_CONNECTION_FAILURE),
                     errmsg("GTM error, could not obtain snapshot XID = " XID_FMT, GetCurrentTransactionIdIfAny())));
        }
    } else {
        snapshot->gtm_snapshot_type = GTM_SNAPSHOT_TYPE_GLOBAL;
        *u_sess->utils_cxt.g_GTM_Snapshot = *gtm_snapshot;
        if (IS_MULTI_DISASTER_RECOVER_MODE) {
            snapshot->snapshotcsn = gtm_snapshot->csn;
            t_thrd.pgxact->csn_dr = snapshot->snapshotcsn;
            LWLockAcquire(XLogMaxCSNLock, LW_SHARED);
            if (t_thrd.xact_cxt.ShmemVariableCache->xlogMaxCSN + 1 < snapshot->snapshotcsn) {
                ereport(ERROR, (errmsg("cn data invisible: local csn %lu, gtm snapshotcsn %lu", t_thrd.xact_cxt.ShmemVariableCache->xlogMaxCSN, snapshot->snapshotcsn)));
            }
            LWLockRelease(XLogMaxCSNLock);
        } else {
            /* only use gtm csn */
            Snapshot ret;
            ret = GetLocalSnapshotData(snapshot);
            Assert(ret != NULL);

            snapshot->snapshotcsn = set_proc_csn_and_check("GetSnapshotDataCoordinator", gtm_snapshot->csn,
                                                           snapshot->gtm_snapshot_type, SNAPSHOT_DIRECT);

            u_sess->utils_cxt.g_GTM_Snapshot->csn = snapshot->snapshotcsn;
            u_sess->utils_cxt.RecentGlobalXmin = GetOldestXmin(NULL, true);
            u_sess->utils_cxt.RecentGlobalCatalogXmin = GetOldestCatalogXmin();
        }
        if (module_logging_is_on(MOD_TRANS_SNAPSHOT)) {
            ereport(LOG, (errmodule(MOD_TRANS_SNAPSHOT),
                          errmsg("CN gets snapshot from gtm_snapshot, csn = %lu.", snapshot->snapshotcsn)));
        }
        return true;
    }

    return false;
}

void proc_cancel_invalid_gtm_lite_conn()
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int i;
    GtmHostIndex hostindex = GTM_HOST_INVAILD;
    GtmHostIndex my_gtmhost = InitGTM(false);

    ereport(LOG, (errmsg("GTMLite: canceling stale GTM connections, new GTM host index: %d.", my_gtmhost)));

    Assert(my_gtmhost == t_thrd.proc->my_gtmhost);

    LWLockAcquire(ProcArrayLock, LW_SHARED);
    for (i = 0; i < arrayP->numProcs; i++) {
        int pgprocno = arrayP->pgprocnos[i];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];

        if (proc == NULL || (proc->pid == 0)) {
            continue;
        }

        Assert(proc->myProgName != NULL);

        /* skip non-postgres threads */
        if (strcmp((const char*)(proc->myProgName), "postgres") != 0) {
            continue;
        }

        hostindex = (GtmHostIndex)pg_atomic_fetch_add_u32((volatile uint32*)&proc->my_gtmhost, 0);

        ereport(DEBUG1, (errmsg("current GTM hostindex %d, thread id: %lu, thread GTM hostindex: %d", my_gtmhost,
            proc->pid, hostindex)));

        if (hostindex == GTM_HOST_INVAILD) {
            continue;
        }

        if (my_gtmhost != hostindex) {
            (void)pg_atomic_exchange_u32(&proc->signal_cancel_gtm_conn_flag, HOST2FLAG(my_gtmhost));
            if (gs_signal_send(proc->pid, SIGUSR2)) {
                ereport(WARNING, (errmsg("GTMLite: could not send signal to thread %lu: %m", proc->pid)));
                (void)pg_atomic_exchange_u32(&proc->signal_cancel_gtm_conn_flag, 0);
            } else {
                ereport(LOG, (errmsg("GTMLite: success to send SIGUSR2 to openGauss thread: %lu.", proc->pid)));
            }
        }
    }
    LWLockRelease(ProcArrayLock);
}

#endif /* ENABLE_MULTIPLE_NODES */

/* Cleanup the snapshot */
static void cleanSnapshot(Snapshot snapshot)
{
    snapshot->snapshotcsn = 0;
    snapshot->xmin = snapshot->xmax = InvalidTransactionId;
    snapshot->timeline = InvalidTransactionTimeline;
}

#endif /* PGXC */

TransactionId GetGlobal2pcXmin()
{
    TransactionId golabl_2pc_xmin = t_thrd.xact_cxt.ShmemVariableCache->nextXid;
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    int ii = 0;
    int* pgprocnos = arrayP->pgprocnos;
    int numProcs;

    LWLockAcquire(ProcArrayLock, LW_SHARED);

    numProcs = arrayP->numProcs;

    for (ii = 0; ii < numProcs; ii++) {
        int pgprocno = pgprocnos[ii];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        TransactionId xid = pgxact->xid;
        TransactionId prepare_xid = pgxact->prepare_xid;

        if (proc->pid == 0)
            continue; /* ignore prepared transactions */

        ereport(DEBUG5, (errmsg("Active transaction: xid: " XID_FMT " ,prepare_xid: " XID_FMT, xid, prepare_xid)));

        if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, golabl_2pc_xmin)) {
            golabl_2pc_xmin = xid;
        }
        if (TransactionIdIsNormal(prepare_xid) && TransactionIdPrecedes(prepare_xid, golabl_2pc_xmin)) {
            golabl_2pc_xmin = prepare_xid;
        }
    }

    LWLockRelease(ProcArrayLock);

    return golabl_2pc_xmin;
}

/*
 * Wait for the transaction which modify the tuple to finish.
 * First release the buffer lock. After waiting, re-acquire the buffer lock.
 */
void SyncWaitXidEnd(TransactionId xid, Buffer buffer, const Snapshot snapshot)
{
    if (!BufferIsValid(buffer)) {
        /* Wait local transaction finish */
        SyncLocalXidWait(xid, snapshot);
        return;
    }

    BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
    LWLockMode mode = GetHeldLWLockMode(bufHdr->content_lock);

    Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);

    /* Release buffer lock */
    LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    /* Wait local transaction finish */
    SyncLocalXidWait(xid, snapshot);
    /* Re-acqure buffer lock, need transform lwlock mode to buffer lock mode */
    LockBuffer(buffer, mode == LW_EXCLUSIVE ? BUFFER_LOCK_EXCLUSIVE : BUFFER_LOCK_SHARE);
}


/*
 * Wait local transaction finish, if transaction wait time exceed transaction_sync_naptime, call gs_clean.
 */
void SyncLocalXidWait(TransactionId xid, const Snapshot snapshot)
{
    ReleaseAllGSCRdConcurrentLock();

    int64 remainingNapTime = (int64)u_sess->attr.attr_common.transaction_sync_naptime * 1000000; /* us */
    int64 remainingTimeout = (int64)u_sess->attr.attr_common.transaction_sync_timeout * 1000000; /* us */
    const int64 sleepTime = 1000;
    WaitState oldStatus = pgstat_report_waitstatus(STATE_WAIT_UNDEFINED, true);

    gstrace_entry(GS_TRC_ID_SyncLocalXidWait);
    while (!ConditionalXactLockTableWait(xid, snapshot)) {
        /* type of transaction id is same as node id, reuse the second param for waited transaction id */
        pgstat_report_waitstatus_xid(STATE_WAIT_XACTSYNC, xid);

        if (u_sess->attr.attr_common.transaction_sync_naptime && remainingNapTime <= 0 && twoPhaseCleanerProc) {
            ereport(LOG,
                    (errcode(ERRCODE_SUCCESSFUL_COMPLETION),
                     errmsg("wait transaction sync time would exceed %d s, "
                            "call gs_clean to clean reserved prepared transactions.",
                            u_sess->attr.attr_common.transaction_sync_naptime)));
            CHECK_FOR_INTERRUPTS();
            /* call gs_clean */
            bSyncXactsCallGsclean = true;
            SetLatch(&twoPhaseCleanerProc->procLatch);
            /* sleep 0.1s, wait gs_clean process */
            pg_usleep(100 * sleepTime);
            remainingNapTime = (int64)u_sess->attr.attr_common.transaction_sync_naptime * 1000000; /* us */
        }

        if (u_sess->attr.attr_common.transaction_sync_timeout && remainingTimeout <= 0) {
            (void)pgstat_report_waitstatus(oldStatus);
            ereport(ERROR,
                    (errcode(ERRCODE_LOCK_WAIT_TIMEOUT),
                     errmsg("wait transaction %lu sync time exceed %d s.",
                            xid,
                            u_sess->attr.attr_common.transaction_sync_timeout)));
        }

        if (g_instance.status > NoShutdown || g_instance.demotion > NoDemote) {
            ereport(FATAL,
                    (errcode(ERRCODE_ADMIN_SHUTDOWN),
                     errmsg("terminating SyncLocalXactsWithGTM process due to administrator command")));
        }

        CHECK_FOR_INTERRUPTS();
        pg_usleep(sleepTime); /* 1ms */
        remainingNapTime = remainingNapTime - sleepTime;
        remainingTimeout = remainingTimeout - sleepTime;
    }
    (void)pgstat_report_waitstatus(oldStatus);
    gstrace_exit(GS_TRC_ID_SyncLocalXidWait);
}

void PrintCurrentSnapshotInfo(int logelevel, TransactionId xid, Snapshot snapshot, const char* action)
{
    if (snapshot) {
        StringInfoData snapshot_str;
        initStringInfo(&snapshot_str);

        appendStringInfo(&snapshot_str,
                         "snapshot xmin: %lu, xmax: %lu, csn: %lu, "
                         "recentGlobalXmin: %lu",
                         snapshot->xmin,
                         snapshot->xmax,
                         snapshot->snapshotcsn,
                         pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin));

        ereport(logelevel,
                (errmsg("[%s] xtuplexid= %lu, [MVCCSanpshot] %s", action ? action : "no aciton", xid, snapshot_str.data)));

        pfree(snapshot_str.data);
        snapshot_str.data = NULL;
    } else
        ereport(logelevel, (errmsg("[%s] tuplexid = %lu", action ? action : "no aciton", xid)));
}

/*
 * cache line size in bytes
 */
#define CACHE_LINE_SZ 64

/*
 * partition reference count to groups of threads to reduce contention
 */
#define NREFCNT 1

/*
 * atomic increment
 */
#define atomic_inc(ptr) __sync_add_and_fetch(ptr, 1)

/*
 * atomic decrement
 */
#define atomic_dec(ptr) __sync_sub_and_fetch(ptr, 1)

/*
 * cache-line aligned reference counter
 */
typedef struct _ref_cnt {
    unsigned count;
    unsigned pad[CACHE_LINE_SZ / sizeof(unsigned) - sizeof(unsigned)];
} ref_cnt_t;


/* snapxid structure to hold the values computed at a commit time */
#ifdef __aarch64__

/* the offset of ref_cnt in the struct _snapxid. */
#define REF_CNT_OFFSET 36

typedef struct _snapxid {
    TransactionId xmin;
    TransactionId xmax;
    CommitSeqNo snapshotcsn;
    TransactionId localxmin; /* the latest xmin in local node, update at transaction end. */
    bool takenDuringRecovery;
    char padding[PG_CACHE_LINE_SIZE - REF_CNT_OFFSET];
} snapxid_t;

#else

typedef struct _snapxid {
    TransactionId xmin;
    TransactionId xmax;
    CommitSeqNo snapshotcsn;
    TransactionId localxmin; /* the latest xmin in local node, update at transaction end. */
    bool takenDuringRecovery;
    ref_cnt_t ref_cnt[NREFCNT];
} snapxid_t;

#endif

/*
 * the snapshot ring buffer
 */
static snapxid_t* g_snap_buffer = NULL;       /* the ring buffer for snapxids */
static snapxid_t* g_snap_buffer_copy = NULL;  /* the ring buffer for AtProcExit */
static size_t g_bufsz = 0;
static bool g_snap_assigned = false;  /* true if current snap valid */

#define SNAP_SZ sizeof(snapxid_t)  /* size of snapxid_t */
#define MaxNumSnapVersion 64       /* max version number */

/*
 * get pointer to snapxid_t entry in specified index in ring buffer
 */
static inline snapxid_t* SNAPXID_AT(size_t i)
{
    return (snapxid_t*)(((char*)g_snap_buffer) + SNAP_SZ * i);
}

/*
 * get offset in bytes of snapxid_t entry in ring buffer
 */
static inline size_t SNAPXID_OFFSET(snapxid_t* x)
{
    return (((char*)x) - ((char*)g_snap_buffer));
}

/*
 * get index of snapxid_t entry in ring buffer
 */
static inline size_t SNAPXID_INDEX(snapxid_t* x)
{
    return (SNAPXID_OFFSET(x) / SNAP_SZ);
}

/*
 * points to most recently computed snapshot
 */
static volatile snapxid_t* g_snap_current = NULL;

/*
 * points to next available slot in snapshot ring buffer
 */
static volatile snapxid_t* g_snap_next = NULL;

/*
 * Report shared-memory space needed by CreateSharedRingBuffer.
 */
Size RingBufferShmemSize(void)
{
#ifdef __aarch64__
    return mul_size(MaxNumSnapVersion, SNAP_SZ) + PG_CACHE_LINE_SIZE;
#else
    return mul_size(MaxNumSnapVersion, SNAP_SZ);
#endif
}

/*
 * Initialize the shared Snapshot Ring Buffer during postmaster startup.
 */
void CreateSharedRingBuffer(void)
{
    bool found = false;

#ifdef __aarch64__
    /* Create or attach to the ProcArray shared structure. */
    g_snap_buffer = (snapxid_t*)CACHELINEALIGN(ShmemInitStruct("Snapshot Ring Buffer", RingBufferShmemSize(), &found));
#else
    /* Create or attach to the ProcArray shared structure. */
    g_snap_buffer = (snapxid_t*)ShmemInitStruct("Snapshot Ring Buffer", RingBufferShmemSize(), &found);
#endif

    if (!found) {
        /* Initialize if we're the first. */
        g_bufsz = MaxNumSnapVersion;
        g_snap_current = SNAPXID_AT(0);
        g_snap_next = SNAPXID_AT(1);
        g_snap_buffer_copy = g_snap_buffer;
        errno_t rc = memset_s(g_snap_buffer, RingBufferShmemSize(), 0, RingBufferShmemSize());
        securec_check(rc, "\0", "\0");
    }
}

#ifdef __aarch64__

/*
 * increment reference count of snapshot
 */
static void IncrRefCount(snapxid_t* s)
{
    t_thrd.proc->snap_refcnt_bitmap |= 1 << (SNAPXID_INDEX(s) % 64);
    pg_write_barrier();
}

/*
 * decrement reference count of snapshot
 */
static void DecrRefCount(snapxid_t* s)
{
    t_thrd.proc->snap_refcnt_bitmap &= ~(1 << (SNAPXID_INDEX(s) % 64));
    pg_write_barrier();
}

/*
 * test for zero reference count of snapshot
 */
static int IsZeroRefCount(snapxid_t* s)
{
    uint64 bitmap = 1 << (SNAPXID_INDEX(s) % 64);
    for (int i = 0; i < g_instance.proc_array_idx->numProcs; i++) {
        if (g_instance.proc_base_all_procs[g_instance.proc_array_idx->pgprocnos[i]]->snap_refcnt_bitmap & bitmap) {
            return 0;
        }
    }
    return 1;
}

#else

/*
 * increment reference count of snapshot
 */
static void IncrRefCount(snapxid_t* s)
{
    const int wh = 0;
    atomic_inc(&s->ref_cnt[wh].count);
}

/*
 * decrement reference count of snapshot
 */
static void DecrRefCount(snapxid_t* s)
{
    const int wh = 0;
    atomic_dec(&s->ref_cnt[wh].count);
}

/*
 * test for zero reference count of snapshot
 */
static int IsZeroRefCount(snapxid_t* s)
{
    int i;
    for (i = 0; i < NREFCNT; ++i) {
        if (s->ref_cnt[i].count) {
            return 0;
        }
    }
    return 1;
}

#endif

/* snapxid to be held off to the next commit */
static inline snapxid_t* GetNextSnapXid()
{
    return g_snap_buffer ? (snapxid_t*)g_snap_next : NULL;
}

const static int SNAP_ERROR_COUNT = 256;
/*
 * update the current snapshot pointer find the next available slot for the next pointer
 */
static void SetNextSnapXid()
{
    if (g_snap_buffer != NULL) {
        g_snap_current = g_snap_next;
        pg_write_barrier();
        g_snap_assigned = true;
        snapxid_t* ret = (snapxid_t*)g_snap_current;
        size_t idx = SNAPXID_INDEX(ret);
        int nofindCount = 0;
loop:
        do {
            ++idx;
            /* if wrap-around, take start from head to find free slot */
            if (idx == g_bufsz)
                idx = 0;
            ret = SNAPXID_AT(idx);
            if (IsZeroRefCount(ret)) {
                g_snap_next = ret;
                return;
            }
            nofindCount++;
        } while (ret != g_snap_next);
        /* we alloc sufficient space for local snapshot , overflow should not happen here */
        ereport(WARNING, (errmsg("snapshot ring buffer overflow.")));
        if (nofindCount >= SNAP_ERROR_COUNT) {
            ereport(PANIC, (errcode(ERRCODE_LOG), errmsg("Can not get an available snapshot slot")));
        }
        /* try to find available slot */
        goto loop;
    }
}

/*
 * just a wrapper to pass __snap_current to GetSnapshotData
 */
static snapxid_t* GetCurrentSnapXid()
{
    snapxid_t* x = (snapxid_t*)g_snap_current;
    IncrRefCount(x);
    return x;
}

/*
 * release snapshot data (decrement reference count)
 */
static void ReleaseSnapXid(snapxid_t* snapshot)
{
    DecrRefCount(snapshot);
}

#ifdef USE_ASSERT_CHECKING
/* add assert information for refcount of snapshot */
class AutoSnapId {
public:
    AutoSnapId()
        : m_count(1)
    {}

    ~AutoSnapId()
    {
        if (m_count > 0) {
            ereport(PANIC, (errcode(ERRCODE_LOG),
                errmsg("snapshot refcount leak, must be zero")));
        }
    }

    void decr()
    {
        m_count = 0;
    }

public:
    int m_count;
};
#endif

Snapshot GetLocalSnapshotData(Snapshot snapshot)
{
    /* if first here, fallback to original code */
    if (!g_snap_assigned || (g_snap_buffer == NULL)) {
        ereport(DEBUG1, (errmsg("Falling back to origin GetSnapshotData: not assigned yet or during shutdown\n")));
        return NULL;
    }
    pg_read_barrier();
    HOLD_INTERRUPTS();
    /* 1. increase ref-count of current snapshot in ring buffer */
    snapxid_t* snapxid = GetCurrentSnapXid();

#ifdef USE_ASSERT_CHECKING
    AutoSnapId snapid;
#endif

    /* save use_data for release */
    snapshot->user_data = snapxid;

    /* 2. copy from pre-computed snapshot arrays into return param snapshot */
    snapshot->takenDuringRecovery = snapxid->takenDuringRecovery;

    TransactionId replication_slot_xmin = g_instance.proc_array_idx->replication_slot_xmin;

    if (!TransactionIdIsValid(t_thrd.pgxact->xmin)) {
        t_thrd.pgxact->xmin = u_sess->utils_cxt.TransactionXmin = snapxid->xmin;
        t_thrd.pgxact->handle = GetCurrentTransactionHandleIfAny();
    }

    if (TransactionIdPrecedes(snapxid->localxmin, (uint64)u_sess->attr.attr_storage.vacuum_defer_cleanup_age)) {
        u_sess->utils_cxt.RecentGlobalXmin = FirstNormalTransactionId;
    } else {
        u_sess->utils_cxt.RecentGlobalXmin = snapxid->localxmin - u_sess->attr.attr_storage.vacuum_defer_cleanup_age;
    }

    if (!TransactionIdIsNormal(u_sess->utils_cxt.RecentGlobalXmin)) {
        u_sess->utils_cxt.RecentGlobalXmin = FirstNormalTransactionId;
    }

    if (TransactionIdIsNormal(replication_slot_xmin) &&
        TransactionIdPrecedes(replication_slot_xmin, u_sess->utils_cxt.RecentGlobalXmin)) {
        u_sess->utils_cxt.RecentGlobalXmin = replication_slot_xmin;
    }

    u_sess->utils_cxt.RecentGlobalCatalogXmin = GetOldestCatalogXmin();
    u_sess->utils_cxt.RecentXmin = snapxid->xmin;
    snapshot->xmin = snapxid->xmin;
    snapshot->xmax = snapxid->xmax;
    snapshot->snapshotcsn = snapxid->snapshotcsn;
    snapshot->curcid = GetCurrentCommandId(false);

    snapshot->active_count = 0;
    snapshot->regd_count = 0;
    snapshot->copied = false;
    /* Non-catalog tables can be vacuumed if older than this xid */
    u_sess->utils_cxt.RecentGlobalDataXmin = u_sess->utils_cxt.RecentGlobalXmin;

    ReleaseSnapXid(snapxid);
    snapshot->user_data = NULL;

#ifdef USE_ASSERT_CHECKING
    snapid.decr();
#endif

    RESUME_INTERRUPTS();

    return snapshot;
}

#define MAX_PENDING_SNAPSHOT_CNT 1000
#define CALC_SNAPSHOT_TIMEOUT (1 * 1000)

void forward_recent_global_xmin(void)
{
    (void)LWLockAcquire(CsnMinLock, LW_EXCLUSIVE);
    /*
     * check and update recentGlobalXmin, get a snapshot,
     * the csn of xid preceed recentLocalXmin, must smaller than nextCommitSeqNo.
     */
    if (t_thrd.xact_cxt.ShmemVariableCache->keep_csn <= t_thrd.xact_cxt.ShmemVariableCache->cutoff_csn_min) {
        if (module_logging_is_on(MOD_TRANS_SNAPSHOT))
            ereport(LOG, (errmodule(MOD_TRANS_SNAPSHOT),
                          errmsg("update recentGlobalXmin, from  %lu to %lu. keep_xmin from %lu to %lu, "
                                 "keep_csn from %lu to %lu.",
                                 t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin, t_thrd.xact_cxt.ShmemVariableCache->keep_xmin,
                                 t_thrd.xact_cxt.ShmemVariableCache->keep_xmin, t_thrd.xact_cxt.ShmemVariableCache->recentLocalXmin,
                                 t_thrd.xact_cxt.ShmemVariableCache->keep_csn,
                                 t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo)));
        t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin = t_thrd.xact_cxt.ShmemVariableCache->keep_xmin;
        t_thrd.xact_cxt.ShmemVariableCache->keep_xmin = t_thrd.xact_cxt.ShmemVariableCache->recentLocalXmin;
        t_thrd.xact_cxt.ShmemVariableCache->keep_csn = t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo;
    }
    LWLockRelease(CsnMinLock);
}

static void init_shmem_csn_cleanup_instr(void)
{
    (void)LWLockAcquire(CsnMinLock, LW_EXCLUSIVE);
    /* make sure cutoff_csn_min is small enough when after redo to avoid false positive invalid snapshot */
    t_thrd.xact_cxt.ShmemVariableCache->cutoff_csn_min = COMMITSEQNO_FIRST_NORMAL + 1;
    t_thrd.xact_cxt.ShmemVariableCache->keep_csn = COMMITSEQNO_FIRST_NORMAL + 1;
    t_thrd.xact_cxt.ShmemVariableCache->keep_xmin = t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin;
    t_thrd.xact_cxt.ShmemVariableCache->local_csn_min = t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo;
    LWLockRelease(CsnMinLock);
}

static bool ForceCalculateSnapshotXmin(bool forceCalc)
{
    return (!u_sess->attr.attr_storage.enable_defer_calculate_snapshot || forceCalc);
}

void CalculateLocalLatestSnapshot(bool forceCalc)
{
    /*
     * 1. copy current snapshot data to next
     * 2. follow same line as original ProcArrayEndTransactionInternal
     * 3. generate new snapshot, based on code in GetSnapshotData_Orig()
     * 4. add new snapshot to ring buffer (lock-free)
     * 5. advance ring-buffer current snapshot pointer.
     */
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;
    TransactionId xmin;
    TransactionId xmax;
    TransactionId globalxmin;
    int index;
    Timestamp currentTimeStamp;
    static Timestamp snapshotTimeStamp = 0;
    static uint32 snapshotPendingCnt = 0;

    snapxid_t* snapxid = GetNextSnapXid();
    if (snapxid == NULL) {
        ereport(LOG, (errmsg("Skipping generation of new snapshot: ring buffer not active (during shutdown)\n")));
        return;
    }

    /* xmax is always latestCompletedXid + 1 */
    xmax = t_thrd.xact_cxt.ShmemVariableCache->latestCompletedXid;
    Assert(TransactionIdIsNormal(xmax));
    TransactionIdAdvance(xmax);

    /*
     * We calculate xmin under the fllowing conditions:
     * 1. we always calculate snapshot if disable defer_calculate_snpshot.
     * 2. we didn't calculate snapshot for GTM_MAX_PENDING_SNAPSHOT_CNT times.
     * 3. we didn't calculate snapshot for GTM_CALC_SNAPSHOT_TIMEOUT seconds.
     */
    currentTimeStamp = GetCurrentTimestamp();
    if (ForceCalculateSnapshotXmin(forceCalc) || ((++snapshotPendingCnt == MAX_PENDING_SNAPSHOT_CNT) ||
        (TimestampDifferenceExceeds(snapshotTimeStamp, currentTimeStamp, CALC_SNAPSHOT_TIMEOUT)))) {
        pg_read_barrier();
        snapshotPendingCnt = 0;
        snapshotTimeStamp = currentTimeStamp;

        /* initialize xmin calculation with xmax */
        globalxmin = xmin = xmax;

        /* Also need to include other snapshot xmin */
        if (g_snap_buffer != NULL) {
            TransactionId minXmin = ((snapxid_t*)g_snap_current)->xmin;
            if (!TransactionIdIsValid(minXmin))
                minXmin = globalxmin;
            for (size_t idx = 0; idx < g_bufsz; idx++) {
                snapxid_t* ret = NULL;

                ret = SNAPXID_AT(idx);
                if (!IsZeroRefCount(ret) && TransactionIdIsValid(ret->xmin)) {
                    if (TransactionIdPrecedes(ret->xmin, minXmin)) {
                        minXmin = ret->xmin;
                    }
                }
            }
            if (TransactionIdPrecedes(minXmin, globalxmin))
                globalxmin = minXmin;
        }

        int* pgprocnos = arrayP->pgprocnos;
        int numProcs;

        /*
         * Spin over procArray checking xid, xmin, and subxids.  The goal is
         * to gather all active xids, find the lowest xmin, and try to record
         * subxids. Also need include myself.
         */
        numProcs = arrayP->numProcs;

        for (index = 0; index < numProcs; index++) {
            int pgprocno = pgprocnos[index];
            volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
            TransactionId xid = InvalidTransactionId;

            /*
             * Backend is doing logical decoding which manages xmin
             * separately, check below.
             */
            if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
                continue;

            /* Update globalxmin to be the smallest valid xmin, only Ignore procs running LAZY VACUUM xmin */
            if (!(pgxact->vacuumFlags & PROC_IN_VACUUM)) {
                xid = pgxact->xmin; /* fetch just once */
            }

            if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, globalxmin))
                globalxmin = xid;

            /* Fetch xid just once - see GetNewTransactionId */
            xid = pgxact->xid;

            /* If no XID assigned, use xid passed down from CN */
            if (!TransactionIdIsNormal(xid))
                xid = pgxact->next_xid;

            /*
             * If the transaction has no XID assigned, we can skip it; it
             * won't have sub-XIDs either.  If the XID is >= xmax, we can also
             * skip it; such transactions will be treated as running anyway
             * (and any sub-XIDs will also be >= xmax).
             */
            if (!TransactionIdIsNormal(xid) || !TransactionIdPrecedes(xid, xmax))
                continue;

            /*
             * We don't include our own XIDs (if any) in the snapshot, but we
             * must include them in xmin.
             * Not true any more in this function.
             */
            if (TransactionIdPrecedes(xid, xmin))
                xmin = xid;
        }

        /*
         * Update globalxmin to include actual process xids.  This is a slightly
         * different way of computing it than GetOldestXmin uses, but should give
         * the same result. GTM-FREE-MODE always use recentLocalXmin.
         */
        if (TransactionIdPrecedes(xmin, globalxmin))
            globalxmin = xmin;

        if (ENABLE_DMS && SS_PRIMARY_MODE) {
            SSUpdateNodeOldestXmin(SS_MY_INST_ID, globalxmin);
            globalxmin = SSGetGlobalOldestXmin(globalxmin);
            if (ENABLE_SS_BCAST_SNAPSHOT) {
                SSSendLatestSnapshotToStandby(xmin, xmax, t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo);
            }
        }

        t_thrd.xact_cxt.ShmemVariableCache->xmin = xmin;
        t_thrd.xact_cxt.ShmemVariableCache->recentLocalXmin = globalxmin;
        if (GTM_FREE_MODE) {
            t_thrd.xact_cxt.ShmemVariableCache->recentGlobalXmin = globalxmin;
        }
    } else if (ENABLE_SS_BCAST_SNAPSHOT && SS_PRIMARY_MODE) {
        SSSendLatestSnapshotToStandby(t_thrd.xact_cxt.ShmemVariableCache->xmin, xmax,
            t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo);
    }

    if (GTM_LITE_MODE) {
        currentTimeStamp = GetCurrentTimestamp();
        if (forceCalc) {  /* means first time here; */
            init_shmem_csn_cleanup_instr();
            forward_recent_global_xmin();
        }
    }

    snapxid->xmin = t_thrd.xact_cxt.ShmemVariableCache->xmin;
    snapxid->xmax = xmax;
    snapxid->localxmin = t_thrd.xact_cxt.ShmemVariableCache->recentLocalXmin;
    snapxid->snapshotcsn = t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo;
    snapxid->takenDuringRecovery = RecoveryInProgress();

    pg_write_barrier();

    ereport(DEBUG1, (errmsg("Generated snapshot in ring buffer slot %lu\n", SNAPXID_INDEX(snapxid))));
    SetNextSnapXid();
}

/*
 * Return the minimal xmin in all the valid snapshot versions.
 */
static TransactionId GetMultiSnapshotOldestXmin()
{
    return ((snapxid_t*)g_snap_current)->localxmin;
}

void ProcArrayResetXmin(PGPROC* proc)
{
    PGXACT* pgxact = &g_instance.proc_base_all_xacts[proc->pgprocno];

    /*
     * Note we can do this without locking because we assume that storing an Xid
     * is atomic.
     */
    pgxact->xmin = InvalidTransactionId;
}

/* return global csn from GTM */
CommitSeqNo GetCommitCsn()
{
    return t_thrd.proc->commitCSN;
}

void setCommitCsn(uint64 commit_csn)
{
    t_thrd.proc->commitCSN = commit_csn;
}

/**
 * @Description: Return the parent xid of the given sub xid.
 *
 * @in xid -  the sub transaction id
 * @return -  return invlid transactionid if not found, otherwise
 * 		return the parent xid.
 */
TransactionId SubTransGetTopParentXidFromProcs(TransactionId xid)
{
    ProcArrayStruct* arrayP = g_instance.proc_array_idx;

    LWLockAcquire(ProcArrayLock, LW_SHARED);
    for (int i = 0; i < arrayP->numProcs; i++) {
        int pgprocno = arrayP->pgprocnos[i];
        volatile PGPROC* proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        TransactionId pxid;

        /* Fetch xid just once - see GetNewTransactionId */
        pxid = pgxact->xid;

        /*
         * search the sub xids, return the top parent xid when match.
         */
        if (pgxact->nxids > 0) {
            /* Use subxidsLock to protect subxids */
            LWLockAcquire(proc->subxidsLock, LW_SHARED);
            for (int j = pgxact->nxids - 1; j >= 0; j--) {
                TransactionId cxid = proc->subxids.xids[j];

                if (TransactionIdEquals(cxid, xid)) {
                    /* when found, release the lock and return the parent xid. */
                    LWLockRelease(proc->subxidsLock);
                    LWLockRelease(ProcArrayLock);
                    return pxid;
                }
            }
            LWLockRelease(proc->subxidsLock);
        }
    }

    LWLockRelease(ProcArrayLock);

    return InvalidTransactionId;
}

Datum pgxc_gtm_snapshot_status(PG_FUNCTION_ARGS)
{
#ifndef ENABLE_MULTIPLE_NODES
    FuncCallContext* funcctx = NULL;
    ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("unsupported view in single node mode.")));
    SRF_RETURN_DONE(funcctx);
#else
    FuncCallContext* funcctx = NULL;
    /* check gtm mode, only gtm support this function */
    ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
        errmsg("unsupported function or view in %s mode.", GTM_LITE_MODE ? "GTM-Lite" : "GTM-Free")));
    SRF_RETURN_DONE(funcctx);
#endif
}

/*
 * @Description: check whether csn is valid, if valid, set it to pgxact.
 *
 * @in func -  the function which need to check and set csn
 * @in csn_min -  the csn to check
 * @in snapshot_type -  the type of snapshot
 * @in from -  where the snapshot from
 *
 * @return -  return csn_min set in pgxact,
 *            or InvalidCommitSeqNo if csn_min is invalid and from the local multi snapshot
 */
CommitSeqNo
set_proc_csn_and_check(const char* func, CommitSeqNo csn_min, GTM_SnapshotType gtm_snapshot_type, SnapshotSource from)
{
    if (u_sess->attr.attr_common.xc_maintenance_mode || u_sess->utils_cxt.cn_xc_maintain_mode ||
        IsAutoVacuumWorkerProcess() || gtm_snapshot_type == GTM_SNAPSHOT_TYPE_AUTOVACUUM) {
        return csn_min;
    }
    if (!COMMITSEQNO_IS_COMMITTED(csn_min))
        ereport(ERROR, (errcode(ERRCODE_SNAPSHOT_INVALID),
                        errmsg("Snapshot is invalid, snaphot type %s, snapshot csn: %lu.",
                               transfer_snapshot_type(gtm_snapshot_type), csn_min)));

    LWLockAcquire(CsnMinLock, LW_SHARED);

    /* make sure the received csn from gtm is not small than local_csn_min */
    CommitSeqNo local_csn_min = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->local_csn_min);
    if (from == SNAPSHOT_DIRECT && csn_min < local_csn_min) {
        csn_min = local_csn_min;
    }

    CommitSeqNo cutoff_csn_min = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->cutoff_csn_min);
    if (csn_min < cutoff_csn_min) {
        if (from == SNAPSHOT_DATANODE) {
            LWLockRelease(CsnMinLock);
            return InvalidCommitSeqNo;
        }
        LWLockRelease(CsnMinLock);
        ereport(ERROR,
                (errcode(ERRCODE_SNAPSHOT_INVALID),
                 errmsg("Snapshot is invalid, this is a safe error, snapshot too old."),
                 errdetail("Snaphot type %s csn %lu is lower than cutoff_csn_min %lu in %s.",
                           transfer_snapshot_type(gtm_snapshot_type), csn_min, cutoff_csn_min, func),
                 errhint("This is a safe error report, will not impact "
                         "data consistency, retry your query if needed.")));
    } else {
        ereport(DEBUG1, (errmsg("try to set my proc csn from %lu to %lu.",
                                t_thrd.pgxact->csn_min, csn_min)));
    }

    t_thrd.pgxact->csn_min = csn_min;
    LWLockRelease(CsnMinLock);

    return t_thrd.pgxact->csn_min;
}

Datum get_gtm_lite_status(PG_FUNCTION_ARGS)
{
#ifndef ENABLE_MULTIPLE_NODES
    FuncCallContext* funcctx = NULL;
    ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("unsupported view in single node mode.")));
    SRF_RETURN_DONE(funcctx);
#else
#define GTM_LITE_STATUS_ATTRS 2

    /* check gtm mode, gtm-free unsupport this function */
    if (GTM_FREE_MODE) {
        ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                        errmsg("unsupported function or view in GTM-FREE mode.")));
    }

    FuncCallContext* funcctx = NULL;
    GTMLite_Status gtm_status = NULL;

    if (SRF_IS_FIRSTCALL()) {
        MemoryContext oldcontext;
        TupleDesc tupdesc;

        funcctx = SRF_FIRSTCALL_INIT();
        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

        tupdesc = CreateTemplateTupleDesc(GTM_LITE_STATUS_ATTRS, false);
        TupleDescInitEntry(tupdesc, (AttrNumber)1, "backup_xid", XIDOID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber)2, "csn", XIDOID, -1, 0);

        funcctx->tuple_desc = BlessTupleDesc(tupdesc);
        funcctx->max_calls = 1;

        MemoryContextSwitchTo(oldcontext);
    }

    /* stuff done on every call of the function */
    funcctx = SRF_PERCALL_SETUP();
    if (funcctx->call_cntr < funcctx->max_calls) {
        Datum values[GTM_LITE_STATUS_ATTRS];
        bool nulls[GTM_LITE_STATUS_ATTRS];
        HeapTuple tuple;
        errno_t rc = 0;

        rc = memset_s(values, sizeof(values), 0, sizeof(values));
        securec_check_c(rc, "\0", "\0");
        rc = memset_s(nulls, sizeof(nulls), 0, sizeof(nulls));
        securec_check_c(rc, "\0", "\0");

        gtm_status = GetGTMLiteStatus();
        if (!gtm_status) {
            ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE),
                            errmsg("GTM error, could not obtain snapshot_status, please check GTM is running or failovering.")));
        }
        values[0] = TransactionIdGetDatum(gtm_status->backup_xid);
        values[1] = TransactionIdGetDatum(gtm_status->csn);

        tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
        SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
    } else {
        SRF_RETURN_DONE(funcctx);
    }
#endif
}

const char* transfer_snapshot_type(GTM_SnapshotType gtm_snap_type)
{
    if (gtm_snap_type == GTM_SNAPSHOT_TYPE_UNDEFINED) {
        return "UNDEFINED";
    } else if (gtm_snap_type == GTM_SNAPSHOT_TYPE_LOCAL) {
        return "LOCAL";
    } else if (gtm_snap_type == GTM_SNAPSHOT_TYPE_GLOBAL) {
        return "GLOBAL";
    } else if (gtm_snap_type == GTM_SNAPSHOT_TYPE_AUTOVACUUM) {
        return "AUTOVACUUM";
    }
    return "UnKnown";
}

/*
 * search all active backend to get oldest frozenxid
 * for global temp table.
 */
TransactionId ListAllThreadGttFrozenxids(int maxSize, ThreadId *pids, TransactionId *xids, int *n)
{
    ProcArrayStruct *arrayP = g_instance.proc_array_idx;
    TransactionId result = InvalidTransactionId;
    int index;
    int flags = 0;
    int i = 0;

    if (g_instance.attr.attr_storage.max_active_gtt <= 0)
        return 0;

    if (maxSize > 0) {
        Assert(pids);
        Assert(xids);
        Assert(n);
        *n = 0;
    }

    if (RecoveryInProgress() || SSIsServerModeReadOnly())
        return InvalidTransactionId;

    flags |= PROC_IS_AUTOVACUUM;
    flags |= PROC_IN_LOGICAL_DECODING;

    LWLockAcquire(ProcArrayLock, LW_SHARED);
    if (maxSize > 0 && maxSize < arrayP->numProcs) {
        LWLockRelease(ProcArrayLock);
        elog(ERROR, "pids, xids array size is not enough for list all gtt frozenxids.");
    }

    for (index = 0; index < arrayP->numProcs; index++) {
        int pgprocno = arrayP->pgprocnos[index];
        volatile PGPROC *proc = g_instance.proc_base_all_procs[pgprocno];
        volatile PGXACT *pgxact = &g_instance.proc_base_all_xacts[pgprocno];

        if (pgxact->vacuumFlags & flags)
            continue;

        if (proc->databaseId == u_sess->proc_cxt.MyDatabaseId &&
            TransactionIdIsNormal(proc->gtt_session_frozenxid)) {
            if (result == InvalidTransactionId)
                result = proc->gtt_session_frozenxid;
            else if (TransactionIdPrecedes(proc->gtt_session_frozenxid, result))
                result = proc->gtt_session_frozenxid;

            if (maxSize > 0) {
                pids[i] = proc->pid;
                xids[i] = proc->gtt_session_frozenxid;
                i++;
            }
        }
    }
    LWLockRelease(ProcArrayLock);
    if (maxSize > 0) {
        *n = i;
    }
    return result;
}

CommitSeqNo
calculate_local_csn_min()
{
    /* acquire procarray and csnmin lock, and there is no deadlock */
    LWLockAcquire(ProcArrayLock, LW_SHARED);
    ProcArrayStruct *arrayP = g_instance.proc_array_idx;
    int *pgprocnos = arrayP->pgprocnos;
    int num_procs = arrayP->numProcs;
    CommitSeqNo local_csn_min = t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo;
    LWLockAcquire(CsnMinLock, LW_EXCLUSIVE);
    for (int index = 0; index < num_procs; index++) {
        int pgprocno = pgprocnos[index];
        volatile PGXACT *pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        /*
        * Ignore procs doing logical decoding which manages xmin
        * separately or running LAZY VACUUM
        */
        if ((pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING) || (pgxact->vacuumFlags & PROC_IN_VACUUM)) {
            continue;
        }

        CommitSeqNo current_csn = pgxact->csn_min;  /* fetch the csn min */
        if (COMMITSEQNO_IS_COMMITTED(current_csn) && current_csn < local_csn_min) {
            local_csn_min = current_csn;
        }
    }
    LWLockRelease(CsnMinLock);
    LWLockRelease(ProcArrayLock);
    return local_csn_min;
}
/*
 * Updates the maximum value for CSN read from XLog
 */

void UpdateXLogMaxCSN(CommitSeqNo xlogCSN)
{
    if (xlogCSN > t_thrd.xact_cxt.ShmemVariableCache->xlogMaxCSN) {
        LWLockAcquire(XLogMaxCSNLock, LW_EXCLUSIVE);
        if (xlogCSN > t_thrd.xact_cxt.ShmemVariableCache->xlogMaxCSN) {
            t_thrd.xact_cxt.ShmemVariableCache->xlogMaxCSN = xlogCSN;
        }
        LWLockRelease(XLogMaxCSNLock);
    }
}

/* Get the current oldestxmin, as there may be no transaction or no finished one */
void GetOldestGlobalProcXmin(TransactionId *globalProcXmin)
{
    TransactionId globalxmin = MaxTransactionId;
    ProcArrayStruct *arrayP = g_instance.proc_array_idx;
    int *pgprocnos = arrayP->pgprocnos;
    int numProcs = arrayP->numProcs;
    (void)LWLockAcquire(ProcArrayLock, LW_SHARED);
    for (int index = 0; index < numProcs; index++) {
        int pgprocno = pgprocnos[index];
        volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno];
        TransactionId xid;
        if (pgxact->vacuumFlags & PROC_IN_VACUUM)
            continue;

        xid = pgxact->xmin;

        if (TransactionIdIsNormal(xid) && TransactionIdPrecedesOrEquals(xid, globalxmin)) {
            globalxmin = xid;
            *globalProcXmin = globalxmin;
        }
    }
    LWLockRelease(ProcArrayLock);
}