openGauss-server/src/gausskernel/storage/replication/syncrep.cpp

/* -------------------------------------------------------------------------
 *
 * syncrep.cpp
 *
 * Synchronous replication is new as of PostgreSQL 9.1.
 *
 * If requested, transaction commits wait until their commit LSN is
 * acknowledged by the sync standby.
 *
 * This module contains the code for waiting and release of backends.
 * All code in this module executes on the primary. The core streaming
 * replication transport remains within WALreceiver/WALsender modules.
 *
 * The essence of this design is that it isolates all logic about
 * waiting/releasing onto the primary. The primary defines which standbys
 * it wishes to wait for. The standby is completely unaware of the
 * durability requirements of transactions on the primary, reducing the
 * complexity of the code and streamlining both standby operations and
 * network bandwidth because there is no requirement to ship
 * per-transaction state information.
 *
 * Replication is either synchronous or not synchronous (async). If it is
 * async, we just fastpath out of here. If it is sync, then we wait for
 * the write or flush location on the standby before releasing the waiting backend.
 * Further complexity in that interaction is expected in later releases.
 *
 * The best performing way to manage the waiting backends is to have a
 * single ordered queue of waiting backends, so that we can avoid
 * searching the through all waiters each time we receive a reply.
 *
 * In 9.1 we support only a single synchronous standby, chosen from a
 * priority list of synchronous_standby_names. Before it can become the
 * synchronous standby it must have caught up with the primary; that may
 * take some time. Once caught up, the current highest priority standby
 * will release waiters from the queue.
 *
 * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
 * Portions Copyright (c) 2010-2012, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  src/gausskernel/storage/replication/syncrep.cpp
 *
 * -------------------------------------------------------------------------
 */
#include "postgres.h"
#include "knl/knl_variable.h"

#include <unistd.h>

#include "access/transam.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/syncrep_gramparse.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#include "utils/ps_status.h"
#include "utils/distribute_test.h"
#include "common/config/cm_config.h"

/*
 * To control whether a master configured with synchronous commit is
 * allowed to stop waiting for standby WAL sync when there is synchronous
 * standby WAL senders are disconnected.
 */
volatile bool most_available_sync = false;

static volatile int SyncRepWaitMode = SYNC_REP_WAIT_FLUSH;
const int MAX_SYNC_REP_RETRY_COUNT = 1000;
const int SYNC_REP_SLEEP_DELAY = 1000;

static void SyncRepQueueInsert(int mode);
static bool SyncRepCancelWait(void);
static int SyncRepWakeQueue(bool all, int mode);
static void SyncRepWaitCompletionQueue();
static void SyncRepNotifyComplete();

static int SyncRepGetStandbyPriority(void);
#ifndef ENABLE_MULTIPLE_NODES
static bool SyncRepGetSyncLeftTime(XLogRecPtr XactCommitLSN, TimestampTz* leftTime);
#endif
static void SyncRepGetOldestSyncRecPtr(XLogRecPtr* receivePtr, XLogRecPtr* writePtr, XLogRecPtr* flushPtr,
                                       XLogRecPtr* replayPtr, List* sync_standbys);
static void SyncRepGetNthLatestSyncRecPtr(XLogRecPtr* receivePtr, XLogRecPtr* writePtr, XLogRecPtr* flushPtr,
                                          XLogRecPtr* replayPtr, List* sync_standbys, uint8 nth);


#ifdef USE_ASSERT_CHECKING
static bool SyncRepQueueIsOrderedByLSN(int mode);
#endif

static List *SyncRepGetSyncStandbysPriority(bool *am_sync, List** catchup_standbys = NULL);
static List *SyncRepGetSyncStandbysQuorum(bool *am_sync, List** catchup_standbys = NULL);
static int cmp_lsn(const void *a, const void *b);

#define CATCHUP_XLOG_DIFF(ptr1, ptr2, amount) \
    XLogRecPtrIsInvalid(ptr1) ? false : (XLByteDifference(ptr2, ptr1) < amount)

/*
 * Determine whether to wait for standby catching up, if requested by user.
 *
 * Return true if it is need to wait for catching up(synchronous replication),
 * return false if don't wait for catching up.
 */
bool SynRepWaitCatchup(XLogRecPtr XactCommitLSN)
{
#ifndef ENABLE_MULTIPLE_NODES
    /*
     * When most_available_sync is off or catchup2normal_wait_time is not set,
     * return true.
     */
    if (!t_thrd.walsender_cxt.WalSndCtl->most_available_sync ||
        g_instance.attr.attr_storage.catchup2normal_wait_time < 0) {
        return true;
    }

    static const TimestampTz catchup2normalWaitTime = g_instance.attr.attr_storage.catchup2normal_wait_time * 1000;
    TimestampTz syncLeftTime = 0;

    /*
     * SyncRepGetSyncLeftTime() return false means that there is at lease one
     * sync standby, or no standby in catchup, so it is need to wait for
     * synchronous replication.
     */
    if (!SyncRepGetSyncLeftTime(XactCommitLSN, &syncLeftTime)) {
        return true;
    }
    if (syncLeftTime == 0 || syncLeftTime > catchup2normalWaitTime) {
        return false;
    }
#endif
    return true;
}

/*
 * Wait for synchronous replication, if requested by user.
 *
 * Initially backends start in state SYNC_REP_NOT_WAITING and then
 * change that state to SYNC_REP_WAITING before adding ourselves
 * to the wait queue. During SyncRepWakeQueue() a WALSender changes
 * the state to SYNC_REP_WAIT_COMPLETE once replication is confirmed.
 * This backend then resets its state to SYNC_REP_NOT_WAITING.
 */
void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
{
    char *new_status = NULL;
    const char *old_status = NULL;
    int mode = SyncRepWaitMode;

    /*
     * Fast exit if user has not requested sync replication, or there are no
     * sync replication standby names defined. Note that those standbys don't
     * need to be connected.
     */
    if (!u_sess->attr.attr_storage.enable_stream_replication || !SyncRepRequested() || !SyncStandbysDefined() ||
        (t_thrd.postmaster_cxt.HaShmData->current_mode == NORMAL_MODE))
        return;

    Assert(SHMQueueIsDetached(&(t_thrd.proc->syncRepLinks)));
    Assert(t_thrd.walsender_cxt.WalSndCtl != NULL);

    /* Prevent the queue cleanups to be influenced by external interruptions */
    HOLD_INTERRUPTS();

    (void)LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
    Assert(t_thrd.proc->syncRepState == SYNC_REP_NOT_WAITING);

    /*
     * We don't wait for sync rep if WalSndCtl->sync_standbys_defined is not
     * set.  See SyncRepUpdateSyncStandbysDefined.
     *
     * Also check that the standby hasn't already replied. Unlikely race
     * condition but we'll be fetching that cache line anyway so its likely to
     * be a low cost check. We don't wait for sync rep if no sync standbys alive
     */
    if (!t_thrd.walsender_cxt.WalSndCtl->sync_standbys_defined ||
        XLByteLE(XactCommitLSN, t_thrd.walsender_cxt.WalSndCtl->lsn[mode]) ||
        t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone ||
        !SynRepWaitCatchup(XactCommitLSN)) {
        LWLockRelease(SyncRepLock);
        RESUME_INTERRUPTS();
        return;
    }

    /*
     * Set our waitLSN so WALSender will know when to wake us, and add
     * ourselves to the queue.
     */
    t_thrd.proc->waitLSN = XactCommitLSN;
    t_thrd.proc->syncRepState = SYNC_REP_WAITING;
    SyncRepQueueInsert(mode);
    Assert(SyncRepQueueIsOrderedByLSN(mode));
    LWLockRelease(SyncRepLock);

    /* Alter ps display to show waiting for sync rep. */
    if (u_sess->attr.attr_common.update_process_title) {
        int len;
        errno_t ret = EOK;
        int rc = 0;
#define NEW_STATUS_LEN 33
        old_status = get_ps_display(&len);
        new_status = (char *)palloc(len + NEW_STATUS_LEN);
        if (len > 0) {
            ret = memcpy_s(new_status, len + NEW_STATUS_LEN, old_status, len);
            securec_check(ret, "\0", "\0");
        }

        rc = snprintf_s(new_status + len, NEW_STATUS_LEN, NEW_STATUS_LEN - 1, " waiting for %X/%X",
                        (uint32)(XactCommitLSN >> 32), (uint32)XactCommitLSN);
        securec_check_ss(rc, "", "");

        set_ps_display(new_status, false);
        new_status[len] = '\0'; /* truncate off " waiting ..." */
    }

    WaitState oldStatus = pgstat_report_waitstatus(STATE_WAIT_WALSYNC);

    /*
     * Wait for specified LSN to be confirmed.
     *
     * Each proc has its own wait latch, so we perform a normal latch
     * check/wait loop here.
     */
    for (;;) {
        /* Must reset the latch before testing state. */
        ResetLatch(&t_thrd.proc->procLatch);

#ifdef ENABLE_DISTRIBUTE_TEST
        if (TEST_STUB(DN_STANDBY_SLEEPIN_SYNCCOMMIT, stub_sleep_emit)) {
            ereport(get_distribute_test_param()->elevel,
                    (errmsg("sleep_emit happen during SyncRepWaitForLSN  time:%ds, stub_name:%s",
                            get_distribute_test_param()->sleep_time, get_distribute_test_param()->test_stub_name)));
        }
#endif

        /*
         * Acquiring the lock is not needed, the latch ensures proper barriers.
         * If it looks like we're done, we must really be done, because once
         * walsender changes the state to SYNC_REP_WAIT_COMPLETE, it will never
         * update it again, so we can't be seeing a stale value in that case.
         */
        if (t_thrd.proc->syncRepState == SYNC_REP_WAIT_COMPLETE)
            break;

        /*
         * If a wait for synchronous replication is pending, we can neither
         * acknowledge the commit nor raise ERROR or FATAL.  The latter would
         * lead the client to believe that the transaction aborted, which
         * is not true: it's already committed locally. The former is no good
         * either: the client has requested synchronous replication, and is
         * entitled to assume that an acknowledged commit is also replicated,
         * which might not be true. So in this case we issue a WARNING (which
         * some clients may be able to interpret) and shut off further output.
         * We do NOT reset t_thrd.int_cxt.ProcDiePending, so that the process will die after
         * the commit is cleaned up.
         */
        if (t_thrd.int_cxt.ProcDiePending || t_thrd.proc_cxt.proc_exit_inprogress) {
            ereport(WARNING,
                    (errcode(ERRCODE_ADMIN_SHUTDOWN),
                     errmsg("canceling the wait for synchronous replication and terminating connection due to "
                            "administrator command"),
                     errdetail("The transaction has already committed locally, but might not have been replicated to "
                               "the standby.")));
            t_thrd.postgres_cxt.whereToSendOutput = DestNone;
            if (SyncRepCancelWait()) {
                break;
            }
        }

        /*
         * It's unclear what to do if a query cancel interrupt arrives.  We
         * can't actually abort at this point, but ignoring the interrupt
         * altogether is not helpful, so we just terminate the wait with a
         * suitable warning.
         */
        if (t_thrd.int_cxt.QueryCancelPending) {
            /* reset query cancel signal after vacuum. */
            if (!t_thrd.vacuum_cxt.in_vacuum) {
                t_thrd.int_cxt.QueryCancelPending = false;
            }
            ereport(WARNING,
                    (errmsg("canceling wait for synchronous replication due to user request"),
                     errdetail("The transaction has already committed locally, but might not have been replicated to "
                               "the standby.")));
            if (SyncRepCancelWait()) {
                break;
            }
        }

        /*
         * If the postmaster dies, we'll probably never get an
         * acknowledgement, because all the wal sender processes will exit. So
         * just bail out.
         */
        if (!PostmasterIsAlive()) {
            t_thrd.int_cxt.ProcDiePending = true;
            t_thrd.postgres_cxt.whereToSendOutput = DestNone;
            if (SyncRepCancelWait()) {
                break;
            }
        }

        /*
         * If we  modify the syncmode dynamically, we'll stop wait
         */
        if (t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone ||
            synchronous_commit <= SYNCHRONOUS_COMMIT_LOCAL_FLUSH) {
            ereport(WARNING,
                    (errmsg("canceling wait for synchronous replication due to syncmaster standalone."),
                     errdetail("The transaction has already committed locally, but might not have been replicated to "
                               "the standby.")));
            if (SyncRepCancelWait()) {
                break;
            }
        }

        /*
         * For gs_rewind, if standby or secondary is not connected, we'll stop wait
         */
        if (strcmp(u_sess->attr.attr_common.application_name, "gs_rewind") == 0) {
            if (IS_DN_MULTI_STANDYS_MODE() ||
                (IS_DN_DUMMY_STANDYS_MODE() &&
                 !(WalSndInProgress(SNDROLE_PRIMARY_STANDBY | SNDROLE_PRIMARY_DUMMYSTANDBY)))) {
                ereport(WARNING,
                        (errmsg("canceling wait for synchronous replication due to client is gs_rewind and "
                                "secondary is not connected."),
                         errdetail("The transaction has already committed locally, but might not have been replicated "
                                   "to the standby.")));
                if (SyncRepCancelWait()) {
                    break;
                }
            }
        }

        /*
         * For case that query cancel pending or proc die pending signal not reached, if current
         * session is set closed, we'll stop wait
         */
        if (u_sess->status == KNL_SESS_CLOSE) {
            ereport(WARNING,
                    (errmsg("canceling wait for synchronous replication due to session close."),
                     errdetail("The transaction has already committed locally, but might not have been replicated to "
                               "the standby.")));
            if (SyncRepCancelWait()) {
                break;
            }
        }

        /*
         * Wait on latch.  Any condition that should wake us up will set the
         * latch, so no need for timeout.
         */
        WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 3000L);
    }

    /* Make sure that syncRepLinks is read after syncRepState */
    pg_read_barrier();

    /* Leader informs following procs */
    if (t_thrd.proc->syncRepLinks.next != NULL) {
        SyncRepNotifyComplete();
    }

    (void)pgstat_report_waitstatus(oldStatus);

    /*
     * WalSender has checked our LSN and has removed us from queue. Clean up
     * state and leave.  It's OK to reset these shared memory fields without
     * holding SyncRepLock, because any walsenders will ignore us anyway when
     * we're not on the queue. pg_read_barrier() has been invoked after for
     * loop to make sure the changes to the queue link is visible.
     */
    Assert(SHMQueueIsDetached(&(t_thrd.proc->syncRepLinks)));
    t_thrd.proc->syncRepState = SYNC_REP_NOT_WAITING;
    t_thrd.proc->syncRepInCompleteQueue = false;
    t_thrd.proc->waitLSN = 0;

    if (new_status != NULL) {
        /* Reset ps display */
        set_ps_display(new_status, false);
        pfree(new_status);
        new_status = NULL;
    }

    RESUME_INTERRUPTS();
}

/*
 * Insert t_thrd.proc into the specified SyncRepQueue, maintaining sorted invariant.
 *
 * Usually we will go at tail of queue, though it's possible that we arrive
 * here out of order, so start at tail and work back to insertion point.
 */
static void SyncRepQueueInsert(int mode)
{
    PGPROC *proc = NULL;

    Assert(mode >= 0 && mode < NUM_SYNC_REP_WAIT_MODE);
    proc = (PGPROC *)SHMQueuePrev(&(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]),
                                  &(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]),
                                  offsetof(PGPROC, syncRepLinks));

    while (proc != NULL) {
        /*
         * Stop at the queue element that we should after to ensure the queue
         * is ordered by LSN. The same lsn is allowed in sync queue.
         */
        if (XLByteLE(proc->waitLSN, t_thrd.proc->waitLSN))
            break;

        proc = (PGPROC *)SHMQueuePrev(&(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]), &(proc->syncRepLinks),
                                      offsetof(PGPROC, syncRepLinks));
    }

    if (proc != NULL)
        SHMQueueInsertAfter(&(proc->syncRepLinks), &(t_thrd.proc->syncRepLinks));
    else
        SHMQueueInsertAfter(&(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]), &(t_thrd.proc->syncRepLinks));
}

/*
 * Acquire SyncRepLock and cancel any wait currently not in completion queue.
 */
static bool SyncRepCancelWait(void)
{
    bool success = false;

    LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
    if (!t_thrd.proc->syncRepInCompleteQueue) {
        if (!SHMQueueIsDetached(&(t_thrd.proc->syncRepLinks))) {
            SHMQueueDelete(&(t_thrd.proc->syncRepLinks));
        }
        t_thrd.proc->syncRepState = SYNC_REP_NOT_WAITING;
        success = true;
    }
    LWLockRelease(SyncRepLock);

    return success;
}

void SyncRepCleanupAtProcExit(void)
{
    if (t_thrd.proc->syncRepLinks.prev || t_thrd.proc->syncRepLinks.next ||
        t_thrd.proc->syncRepState != SYNC_REP_NOT_WAITING) {
        LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
        if (!t_thrd.proc->syncRepInCompleteQueue) {
            if (!SHMQueueIsDetached(&(t_thrd.proc->syncRepLinks))) {
                SHMQueueDelete(&(t_thrd.proc->syncRepLinks));
            }
            LWLockRelease(SyncRepLock);
            return;
        }
        LWLockRelease(SyncRepLock);
        SyncRepWaitCompletionQueue();
    }
}

/*
 * ===========================================================
 * Synchronous Replication functions for wal sender processes
 * ===========================================================
 *
 *
 * Take any action required to initialise sync rep state from config
 * data. Called at WALSender startup and after each SIGHUP.
 */
void SyncRepInitConfig(void)
{
    int priority;

    /*
     * Determine if we are a potential sync standby and remember the result
     * for handling replies from standby.
     */
    priority = SyncRepGetStandbyPriority();
    if (t_thrd.walsender_cxt.MyWalSnd->sync_standby_priority != priority) {
        LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
        t_thrd.walsender_cxt.MyWalSnd->sync_standby_priority = priority;

        /*
         * Synchronous standby is starting, so we should change the standalone
         * sync_master_standalone, if required.
         */
        SyncRepCheckSyncStandbyAlive();

        LWLockRelease(SyncRepLock);
        ereport(DEBUG1, (errmsg("standby \"%s\" now has synchronous standby priority %d",
                                u_sess->attr.attr_common.application_name, priority)));
    }
}

/*
 * Update the LSNs on each queue based upon our latest state. This
 * implements a simple policy of first-valid-standby-releases-waiter.
 *
 * Other policies are possible, which would change what we do here and what
 * perhaps also which information we store as well.
 */
void SyncRepReleaseWaiters(void)
{
    volatile WalSndCtlData *walsndctl = t_thrd.walsender_cxt.WalSndCtl;
    XLogRecPtr receivePtr;
    XLogRecPtr writePtr;
    XLogRecPtr flushPtr;
    XLogRecPtr replayPtr;
    int numreceive = 0;
    int numwrite = 0;
    int numflush = 0;
    bool got_recptr = false;
    bool am_sync = false;

    /*
     * If this WALSender is serving a standby that is not on the list of
     * potential standbys then we have nothing to do. If we are still starting
     * up, still running base backup or the current flush position is still
     * invalid, then leave quickly also.
     */
    if (t_thrd.walsender_cxt.MyWalSnd->sync_standby_priority == 0 ||
        t_thrd.walsender_cxt.MyWalSnd->state < WALSNDSTATE_STREAMING ||
        XLByteEQ(t_thrd.walsender_cxt.MyWalSnd->flush, InvalidXLogRecPtr)) {
        t_thrd.syncrep_cxt.announce_next_takeover = true;
        return;
    }

    /*
     * We're a potential sync standby. Release waiters if we are the highest
     * priority standby. If there are multiple standbys with same priorities
     * then we use the first mentioned standby. If you change this, also
     * change pg_stat_get_wal_senders().
     */
    (void)LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);

    /*
     * Check whether we are a sync standby or not, and calculate the synced
     * positions among all sync standbys.
     */
    got_recptr = SyncRepGetSyncRecPtr(&receivePtr, &writePtr, &flushPtr, &replayPtr, &am_sync);

    /*
     * If we are managing a sync standby, though we weren't prior to this,
     * then announce we are now a sync standby.
     */
    if (t_thrd.syncrep_cxt.announce_next_takeover && am_sync) {
        t_thrd.syncrep_cxt.announce_next_takeover = false;
        if (t_thrd.syncrep_cxt.SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY) {
            ereport(LOG, (errmsg("standby \"%s\" is now a synchronous standby with priority %d",
                                 u_sess->attr.attr_common.application_name,
                                 t_thrd.walsender_cxt.MyWalSnd->sync_standby_priority)));
        } else {
            ereport(LOG, (errmsg("standby \"%s\" is now a candidate for quorum synchronous standby",
                                 u_sess->attr.attr_common.application_name)));
        }
    }

    /*
     * If the number of sync standbys is less than requested or we aren't
     * managing a sync standby then just leave.
     */
    if (!got_recptr || !am_sync) {
        LWLockRelease(SyncRepLock);
        t_thrd.syncrep_cxt.announce_next_takeover = !am_sync;
        return;
    }

    /*
     * Set the lsn first so that when we wake backends they will release up to
     * this location.
     */
    if (XLByteLT(walsndctl->lsn[SYNC_REP_WAIT_RECEIVE], receivePtr)) {
        walsndctl->lsn[SYNC_REP_WAIT_RECEIVE] = t_thrd.walsender_cxt.MyWalSnd->receive;
        numreceive = SyncRepWakeQueue(false, SYNC_REP_WAIT_RECEIVE);
    }
    if (XLByteLT(walsndctl->lsn[SYNC_REP_WAIT_WRITE], writePtr)) {
        walsndctl->lsn[SYNC_REP_WAIT_WRITE] = t_thrd.walsender_cxt.MyWalSnd->write;
        numwrite = SyncRepWakeQueue(false, SYNC_REP_WAIT_WRITE);
    }
    if (XLByteLT(walsndctl->lsn[SYNC_REP_WAIT_FLUSH], flushPtr)) {
        walsndctl->lsn[SYNC_REP_WAIT_FLUSH] = t_thrd.walsender_cxt.MyWalSnd->flush;
        numflush = SyncRepWakeQueue(false, SYNC_REP_WAIT_FLUSH);
    }
    if (XLByteLT(walsndctl->lsn[SYNC_REP_WAIT_APPLY], replayPtr)) {
        walsndctl->lsn[SYNC_REP_WAIT_APPLY] = t_thrd.walsender_cxt.MyWalSnd->apply;
        numflush = SyncRepWakeQueue(false, SYNC_REP_WAIT_APPLY);
    }

    LWLockRelease(SyncRepLock);

    ereport(DEBUG3,
            (errmsg("released %d procs up to receive %X/%X, %d procs up to write %X/%X, %d procs up to flush %X/%X",
                    numreceive, (uint32)(receivePtr >> 32), (uint32)receivePtr, numwrite, (uint32)(writePtr >> 32),
                    (uint32)writePtr, numflush, (uint32)(flushPtr >> 32), (uint32)flushPtr)));
}

/*
 * Calculate the synced Receive, Write, Flush and Apply positions among sync standbys.
 *
 * Return false if the number of sync standbys is less than
 * synchronous_standby_names specifies. Otherwise return true and
 * store the positions into *receivePtr *writePtr, *flushPtr and *applyPtr.
 *
 * On return, *am_sync is set to true if this walsender is connecting to
 * sync standby. Otherwise it's set to false.
 */
bool SyncRepGetSyncRecPtr(XLogRecPtr *receivePtr, XLogRecPtr *writePtr, XLogRecPtr *flushPtr, XLogRecPtr* replayPtr, bool *am_sync, bool check_am_sync)
{
    List *sync_standbys = NIL;

    *receivePtr = InvalidXLogRecPtr;
    *writePtr = InvalidXLogRecPtr;
    *flushPtr = InvalidXLogRecPtr;
    *replayPtr = InvalidXLogRecPtr;
    *am_sync = false;

    /* Get standbys that are considered as synchronous at this moment */
    sync_standbys = SyncRepGetSyncStandbys(am_sync);
    /*
     * Quick exit if we are not managing a sync standby (or not check for check_am_sync is false)
     * or there are not enough synchronous standbys.
     * but in a particular scenario, when most_available_sync is true, primary only wait the alive sync standbys
     * if list_length(sync_standbys) doesn't satisfy t_thrd.syncrep_cxt.SyncRepConfig->num_sync.
     *
     * All synchronous standbys are allowed to disconnect from the host
     * only when the maximum available mode is on
     */
    if ((!(*am_sync) && check_am_sync) || t_thrd.syncrep_cxt.SyncRepConfig == NULL ||
        (!t_thrd.walsender_cxt.WalSndCtl->most_available_sync &&
            list_length(sync_standbys) < t_thrd.syncrep_cxt.SyncRepConfig->num_sync) ||
        (t_thrd.walsender_cxt.WalSndCtl->most_available_sync && list_length(sync_standbys) == 0)) {
        list_free(sync_standbys);
        return false;
    }

    /*
     * In a priority-based sync replication, the synced positions are the
     * oldest ones among sync standbys. In a quorum-based, they are the Nth
     * latest ones.
     *
     * SyncRepGetNthLatestSyncRecPtr() also can calculate the oldest positions.
     * But we use SyncRepGetOldestSyncRecPtr() for that calculation because
     * it's a bit more efficient.
     *
     * XXX If the numbers of current and requested sync standbys are the same,
     * we can use SyncRepGetOldestSyncRecPtr() to calculate the synced
     * positions even in a quorum-based sync replication.
     */
    if (t_thrd.syncrep_cxt.SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY) {
        SyncRepGetOldestSyncRecPtr(receivePtr, writePtr, flushPtr, replayPtr, sync_standbys);
    } else {
        SyncRepGetNthLatestSyncRecPtr(receivePtr, writePtr, flushPtr, replayPtr, sync_standbys,
                                      t_thrd.syncrep_cxt.SyncRepConfig->num_sync);
    }

    list_free(sync_standbys);
    return true;
}

#ifndef ENABLE_MULTIPLE_NODES
/*
 * Obtains the remaining time for synchronizing to the sync standby.
 *
 * If there is at lease one sync standby, or no standby in catchup, no need
 * to consider standby in catchup, return false. Otherwise return true.
 */
static bool SyncRepGetSyncLeftTime(XLogRecPtr XactCommitLSN, TimestampTz* leftTime)
{
    List* sync_standbys = NIL;
    List* catchup_standbys = NIL;
    bool am_sync = false;
    ListCell* cell = NULL;
    *leftTime = 0;

    /* Get standbys that are considered as synchronous at this moment. */
    sync_standbys = SyncRepGetSyncStandbys(&am_sync, &catchup_standbys);

    /* Skip here if there is at lease one sync standby, or no standby in catchup. */
    if (list_length(sync_standbys) > 0 || list_length(catchup_standbys) == 0) {
        list_free(sync_standbys);
        list_free(catchup_standbys);
        return false;
    }

    /*
     * Scan through all sync standbys and calculate the left time
     * for sync.
     */
    foreach (cell, catchup_standbys) {
        WalSnd* walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[lfirst_int(cell)];
        SpinLockAcquire(&walsnd->mutex);
        TimestampTz syncNeededTime;
        XLogRecPtr xrp = walsnd->receive;
        double rate = walsnd->catchupRate;
        SpinLockRelease(&walsnd->mutex);

        syncNeededTime = (TimestampTz)(rate * XLByteDifference(XactCommitLSN, xrp));
        if (*leftTime == 0 || *leftTime > syncNeededTime) {
            *leftTime = syncNeededTime;
        }
    }

    list_free(sync_standbys);
    list_free(catchup_standbys);
    return true;
}
#endif

/*
 * Calculate the oldest Write, Flush and Apply positions among sync standbys.
 */
static void SyncRepGetOldestSyncRecPtr(XLogRecPtr* receivePtr, XLogRecPtr* writePtr, XLogRecPtr* flushPtr,
                                       XLogRecPtr* replayPtr, List* sync_standbys)
{
    ListCell *cell = NULL;

    /*
     * Scan through all sync standbys and calculate the oldest
     * Write, Flush and Apply positions.
     */
    foreach (cell, sync_standbys) {
        WalSnd* walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[lfirst_int(cell)];
        XLogRecPtr receive;
        XLogRecPtr write;
        XLogRecPtr flush;
        XLogRecPtr apply;

        SpinLockAcquire(&walsnd->mutex);
        receive = walsnd->receive;
        write = walsnd->write;
        flush = walsnd->flush;
        apply = walsnd->apply;
        SpinLockRelease(&walsnd->mutex);

        if (XLogRecPtrIsInvalid(*writePtr) || !XLByteLE(*writePtr, write))
            *writePtr = write;
        if (XLogRecPtrIsInvalid(*flushPtr) || !XLByteLE(*flushPtr, flush))
            *flushPtr = flush;
        if (XLogRecPtrIsInvalid(*receivePtr) || !XLByteLE(*receivePtr, receive))
            *receivePtr = receive;
        if (XLogRecPtrIsInvalid(*replayPtr) || !XLByteLE(*replayPtr, apply))
            *replayPtr = apply;
    }
}

/*
 * Calculate the Nth latest Write, Flush and Apply positions among sync
 * standbys.
 */
static void SyncRepGetNthLatestSyncRecPtr(XLogRecPtr* receivePtr, XLogRecPtr* writePtr, XLogRecPtr* flushPtr,
                                          XLogRecPtr* replayPtr, List* sync_standbys, uint8 nth)
{
    ListCell *cell = NULL;
    XLogRecPtr *receive_array = NULL;
    XLogRecPtr *write_array = NULL;
    XLogRecPtr *flush_array = NULL;
    XLogRecPtr* apply_array = NULL;
    int len;
    int i = 0;

    len = list_length(sync_standbys);
    receive_array = (XLogRecPtr*)palloc(sizeof(XLogRecPtr) * len);
    write_array = (XLogRecPtr*)palloc(sizeof(XLogRecPtr) * len);
    flush_array = (XLogRecPtr*)palloc(sizeof(XLogRecPtr) * len);
    apply_array = (XLogRecPtr*)palloc(sizeof(XLogRecPtr) * len);

    foreach (cell, sync_standbys) {
        WalSnd* walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[lfirst_int(cell)];

        SpinLockAcquire(&walsnd->mutex);
        receive_array[i] = walsnd->receive;
        write_array[i] = walsnd->write;
        flush_array[i] = walsnd->flush;
        apply_array[i] = walsnd->apply;
        SpinLockRelease(&walsnd->mutex);

        i++;
    }

    qsort(receive_array, len, sizeof(XLogRecPtr), cmp_lsn);
    qsort(write_array, len, sizeof(XLogRecPtr), cmp_lsn);
    qsort(flush_array, len, sizeof(XLogRecPtr), cmp_lsn);
    qsort(apply_array, len, sizeof(XLogRecPtr), cmp_lsn);

    /*
    * rewrite nth if current sync standby num < nth, when most_available_sync is true,
    * primary only wait the alive sync standbys if list_length(sync_standbys) doesn't satisfy num_sync in quroum.
    */
    if (t_thrd.walsender_cxt.WalSndCtl->most_available_sync && list_length(sync_standbys) < nth) {
        nth = (uint8)list_length(sync_standbys);
    }

    /* Get Nth latest Write, Flush, Apply positions */
    *receivePtr = receive_array[nth - 1];
    *writePtr = write_array[nth - 1];
    *flushPtr = flush_array[nth - 1];
    *replayPtr = apply_array[nth - 1];

    pfree(receive_array);
    receive_array = NULL;
    pfree(write_array);
    write_array = NULL;
    pfree(flush_array);
    flush_array = NULL;
    pfree(apply_array);
    apply_array = NULL;
}

/*
 * Compare lsn in order to sort array in descending order.
 */
static int cmp_lsn(const void *a, const void *b)
{
    XLogRecPtr lsn1 = *((const XLogRecPtr *)a);
    XLogRecPtr lsn2 = *((const XLogRecPtr *)b);

    if (!XLByteLE(lsn1, lsn2))
        return -1;
    else if (XLByteEQ(lsn1, lsn2))
        return 0;
    else
        return 1;
}

/*
 * Check if we are in the list of sync standbys, and if so, determine
 * priority sequence. Return priority if set, or zero to indicate that
 * we are not a potential sync standby.
 *
 * Compare the parameter SyncRepStandbyNames against the application_name
 * for this WALSender, or allow any name if we find a wildcard "*".
 */
static int SyncRepGetStandbyPriority(void)
{
    const char *standby_name = NULL;
    int priority;
    bool found = false;

    /*
     * Since synchronous cascade replication is not allowed, we always set the
     * priority of cascading walsender to zero.
     */
    if (AM_WAL_STANDBY_SENDER)
        return 0;

    if (!SyncStandbysDefined() || t_thrd.syncrep_cxt.SyncRepConfig == NULL || !SyncRepRequested())
        return 0;

    standby_name = t_thrd.syncrep_cxt.SyncRepConfig->member_names;
    for (priority = 1; priority <= t_thrd.syncrep_cxt.SyncRepConfig->nmembers; priority++) {
        if (pg_strcasecmp(standby_name, u_sess->attr.attr_common.application_name) == 0 ||
            strcmp(standby_name, "*") == 0) {
            found = true;
            break;
        }
        standby_name += strlen(standby_name) + 1;
    }

    if (!found) {
        return 0;
    }

    /*
     * In quorum-based sync replication, all the standbys in the list
     * have the same priority, one.
     */
    return (t_thrd.syncrep_cxt.SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY) ? priority : 1;
}

/*
 * Wake the specified queue from head.	Set the state of any backends that
 * need to be woken, remove them from the queue, and then wake them.
 * Pass all = true to wake whole queue; otherwise, just wake up to
 * the walsender's LSN.
 *
 * Must hold SyncRepLock.
 */
static int SyncRepWakeQueue(bool all, int mode)
{
    volatile WalSndCtlData *walsndctl = t_thrd.walsender_cxt.WalSndCtl;
    PGPROC *proc = NULL;
    PGPROC *thisproc = NULL;
    int numprocs = 0;

    Assert(mode >= 0 && mode < NUM_SYNC_REP_WAIT_MODE);
    Assert(SyncRepQueueIsOrderedByLSN(mode));

    proc = (PGPROC *)SHMQueueNext(&(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]),
                                  &(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]),
                                  offsetof(PGPROC, syncRepLinks));
    SHM_QUEUE* pHead = &(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]);
    SHM_QUEUE* pTail = pHead;

    while (proc != NULL) {
        /*
         * Assume the queue is ordered by LSN
         */
        if (!all && XLByteLT(walsndctl->lsn[mode], proc->waitLSN))
            break;

        /*
         * Move to next proc, so we can delete thisproc from the queue.
         * thisproc is valid, proc may be NULL after this.
         */
        thisproc = proc;
        thisproc->syncRepInCompleteQueue = true;
        proc = (PGPROC *)SHMQueueNext(&(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]), &(proc->syncRepLinks),
                                      offsetof(PGPROC, syncRepLinks));

        /* Refers to the last removable node */
        pTail = &(thisproc->syncRepLinks);
        numprocs++;
    }

    /* Delete the finished segment from the list, and only notifies leader proc */
    if (pTail != pHead) {
        PGPROC* leaderProc = (PGPROC *) (((char *) pHead->next) - offsetof(PGPROC, syncRepLinks));
        pHead->next->prev = NULL;
        pHead->next = pTail->next;
        pTail->next->prev = pHead;
        pTail->next = NULL;

        /*
         * SyncRepWaitForLSN() reads syncRepState without holding the lock, so
         * make sure that it sees the queue link being removed before the
         * syncRepState change.
         */
        pg_write_barrier();

        /*
         * Set state to complete; see SyncRepWaitForLSN() for discussion of
         * the various states.
         */
        leaderProc->syncRepState = SYNC_REP_WAIT_COMPLETE;

        /*
         * Wake only when we have set state and removed from queue.
         */
        SetLatch(&(leaderProc->procLatch));
    }

    return numprocs;
}

/*
 * Wait for notification from completion queue. It should be finished
 * soon, and is irrelevant to the network. So we just wait to avoid using lock.
 */
static void SyncRepWaitCompletionQueue()
{
    /* Waiting for complete */
    int i = MAX_SYNC_REP_RETRY_COUNT;
    while (t_thrd.proc->syncRepState == SYNC_REP_WAITING) {
        if (i-- > 0) {
            pg_usleep(SYNC_REP_SLEEP_DELAY);
        } else {
            ereport(WARNING, (errmsg("Waiting for syncrep completion queue timeout.")));
            i = MAX_SYNC_REP_RETRY_COUNT;
        }
    }

    /* Make sure that syncRepLinks is read after syncRepState */
    pg_read_barrier();

    /* Leader informs following procs */
    if (t_thrd.proc->syncRepState == SYNC_REP_WAIT_COMPLETE && t_thrd.proc->syncRepLinks.next) {
        SyncRepNotifyComplete();
    }
}

/*
 * Leader informs following procs
 */
static void SyncRepNotifyComplete()
{
    SHM_QUEUE *nextElement = t_thrd.proc->syncRepLinks.next;
    if (nextElement != NULL) {
        t_thrd.proc->syncRepLinks.next = NULL;
        while (nextElement != NULL) {
            PGPROC* curProc = (PGPROC*)(((char*)nextElement) - offsetof(PGPROC, syncRepLinks));
            /*
             * Move to next proc, so we can delete thisproc from the queue.
             * curProc is valid, proc may be NULL after this.
             */
            nextElement = curProc->syncRepLinks.next;

            /*
             * Remove curProc from queue.
             */
            curProc->syncRepLinks.next = NULL;
            curProc->syncRepLinks.prev = NULL;

            /*
             * SyncRepWaitForLSN() reads syncRepState without holding the lock, so
             * make sure that it sees the queue link being removed before the
             * syncRepState change.
             */
            pg_write_barrier();

            curProc->syncRepState = SYNC_REP_WAIT_COMPLETE;

            /*
             * Wake only when we have set state and removed from queue.
             */
            SetLatch(&(curProc->procLatch));
        }
    }
}

/*
 * The checkpointer calls this as needed to update the shared
 * sync_standbys_defined flag, so that backends don't remain permanently wedged
 * if synchronous_standby_names is unset.  It's safe to check the current value
 * without the lock, because it's only ever updated by one process.  But we
 * must take the lock to change it.
 */
void SyncRepUpdateSyncStandbysDefined(void)
{
    bool sync_standbys_defined = SyncStandbysDefined();
    if (sync_standbys_defined != t_thrd.walsender_cxt.WalSndCtl->sync_standbys_defined) {
        (void)LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);

        /*
         * If synchronous_standby_names has been reset to empty, it's futile
         * for backends to continue to waiting.  Since the user no longer
         * wants synchronous replication, we'd better wake them up.
         */
        if (!sync_standbys_defined) {
            int i;

            for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++)
                (void)SyncRepWakeQueue(true, i);
        }

        /*
         * Only allow people to join the queue when there are synchronous
         * standbys defined.  Without this interlock, there's a race
         * condition: we might wake up all the current waiters; then, some
         * backend that hasn't yet reloaded its config might go to sleep on
         * the queue (and never wake up).  This prevents that.
         */
        t_thrd.walsender_cxt.WalSndCtl->sync_standbys_defined = sync_standbys_defined;

        if (sync_standbys_defined && t_thrd.walsender_cxt.WalSndCtl->most_available_sync &&
            !t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone)
            SyncRepCheckSyncStandbyAlive();

        LWLockRelease(SyncRepLock);
    }

    /*
     * Check if new value of parameter is same as earlier,
     * if not then change in shared memory. Since here were enabling this
     * parameter now only, so it may happen that there were no synchronous
     * standby but master has not gone in stand-alone mode because it was
     * not configured to do so.
     */
    if (most_available_sync != t_thrd.walsender_cxt.WalSndCtl->most_available_sync) {
        LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);

        t_thrd.walsender_cxt.WalSndCtl->most_available_sync = most_available_sync;
        (void)SyncRepCheckSyncStandbyAlive();

        LWLockRelease(SyncRepLock);
    }
}

/*
 * Return the list of sync standbys, or NIL if no sync standby is connected.
 *
 * If there are multiple standbys with the same priority,
 * the first one found is selected preferentially.
 * The caller must hold SyncRepLock.
 *
 * On return, *am_sync is set to true if this walsender is connecting to
 * sync standby. Otherwise it's set to false.
 */
List *SyncRepGetSyncStandbys(bool *am_sync, List** catchup_standbys)
{
    /* Set default result */
    if (am_sync != NULL)
        *am_sync = false;

    /* Quick exit if sync replication is not requested */
    if (t_thrd.syncrep_cxt.SyncRepConfig == NULL)
        return NIL;

    return (t_thrd.syncrep_cxt.SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY)
                ? SyncRepGetSyncStandbysPriority(am_sync, catchup_standbys)
                : SyncRepGetSyncStandbysQuorum(am_sync, catchup_standbys);
}

/*
 * Return the list of all the candidates for quorum sync standbys,
 * or NIL if no such standby is connected.
 *
 * The caller must hold SyncRepLock. This function must be called only in
 * a quorum-based sync replication.
 *
 * On return, *am_sync is set to true if this walsender is connecting to
 * sync standby. Otherwise it's set to false.
 */
static List *SyncRepGetSyncStandbysQuorum(bool *am_sync, List** catchup_standbys)
{
    List *result = NIL;
    int i;
    volatile WalSnd *walsnd = NULL; /* Use volatile pointer to prevent code rearrangement */

    Assert(t_thrd.syncrep_cxt.SyncRepConfig->syncrep_method == SYNC_REP_QUORUM);

    for (i = 0; i < g_instance.attr.attr_storage.max_wal_senders; i++) {
        walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[i];

        /* Must be active */
        if (walsnd->pid == 0)
            continue;

        /* Must be synchronous */
        if (walsnd->sync_standby_priority == 0)
            continue;

        if ((walsnd->state == WALSNDSTATE_CATCHUP || walsnd->peer_state == CATCHUP_STATE) &&
            catchup_standbys != NULL) {
            *catchup_standbys = lappend_int(*catchup_standbys, i);
        }

        /* Must have a valid flush position */
        if (XLogRecPtrIsInvalid(walsnd->flush))
            continue;

        /* Must be streaming */
        if (walsnd->state != WALSNDSTATE_STREAMING)
            continue;

        /*
         * Consider this standby as a candidate for quorum sync standbys
         * and append it to the result.
         */
        result = lappend_int(result, i);
        if (am_sync != NULL && walsnd == t_thrd.walsender_cxt.MyWalSnd)
            *am_sync = true;
    }

    return result;
}

/*
 * Return the list of sync standbys chosen based on their priorities,
 * or NIL if no sync standby is connected.
 *
 * If there are multiple standbys with the same priority,
 * the first one found is selected preferentially.
 *
 * The caller must hold SyncRepLock. This function must be called only in
 * a priority-based sync replication.
 *
 * On return, *am_sync is set to true if this walsender is connecting to
 * sync standby. Otherwise it's set to false.
 */
static List *SyncRepGetSyncStandbysPriority(bool *am_sync, List** catchup_standbys)
{
    List *result = NIL;
    List *pending = NIL;
    int lowest_priority;
    int next_highest_priority;
    int this_priority;
    int priority;
    int i;
    bool am_in_pending = false;
    volatile WalSnd *walsnd = NULL; /* Use volatile pointer to prevent code rearrangement */

    Assert(t_thrd.syncrep_cxt.SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY);

    lowest_priority = t_thrd.syncrep_cxt.SyncRepConfig->nmembers;
    next_highest_priority = lowest_priority + 1;

    /*
     * Find the sync standbys which have the highest priority (i.e, 1). Also
     * store all the other potential sync standbys into the pending list, in
     * order to scan it later and find other sync standbys from it quickly.
     */
    for (i = 0; i < g_instance.attr.attr_storage.max_wal_senders; i++) {
        walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[i];

        /* Must be active */
        if (walsnd->pid == 0)
            continue;

        /* Must be synchronous */
        this_priority = walsnd->sync_standby_priority;
        if (this_priority == 0)
            continue;

        if ((walsnd->state == WALSNDSTATE_CATCHUP || walsnd->peer_state == CATCHUP_STATE) &&
            catchup_standbys != NULL) {
            *catchup_standbys = lappend_int(*catchup_standbys, i);
        }

        /* Must have a valid flush position */
        if (XLogRecPtrIsInvalid(walsnd->flush))
            continue;

        /* Must be streaming */
        if (walsnd->state != WALSNDSTATE_STREAMING)
            continue;

        /*
         * If the priority is equal to 1, consider this standby as sync and
         * append it to the result. Otherwise append this standby to the
         * pending list to check if it's actually sync or not later.
         */
        if (this_priority == 1) {
            result = lappend_int(result, i);
            if (am_sync != NULL && walsnd == t_thrd.walsender_cxt.MyWalSnd)
                *am_sync = true;
            if (list_length(result) == t_thrd.syncrep_cxt.SyncRepConfig->num_sync) {
                list_free(pending);
                return result; /* Exit if got enough sync standbys */
            }
        } else {
            pending = lappend_int(pending, i);
            if (am_sync != NULL && walsnd == t_thrd.walsender_cxt.MyWalSnd)
                am_in_pending = true;

            /*
             * Track the highest priority among the standbys in the pending
             * list, in order to use it as the starting priority for later
             * scan of the list. This is useful to find quickly the sync
             * standbys from the pending list later because we can skip
             * unnecessary scans for the unused priorities.
             */
            if (this_priority < next_highest_priority)
                next_highest_priority = this_priority;
        }
    }

    /*
     * Consider all pending standbys as sync if the number of them plus
     * already-found sync ones is lower than the configuration requests.
     */
    if (list_length(result) + list_length(pending) <= t_thrd.syncrep_cxt.SyncRepConfig->num_sync) {
        bool needfree = (result != NIL && pending != NIL);

        /*
         * Set *am_sync to true if this walsender is in the pending list
         * because all pending standbys are considered as sync.
         */
        if (am_sync != NULL && !(*am_sync))
            *am_sync = am_in_pending;

        result = list_concat(result, pending);
        if (needfree) {
            pfree(pending);
            pending = NULL;
        }
        return result;
    }

    /*
     * Find the sync standbys from the pending list.
     */
    priority = next_highest_priority;
    while (priority <= lowest_priority) {
        ListCell *cell = NULL;
        ListCell *prev = NULL;
        ListCell *next = NULL;

        next_highest_priority = lowest_priority + 1;

        for (cell = list_head(pending); cell != NULL; cell = next) {
            i = lfirst_int(cell);
            walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[i];

            next = lnext(cell);

            this_priority = walsnd->sync_standby_priority;
            if (this_priority == priority) {
                result = lappend_int(result, i);
                if (am_sync != NULL && walsnd == t_thrd.walsender_cxt.MyWalSnd)
                    *am_sync = true;

                /*
                 * We should always exit here after the scan of pending list
                 * starts because we know that the list has enough elements to
                 * reach SyncRepConfig->num_sync.
                 */
                if (list_length(result) == t_thrd.syncrep_cxt.SyncRepConfig->num_sync) {
                    list_free(pending);
                    return result; /* Exit if got enough sync standbys */
                }

                /*
                 * Remove the entry for this sync standby from the list to
                 * prevent us from looking at the same entry again.
                 */
                pending = list_delete_cell(pending, cell, prev);

                continue;
            }

            if (this_priority < next_highest_priority)
                next_highest_priority = this_priority;

            prev = cell;
        }

        priority = next_highest_priority;
    }

    /* never reached, but keep compiler quiet */
    Assert(false);
    return result;
}

/*
 * check to see whether synchronous standby is alive.
 * Loop through all sender task and check if there is any
 * synchronous standby is alive. If alive then master needs
 * to continue to wait for synchronous standby otherwise,
 * it does not have to and it can switch to standalone mode.
 * Whenever mode is changing from one to another then
 * log the appropriate log message, which will be used by DBA.
 */
void SyncRepCheckSyncStandbyAlive(void)
{
    bool sync_standby_alive = false;
    int i = 0;

    if (!t_thrd.walsender_cxt.WalSndCtl->sync_standbys_defined ||
        !t_thrd.walsender_cxt.WalSndCtl->most_available_sync) {
        t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone = false;
        return;
    }

    for (i = 0; i < g_instance.attr.attr_storage.max_wal_senders; i++) {
        volatile WalSnd *walsnd = &t_thrd.walsender_cxt.WalSndCtl->walsnds[i];

        SpinLockAcquire(&walsnd->mutex);

        /*
         * Check if this synchronous standby and its pid is not zero i.e. synchronous
         * standby is alive.
         */
        if (walsnd->pid != 0 && walsnd->sync_standby_priority > 0 &&
            (walsnd->sendRole == SNDROLE_PRIMARY_DUMMYSTANDBY || walsnd->sendRole == SNDROLE_PRIMARY_STANDBY)) {
            SpinLockRelease(&walsnd->mutex);
            sync_standby_alive = true;
            break;
        }

        SpinLockRelease(&walsnd->mutex);
    }

    if (sync_standby_alive && t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone) {
        ereport(LOG, (errmsg("standalone synchronous master now have synchronous standby")));

        t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone = false;
        return;
    }

    if (!sync_standby_alive && !t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone &&
        t_thrd.walsender_cxt.WalSndCtl->most_available_sync) {
        ereport(LOG, (errmsg("synchronous master is now standalone")));

        t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone = true;

        /*
         * If there is any waiting sender, then wake-up them as
         * master has switched to standalone mode
         */
        for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++)
            (void)SyncRepWakeQueue(true, i);
    }
}

#ifdef USE_ASSERT_CHECKING
static bool SyncRepQueueIsOrderedByLSN(int mode)
{
    PGPROC *proc = NULL;
    XLogRecPtr lastLSN;

    Assert(mode >= 0 && mode < NUM_SYNC_REP_WAIT_MODE);

    lastLSN = 0;

    proc = (PGPROC *)SHMQueueNext(&(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]),
                                  &(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]),
                                  offsetof(PGPROC, syncRepLinks));

    while (proc != NULL) {
        /*
         * Check the queue is ordered by LSN
         */
        if (XLByteLT(proc->waitLSN, lastLSN))
            return false;

        lastLSN = proc->waitLSN;

        proc = (PGPROC *)SHMQueueNext(&(t_thrd.walsender_cxt.WalSndCtl->SyncRepQueue[mode]), &(proc->syncRepLinks),
                                      offsetof(PGPROC, syncRepLinks));
    }

    return true;
}
#endif

/*
 * Obtain cluster information from cluster_static_config.
 */
int init_gauss_cluster_config(void)
{
    char path[MAXPGPATH] = {0};
    int err_no = 0;
    int nRet = 0;
    int status = 0;
    uint32 nodeidx = 0;
    struct stat statbuf {};

    char* gausshome = gs_getenv_r("GAUSSHOME");
    check_backend_env(gausshome);

    nRet = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/bin/%s", gausshome, STATIC_CONFIG_FILE);
    securec_check_ss_c(nRet, "\0", "\0");

    if (lstat(path, &statbuf) != 0) {
        return 1;
    }

    status = read_config_file(path, &err_no);

    if (0 != status) {
        return 1;
    }

    if (g_nodeHeader.node <= 0) {
        free(g_node);
        return 1;
    }

    for (nodeidx = 0; nodeidx < g_node_num; nodeidx++) {
        if (g_node[nodeidx].node == g_nodeHeader.node) {
            g_currentNode = &g_node[nodeidx];
            break;
        }
    }

    if (NULL == g_currentNode) {
        free(g_node);
        return 1;
    }

    if (get_dynamic_dn_role() != 0) {
        // failed to get dynamic dn role
        free(g_node);
        return 1;
    }

    return 0;
}

/*
 * ===========================================================
 * Synchronous Replication functions executed by any process
 * ===========================================================
 */
bool check_synchronous_standby_names(char **newval, void **extra, GucSource source)
{
    errno_t rc = EOK;

    if (*newval != NULL && (*newval)[0] != '\0') {
        int parse_rc;
        SyncRepConfigData *pconf = NULL;
        syncrep_scanner_yyscan_t yyscanner;
        char* data_dir = t_thrd.proc_cxt.DataDir;
        uint32 idx;
        char* p = NULL;

        /* Reset communication variables to ensure a fresh start */
        t_thrd.syncrepgram_cxt.syncrep_parse_result = NULL;

        /* Parse the synchronous_standby_names string */
        yyscanner = syncrep_scanner_init(*newval);
        parse_rc = syncrep_yyparse(yyscanner);
        syncrep_scanner_finish(yyscanner);

        if (parse_rc != 0 || t_thrd.syncrepgram_cxt.syncrep_parse_result == NULL) {
            GUC_check_errcode(ERRCODE_SYNTAX_ERROR);
            GUC_check_errdetail("synchronous_standby_names parser failed");
            return false;
        }

        /* GUC extra value must be malloc'd, not palloc'd */
        pconf = (SyncRepConfigData *)MemoryContextAlloc(SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE),
                                                        t_thrd.syncrepgram_cxt.syncrep_parse_result->config_size);
        if (pconf == NULL)
            return false;
        rc = memcpy_s(pconf, t_thrd.syncrepgram_cxt.syncrep_parse_result->config_size,
                      t_thrd.syncrepgram_cxt.syncrep_parse_result,
                      t_thrd.syncrepgram_cxt.syncrep_parse_result->config_size);
        securec_check(rc, "", "");

        if (strcmp(pconf->member_names, "*") == 0) {
            goto pass;
        }

        if (pconf->num_sync > pconf->nmembers) {
            // The sync number must less or equals to the number of standby node names.
            return false;
        }

        /* get current cluster information from cluster_staic_config */
        if (strcmp(u_sess->attr.attr_common.application_name, "gsql") == 0 && has_static_config()
            && 0 == init_gauss_cluster_config()) {
            for (idx = 0; idx < g_node_num; idx++) {
                if (g_currentNode->node == g_node[idx].node) {
                    g_local_node_idx = idx;
                    break;
                }
            }
        } else {
            goto pass;
        }
        g_local_node_name = g_node[g_local_node_idx].nodeName;

        p = pconf->member_names;
        for (int i = 1; i <= pconf->nmembers; i++) {
            if (!CheckDataNameValue(p, data_dir)) {
                return false;
            }
            p += strlen(p) + 1;
        }

pass:
        *extra = (void *)pconf;
        if (t_thrd.syncrepgram_cxt.syncrep_parse_result) {
            pfree(t_thrd.syncrepgram_cxt.syncrep_parse_result);
            t_thrd.syncrepgram_cxt.syncrep_parse_result = NULL;
        }

        /*
         * We need not explicitly clean up syncrep_parse_result.  It, and any
         * other cruft generated during parsing, will be freed when the
         * current memory context is deleted.  (This code is generally run in
         * a short-lived context used for config file processing, so that will
         * not be very long.)
         */
    } else
        *extra = NULL;

    return true;
}

void assign_synchronous_standby_names(const char *newval, void *extra)
{
    /*
     * At present, SyncRepConfig is kept at thread level, on the assumption that
     * it should be safe to know the latest rep config ASAP for all sessions.
     * If this assumption no longer holds, please move it to session level.
     */
    pfree_ext(t_thrd.syncrep_cxt.SyncRepConfig);

    if (extra != NULL) {
        SyncRepConfigData *pconf = (SyncRepConfigData *)extra;
        errno_t rc = EOK;
        t_thrd.syncrep_cxt.SyncRepConfig = (SyncRepConfigData *)MemoryContextAlloc(
            THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), pconf->config_size);
        rc = memcpy_s(t_thrd.syncrep_cxt.SyncRepConfig, pconf->config_size, pconf, pconf->config_size);
        securec_check(rc, "", "");
    }
}

void assign_synchronous_commit(int newval, void *extra)
{
    switch (newval) {
        case SYNCHRONOUS_COMMIT_REMOTE_RECEIVE:
            SyncRepWaitMode = SYNC_REP_WAIT_RECEIVE;
            break;
        case SYNCHRONOUS_COMMIT_REMOTE_WRITE:
            SyncRepWaitMode = SYNC_REP_WAIT_WRITE;
            break;
        case SYNCHRONOUS_COMMIT_REMOTE_FLUSH:
            SyncRepWaitMode = SYNC_REP_WAIT_FLUSH;
            break;
        case SYNCHRONOUS_COMMIT_REMOTE_APPLY:
            SyncRepWaitMode = SYNC_REP_WAIT_APPLY;
            break;
        default:
            SyncRepWaitMode = SYNC_REP_NO_WAIT;
            break;
    }
}

int syncrep_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, syncrep_scanner_yyscan_t yyscanner)
{
    return syncrep_scanner_yylex(&(lvalp->yy_core), llocp, yyscanner);
}