openGauss-server/src/common/backend/libpq/pqcomm.cpp

/* -------------------------------------------------------------------------
 *
 * pqcomm.cpp
 *	  Communication functions between the Frontend and the Backend
 *
 * These routines handle the low-level details of communication between
 * frontend and backend.  They just shove data across the communication
 * channel, and are ignorant of the semantics of the data --- or would be,
 * except for major brain damage in the design of the old COPY OUT protocol.
 * Unfortunately, COPY OUT was designed to commandeer the communication
 * channel (it just transfers data without wrapping it into messages).
 * No other messages can be sent while COPY OUT is in progress; and if the
 * copy is aborted by an ereport(ERROR), we need to close out the copy so that
 * the frontend gets back into sync.  Therefore, these routines have to be
 * aware of COPY OUT state.  (New COPY-OUT is message-based and does *not*
 * set the DoingCopyOut flag.)
 *
 * NOTE: generally, it's a bad idea to emit outgoing messages directly with
 * pq_putbytes(), especially if the message would require multiple calls
 * to send.  Instead, use the routines in pqformat.c to construct the message
 * in a buffer and then emit it in one call to pq_putmessage.  This ensures
 * that the channel will not be clogged by an incomplete message if execution
 * is aborted by ereport(ERROR) partway through the message.  The only
 * non-libpq code that should call pq_putbytes directly is old-style COPY OUT.
 *
 * At one time, libpq was shared between frontend and backend, but now
 * the backend's "backend/libpq" is quite separate from "interfaces/libpq".
 * All that remains is similarities of names to trap the unwary...
 *
 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *	src/common/backend/libpq/pqcomm.cpp
 *
 * -------------------------------------------------------------------------
 */

/* ------------------------
 * INTERFACE ROUTINES
 *
 * setup/teardown:
 *		StreamServerPort	- Open postmaster's server port
 *		StreamConnection	- Create new connection with client
 *		StreamClose			- Close a client/backend connection
 *		TouchSocketFile		- Protect socket file against /tmp cleaners
 *		pq_init			- initialize libpq at backend startup
 *		pq_comm_reset	- reset libpq during error recovery
 *		pq_close		- shutdown libpq at backend exit
 *
 * low-level I/O:
 *		pq_getbytes		- get a known number of bytes from connection
 *		pq_getstring	- get a null terminated string from connection
 *		pq_getmessage	- get a message with length word from connection
 *		pq_getbyte		- get next byte from connection
 *		pq_peekbyte		- peek at next byte from connection
 *		pq_putbytes		- send bytes to connection (not flushed until pq_flush)
 *		pq_flush		- flush pending output
 *		pq_flush_if_writable - flush pending output if writable without blocking
 *		pq_getbyte_if_available - get a byte if available without blocking
 *
 * message-level I/O (and old-style-COPY-OUT cruft):
 *		pq_putmessage	- send a normal message (suppressed in COPY OUT mode)
 *		pq_putmessage_noblock - buffer a normal message (suppressed in COPY OUT)
 *		pq_startcopyout - inform libpq that a COPY OUT transfer is beginning
 *		pq_endcopyout	- end a COPY OUT transfer
 *
 * ------------------------
 */
#include "postgres.h"
#include "knl/knl_variable.h"
#include <fcntl.h>
#include <grp.h>
#include <sys/file.h>
#include <sys/time.h>
#ifdef HAVE_NETINET_TCP_H
#include <netinet/tcp.h>
#endif
#include <arpa/inet.h>
#ifdef HAVE_UTIME_H
#include <utime.h>
#endif
#ifdef WIN32_ONLY_COMPILER /* mstcpip.h is missing on mingw */
#include <mstcpip.h>
#endif
#ifdef HAVE_POLL_H
#include <poll.h>
#endif
#include "pgxc/pgxc.h"
#include "libpq/ip.h"
#include "libpq/libpq.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/ipc.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "libpq/pqcomm.h"
#include "replication/replicainternal.h"
#include "utils/timestamp.h"
#include "postmaster/postmaster.h"
#include "libcomm/libcomm.h"
#include "libpq/pqformat.h"
#include "pgxc/nodemgr.h"
#include "storage/lz4_file.h"
#include "tcop/stmt_retry.h"
#include "distributelayer/streamProducer.h"

#define MAXLISTEN 64
#define IP_LEN 64
#define CRC_HEADER 12  // uint32 sequence number + uint32 data length + uint32 crc checksum.

extern bool StreamThreadAmI();
extern GlobalNodeDefinition* global_node_definition;

/*
 * Buffers for low-level I/O.
 *
 * The receive buffer is fixed size. Send buffer is usually 8k, but can be
 * enlarged by pq_putmessage_noblock() if the message doesn't fit otherwise.
 */
#define PQ_BUFFER_SIZE 8192
#define PQ_SEND_BUFFER_SIZE PQ_BUFFER_SIZE

#ifdef USE_RETRY_STUB
#define PQ_RECV_BUFFER_SIZE 16
#else
#define PQ_RECV_BUFFER_SIZE PQ_BUFFER_SIZE
#endif

#define NAPTIME_PER_SEND_RETRY 100 /* max sleep between two send try (100ms) */
#define NAPTIME_PER_SEND 10        /* max sleep before sending next batch of data (10ms) */

void pq_close(int code, Datum arg);

/* Internal functions */
static int internal_putbytes(const char* s, size_t len);
static int internal_flush(void);
static void pq_set_nonblocking(bool nonblocking);
static void pq_disk_generate_checking_header(
    const char* src_data, StringInfo dest_data, uint32 data_len, uint32 seq_num);
static size_t pq_disk_read_data_block(
    LZ4File* file_handle, char* src_data, char* dest_data, uint32 data_len, uint32 seq_num);

#ifdef HAVE_UNIX_SOCKETS
static int Lock_AF_UNIX(unsigned short portNumber, const char* unixSocketName, bool is_create_psql_sock);
static int Setup_AF_UNIX(bool is_create_psql_sock);
#endif /* HAVE_UNIX_SOCKETS */

static void socket_comm_reset(void);
static int  socket_flush(void);
static int  socket_flush_if_writable(void);
static bool socket_is_send_pending(void);
static int  socket_putmessage(char msgtype, const char *s, size_t len);
static int socket_putmessage_noblock(char msgtype, const char *s, size_t len);
static void socket_startcopyout(void);
static void socket_endcopyout(bool errorAbort);

static PQcommMethods PqCommSocketMethods = {
    socket_comm_reset,
    socket_flush,
    socket_flush_if_writable,
    socket_is_send_pending,
    socket_putmessage,
    socket_putmessage_noblock,
    socket_startcopyout,
    socket_endcopyout
};

THR_LOCAL PQcommMethods *PqCommMethods = &PqCommSocketMethods;

extern bool FencedUDFMasterMode;

/* --------------------------------
 *		usages for temp file operations
 * --------------------------------
 */
typedef struct TempFileContextInfo {
    LZ4File* file_handle;
    TempFileState file_state;
    size_t file_size;
    uint32 seq_count;          /* count number of PqRecvBuffers */
    StringInfoData crc_buffer; /* store stringinfo with crc header */
} TempFileContextInfo;

void temp_file_context_init(knl_t_libpq_context* libpq_cxt)
{
    libpq_cxt->PqTempFileContextInfo = (TempFileContextInfo*)palloc0(sizeof(TempFileContextInfo));
}

/*
 * @Description: enable temp file for saving query result.
 */
void pq_disk_enable_temp_file(void)
{
    if ((u_sess->attr.attr_sql.max_cn_temp_file_size > 0) && IS_PGXC_COORDINATOR) {
        t_thrd.libpq_cxt.save_query_result_to_disk = true;
    }
}

/*
 * @Description: disable temp file to saving query result.
 */
void pq_disk_disable_temp_file(void)
{
    t_thrd.libpq_cxt.save_query_result_to_disk = false;
}

/*
 * @Description: check temp file enabled or not.
 * @return - true, tempfile is enabled. false, meaning temp file is disabled.
 */
bool pq_disk_is_temp_file_enabled(void)
{
    return t_thrd.libpq_cxt.save_query_result_to_disk;
}

/*
 * @Description: check temp file created or not.
 * @return - true, tempfile is created.
 */
bool pq_disk_is_temp_file_created(void)
{
    return t_thrd.libpq_cxt.PqTempFileContextInfo->file_handle != NULL;
}

/*
 * @Description: create temp file for saving query result. which will update PqTempFileContextInfo at the same time.
 */
static inline void pq_disk_create_tempfile(void)
{
    MemoryContext oldcontext;
    oldcontext = MemoryContextSwitchTo(u_sess->top_mem_cxt);

    t_thrd.libpq_cxt.PqTempFileContextInfo->file_handle = LZ4FileCreate(true);
    t_thrd.libpq_cxt.PqTempFileContextInfo->file_state = TEMPFILE_CREATED;
    t_thrd.libpq_cxt.PqTempFileContextInfo->file_size = 0;
    t_thrd.libpq_cxt.PqTempFileContextInfo->seq_count = 0;

    initStringInfo(&(t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer));

    (void)MemoryContextSwitchTo(oldcontext);
}

/*
 * @Description: write query result to temp file instead of saving query result to PqSendBuffer.
 * @in - data, data pointer.
 * @in - size, size of data to be written.
 * @return - written size, EOF if error happens.
 */
static size_t pq_disk_write_tempfile(const void* data, size_t size)
{
    size_t nwritten = 0;

    if (t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer.data != NULL) {
        resetStringInfo(&(t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer));
    } else {
        ereport(LOG, (errmodule(MOD_CN_RETRY), errmsg("invalid crc buffer for temp file.")));
        return EOF;
    }

    MemoryContext oldcontext;
    oldcontext = MemoryContextSwitchTo(u_sess->top_mem_cxt);

    /* generate a crc header and append data after it */
    pq_disk_generate_checking_header((char*)data,
        &(t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer),
        size,
        t_thrd.libpq_cxt.PqTempFileContextInfo->seq_count++);

    nwritten = LZ4FileWrite(t_thrd.libpq_cxt.PqTempFileContextInfo->file_handle,
        t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer.data,
        size + CRC_HEADER);

    (void)MemoryContextSwitchTo(oldcontext);

    if (nwritten < CRC_HEADER) {
        return 0;
    }

    nwritten -= CRC_HEADER;  // don't calculate file size with crc header
    Assert(nwritten == size);

    t_thrd.libpq_cxt.PqTempFileContextInfo->file_state = TEMPFILE_FLUSHED;
    t_thrd.libpq_cxt.PqTempFileContextInfo->file_size += nwritten;

    /* be ware to write file first before flush file data to frontend */
    size_t file_size = t_thrd.libpq_cxt.PqTempFileContextInfo->file_size / (GUC_UNIT_KB);

    if (file_size > (size_t)u_sess->attr.attr_sql.max_cn_temp_file_size) {
        /*
         * if here, meaning temp file size has exceeded, in order to continue
         * query execution,
         * 1. we need to send current query result in temp file to
         *    the frontend.
         * 2. but current query can't be retried any more, since we can't
         *    protect total query result stay in temp file.
         * 3. close current file and create a new one
         */
        ereport(LOG,
            (errmodule(MOD_CN_RETRY),
                errmsg(" %s temp file exceeded, max temp file size : %d KB, current result size : %lu KB",
                    PRINT_PREFIX_TYPE_ALERT,
                    u_sess->attr.attr_sql.max_cn_temp_file_size,
                    file_size)));

        StmtRetrySetFileExceededFlag();
        pq_disk_send_to_frontend();

        pq_disk_create_tempfile();
    }

    return nwritten;
}
/*
 * @Description: extract send buffer data to temp file.
 */
void pq_disk_extract_sendbuffer(void)
{
    if (t_thrd.libpq_cxt.PqSendPointer) {
        (void)pq_disk_write_tempfile(
            t_thrd.libpq_cxt.PqSendBuffer + t_thrd.libpq_cxt.PqSendStart, t_thrd.libpq_cxt.PqSendPointer);
        t_thrd.libpq_cxt.PqSendPointer = 0;
    }
}

/*
 * @Description: reset PqTempFileContextInfo.
 */
void pq_disk_reset_tempfile_contextinfo(void)
{
    t_thrd.libpq_cxt.PqTempFileContextInfo->file_handle = NULL;
    t_thrd.libpq_cxt.PqTempFileContextInfo->file_state = TEMPFILE_DEFAULT;
    t_thrd.libpq_cxt.PqTempFileContextInfo->file_size = 0;
    t_thrd.libpq_cxt.PqTempFileContextInfo->seq_count = 0;

    if (t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer.data != NULL) {
        pfree_ext(t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer.data);
    }
    t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer.len = 0;
    t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer.maxlen = 0;
    t_thrd.libpq_cxt.PqTempFileContextInfo->crc_buffer.cursor = 0;
}

/*
 * @Description: check whether temp file is once flushed before.
 * @return - true, temp file had been flushed. false, temp file never flushed.
 */
bool pq_disk_is_flushed(void)
{
    return (t_thrd.libpq_cxt.PqTempFileContextInfo->file_state == TEMPFILE_FLUSHED);
}

/*
 * @Description: send query result data in tempfile to frontend.
 * @in use_flush_protection - if true use pq_flush to send data else use internal_flush
 * @return - 0 if OK, EOF if trouble
 */
int pq_disk_send_to_frontend(void)
{
    Assert(t_thrd.libpq_cxt.PqTempFileContextInfo->file_handle);

    int res = 0;

    ereport(DEBUG2,
        (errmodule(MOD_CN_RETRY),
            errmsg("data in tempfile \"%zu\". ", t_thrd.libpq_cxt.PqTempFileContextInfo->file_size)));

    LZ4FileRewind(t_thrd.libpq_cxt.PqTempFileContextInfo->file_handle);

    size_t total_read = 0;
    size_t read_size = 0;
    uint32 read_seq = 0;

    t_thrd.libpq_cxt.PqTempFileContextInfo->file_state = TEMPFILE_ON_SENDING;

    char* read_data = NULL;
    read_data = (char*)palloc0(t_thrd.libpq_cxt.PqSendBufferSize + CRC_HEADER);

    do {
        /* exclude a crc header and extract data after it */
        read_size = pq_disk_read_data_block(t_thrd.libpq_cxt.PqTempFileContextInfo->file_handle,
            read_data,
            t_thrd.libpq_cxt.PqSendBuffer,
            t_thrd.libpq_cxt.PqSendBufferSize,
            read_seq);
        if (read_size == 0) {
            break;
        }

        read_seq++;
        t_thrd.libpq_cxt.PqSendPointer = read_size;
        total_read += read_size;

        res = internal_flush();
        if (res == EOF) {
            ereport(LOG, (errmodule(MOD_CN_RETRY), errmsg("get EOF while flushing data.")));
            t_thrd.libpq_cxt.PqTempFileContextInfo->file_state = TEMPFILE_ERROR_SEND;
            break;
        }
    } while (read_size == (size_t)t_thrd.libpq_cxt.PqSendBufferSize);

    pfree_ext(read_data);

    if (read_seq != t_thrd.libpq_cxt.PqTempFileContextInfo->seq_count) {
        ereport(FATAL,
            (errmodule(MOD_CN_RETRY),
                errcode(ERRCODE_DATA_EXCEPTION),
                errmsg("Last read message sequence %u is not equal to the max written message sequence %u",
                    read_seq,
                    t_thrd.libpq_cxt.PqTempFileContextInfo->seq_count)));
    }

    t_thrd.libpq_cxt.PqTempFileContextInfo->file_size -= total_read;

    if (t_thrd.libpq_cxt.PqTempFileContextInfo->file_state != TEMPFILE_ERROR_SEND) {
        t_thrd.libpq_cxt.PqTempFileContextInfo->file_state = TEMPFILE_SENDED;
    }

    ereport(DEBUG2,
        (errmodule(MOD_CN_RETRY),
            errmsg("remaining data in tempfile \"%zu\", total read data \"%zu\" ",
                t_thrd.libpq_cxt.PqTempFileContextInfo->file_size,
                total_read)));

    /* since we can not reuse tempfile discard it */
    pq_disk_discard_temp_file();
    return res;
}

/*
 * @Description: discard temp file if created, which will close and delete temp file at same time.
 */
void pq_disk_discard_temp_file(void)
{
    LZ4File* file_handle = t_thrd.libpq_cxt.PqTempFileContextInfo->file_handle;
    pq_disk_reset_tempfile_contextinfo();

    if (file_handle != NULL) {
        LZ4FileClose(file_handle);
    }
}

/*
 * @Description: get temp file state.
 * @return - file state
 */
TempFileState pq_disk_file_state(void)
{
    return t_thrd.libpq_cxt.PqTempFileContextInfo->file_state;
}

/* --------------------------------
 *		pq_init - initialize libpq at backend startup
 * --------------------------------
 */
void pq_init(void)
{
    t_thrd.libpq_cxt.PqSendBufferSize = g_instance.attr.attr_network.cn_send_buffer_size * (GUC_UNIT_KB);
#ifdef USE_RETRY_STUB
    t_thrd.libpq_cxt.PqSendBufferSize = 64;
#endif
    t_thrd.libpq_cxt.PqSendBuffer = (char*)MemoryContextAlloc(t_thrd.top_mem_cxt, t_thrd.libpq_cxt.PqSendBufferSize);

    t_thrd.libpq_cxt.PqRecvBuffer = (char*)MemoryContextAlloc(t_thrd.top_mem_cxt, PQ_RECV_BUFFER_SIZE);
    t_thrd.libpq_cxt.PqRecvBufferSize = PQ_RECV_BUFFER_SIZE;
    t_thrd.libpq_cxt.PqSendPointer = t_thrd.libpq_cxt.PqSendStart = t_thrd.libpq_cxt.PqRecvPointer =
        t_thrd.libpq_cxt.PqRecvLength = 0;
    t_thrd.libpq_cxt.PqCommBusy = false;
    t_thrd.libpq_cxt.DoingCopyOut = false;

    pq_disk_reset_tempfile_contextinfo();

    on_proc_exit(pq_close, 0);
}

/* --------------------------------
 *		pq_comm_reset - reset libpq during error recovery
 *
 * This is called from error recovery at the outer idle loop.  It's
 * just to get us out of trouble if we somehow manage to elog() from
 * inside a pqcomm.c routine (which ideally will never happen, but...)
 * --------------------------------
 */
static void socket_comm_reset(void)
{
    /* Do not throw away pending data, but do reset the busy flag */
    t_thrd.libpq_cxt.PqCommBusy = false;
    /* We can abort any old-style COPY OUT, too */
    pq_endcopyout(true);
}

/* --------------------------------
 *		pq_close - shutdown libpq at backend exit
 *
 * Note: in a standalone backend u_sess->proc_cxt.MyProcPort will be null,
 * don't crash during exit...
 * --------------------------------
 */
void pq_close(int code, Datum arg)
{
    if (t_thrd.postmaster_cxt.KeepSocketOpenForStream) {
        return;
    }

    if (u_sess->proc_cxt.MyProcPort != NULL) {
        if (u_sess->proc_cxt.MyProcPort->gss != NULL) {
#if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
#ifdef ENABLE_GSS
            OM_uint32 min_s;

            /* Shutdown GSSAPI layer */
            if (u_sess->proc_cxt.MyProcPort->gss->ctx != GSS_C_NO_CONTEXT) {
                gss_delete_sec_context(&min_s, &u_sess->proc_cxt.MyProcPort->gss->ctx, NULL);
            }

            if (u_sess->proc_cxt.MyProcPort->gss->cred != GSS_C_NO_CREDENTIAL) {
                gss_release_cred(&min_s, &u_sess->proc_cxt.MyProcPort->gss->cred);
            }

#endif /* ENABLE_GSS */
            /* GSS and SSPI share the port->gss struct */
            pfree_ext(u_sess->proc_cxt.MyProcPort->gss);
#endif /* ENABLE_GSS || ENABLE_SSPI */
        }

        /* Cleanly shut down SSL layer */
        secure_close(u_sess->proc_cxt.MyProcPort);

        /*
         * Formerly we did an explicit close() here, but it seems better to
         * leave the socket open until the process dies.  This allows clients
         * to perform a "synchronous close" if they care --- wait till the
         * transport layer reports connection closure, and you can be sure the
         * backend has exited.
         *
         * We do set sock to PGINVALID_SOCKET to prevent any further I/O,
         * though.
         */
        if (u_sess->proc_cxt.MyProcPort && u_sess->proc_cxt.MyProcPort->is_logic_conn) {
            gs_close_gsocket(&(u_sess->proc_cxt.MyProcPort->gs_sock));
        } else if (u_sess->proc_cxt.MyProcPort) {
            if (u_sess->proc_cxt.MyProcPort->sock != PGINVALID_SOCKET) {
                closesocket(u_sess->proc_cxt.MyProcPort->sock);
            }
            u_sess->proc_cxt.MyProcPort->sock = PGINVALID_SOCKET;
        }
    }
}

/*
 * Streams -- wrapper around Unix socket system calls
 *
 * Stream functions are used for vanilla TCP connection protocol.
 */

/* StreamDoUnlink()
 * Shutdown routine for backend connection
 * If a Unix socket is used for communication, explicitly close it.
 */
#ifdef HAVE_UNIX_SOCKETS
static void StreamDoUnlink(int code, Datum arg)
{
    if ((int)arg == PSQL_LISTEN_SOCKET) {
        Assert(t_thrd.libpq_cxt.sock_path[0]);
        unlink(t_thrd.libpq_cxt.sock_path);
    } else if ((int)arg == HA_LISTEN_SOCKET) {
        Assert(t_thrd.libpq_cxt.ha_sock_path[0]);
        unlink(t_thrd.libpq_cxt.ha_sock_path);
    }
}
#endif /* HAVE_UNIX_SOCKETS */

/*
 * StreamServerPort -- open a "listening" port to accept connections.
 *
 * Successfully opened sockets are added to the ListenSocket[] array,
 * at the first position that isn't PGINVALID_SOCKET.
 *
 * RETURNS: STATUS_OK or STATUS_ERROR
 */
int StreamServerPort(int family, char* hostName, unsigned short portNumber, const char* unixSocketName,
    pgsocket ListenSocket[], pgsocket SctpListenSocket[], int MaxListen, bool add_localaddr_flag,
    bool is_create_psql_sock, bool is_create_libcomm_sock)
{
#define RETRY_SLEEP_TIME 1000000L
    pgsocket fd = PGINVALID_SOCKET;
    pgsocket fd_sctp = PGINVALID_SOCKET;
    int err;
    int maxconn;
    int ret;
    char portNumberStr[32];
    const char* familyDesc = NULL;
    char familyDescBuf[64];
    char* service = NULL;
    struct addrinfo* addrs = NULL;
    struct addrinfo* addr = NULL;
    struct addrinfo hint;
    int listen_index = 0;
    int sctp_listen_index = 0;
    int added = 0;
    const int tryBindNum = 3;
    int i = 0;
    errno_t rc = EOK;

#if !defined(WIN32) || defined(IPV6_V6ONLY)
    int one = 1;
#endif

    /* Initialize hint structure */
    rc = memset_s(&hint, sizeof(hint), 0, sizeof(hint));
    securec_check(rc, "\0", "\0");
    hint.ai_family = family;
    hint.ai_flags = AI_PASSIVE;
    hint.ai_socktype = SOCK_STREAM;

#ifdef HAVE_UNIX_SOCKETS
    if (family == AF_UNIX) {
        /* Lock_AF_UNIX will also fill in sock_path. */
        if (Lock_AF_UNIX(portNumber, unixSocketName, is_create_psql_sock) != STATUS_OK) {
            return STATUS_ERROR;
        }
        service = (is_create_psql_sock ? t_thrd.libpq_cxt.sock_path : t_thrd.libpq_cxt.ha_sock_path);
    } else
#endif /* HAVE_UNIX_SOCKETS */
    {
        rc = snprintf_s(portNumberStr, sizeof(portNumberStr), sizeof(portNumberStr) - 1, "%hu", portNumber);
        securec_check_ss(rc, "\0", "\0");
        service = portNumberStr;
    }

    ret = pg_getaddrinfo_all(hostName, service, &hint, &addrs);
    if (ret || addrs == NULL) {
        if (hostName != NULL) {
            ereport(LOG,
                (errmsg("could not translate host name \"%s\", service \"%s\" to address: %s",
                    hostName,
                    service,
                    gai_strerror(ret))));
        } else {
            ereport(LOG, (errmsg("could not translate service \"%s\" to address: %s", service, gai_strerror(ret))));
        }
        if (addrs != NULL) {
            pg_freeaddrinfo_all(hint.ai_family, addrs);
        }
        return STATUS_ERROR;
    }

    for (addr = addrs; addr; addr = addr->ai_next) {
        /* init value of fd */
        fd = PGINVALID_SOCKET;
        fd_sctp = PGINVALID_SOCKET;

        if (!IS_AF_UNIX(family) && IS_AF_UNIX(addr->ai_family)) {
            /*
             * Only set up a unix domain socket when they really asked for it.
             * The service/port is different in that case.
             */
            continue;
        }

        /* See if there is still room to add 1 more socket. */
        for (; listen_index < MaxListen; listen_index++) {
            if (ListenSocket[listen_index] == PGINVALID_SOCKET) {
                break;
            }
        }
        if (listen_index >= MaxListen) {
            ereport(LOG, (errmsg("could not bind to all requested addresses: MAXLISTEN (%d) exceeded", MaxListen)));
            break;
        }
        if (SCTP_CN_DN_CONN) {

            for (; sctp_listen_index < MaxListen; sctp_listen_index++) {
                if (SctpListenSocket[sctp_listen_index] == PGINVALID_SOCKET) {
                    break;
                }
            }
            if (sctp_listen_index >= MaxListen) {
                ereport(LOG,
                    (errmsg("sctp could not bind to all requested addresses: MAXLISTEN (%d) exceeded", MaxListen)));
                break;
            }
        }

        /* set up family name for possible error messages */
        switch (addr->ai_family) {
            case AF_INET:
                familyDesc = _("IPv4");
                break;
#ifdef HAVE_IPV6
            case AF_INET6:
                familyDesc = _("IPv6");
                break;
#endif
#ifdef HAVE_UNIX_SOCKETS
            case AF_UNIX:
                familyDesc = _("Unix");
                break;
#endif
            default:
                rc = snprintf_s(familyDescBuf,
                    sizeof(familyDescBuf),
                    sizeof(familyDescBuf) - 1,
                    _("unrecognized address family %d"),
                    addr->ai_family);
                securec_check_ss(rc, "\0", "\0");
                familyDesc = familyDescBuf;
                break;
        }

        if ((fd = socket(addr->ai_family, SOCK_STREAM, 0)) < 0) {
            ereport(LOG,
                (errcode_for_socket_access(),
                    /* translator: %s is IPv4, IPv6, or Unix */
                    errmsg("could not create %s socket: %m", familyDesc)));
            goto errhandle;
        }
        if (!IS_AF_UNIX(addr->ai_family)) {
            if (SCTP_CN_DN_CONN && !(addr->ai_family == AF_INET6)) {
                if ((fd_sctp = socket(addr->ai_family, SOCK_STREAM, IPPROTO_SCTP)) < 0) {
                    ereport(LOG,
                        (errcode_for_socket_access(),
                            /* translator: %s is IPv4, IPv6, or Unix */
                            errmsg("could not create SCTP %s socket: %m", familyDesc)));
                    goto errhandle;
                }
            }
        }
        /*
         * save unix domain sock, thus we will know when
         * rece flow ctrl thread send the libcomm addr to server loop
         */
        if (is_create_libcomm_sock) {
            t_thrd.libpq_cxt.listen_fd_for_recv_flow_ctrl = fd;
        }

#ifdef F_SETFD
        if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) {
            ereport(LOG, (errcode_for_socket_access(), errmsg("setsockopt(FD_CLOEXEC) failed: %m")));
            goto errhandle;
        }
        if (!IS_AF_UNIX(addr->ai_family)) {
            if (SCTP_CN_DN_CONN && !(addr->ai_family == AF_INET6)) {
                if (fcntl(fd_sctp, F_SETFD, FD_CLOEXEC) == -1) {
                    ereport(LOG, (errcode_for_socket_access(), errmsg("sctp setsockopt(FD_CLOEXEC) failed: %m")));
                    goto errhandle;
                }
            }
        }
#endif /* F_SETFD */

#ifndef WIN32

        /*
         * Without the SO_REUSEADDR flag, a new postmaster can't be started
         * right away after a stop or crash, giving "address already in use"
         * error on TCP ports.
         *
         * On win32, however, this behavior only happens if the
         * SO_EXLUSIVEADDRUSE is set. With SO_REUSEADDR, win32 allows multiple
         * servers to listen on the same address, resulting in unpredictable
         * behavior. With no flags at all, win32 behaves as Unix with
         * SO_REUSEADDR.
         */
        if (!IS_AF_UNIX(addr->ai_family)) {
            if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&one, sizeof(one))) == -1) {
                ereport(LOG, (errcode_for_socket_access(), errmsg("setsockopt(SO_REUSEADDR) failed: %m")));
                goto errhandle;
            }
            if (SCTP_CN_DN_CONN && !(addr->ai_family == AF_INET6)) {
                if ((setsockopt(fd_sctp, SOL_SOCKET, SO_REUSEADDR, (char*)&one, sizeof(one))) == -1) {
                    ereport(LOG, (errcode_for_socket_access(), errmsg("sctp setsockopt(SO_REUSEADDR) failed: %m")));
                    goto errhandle;
                }
            }
        }
#endif

#ifdef IPV6_V6ONLY
        if (addr->ai_family == AF_INET6) {
            if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char*)&one, sizeof(one)) == -1) {
                ereport(LOG, (errcode_for_socket_access(), errmsg("setsockopt(IPV6_V6ONLY) failed: %m")));
                goto errhandle;
            }
        }
#endif

        /*
         * Note: This might fail on some OS's, like Linux older than
         * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and map
         * ipv4 addresses to ipv6.	It will show ::ffff:ipv4 for all ipv4
         * connections.if the bind failded, we sleep for 100ms and try it again.
         * We will try at most 30 times, because of slow clean of OS.
         */
        for (i = 0; i != tryBindNum; ++i) {
            err = bind(fd, addr->ai_addr, addr->ai_addrlen);
            if (err < 0) {
                /* need not retry when a addr is added before */
                if (added != 0) {
                    i = tryBindNum;
                    break;
                }
                ereport(LOG,
                    (errcode_for_socket_access(),
                        /* translator: %s is IPv4, IPv6, or Unix */
                        errmsg("could not bind %s socket at the %d time: %m", familyDesc, i),
                        (IS_AF_UNIX(addr->ai_family))
                            ? errhint("Is another postmaster already running on port %d?"
                                      " If not, remove socket file \"%s\" and retry.",
                                      (int)portNumber, service)
                            : errhint("Port %u is used, run 'netstat -anop|grep %u' or "
                                      "'lsof -i:%u'(need root) to see who is using this port.",
                                      portNumber, portNumber, portNumber)));
                pg_usleep(RETRY_SLEEP_TIME);
                continue;
            } else {
                break;
            }
        }
        if (i == tryBindNum) {
            goto errhandle;
        }

        if (!IS_AF_UNIX(addr->ai_family)) {
            if (SCTP_CN_DN_CONN && !(addr->ai_family == AF_INET6)) {
                for (i = 0; i != tryBindNum; ++i) {
                    err = bind(fd_sctp, addr->ai_addr, addr->ai_addrlen);
                    if (err < 0) {
                        /* need not retry when a addr is added before */
                        if (added != 0) {
                            i = tryBindNum;
                            break;
                        }
                        ereport(LOG,
                            (errcode_for_socket_access(),
                                // translator: %s is IPv4, IPv6, or Unix
                                errmsg("sctp could not bind %s socket at the %d time: %m", familyDesc, i),
                                (IS_AF_UNIX(addr->ai_family))
                                    ? errhint("sctp Is another postmaster already running on port %d?"
                                              " If not, remove socket file \"%s\" and retry.",
                                              (int)portNumber, service)
                                    : errhint("sctp Is another postmaster already running on port %d?"
                                              " If not, wait a few seconds and retry.", (int)portNumber)));
                        pg_usleep(RETRY_SLEEP_TIME);
                        continue;
                    } else {
                        break;
                    }
                }
                if (i == tryBindNum) {
                    goto errhandle;
                }
            }
        }
#ifdef HAVE_UNIX_SOCKETS
        if (addr->ai_family == AF_UNIX) {
            if (Setup_AF_UNIX(is_create_psql_sock) != STATUS_OK) {
                goto errhandle;
            }
        }
#endif

        /*
         * Select appropriate accept-queue length limit.  PG_SOMAXCONN is only
         * intended to provide a clamp on the request on platforms where an
         * overly large request provokes a kernel error (are there any?).
         */
        maxconn = g_instance.attr.attr_network.MaxConnections * 6;
        maxconn = Max(maxconn, PG_SOMINCONN);
        maxconn = Min(maxconn, PG_SOMAXCONN);

        err = listen(fd, maxconn);
        if (err < 0) {
            ereport(LOG,
                (errcode_for_socket_access(),
                    /* translator: %s is IPv4, IPv6, or Unix */
                    errmsg("could not listen on %s socket: %m", familyDesc)));
            goto errhandle;
        }
        ListenSocket[listen_index] = fd;
        if (!IS_AF_UNIX(addr->ai_family)) {
            if (SCTP_CN_DN_CONN && !(addr->ai_family == AF_INET6)) {
                err = listen(fd_sctp, maxconn);
                if (err < 0) {
                    ereport(LOG,
                        (errcode_for_socket_access(),
                            /* translator: %s is IPv4, IPv6, or Unix */
                            errmsg("sctp could not listen on %s socket: %m", familyDesc)));
                    closesocket(fd_sctp);
                    continue;
                }
                SctpListenSocket[sctp_listen_index] = fd_sctp;
            }
        }
        added++;
        if (add_localaddr_flag == true) {
            struct sockaddr* sinp = NULL;
            char* result = NULL;

            sinp = (struct sockaddr*)(addr->ai_addr);
            if (addr->ai_family == AF_INET6) {
                result = inet_net_ntop(AF_INET6,
                    &((struct sockaddr_in*)sinp)->sin_addr,
                    128,
                    t_thrd.postmaster_cxt.LocalAddrList[t_thrd.postmaster_cxt.LocalIpNum],
                    IP_LEN);
            } else if (addr->ai_family == AF_INET) {
                result = inet_net_ntop(AF_INET,
                    &((struct sockaddr_in*)sinp)->sin_addr,
                    32,
                    t_thrd.postmaster_cxt.LocalAddrList[t_thrd.postmaster_cxt.LocalIpNum],
                    IP_LEN);
            }
            if (result == NULL) {
                ereport(WARNING, (errmsg("inet_net_ntop failed, error: %d", EAFNOSUPPORT)));
            } else {
                t_thrd.postmaster_cxt.LocalIpNum++;
            }
        }
        if (is_create_psql_sock) {
            t_thrd.postmaster_cxt.listen_sock_type[listen_index] = PSQL_LISTEN_SOCKET;
        } else {
            t_thrd.postmaster_cxt.listen_sock_type[listen_index] = HA_LISTEN_SOCKET;
        }

        continue;

    errhandle:
        if (fd != PGINVALID_SOCKET) {
            closesocket(fd);
        }
        if (fd_sctp != PGINVALID_SOCKET) {
            closesocket(fd_sctp);
        }
    }

    pg_freeaddrinfo_all(hint.ai_family, addrs);

    if (!added) {
        return STATUS_ERROR;
    }

    return STATUS_OK;
}

#ifdef HAVE_UNIX_SOCKETS

/*
 * Lock_AF_UNIX -- configure unix socket file path
 */
static int Lock_AF_UNIX(unsigned short portNumber, const char* unixSocketName, bool is_create_psql_sock)
{
    char* sock_path = NULL;

    if (is_create_psql_sock) {
        UNIXSOCK_PATH(t_thrd.libpq_cxt.sock_path, portNumber, unixSocketName);
        sock_path = t_thrd.libpq_cxt.sock_path;
    } else {
        UNIXSOCK_PATH(t_thrd.libpq_cxt.ha_sock_path, portNumber, unixSocketName);
        sock_path = t_thrd.libpq_cxt.ha_sock_path;
    }

    if (strlen(sock_path) >= UNIXSOCK_PATH_BUFLEN) {
        ereport(LOG,
            (errmsg("Unix-domain socket path \"%s\" is too long (maximum %d bytes)",
                sock_path,
                (int)(UNIXSOCK_PATH_BUFLEN - 1))));
        return STATUS_ERROR;
    }

    /*
     * Grab an interlock file associated with the socket file.
     *
     * Note: there are two reasons for using a socket lock file, rather than
     * trying to interlock directly on the socket itself.  First, it's a lot
     * more portable, and second, it lets us remove any pre-existing socket
     * file without race conditions.
     */
    CreateSocketLockFile(sock_path, true, is_create_psql_sock);

    /*
     * Once we have the interlock, we can safely delete any pre-existing
     * socket file to avoid failure at bind() time.
     */
    unlink(sock_path);

    return STATUS_OK;
}

/*
 * Setup_AF_UNIX -- configure unix socket permissions
 */
static int Setup_AF_UNIX(bool is_create_psql_sock)
{
    /* Arrange to unlink the socket file at exit */
    on_proc_exit(StreamDoUnlink, is_create_psql_sock ? (Datum)PSQL_LISTEN_SOCKET : (Datum)HA_LISTEN_SOCKET);

    const char* sock_path = (is_create_psql_sock ? t_thrd.libpq_cxt.sock_path : t_thrd.libpq_cxt.ha_sock_path);

    /*
     * Fix socket ownership/permission if requested.  Note we must do this
     * before we listen() to avoid a window where unwanted connections could
     * get accepted.
     */
    AssertEreport(g_instance.attr.attr_network.Unix_socket_group, MOD_OPT, "");
    if (g_instance.attr.attr_network.Unix_socket_group[0] != '\0') {
#ifdef WIN32
        ereport(WARNING, (errmsg("configuration item unix_socket_group is not supported on this platform")));
#else
        char* endptr = NULL;
        unsigned long val;
        gid_t gid;

        val = strtoul(g_instance.attr.attr_network.Unix_socket_group, &endptr, 10);
        if (*endptr == '\0') { /* numeric group id */
            gid = val;
        } else { /* convert group name to id */
            // use the getgrnam_r to guarantee thread safe
            struct group grp;
            struct group* grpptr = &grp;
            struct group* tmpGrpPtr = NULL;
            char grpBuffer[200];
            int grpLineLen = sizeof(grpBuffer);
            int ret;

            if ((ret = getgrnam_r(
                     g_instance.attr.attr_network.Unix_socket_group, grpptr, grpBuffer, grpLineLen, &tmpGrpPtr)) != 0) {
                ereport(LOG, (errmsg("getgrnam_r() error, error num is %d", ret)));
                return STATUS_ERROR;
            }

            if (tmpGrpPtr == NULL) {
                ereport(LOG, (errmsg("group \"%s\" does not exist", g_instance.attr.attr_network.Unix_socket_group)));
                return STATUS_ERROR;
            }
            gid = grp.gr_gid;
        }
        if (chown(sock_path, -1, gid) == -1) {
            ereport(LOG, (errcode_for_file_access(), errmsg("could not set group of file \"%s\": %m", sock_path)));
            return STATUS_ERROR;
        }
#endif
    }

    if (chmod(sock_path, g_instance.attr.attr_network.Unix_socket_permissions) == -1) {
        ereport(LOG, (errcode_for_file_access(), errmsg("could not set permissions of file \"%s\": %m", sock_path)));
        return STATUS_ERROR;
    }
    return STATUS_OK;
}
#endif /* HAVE_UNIX_SOCKETS */

/*
 * StreamConnection -- create a new connection with client using
 *		server port.  Set port->sock to the FD of the new connection.
 *
 * ASSUME: that this doesn't need to be non-blocking because
 *		the Postmaster uses select() to tell when the server master
 *		socket is ready for accept().
 *
 * RETURNS: STATUS_OK or STATUS_ERROR
 */
int StreamConnection(pgsocket server_fd, Port* port)
{
    /* accept connection and fill in the client (remote) address */
    port->raddr.salen = sizeof(port->raddr.addr);
    if ((port->sock = accept4(server_fd, (struct sockaddr*)&port->raddr.addr, &port->raddr.salen, SOCK_CLOEXEC)) < 0) {
        ereport(LOG, (errcode_for_socket_access(), errmsg("could not accept new connection: %m")));

        /*
         * If accept() fails then postmaster.c will still see the server
         * socket as read-ready, and will immediately try again.  To avoid
         * uselessly sucking lots of CPU, delay a bit before trying again.
         * (The most likely reason for failure is being out of kernel file
         * table slots; we can do little except hope some will get freed up.)
         */
        pg_usleep(100000L); /* wait 0.1 sec */
        return STATUS_ERROR;
    }

#ifdef SCO_ACCEPT_BUG

    /*
     * UnixWare 7+ and OpenServer 5.0.4 are known to have this bug, but it
     * shouldn't hurt to catch it for all versions of those platforms.
     */
    if (port->raddr.addr.ss_family == 0) {
        port->raddr.addr.ss_family = AF_UNIX;
    }
#endif

    /* fill in the server (local) address */
    port->laddr.salen = sizeof(port->laddr.addr);
    if (getsockname(port->sock, (struct sockaddr*)&port->laddr.addr, &port->laddr.salen) < 0) {
        ereport(LOG, (errmsg("getsockname() failed: %m")));
        return STATUS_ERROR;
    }

    /* select NODELAY, KEEPALIVE and SO_RCVTIMEO options if it's a TCP connection */
    if (!IS_AF_UNIX(port->laddr.addr.ss_family)) {
        int on;
        int opval = 0;
        on = 1;
        socklen_t oplen = sizeof(opval);
        if (getsockopt(port->sock, SOL_SOCKET, SO_PROTOCOL, &opval, &oplen) < 0) {
            ereport(LOG, (errmsg("getsockopt(SO_PROTOCOL) failed: %m")));
            return STATUS_ERROR;
        }
        if (opval != IPPROTO_SCTP) {
            if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY, (char*)&on, sizeof(on)) < 0) {
                ereport(LOG, (errmsg("setsockopt(TCP_NODELAY) failed: %m")));
                return STATUS_ERROR;
            }
            if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE, (char*)&on, sizeof(on)) < 0) {
                ereport(LOG, (errmsg("setsockopt(SO_KEEPALIVE) failed: %m")));
                return STATUS_ERROR;
            }
            struct timeval tv = {u_sess->attr.attr_common.tcpRecvTimeout, 0};
            if (setsockopt(port->sock, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval)) < 0) {
                ereport(LOG, (errmsg("setsockopt(SO_RCVTIMEO) failed: %m")));
                return STATUS_ERROR;
            }
        } else {
            if (setsockopt(port->sock, IPPROTO_SCTP, COMM_NO_DELAY, (char*)&on, sizeof(on)) < 0) {
                ereport(LOG, (errmsg("setsockopt(COMM_NO_DELAY) failed: %m")));
                return STATUS_ERROR;
            }
        }
#ifdef WIN32

        /*
         * This is a Win32 socket optimization.  The ideal size is 32k.
         * http://support.microsoft.com/kb/823764/EN-US/
         */
        on = PQ_SEND_BUFFER_SIZE * 4;
        if (setsockopt(port->sock, SOL_SOCKET, SO_SNDBUF, (char*)&on, sizeof(on)) < 0) {
            ereport(LOG, (errmsg("setsockopt(SO_SNDBUF) failed: %m")));
            return STATUS_ERROR;
        }
#endif

        /*
         * Also apply the current keepalive parameters.  If we fail to set a
         * parameter, don't error out, because these aren't universally
         * supported.  (Note: you might think we need to reset the GUC
         * variables to 0 in such a case, but it's not necessary because the
         * show hooks for these variables report the truth anyway.)
         */
        if (opval == IPPROTO_TCP) {
            (void)pq_setkeepalivesidle(u_sess->attr.attr_common.tcp_keepalives_idle, port);
            (void)pq_setkeepalivesinterval(u_sess->attr.attr_common.tcp_keepalives_interval, port);
            (void)pq_setkeepalivescount(u_sess->attr.attr_common.tcp_keepalives_count, port);
        }
    }

    return STATUS_OK;
}

/*
 * StreamClose -- close a client/backend connection
 *
 * NOTE: this is NOT used to terminate a session; it is just used to release
 * the file descriptor in a process that should no longer have the socket
 * open.  (For example, the postmaster calls this after passing ownership
 * of the connection to a child process.)  It is expected that someone else
 * still has the socket open.  So, we only want to close the descriptor,
 * we do NOT want to send anything to the far end.
 */
void StreamClose(pgsocket sock)
{
    closesocket(sock);
}

/*
 * TouchSocketFileInternel & TouchSocketFile -- mark socket file as recently accessed
 *
 * This routine should be called every so often to ensure that the socket
 * file has a recent mod date (ordinary operations on sockets usually won't
 * change the mod date).  That saves it from being removed by
 * overenthusiastic /tmp-directory-cleaner daemons.  (Another reason we should
 * never have put the socket file in /tmp...)
 */
void TouchSocketFileInternel(const char* sock_path)
{
    /* Do nothing if we did not create a socket... */
    if (sock_path[0] != '\0') {
        /*
         * utime() is POSIX standard, utimes() is a common alternative. If we
         * have neither, there's no way to affect the mod or access time of
         * the socket :-(
         *
         * In either path, we ignore errors; there's no point in complaining.
         */
#ifdef HAVE_UTIME
        utime(sock_path, NULL);
#else /* !HAVE_UTIME */
#ifdef HAVE_UTIMES
        utimes(sock_path, NULL);
#endif /* HAVE_UTIMES */
#endif /* HAVE_UTIME */
    }
}

void TouchSocketFile(void)
{
    TouchSocketFileInternel(t_thrd.libpq_cxt.sock_path);
    TouchSocketFileInternel(t_thrd.libpq_cxt.ha_sock_path);
}

/* --------------------------------
 * Low-level I/O routines begin here.
 *
 * These routines communicate with a frontend client across a connection
 * already established by the preceding routines.
 * --------------------------------
 */

/* --------------------------------
 *			  pq_set_nonblocking - set socket blocking/non-blocking
 *
 * Sets the socket non-blocking if nonblocking is TRUE, or sets it
 * blocking otherwise.
 * --------------------------------
 */
static void pq_set_nonblocking(bool nonblocking)
{
    if (u_sess->proc_cxt.MyProcPort->noblock == nonblocking) {
        return;
    }

#ifdef WIN32
    pgwin32_noblock = nonblocking ? 1 : 0;
#else

    /*
     * Use COMMERROR on failure, because ERROR would try to send the error to
     * the client, which might require changing the mode again, leading to
     * infinite recursion.
     */
    if (nonblocking) {
        if (!pg_set_noblock(u_sess->proc_cxt.MyProcPort->sock)) {
            ereport(COMMERROR, (errmsg("could not set socket to non-blocking mode: %m")));
        }
    } else {
        if (!pg_set_block(u_sess->proc_cxt.MyProcPort->sock)) {
            ereport(COMMERROR, (errmsg("could not set socket to blocking mode: %m")));
        }
    }
#endif
    u_sess->proc_cxt.MyProcPort->noblock = nonblocking;
}

/* --------------------------------
 *		pq_recvbuf - load some bytes into the input buffer
 *
 *		returns 0 if OK, EOF if trouble
 * --------------------------------
 */
static int pq_recvbuf(void)
{
    if (t_thrd.libpq_cxt.PqRecvPointer > 0) {
        if (t_thrd.libpq_cxt.PqRecvLength > t_thrd.libpq_cxt.PqRecvPointer) {
            /* still some unread data, left-justify it in the buffer */
            errno_t rc = memmove_s(t_thrd.libpq_cxt.PqRecvBuffer,
                t_thrd.libpq_cxt.PqRecvLength - t_thrd.libpq_cxt.PqRecvPointer,
                t_thrd.libpq_cxt.PqRecvBuffer + t_thrd.libpq_cxt.PqRecvPointer,
                t_thrd.libpq_cxt.PqRecvLength - t_thrd.libpq_cxt.PqRecvPointer);
            securec_check(rc, "\0", "\0");
            t_thrd.libpq_cxt.PqRecvLength -= t_thrd.libpq_cxt.PqRecvPointer;
            t_thrd.libpq_cxt.PqRecvPointer = 0;
        } else {
            t_thrd.libpq_cxt.PqRecvLength = t_thrd.libpq_cxt.PqRecvPointer = 0;
        }
    }

    /* Ensure that we're in blocking mode */
    pq_set_nonblocking(false);

    /* Can fill buffer from PqRecvLength and upwards */
    for (;;) {
        int r;

        WaitState oldStatus = pgstat_report_waitstatus(STATE_WAIT_COMM);
        r = secure_read(u_sess->proc_cxt.MyProcPort,
            t_thrd.libpq_cxt.PqRecvBuffer + t_thrd.libpq_cxt.PqRecvLength,
            PQ_RECV_BUFFER_SIZE - t_thrd.libpq_cxt.PqRecvLength);
        (void)pgstat_report_waitstatus(oldStatus);

        if (r < 0) {
            if (errno == EINTR) {
                continue; /* Ok if interrupted */
            }

            /*
             * Careful: an ereport() that tries to write to the client would
             * cause recursion to here, leading to stack overflow and core
             * dump!  This message must go *only* to the postmaster log.
             */
            ereport(COMMERROR,
                (errcode_for_socket_access(), errmsg("could not receive data from client: %s", gs_comm_strerror())));
            return EOF;
        }
        if (r == 0) {
            /*
             * EOF detected.  We used to write a log message here, but it's
             * better to expect the ultimate caller to do that.
             */
            return EOF;
        }
        /* r contains number of bytes read, so just incr length */
        t_thrd.libpq_cxt.PqRecvLength += r;
        return 0;
    }
}

/* --------------------------------
 *		pq_getbyte	- get a single byte from connection, or return EOF
 * --------------------------------
 */
int pq_getbyte(void)
{
    while (t_thrd.libpq_cxt.PqRecvPointer >= t_thrd.libpq_cxt.PqRecvLength) {
        if (pq_recvbuf()) { /* If nothing in buffer, then recv some */
            return EOF;   /* Failed to recv data */
        }
    }
    return (unsigned char)t_thrd.libpq_cxt.PqRecvBuffer[t_thrd.libpq_cxt.PqRecvPointer++];
}

/* --------------------------------
 *		pq_peekbyte		- peek at next byte from connection
 *
 *	 Same as pq_getbyte() except we don't advance the pointer.
 * --------------------------------
 */
int pq_peekbyte(void)
{
    while (t_thrd.libpq_cxt.PqRecvPointer >= t_thrd.libpq_cxt.PqRecvLength) {
        if (pq_recvbuf()) {/* If nothing in buffer, then recv some */
            return EOF;   /* Failed to recv data */
        }
    }
    return (unsigned char)t_thrd.libpq_cxt.PqRecvBuffer[t_thrd.libpq_cxt.PqRecvPointer];
}

/* --------------------------------
 *		pq_getbyte_if_available - get a single byte from connection,
 *			if available
 *
 * The received byte is stored in *c. Returns 1 if a byte was read,
 * 0 if no data was available, or EOF if trouble.
 * --------------------------------
 */
int pq_getbyte_if_available(unsigned char* c)
{
    int r;

    if (t_thrd.libpq_cxt.PqRecvPointer < t_thrd.libpq_cxt.PqRecvLength) {
        *c = t_thrd.libpq_cxt.PqRecvBuffer[t_thrd.libpq_cxt.PqRecvPointer++];
        return 1;
    }

    /* Put the socket into non-blocking mode */
    pq_set_nonblocking(true);

    r = secure_read(u_sess->proc_cxt.MyProcPort, c, 1);
    if (r < 0) {
        /*
         * Ok if no data available without blocking or interrupted (though
         * EINTR really shouldn't happen with a non-blocking socket). Report
         * other errors.
         */
        if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
            r = 0;
        } else {
            /*
             * Careful: an ereport() that tries to write to the client would
             * cause recursion to here, leading to stack overflow and core
             * dump!  This message must go *only* to the postmaster log.
             */
            ereport(COMMERROR,
                (errcode_for_socket_access(), errmsg("could not receive data from client: %s", gs_comm_strerror())));
            r = EOF;
        }
    } else if (r == 0) {
        /* EOF detected */
        r = EOF;
    }

    return r;
}

/* --------------------------------
 *		pq_getbytes		- get a known number of bytes from connection
 *
 *		returns 0 if OK, EOF if trouble
 * --------------------------------
 */
int pq_getbytes(char* s, size_t len)
{
    size_t amount;

    while (len > 0) {
        while (t_thrd.libpq_cxt.PqRecvPointer >= t_thrd.libpq_cxt.PqRecvLength) {
            if (pq_recvbuf()) { /* If nothing in buffer, then recv some */
                return EOF;   /* Failed to recv data */
            }
        }
        amount = t_thrd.libpq_cxt.PqRecvLength - t_thrd.libpq_cxt.PqRecvPointer;
        if (amount > len) {
            amount = len;
        }
        errno_t rc = memcpy_s(s, amount, t_thrd.libpq_cxt.PqRecvBuffer + t_thrd.libpq_cxt.PqRecvPointer, amount);
        securec_check(rc, "\0", "\0");

        t_thrd.libpq_cxt.PqRecvPointer += amount;
        s += amount;
        len -= amount;
    }

    return 0;
}

/* --------------------------------
 *		pq_discardbytes		- throw away a known number of bytes
 *
 *		same as pq_getbytes except we do not copy the data to anyplace.
 *		this is used for resynchronizing after read errors.
 *
 *		returns 0 if OK, EOF if trouble
 * --------------------------------
 */
static int pq_discardbytes(size_t len)
{
    size_t amount;

    while (len > 0) {
        while (t_thrd.libpq_cxt.PqRecvPointer >= t_thrd.libpq_cxt.PqRecvLength) {
            if (pq_recvbuf()) { /* If nothing in buffer, then recv some */
                return EOF;   /* Failed to recv data */
            }
        }
        amount = t_thrd.libpq_cxt.PqRecvLength - t_thrd.libpq_cxt.PqRecvPointer;
        if (amount > len) {
            amount = len;
        }
        t_thrd.libpq_cxt.PqRecvPointer += amount;
        len -= amount;
    }
    return 0;
}

/* --------------------------------
 *		pq_getstring	- get a null terminated string from connection
 *
 *		The return value is placed in an expansible StringInfo, which has
 *		already been initialized by the caller.
 *
 *		This is used only for dealing with old-protocol clients.  The idea
 *		is to produce a StringInfo that looks the same as we would get from
 *		pq_getmessage() with a newer client; we will then process it with
 *		pq_getmsgstring.  Therefore, no character set conversion is done here,
 *		even though this is presumably useful only for text.
 *
 *		returns 0 if OK, EOF if trouble
 * --------------------------------
 */
int pq_getstring(StringInfo s)
{
    int i;

    resetStringInfo(s);

    /* Read until we get the terminating '\0' */
    for (;;) {
        while (t_thrd.libpq_cxt.PqRecvPointer >= t_thrd.libpq_cxt.PqRecvLength) {
            if (pq_recvbuf()) {/* If nothing in buffer, then recv some */
                return EOF;   /* Failed to recv data */
            }
        }

        for (i = t_thrd.libpq_cxt.PqRecvPointer; i < t_thrd.libpq_cxt.PqRecvLength; i++) {
            if (t_thrd.libpq_cxt.PqRecvBuffer[i] == '\0') {
                /* include the '\0' in the copy */
                appendBinaryStringInfo(s,
                    t_thrd.libpq_cxt.PqRecvBuffer + t_thrd.libpq_cxt.PqRecvPointer,
                    i - t_thrd.libpq_cxt.PqRecvPointer + 1);
                t_thrd.libpq_cxt.PqRecvPointer = i + 1; /* advance past \0 */
                return 0;
            }
        }

        /* If we're here we haven't got the \0 in the buffer yet. */
        appendBinaryStringInfo(s,
            t_thrd.libpq_cxt.PqRecvBuffer + t_thrd.libpq_cxt.PqRecvPointer,
            t_thrd.libpq_cxt.PqRecvLength - t_thrd.libpq_cxt.PqRecvPointer);
        t_thrd.libpq_cxt.PqRecvPointer = t_thrd.libpq_cxt.PqRecvLength;
    }
}

/* --------------------------------
 *		pq_getmessage	- get a message with length word from connection
 *
 *		The return value is placed in an expansible StringInfo, which has
 *		already been initialized by the caller.
 *		Only the message body is placed in the StringInfo; the length word
 *		is removed.  Also, s->cursor is initialized to zero for convenience
 *		in scanning the message contents.
 *
 *		If maxlen is greater than zero, it is an upper limit on the length
 *		of the message we are willing to accept.  We abort the connection
 *		(by returning EOF) if client tries to send more than that.
 *
 *		returns 0 if OK, EOF if trouble
 * --------------------------------
 */
int pq_getmessage(StringInfo s, int maxlen)
{
    int32 len;

    resetStringInfo(s);

    /* Read message length word */
    if (pq_getbytes((char*)&len, 4) == EOF) {
        ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unexpected EOF within message length word")));
        return EOF;
    }

    len = ntohl(len);

    if (len < 4 || (maxlen > 0 && len > maxlen)) {
        ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid message length")));
        return EOF;
    }

    len -= 4; /* discount length itself */

    if (len > 0) {
        /*
         * Allocate space for message.	If we run out of room (ridiculously
         * large message), we will elog(ERROR), but we want to discard the
         * message body so as not to lose communication sync.
         */
        PG_TRY();
        {
            enlargeStringInfo(s, len);
        }
        PG_CATCH();
        {
            if (pq_discardbytes(len) == EOF) {
                ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete message from client")));
            }
            PG_RE_THROW();
        }
        PG_END_TRY();

        /* And grab the message */
        if (pq_getbytes(s->data, len) == EOF) {
            ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete message from client")));
            return EOF;
        }
        s->len = len;
        /* Place a trailing null per StringInfo convention */
        s->data[len] = '\0';
    }

    return 0;
}

/* --------------------------------
 *		pq_putbytes		- send bytes to connection (not flushed until pq_flush)
 *
 *		returns 0 if OK, EOF if trouble
 * --------------------------------
 */
int pq_putbytes(const char* s, size_t len)
{
    int res;

    /* Should only be called by old-style COPY OUT */
    Assert(t_thrd.libpq_cxt.DoingCopyOut);
    /* No-op if reentrant call */
    if (t_thrd.libpq_cxt.PqCommBusy) {
        return 0;
    }
    t_thrd.libpq_cxt.PqCommBusy = true;
    res = internal_putbytes(s, len);
    t_thrd.libpq_cxt.PqCommBusy = false;
    return res;
}

static int internal_putbytes(const char* s, size_t len)
{
    size_t amount;

    while (len > 0) {
        /* If buffer is full, then flush it out */
        if (t_thrd.libpq_cxt.PqSendPointer >= t_thrd.libpq_cxt.PqSendBufferSize) {
            if (pq_disk_is_temp_file_enabled()) {
                /* create temp file to store the result, it is caller's responsibility
                 * to close the file done */
                if (!t_thrd.libpq_cxt.PqTempFileContextInfo->file_handle) {
                    pq_disk_create_tempfile();
                }
                if ((int)pq_disk_write_tempfile(
                        t_thrd.libpq_cxt.PqSendBuffer, ((size_t)t_thrd.libpq_cxt.PqSendPointer)) == EOF) {
                    return EOF;
                }
                t_thrd.libpq_cxt.PqSendPointer = 0;
            } else {
                StmtRetrySetFileExceededFlag(); /* once flush data to frontend, can not retry this query anymore */
                pq_set_nonblocking(false);
                if (internal_flush()) {
                    return EOF;
                }
            }
        }
        amount = t_thrd.libpq_cxt.PqSendBufferSize - t_thrd.libpq_cxt.PqSendPointer;
        if (amount > len) {
            amount = len;
        }
        errno_t rc = memcpy_s(t_thrd.libpq_cxt.PqSendBuffer + t_thrd.libpq_cxt.PqSendPointer, amount, s, amount);
        securec_check(rc, "\0", "\0");
        t_thrd.libpq_cxt.PqSendPointer += amount;
        s += amount;
        len -= amount;
    }

    return 0;
}

/* --------------------------------
 *		pq_flush		- flush pending output
 *
 *		returns 0 if OK, EOF if trouble
 * --------------------------------
 */
static int socket_flush(void)
{
    int res = 0;

    /* No-op if reentrant call */
    if (t_thrd.libpq_cxt.PqCommBusy) {
        return res;
    }
    t_thrd.libpq_cxt.PqCommBusy = true;
    pq_set_nonblocking(false);

    if (t_thrd.libpq_cxt.save_query_result_to_disk &&
        (t_thrd.libpq_cxt.PqTempFileContextInfo->file_state == TEMPFILE_FLUSHED)) {
        if (!u_sess->wlm_cxt->spill_limit_error) {
            MemoryContext oldMemory;
            oldMemory = MemoryContextSwitchTo(u_sess->top_mem_cxt);

            /*
             * read query result from temp file, then flush to client.
             * extract remaining data in send buffer to disk,
             * in order to send total query result together through temp file
             */
            pq_disk_extract_sendbuffer();
            res = pq_disk_send_to_frontend();

            (void)MemoryContextSwitchTo(oldMemory);
        } else {
            pq_disk_discard_temp_file();
        }
    } else {
        res = internal_flush();
    }
    t_thrd.libpq_cxt.PqCommBusy = false;
    return res;
}

/* --------------------------------
 *		internal_flush - flush pending output
 *
 * Returns 0 if OK (meaning everything was sent, or operation would block
 * and the socket is in non-blocking mode), or EOF if trouble.
 * --------------------------------
 */
static int internal_flush(void)
{
    static THR_LOCAL int last_reported_send_errno = 0;

    char* bufptr = t_thrd.libpq_cxt.PqSendBuffer + t_thrd.libpq_cxt.PqSendStart;
    char* bufend = t_thrd.libpq_cxt.PqSendBuffer + t_thrd.libpq_cxt.PqSendPointer;
    WaitState oldStatus = pgstat_report_waitstatus(STATE_WAIT_UNDEFINED, true);

    if (StreamThreadAmI() == false) {
        oldStatus = pgstat_report_waitstatus(STATE_WAIT_FLUSH_DATA);
    } else {
        /* Add node name to mark where to flush data for SCTP */
        oldStatus = pgstat_report_waitstatus_comm(STATE_WAIT_FLUSH_DATA,
            u_sess->proc_cxt.MyProcPort->libcomm_addrinfo->nodeIdx,
            -1,
            u_sess->stream_cxt.producer_obj->getParentPlanNodeId(),
            global_node_definition ? global_node_definition->num_nodes : -1);
    }

    while (bufptr < bufend) {
        int r;

        r = secure_write(u_sess->proc_cxt.MyProcPort, bufptr, bufend - bufptr);
        if (unlikely(r == 0 && (StreamThreadAmI() == true || u_sess->proc_cxt.MyProcPort->is_logic_conn))) {
            /* Stop query when cancel happend */
            if (t_thrd.int_cxt.QueryCancelPending) {
                if (t_thrd.storage_cxt.cancel_from_timeout) {
                    ereport(LOG,
                        (errcode(ERRCODE_QUERY_CANCELED),
                            errmsg("canceling statement due to statement timeout"),
                            ignore_interrupt(true)));
                } else {
                    ereport(LOG,
                        (errcode(ERRCODE_QUERY_CANCELED),
                            errmsg("canceling statement due to %s request", IS_PGXC_DATANODE ? "coordinator" : "user"),
                            ignore_interrupt(true)));
                }

                (void)pgstat_report_waitstatus(oldStatus);
                return EOF;
            } else {
                continue;
            }
        }

        if (unlikely(r <= 0)) {
            if (errno == EINTR) {
                continue; /* Ok if we were interrupted */
            }

            /*
             * Ok if no data writable without blocking, and the socket is in
             * non-blocking mode.
             */
            if (errno == EAGAIN || errno == EWOULDBLOCK) {
                (void)pgstat_report_waitstatus(oldStatus);
                return 0;
            }

            /*
             * Careful: an ereport() that tries to write to the client would
             * cause recursion to here, leading to stack overflow and core
             * dump!  This message must go *only* to the postmaster log.
             *
             * If a client disconnects while we're in the midst of output, we
             * might write quite a bit of data before we get to a safe query
             * abort point.  So, suppress duplicate log messages.
             */
            // if it is stream thread, suppress the error message.
            if (errno != last_reported_send_errno && StreamThreadAmI() == false) {
                last_reported_send_errno = errno;
                ereport(COMMERROR,
                    (errcode_for_socket_access(),
                        errmsg("could not send data to client [ Remote IP: %s PORT: %s]. Detail: %m",
                            u_sess->proc_cxt.MyProcPort->remote_host,
                            (u_sess->proc_cxt.MyProcPort->remote_port != NULL &&
                                u_sess->proc_cxt.MyProcPort->remote_port[0] != '\0')
                                ? u_sess->proc_cxt.MyProcPort->remote_port
                                : "")));
            }

            /*
             * We drop the buffered data anyway so that processing can
             * continue, even though we'll probably quit soon. We also set a
             * flag that'll cause the next CHECK_FOR_INTERRUPTS to terminate
             * the connection.
             */
            t_thrd.libpq_cxt.PqSendStart = t_thrd.libpq_cxt.PqSendPointer = 0;
            if ((StreamThreadAmI() == false) && (!t_thrd.proc_cxt.proc_exit_inprogress)) {
                t_thrd.int_cxt.ClientConnectionLost = 1;
                InterruptPending = 1;
            } else if (StreamThreadAmI()) {
                t_thrd.int_cxt.StreamConnectionLost = 1;
            }
            (void)pgstat_report_waitstatus(oldStatus);
            return EOF;
        }

        last_reported_send_errno = 0; /* reset after any successful send */
        bufptr += r;
        t_thrd.libpq_cxt.PqSendStart += r;
    }

    t_thrd.libpq_cxt.PqSendStart = t_thrd.libpq_cxt.PqSendPointer = 0;
    (void)pgstat_report_waitstatus(oldStatus);
    return 0;
}

/* --------------------------------
 *		pq_flush_if_writable - flush pending output if writable without blocking
 *
 * Returns 0 if OK, or EOF if trouble.
 * --------------------------------
 */
static int socket_flush_if_writable(void)
{
    int res;

    /* Quick exit if nothing to do */
    if (t_thrd.libpq_cxt.PqSendPointer == t_thrd.libpq_cxt.PqSendStart) {
        return 0;
    }

    /* No-op if reentrant call */
    if (t_thrd.libpq_cxt.PqCommBusy) {
        return 0;
    }

    /* Temporarily put the socket into non-blocking mode */
    pq_set_nonblocking(true);

    t_thrd.libpq_cxt.PqCommBusy = true;
    res = internal_flush();
    t_thrd.libpq_cxt.PqCommBusy = false;
    return res;
}

/* --------------------------------
 *		pq_flush_timedwait - Check if some data is pending to be flushed.
 *		If yes then call the existing non-block flush function to flush.
 *		If all datas are flushed (means PqSendStart is 0), then return
 *		Otherwise check even if at least few bytes of datas are flushed
 *		(by checking the before and after PqSendStart), if yes then
 *		update the last flush time otherwise check if any data was able
 *		to flush during maximum configured timeout.
 * --------------------------------
 */
void pq_flush_timedwait(int timeout)
{
    int sleeptime = 0;
    int send_start_before_flush = 0;
    TimestampTz start_time = 0;
    start_time = GetCurrentTimestamp();

    for (;;) {
        /* Check if still some data is pending to be sent */
        if (!pq_is_send_pending()) {
            break;
        }

        send_start_before_flush = t_thrd.libpq_cxt.PqSendStart;
        if (pq_flush_if_writable()) {
            ereport(COMMERROR, (errmsg("could not send data due to connection reset, terminating process")));
            proc_exit(0);
        }

        if (t_thrd.libpq_cxt.PqSendStart == 0) {
            /*
             * Means either nothing was flushed or
             * all datas are flushed. So loop back and see if
             * if any data to be send pending, if it zero because
             * everything was flushed then no data will be pending
             * to send otherwise it will be pending, so try to send
             * again. So here there is no need to reset the flush time
             */
            if (!pq_is_send_pending()) {
                break;
            }
        } else if (send_start_before_flush != t_thrd.libpq_cxt.PqSendStart) {
            /* Some more data have been flushed */
            sleeptime = 0;
            pg_usleep(NAPTIME_PER_SEND * 1000);
            continue;
        }

        if (timeout > 0 && sleeptime >= timeout) {
            long secs;
            int usecs;
            TimestampTz stop_time = GetCurrentTimestamp();

            TimestampDifference(start_time, stop_time, &secs, &usecs);
            sleeptime = secs * 1000 + usecs / 1000 + 1;

            /*
             * By checking the delayed time again, it ensures we won't delay
             * less than the specified time if pg_usleep is interrupted by other
             * signals such as SIGHUP.
             */
            if (stop_time < start_time || sleeptime >= timeout) {
                ereport(COMMERROR, (errmsg("could not send data during maximum timeout, terminating process")));
                proc_exit(0);
            }
        }

        pg_usleep(NAPTIME_PER_SEND_RETRY * 1000);
        sleeptime += NAPTIME_PER_SEND_RETRY;
    }
}

/* --------------------------------
 *		pq_is_send_pending	- is there any pending data in the output buffer?
 * --------------------------------
 */
static bool socket_is_send_pending(void)
{
    return (t_thrd.libpq_cxt.PqSendStart < t_thrd.libpq_cxt.PqSendPointer);
}

/* --------------------------------
 * Message-level I/O routines begin here.
 *
 * These routines understand about the old-style COPY OUT protocol.
 * --------------------------------
 */

/* --------------------------------
 *		pq_putmessage	- send a normal message (suppressed in COPY OUT mode)
 *
 *		If msgtype is not '\0', it is a message type code to place before
 *		the message body.  If msgtype is '\0', then the message has no type
 *		code (this is only valid in pre-3.0 protocols).
 *
 *		len is the length of the message body data at *s.  In protocol 3.0
 *		and later, a message length word (equal to len+4 because it counts
 *		itself too) is inserted by this routine.
 *
 *		All normal messages are suppressed while old-style COPY OUT is in
 *		progress.  (In practice only a few notice messages might get emitted
 *		then; dropping them is annoying, but at least they will still appear
 *		in the postmaster log.)
 *
 *		We also suppress messages generated while pqcomm.c is busy.  This
 *		avoids any possibility of messages being inserted within other
 *		messages.  The only known trouble case arises if SIGQUIT occurs
 *		during a pqcomm.c routine --- quickdie() will try to send a warning
 *		message, and the most reasonable approach seems to be to drop it.
 *
 *		returns 0 if OK, EOF if trouble
 * --------------------------------
 */
static int socket_putmessage(char msgtype, const char* s, size_t len)
{
    if (t_thrd.libpq_cxt.DoingCopyOut || t_thrd.libpq_cxt.PqCommBusy) {
        return 0;
    }
    t_thrd.libpq_cxt.PqCommBusy = true;
    if (msgtype) {
        if (internal_putbytes(&msgtype, 1)) {
            goto fail;
        }
    }
    if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3) {
        uint32 n32;

        n32 = htonl((uint32)(len + 4));
        if (internal_putbytes((char*)&n32, 4)) {
            goto fail;
        }
    }
    if (internal_putbytes(s, len)) {
        goto fail;
    }
    t_thrd.libpq_cxt.PqCommBusy = false;
    return 0;

fail:
    t_thrd.libpq_cxt.PqCommBusy = false;
    return EOF;
}

/* --------------------------------
 *		pq_putmessage_noblock	- like pq_putmessage, but never blocks
 *
 *		If the output buffer is too small to hold the message, the buffer
 *		is enlarged.
 */
static int socket_putmessage_noblock(char msgtype, const char* s, size_t len)
{
    int res;
    int required;
    const int datalen = 1 + 4 + len;
    /*
     * Ensure we have enough space in the output buffer for the message header
     * as well as the message itself.
     */
    Assert((unsigned int)t_thrd.libpq_cxt.PqSendPointer <= MaxBuildAllocSize);
    if (MaxBuildAllocSize - (unsigned int)t_thrd.libpq_cxt.PqSendPointer >= (unsigned int)datalen) {
        required = t_thrd.libpq_cxt.PqSendPointer + datalen;
        if (required > t_thrd.libpq_cxt.PqSendBufferSize) {
            t_thrd.libpq_cxt.PqSendBuffer = (char*)repalloc(t_thrd.libpq_cxt.PqSendBuffer, required);
            t_thrd.libpq_cxt.PqSendBufferSize = required;
        }
    }
    res = pq_putmessage(msgtype, s, len);
    return res;
}

/* --------------------------------
 *		pq_startcopyout - inform libpq that an old-style COPY OUT transfer
 *			is beginning
 * --------------------------------
 */
static void socket_startcopyout(void)
{
    t_thrd.libpq_cxt.DoingCopyOut = true;
}

/* --------------------------------
 *		pq_endcopyout	- end an old-style COPY OUT transfer
 *
 *		If errorAbort is indicated, we are aborting a COPY OUT due to an error,
 *		and must send a terminator line.  Since a partial data line might have
 *		been emitted, send a couple of newlines first (the first one could
 *		get absorbed by a backslash...)  Note that old-style COPY OUT does
 *		not allow binary transfers, so a textual terminator is always correct.
 * --------------------------------
 */
static void socket_endcopyout(bool errorAbort)
{
    if (!t_thrd.libpq_cxt.DoingCopyOut) {
        return;
    }
    if (errorAbort) {
        pq_putbytes("\n\n\\.\n", 5);
    }
    /* in non-error case, copy.c will have emitted the terminator line */
    t_thrd.libpq_cxt.DoingCopyOut = false;
}

/* --------------------------------
 *		pq_select	- Wait until we can read data, or timeout.
 *		Returns true if data has become available for reading, false if timed out
 *		or interrupted by signal.
 *		This is based on libpq_select of libpq_walreceiver.cpp.
 * --------------------------------
 */
bool pq_select(int timeout_ms)
{
    int ret;

    /* We use poll(2) if available, otherwise select(2) */
    {
#ifdef HAVE_POLL
        struct pollfd input_fd;

        input_fd.fd = u_sess->proc_cxt.MyProcPort->sock;
        input_fd.events = POLLIN | POLLERR;
        input_fd.revents = 0;

        ret = poll(&input_fd, 1, timeout_ms);
#else  /* !HAVE_POLL */

        fd_set input_mask;
        struct timeval timeout;
        struct timeval* ptr_timeout = NULL;

        FD_ZERO(&input_mask);
        FD_SET(u_sess->proc_cxt.MyProcPort->sock, &input_mask);

        if (timeout_ms < 0) {
            ptr_timeout = NULL;
        } else {
            timeout.tv_sec = timeout_ms / 1000;
            timeout.tv_usec = (timeout_ms % 1000) * 1000;
            ptr_timeout = &timeout;
        }

        ret = select(u_sess->proc_cxt.MyProcPort->sock + 1, &input_mask, NULL, NULL, ptr_timeout);
#endif /* HAVE_POLL */
    }

    if (ret == 0 || (ret < 0 && errno == EINTR)) {
        return false;
    }
    if (ret < 0) {
        ereport(ERROR, (errcode_for_socket_access(), errmsg("select() failed: %m")));
    }
    return true;
}

/*
 * Support for TCP Keepalive parameters
 */

/*
 * On Windows, we need to set both idle and interval at the same time.
 * We also cannot reset them to the default (setting to zero will
 * actually set them to zero, not default), therefor we fallback to
 * the out-of-the-box default instead.
 */
#if defined(WIN32) && defined(SIO_KEEPALIVE_VALS)
static int pq_setkeepaliveswin32(Port* port, int idle, int interval)
{
    struct tcp_keepalive ka;
    DWORD retsize;

    if (idle <= 0)
        idle = 2 * 60 * 60; /* default = 2 hours */
    if (interval <= 0)
        interval = 1; /* default = 1 second */

    ka.onoff = 1;
    ka.keepalivetime = idle * 1000;
    ka.keepaliveinterval = interval * 1000;

    if (WSAIoctl(port->sock, SIO_KEEPALIVE_VALS, (LPVOID)&ka, sizeof(ka), NULL, 0, &retsize, NULL, NULL) != 0) {
        ereport(LOG, (errmsg("WSAIoctl(SIO_KEEPALIVE_VALS) failed: %ui", WSAGetLastError())));
        return STATUS_ERROR;
    }
    if (port->keepalives_idle != idle)
        port->keepalives_idle = idle;
    if (port->keepalives_interval != interval)
        port->keepalives_interval = interval;
    return STATUS_OK;
}
#endif

int pq_getkeepalivesidle(Port* port)
{
#if defined(TCP_KEEPIDLE) || defined(TCP_KEEPALIVE) || defined(WIN32)
    if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family)) {
        return 0;
    }

    if (port->keepalives_idle != 0) {
        return port->keepalives_idle;
    }

    if ((port->default_keepalives_idle == 0) && (port->sock != NO_SOCKET)) {
#ifndef WIN32
        ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_idle);

#ifdef TCP_KEEPIDLE
        if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPIDLE, (char*)&port->default_keepalives_idle, &size) < 0) {
            ereport(LOG, (errmsg("getsockopt(TCP_KEEPIDLE) failed: %m")));
            port->default_keepalives_idle = -1; /* don't know */
        }
#else
        if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPALIVE, (char*)&port->default_keepalives_idle, &size) < 0) {
            ereport(LOG, (errmsg("getsockopt(TCP_KEEPALIVE) failed: %m")));
            port->default_keepalives_idle = -1; /* don't know */
        }
#endif /* TCP_KEEPIDLE */
#else  /* WIN32 */
        /* We can't get the defaults on Windows, so return "don't know" */
        port->default_keepalives_idle = -1;
#endif /* WIN32 */
    }

    return port->default_keepalives_idle;
#else
    return 0;
#endif
}

int pq_setkeepalivesidle(int idle, Port* port)
{
    if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family)) {
        return STATUS_OK;
    }

#if defined(TCP_KEEPIDLE) || defined(TCP_KEEPALIVE) || defined(SIO_KEEPALIVE_VALS)
    if (idle == port->keepalives_idle) {
        return STATUS_OK;
    }

#ifndef WIN32
    if (port->default_keepalives_idle <= 0) {
        if (pq_getkeepalivesidle(port) < 0) {
            if (idle == 0) {
                return STATUS_OK; /* default is set but unknown */
            } else {
                return STATUS_ERROR;
            }
        }
    }

    if (idle == 0) {
        idle = port->default_keepalives_idle;
    }

    if (port->sock != NO_SOCKET) {
#ifdef TCP_KEEPIDLE
        if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPIDLE, (char*)&idle, sizeof(idle)) < 0) {
            ereport(LOG, (errmsg("setsockopt(TCP_KEEPIDLE) failed: %m")));
            return STATUS_ERROR;
        }
#else
        if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPALIVE, (char*)&idle, sizeof(idle)) < 0) {
            ereport(LOG, (errmsg("setsockopt(TCP_KEEPALIVE) failed: %m")));
            return STATUS_ERROR;
        }
#endif
        port->keepalives_idle = idle;
    }

#else /* WIN32 */
    return pq_setkeepaliveswin32(port, idle, port->keepalives_interval);
#endif
#else /* TCP_KEEPIDLE || SIO_KEEPALIVE_VALS */
    if (idle != 0) {
        ereport(LOG, (errmsg("setting the keepalive idle time is not supported")));
        return STATUS_ERROR;
    }
#endif
    return STATUS_OK;
}

int pq_getkeepalivesinterval(Port* port)
{
#if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
    if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family)) {
        return 0;
    }

    if (port->keepalives_interval != 0) {
        return port->keepalives_interval;
    }

    if ((port->default_keepalives_interval == 0) && (port->sock != NO_SOCKET)) {
#ifndef WIN32
        ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_interval);

        if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL, (char*)&port->default_keepalives_interval, &size) < 0) {
            ereport(LOG, (errmsg("getsockopt(TCP_KEEPINTVL) failed: %m")));
            port->default_keepalives_interval = -1; /* don't know */
        }
#else
        /* We can't get the defaults on Windows, so return "don't know" */
        port->default_keepalives_interval = -1;
#endif /* WIN32 */
    }

    return port->default_keepalives_interval;
#else
    return 0;
#endif
}

int pq_setkeepalivesinterval(int interval, Port* port)
{
    if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family)) {
        return STATUS_OK;
    }

#if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
    if (interval == port->keepalives_interval) {
        return STATUS_OK;
    }

#ifndef WIN32
    if (port->default_keepalives_interval <= 0) {
        if (pq_getkeepalivesinterval(port) < 0) {
            if (interval == 0) {
                return STATUS_OK; /* default is set but unknown */
            } else {
                return STATUS_ERROR;
            }
        }
    }

    if (interval == 0) {
        interval = port->default_keepalives_interval;
    }

    if (port->sock != NO_SOCKET) {
        if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPINTVL, (char*)&interval, sizeof(interval)) < 0) {
            ereport(LOG, (errmsg("setsockopt(TCP_KEEPINTVL) failed: %m")));
            return STATUS_ERROR;
        }

        port->keepalives_interval = interval;
    }

#else /* WIN32 */
    return pq_setkeepaliveswin32(port, port->keepalives_idle, interval);
#endif
#else
    if (interval != 0) {
        ereport(LOG, (errmsg("setsockopt(TCP_KEEPINTVL) not supported")));
        return STATUS_ERROR;
    }
#endif

    return STATUS_OK;
}

int pq_getkeepalivescount(Port* port)
{
#ifdef TCP_KEEPCNT
    if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family)) {
        return 0;
    }

    if (port->keepalives_count != 0) {
        return port->keepalives_count;
    }

    if ((port->default_keepalives_count == 0) && (port->sock != NO_SOCKET)) {
        ACCEPT_TYPE_ARG3 size = sizeof(port->default_keepalives_count);

        if (getsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT, (char*)&port->default_keepalives_count, &size) < 0) {
            ereport(LOG, (errmsg("getsockopt(TCP_KEEPCNT) failed: %m")));
            port->default_keepalives_count = -1; /* don't know */
        }
    }

    return port->default_keepalives_count;
#else
    return 0;
#endif
}

int pq_setkeepalivescount(int count, Port* port)
{
    if (port == NULL || IS_AF_UNIX(port->laddr.addr.ss_family)) {
        return STATUS_OK;
    }

#ifdef TCP_KEEPCNT
    if (count == port->keepalives_count) {
        return STATUS_OK;
    }

    if (port->default_keepalives_count <= 0) {
        if (pq_getkeepalivescount(port) < 0) {
            if (count == 0) {
                return STATUS_OK; /* default is set but unknown */
            } else {
                return STATUS_ERROR;
            }
        }
    }

    if (count == 0) {
        count = port->default_keepalives_count;
    }

    if (port->sock != NO_SOCKET) {
        if (setsockopt(port->sock, IPPROTO_TCP, TCP_KEEPCNT, (char*)&count, sizeof(count)) < 0) {
            ereport(LOG, (errmsg("setsockopt(TCP_KEEPCNT) failed: %m")));
            return STATUS_ERROR;
        }

        port->keepalives_count = count;
    }

#else
    if (count != 0) {
        ereport(LOG, (errmsg("setsockopt(TCP_KEEPCNT) not supported")));
        return STATUS_ERROR;
    }
#endif

    return STATUS_OK;
}

/*
 * @Description: reset send buffer cursors
 */
void pq_abandon_sendbuffer(void)
{
    t_thrd.libpq_cxt.PqSendPointer = 0;
    t_thrd.libpq_cxt.PqSendStart = 0;
}

/*
 * @Description: reset recv buffer cursors
 */
void pq_abandon_recvbuffer(void)
{
    t_thrd.libpq_cxt.PqRecvPointer = 0;
    t_thrd.libpq_cxt.PqRecvLength = 0;
}

/*
 * @Description: resize PqRecvBuffer
 */
void pq_resize_recvbuffer(int size)
{
#ifdef USE_RETRY_STUB
    elog(LOG,
        "%s %s  resize pqrecvbuffer from %d to %d",
        STUB_PRINT_PREFIX,
        STUB_PRINT_PREFIX_TYPE_S,
        t_thrd.libpq_cxt.PqRecvBufferSize,
        size);
#endif

    char* enlarged_buffer = (char*)MemoryContextAlloc(t_thrd.top_mem_cxt, size);
    if (t_thrd.libpq_cxt.PqRecvBuffer != NULL) {
        /* since MemoryContextAlloc may fail, so alloc new memory first, then free old memory */
        pfree(t_thrd.libpq_cxt.PqRecvBuffer);
        t_thrd.libpq_cxt.PqRecvBuffer = NULL;
    }
    t_thrd.libpq_cxt.PqRecvBuffer = enlarged_buffer;
    t_thrd.libpq_cxt.PqRecvBufferSize = size;
    t_thrd.libpq_cxt.PqRecvPointer = 0;
    t_thrd.libpq_cxt.PqRecvLength = 0;
}

/*
 * @Description: revert PqRecvBuffer to given data
 */
void pq_revert_recvbuffer(const char* data, int len)
{
    if (unlikely(data == NULL || len < 0 || t_thrd.libpq_cxt.PqRecvBufferSize < len)) {
        ereport(ERROR,
            (errmodule(MOD_CN_RETRY),
                errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                errmsg("alert, failed in revert command buffer, invalid params data len %d pq buffer size %d",
                    len,
                    t_thrd.libpq_cxt.PqRecvBufferSize)));
    }

    errno_t rc = memcpy_s(t_thrd.libpq_cxt.PqRecvBuffer, t_thrd.libpq_cxt.PqRecvBufferSize, data, len);
    securec_check(rc, "", "");

    t_thrd.libpq_cxt.PqRecvPointer = 0;
    t_thrd.libpq_cxt.PqRecvLength = len;
}

/*
 * @Description: generate crc checking header
 * @in - src data and length and sequence number
 * @return - int seqnum + int datalength + pg_crc32 crc as char *.
 */
static void pq_disk_generate_checking_header(
    const char* src_data, StringInfo dest_data, uint32 data_len, uint32 seq_num)
{
    Assert(src_data != NULL);

    pq_sendint(dest_data, seq_num, 4);
    pq_sendint(dest_data, data_len, 4);

    /* Add CRC check. */
    pg_crc32 val_crc;
    INIT_CRC32(val_crc);

#ifdef USE_ASSERT_CHECKING
    COMP_CRC32(val_crc, src_data, data_len);
#endif

    FIN_CRC32(val_crc);
    pq_sendint(dest_data, val_crc, 4);
    appendBinaryStringInfo(dest_data, src_data, data_len);

    return;
}

/*
 * @Description: read data file and do crc checking
 * @in - src data and length
 * @return - pqSendBuf read size.
 */
static size_t pq_disk_read_data_block(
    LZ4File* file_handle, char* src_data, char* dest_data, uint32 data_len, uint32 seq_num)
{
    Assert(file_handle != NULL && src_data != NULL);

    errno_t rc = EOK;
    uint32 actual_crc_val;
    uint32 actual_seq_num = 0;
    uint32 actual_msg_len = 0;

    size_t read_len = LZ4FileRead(file_handle, src_data, data_len + CRC_HEADER);

    if (read_len < CRC_HEADER) {
        return 0;
    }

    read_len -= CRC_HEADER;

    rc = memcpy_s(&actual_seq_num, 4, src_data, 4);
    securec_check(rc, "\0", "\0");
    actual_seq_num = ntohl(actual_seq_num);
    src_data += 4;

    Assert(actual_seq_num == seq_num);

    if (actual_seq_num != seq_num) {
        src_data -= 4;
        pfree_ext(src_data);
        ereport(FATAL,
            (errmodule(MOD_CN_RETRY),
                errcode(ERRCODE_DATA_EXCEPTION),
                errmsg("expected message sequnce is %u, actual message sequence is %u", seq_num, actual_seq_num)));
    }

    rc = memcpy_s(&actual_msg_len, 4, src_data, 4);
    securec_check(rc, "\0", "\0");
    actual_msg_len = ntohl(actual_msg_len);
    src_data += 4;

    Assert(actual_msg_len == read_len);

    if (actual_msg_len != read_len) {
        src_data -= 8;
        pfree_ext(src_data);
        ereport(FATAL,
            (errmodule(MOD_CN_RETRY),
                errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
                errmsg("expected message length is %u, actual message length is %u", actual_msg_len, data_len)));
    }

    /* CRC check. */
    rc = memcpy_s(&actual_crc_val, 4, src_data, 4);
    securec_check(rc, "\0", "\0");
    actual_crc_val = ntohl(actual_crc_val);
    src_data += 4;

#ifdef USE_ASSERT_CHECKING
    pg_crc32 valcrc;
    INIT_CRC32(valcrc);
    COMP_CRC32(valcrc, src_data, actual_msg_len);
    FIN_CRC32(valcrc);

    if (!EQ_CRC32(valcrc, actual_crc_val)) {
        src_data -= 12;
        pfree_ext(src_data);
        ereport(FATAL,
            (errmodule(MOD_CN_RETRY),
                errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                errmsg("expected crc is %u, actual crc is %u", actual_crc_val, valcrc)));
    }
#endif

    errno_t err_rc = memcpy_s(dest_data, read_len, src_data, read_len);
    securec_check(err_rc, "\0", "\0");
    src_data -= CRC_HEADER;

    return read_len;
}