/* -------------------------------------------------------------------------
 * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *

 *
 * bcm.cpp
 *	  bcm map for tracking modify of heap blocks
 *
 *
 * IDENTIFICATION
 * 		src/gausskernel/storage/replication/bcm.cpp
 *
 * INTERFACE ROUTINES
 *		createBCMFile		 - create BCM file with a init file header
 *		BCM_pin	     		 - pin a map page for setting a bit
 *		BCMSetStatusBit	     - set Status in a previously pinned page
 *		BCMTestStatusBit     - test if a bit is set
 *		BCMCountStatusBits   - fast count number of bits set in BCM map
 *		BCMTruncateFile 	 - truncate the BCM map
 *		BCMClearRel		 	 - clear all the BCM bis of a rel
 *		getBcmFileList		 -
 *
 * NOTES
 *
 * The bcm map is a bitmap with two bits(one for sync and another for backup)
 * per heap block. A set bit means that block is modified and has not sync to
 * the standby,if a bit is not set, it means the block has sync to standby and
 * no need to be sync.
 *
 * Clearing a bcm map bit is not separately WAL-logged.
 *
 * When a bit is set, the LSN of the bcm map page is updated to make
 * sure that the bcm map update doesn't get written to disk before the
 * WAL record of the changes that made it possible to set the bit is flushed.
 * But when a bit is cleared, we don't have to do that because it's always
 * safe to clear a bit in the map from correctness point of view.
 *
 * -------------------------------------------------------------------------
 */
#include "postgres.h"
#include "knl/knl_variable.h"

#include "access/heapam.h"
#include "access/transam.h"
#include "access/xlogutils.h"
#include "access/visibilitymap.h"
#include "catalog/catalog.h"
#include "catalog/pg_database.h"
#include "catalog/pg_tablespace.h"
#include "miscadmin.h"
#include "storage/buf/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/smgr/smgr.h"
#include "storage/smgr/fd.h"
#include "storage/cu.h"
#include "utils/inval.h"
#include "postmaster/alarmchecker.h"

#include "replication/bcm.h"
#include "replication/basebackup.h"
#include "replication/catchup.h"
#include "replication/dataprotocol.h"
#include "replication/dataqueue.h"
#include "replication/datasender.h"
#include "replication/datasender_private.h"
#include "replication/walsender.h"

#include "utils/aiomem.h"
#include "utils/memutils.h"
#include "storage/custorage.h"
#include "storage/ipc.h"
#include "commands/tablespace.h"

/*
 * Table for fast counting of set bits, by now only for sync
 * FUTURE CASE:: for backup(the second bit)
 *
 * Define bcm postfix
 */
#define BCM "_bcm"

/* prototypes for internal routines */
static Buffer BCM_readbuf(Relation rel, BlockNumber blkno, bool extend, int col = 0);
static void BCM_extend(Relation rel, BlockNumber nvmblocks, int col = 0);
static void searchBCMFiles(const char *tableSpacePath, const char *relativepath, bool undertablespace, bool clear,
                           int iterations);
static void GetIncrementalBcmFilePathForDefault(const RelFileNodeKey &data, char *path, int length);
static void GetIncrementalBcmFilePathForCustome(const RelFileNodeKey &data, char *path, int length);
static void HandleBCMfile(char *bcmpath, bool clear);
static void BCMClearFile(const RelFileNode &relfilenode, int col = 0);
static void BCMSendData(const RelFileNode &relfilenode, const char *bcmpath, int col = 0);
static void bcm_read_multi_cu(CUFile *cFile, Relation rel, int col, BlockNumber heapBlock, int &contibits,
                              BlockNumber maxHeapBlock);
static void BCMSetMetaBit(Relation rel, BlockNumber block, BCMBitStatus status, int col = 0);
static void BCMClearMetaBit(Relation rel, int col = 0);
static void BCMResetMetaBit(Relation rel, BlockNumber metablk, int col = 0);
static void BCMWalkMetaBuffer(Relation rel, CUFile *cFile, Buffer metabuffer, BlockNumber &heapBlock, int &contibits,
                              BlockNumber maxHeapBlock, int col = 0);
static void BCMSendOneBuffer(Relation rel, CUFile *cFile, Buffer bcmbuffer, BlockNumber &heapBlock, int &contibits,
                             BlockNumber maxHeapBlock, int col = 0);
static BlockNumber BCMGetDataFileMaxSize(Relation rel, int col);
static bool CheckFilePostfix(const char *str1, const char *str2);

// check tablespace size limitation when extending BCM file.
static inline void VerifyTblspcWhenBcmExtend(Relation rel, int col, int nblocks)
{
    Assert(nblocks > 0);
    STORAGE_SPACE_OPERATION(rel, (uint64)BLCKSZ * nblocks);

    // Might have to re-open if a cache flush happened
    if (col > 0) {
        CStoreRelationOpenSmgr(rel, col);
    } else {
        RelationOpenSmgr(rel);
    }
}

/* Create a bcm file with an inited bcm file header */
void createBCMFile(Relation rel, int col)
{
    Page bcmHeader;

    ADIO_RUN()
    {
        bcmHeader = (Page)adio_align_alloc(BLCKSZ);
    }
    ADIO_ELSE()
    {
        bcmHeader = (Page)palloc(BLCKSZ);
    }
    ADIO_END();

    PageInit(bcmHeader, BLCKSZ, 0);
    BCMHeader *hd = NULL;
    ForkNumber forknum = BCM_FORKNUM;

    hd = (BCMHeader *)PageGetContents(bcmHeader);

    /* FUTURE CASE:: for COLUMN_STORE, only support ROW_STORE by now. */
    hd->type = col > 0 ? COLUMN_STORE : ROW_STORE;
    hd->node.dbNode = rel->rd_node.dbNode;
    hd->node.relNode = rel->rd_node.relNode;
    hd->node.spcNode = rel->rd_node.spcNode;
    hd->node.bucketNode = rel->rd_node.bucketNode;
    hd->blockSize = col > 0 ? CUAlignUtils::GetCuAlignSizeColumnId(col) : BLCKSZ; /* defaut size for ROW_STORE */

    if (col > 0)
        forknum = ColumnId2ColForkNum(col);

    smgrcreate(rel->rd_smgr, forknum, false);

    VerifyTblspcWhenBcmExtend(rel, col, 1);

    PageSetChecksumInplace(bcmHeader, 0);

    /* Now extend the file */
    smgrextend(rel->rd_smgr, forknum, 0, (char *)bcmHeader, false);

    ADIO_RUN()
    {
        adio_align_free(bcmHeader);
    }
    ADIO_ELSE()
    {
        pfree(bcmHeader);
        bcmHeader = NULL;
    }
    ADIO_END();
}

void BCMLogCU(Relation rel, uint64 offset, int col, BCMBitStatus status, int count)
{
    bool needwal = false;

    needwal = (RelationNeedsWAL(rel) && !t_thrd.xlog_cxt.InRecovery);

    if (needwal) {
        Buffer bcmbuffer = InvalidBuffer;
        Page page;

        BCM_CStore_pin(rel, col, offset, &bcmbuffer);
        LockBuffer(bcmbuffer, BUFFER_LOCK_EXCLUSIVE);
        page = BufferGetPage(bcmbuffer);

        START_CRIT_SECTION();
        {
            uint64 cuBlock = 0;
            XLogRecPtr recptr = InvalidXLogRecPtr;
            uint64 align_size = (uint64)(uint32)CUAlignUtils::GetCuAlignSizeColumnId(col);
            cuBlock = cstore_offset_to_cstoreblock(offset, align_size);
            recptr = log_cu_bcm(&(rel->rd_node), col, cuBlock, status, count);
            PageSetLSN(page, recptr);
        }
        END_CRIT_SECTION();

        LockBuffer(bcmbuffer, BUFFER_LOCK_UNLOCK);
        ReleaseBuffer(bcmbuffer);
    }
}

/*
 * Set the meta page sync bit where the bcm block refers to.
 * Here, we use two sync bits(sync bit 0 and sync bit 1) to represent
 * the sync status of a bcm page. When we set sync bit 1 to unsynced, it
 * means the bcm page may have unsynced heap blocks. sync. When we set
 * sync bit 0 to unsynced during catchup, it means that the bcm page status
 * bit in meta page should not be reset after catchup which can be perceived
 * by the next catchup.
 * Note: call the function should first hold BUFFER_LOCK_EXCLUSIVE lock
 */
static void BCMSetMetaBit(Relation rel, BlockNumber block, BCMBitStatus status, int col)
{
    BlockNumber metablock = BCMBLK_TO_METABLOCK(block);
    int metaByte = BCMBLK_TO_METABYTE(block);
    int metaBit = BCMBLK_TO_METABIT(block);
    uint32 bshift = (uint32)metaBit * META_BITS_PER_BLOCK;
    Buffer metabuffer = InvalidBuffer;
    BCMBitStatus pageStatus0 = 0;
    BCMBitStatus pageStatus1 = 0;
    Page page;
    unsigned char *map = NULL;

    Assert(status == SYNCED || status == NOTSYNCED);

    metabuffer = BCM_readbuf(rel, metablock, false, col);
    Assert(BufferIsValid(metabuffer));
    LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE);
    page = BufferGetPage(metabuffer);
    map = (unsigned char *)PageGetContents(page);

    /* get sync bit 0 & 1 status */
    pageStatus0 = ((map[metaByte] >> bshift) & META_SYNC0_BITMASK) >> 3;
    Assert(pageStatus0 == SYNCED || pageStatus0 == NOTSYNCED);
    pageStatus1 = ((map[metaByte] >> bshift) & META_SYNC1_BITMASK) >> 1;
    Assert(pageStatus1 == SYNCED || pageStatus1 == NOTSYNCED);

    /* set sync bit status */
    if (status != pageStatus0 || status != pageStatus1) {
        START_CRIT_SECTION();
        if (status != pageStatus0)
            SET_SYNC0_BYTE_STATUS(map[metaByte], status, bshift);
        if (status != pageStatus1)
            SET_SYNC1_BYTE_STATUS(map[metaByte], status, bshift);
        MarkBufferDirty(metabuffer);
        END_CRIT_SECTION();
    }

    UnlockReleaseBuffer(metabuffer);
}

/*
 * Clear all bcm page sync status bit 0 in meta pages before catchup.
 */
static void BCMClearMetaBit(Relation rel, int col)
{
    BlockNumber metablock = 1;
    Buffer metabuffer = InvalidBuffer;
    Page page;
    unsigned char *map = NULL;
    uint32 bshift = 0;
    BCMBitStatus pageStatus0 = 0;
    int i = 0;
    int j = 0;
    bool dirty = false;

    metabuffer = BCM_readbuf(rel, metablock, false, col);
    if (!BufferIsValid(metabuffer))
        return; /* nothing to */

    do {
        LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE);
        page = BufferGetPage(metabuffer);
        map = (unsigned char *)PageGetContents(page);

        ereport(DEBUG1, (errmsg("relation %u/%u/%u col %d try to clear meta block %u", rel->rd_node.spcNode,
                                rel->rd_node.dbNode, rel->rd_node.relNode, col, metablock)));

        /* clear sync bit 0 status */
        START_CRIT_SECTION();
        for (i = 0; i < (int)BCMMAPSIZE; i++) {
            for (j = 0; j < META_BLOCKS_PER_BYTE; j++) {
                bshift = (uint32)j * META_BITS_PER_BLOCK;
                pageStatus0 = ((map[i] >> bshift) & META_SYNC0_BITMASK) >> 3;
                Assert(pageStatus0 == SYNCED || pageStatus0 == NOTSYNCED);
                if (pageStatus0 == NOTSYNCED) {
                    SET_SYNC0_BYTE_STATUS(map[i], SYNCED, bshift);
                    dirty = true;
                }
            }
        }
        if (dirty)
            MarkBufferDirty(metabuffer);
        END_CRIT_SECTION();

        UnlockReleaseBuffer(metabuffer);

        /* caculate the next meta page, than clear again. */
        metablock += META_BLOCKS_PER_PAGE + 1;
        metabuffer = BCM_readbuf(rel, metablock, false, col);
    } while (BufferIsValid(metabuffer));

    return;
}

/*
 * Reset the bcm page sync status bit 1 in meta pages after catchup.
 * Notes: we skip those bcm pages which are recently marked as unsynced
 * by checking the sync status bit 0.
 */
static void BCMResetMetaBit(Relation rel, BlockNumber metablk, int col)
{
    BlockNumber metablock = 1;
    Buffer metabuffer = InvalidBuffer;
    Page page;
    unsigned char *map = NULL;
    uint32 bshift = 0;
    BCMBitStatus pageStatus0 = 0;
    BCMBitStatus pageStatus1 = 0;
    int i = 0;
    int j = 0;
    bool dirty = false;

    for (metablock = 1; metablock < metablk; metablock += (META_BLOCKS_PER_PAGE + 1)) {
        metabuffer = BCM_readbuf(rel, metablock, false, col);
        if (!BufferIsValid(metabuffer))
            ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED),
                            errmsg("%u/%u/%u invalid bcm meta buffer %u", rel->rd_node.spcNode, rel->rd_node.dbNode,
                                   rel->rd_node.relNode, metablock)));

        LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE);
        page = BufferGetPage(metabuffer);
        map = (unsigned char *)PageGetContents(page);

        ereport(DEBUG1, (errmsg("relation %u/%u/%u col %d try to reset meta block %u", rel->rd_node.spcNode,
                                rel->rd_node.dbNode, rel->rd_node.relNode, col, metablock)));
        /*
         * Clear the latest set sync bit 1 status, the the page status 0 has been set
         * to NOTSYNCED sync last meta clear, we should skip this BCM block.
         */
        START_CRIT_SECTION();
        for (i = 0; i < (int)BCMMAPSIZE; i++) {
            for (j = 0; j < META_BLOCKS_PER_BYTE; j++) {
                bshift = (uint32)j * META_BITS_PER_BLOCK;
                pageStatus0 = ((map[i] >> bshift) & META_SYNC0_BITMASK) >> 3;
                Assert(SYNCED == pageStatus0 || NOTSYNCED == pageStatus0);
                pageStatus1 = ((map[i] >> bshift) & META_SYNC1_BITMASK) >> 1;
                Assert(SYNCED == pageStatus1 || NOTSYNCED == pageStatus1);
                if (SYNCED == pageStatus0 && NOTSYNCED == pageStatus1) {
                    SET_SYNC1_BYTE_STATUS(map[i], SYNCED, bshift);
                    dirty = true;
                }
            }
        }
        if (dirty)
            MarkBufferDirty(metabuffer);
        END_CRIT_SECTION();

        UnlockReleaseBuffer(metabuffer);
    }

    return;
}

/*
 * Set the corresponding bit of the heap block as status, before call
 * this function we should call BCM_pin to get the right bcmbuffer.
 */
void BCMSetStatusBit(Relation rel, uint64 heapBlk, Buffer buf, BCMBitStatus status, int col)
{
    BlockNumber mapBlock = HEAPBLK_TO_BCMBLOCK(heapBlk);
    int mapByte = HEAPBLK_TO_BCMBYTE(heapBlk);
    int mapBit = HEAPBLK_TO_BCMBIT(heapBlk);
    uint32 bshift = (uint32)mapBit * BCM_BITS_PER_BLOCK;
    BCMBitStatus bcmStatus = 0;
    bool needwal = false;
    Page page;
    unsigned char *map = NULL;

#ifdef TRACE_BCMMAP
    elog(LOG, "BCMSetStatusBit: rel: %s col: %d blk: %lu  status: %d ", RelationGetRelationName(rel), col, heapBlk,
         status);
#endif

    if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock)
        ereport(ERROR,
                (errcode(ERRCODE_DATA_CORRUPTED), errmsg("wrong buffer passed to BCM_clear, BlockNumber from buf is %u,"
                                                         "mapBlock is %u",
                                                         BufferGetBlockNumber(buf), mapBlock)));

    Assert(status == SYNCED || status == NOTSYNCED);

    if (status == NOTSYNCED)
        BCMSetMetaBit(rel, mapBlock, NOTSYNCED, col);

    page = BufferGetPage(buf);
    map = (unsigned char *)PageGetContents(page);

    bcmStatus = (map[mapByte] >> bshift) & BCM_SYNC_BITMASK;
    bcmStatus = bcmStatus >> 1;
    Assert(bcmStatus == SYNCED || bcmStatus == NOTSYNCED);

    /* Bcm status must be 0 before it will be set to 1 */
    if (!RecoveryInProgress() && status == NOTSYNCED && bcmStatus == NOTSYNCED)
        ereport(WARNING, (errmsg("BCM page maybe damage, rnode[%u,%u,%u] col:%d block:%lu ", rel->rd_node.spcNode,
                                 rel->rd_node.dbNode, rel->rd_node.relNode, col, heapBlk)));

    needwal = (RelationNeedsWAL(rel) && !t_thrd.xlog_cxt.InRecovery);

    if (status != bcmStatus) {
        START_CRIT_SECTION();

        /* set status */
        SET_SYNC_BYTE_STATUS(map[mapByte], status, bshift);
        MarkBufferDirty(buf);

        /*
         * we record one cu bcm xlog in BCMLogCU for column store.
         */
        bool isRowStore = (col == 0);

        if (needwal && isRowStore) {
            XLogRecPtr recptr = InvalidXLogRecPtr;

            recptr = log_heap_bcm(&(rel->rd_node), 0, heapBlk, status);

            PageSetLSN(page, recptr);
        }
        END_CRIT_SECTION();
    }
}

/* Clear all the bcm bits of a relation */
void BCMClearRel(Relation rel, int col)
{
    BlockNumber totalblocks = 0;
    BlockNumber mapBlock;
    ForkNumber forknum = BCM_FORKNUM;

#ifdef TRACE_BCMMAP
    elog(LOG, "BCMClearRel %s", RelationGetRelationName(rel));
#endif

    if (col > 0) {
        forknum = ColumnId2ColForkNum(col);
        CStoreRelationOpenSmgr(rel, col);
    } else {
        RelationOpenSmgr(rel);
    }

    /*
     * If no bcm map has been created yet for this relation, there's
     * nothing to clear.
     */
    if (!smgrexists(rel->rd_smgr, forknum))
        return;

    totalblocks = smgrnblocks(rel->rd_smgr, forknum);
    /*
     * If  bcm map only has a file header, there's nothing to clear.
     */
    if (totalblocks == 0 || totalblocks == 1)
        return;

    /* We begin clear from page 1 not page 0 */
    for (mapBlock = 1; mapBlock < totalblocks; mapBlock++) {
        Buffer mapBuffer;
        unsigned char *map = NULL;
        errno_t rc = 0;

        mapBuffer = BCM_readbuf(rel, mapBlock, false, col);
        if (!BufferIsValid(mapBuffer))
            ereport(ERROR,
                    (errcode(ERRCODE_DATA_CORRUPTED), errmsg("%u/%u/%u invalid bcm buffer %u", rel->rd_node.spcNode,
                                                             rel->rd_node.dbNode, rel->rd_node.relNode, mapBlock)));

        LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE);
        map = (unsigned char *)PageGetContents(BufferGetPage(mapBuffer));

        /* NB: We clear the whole page, including the dcm bits, is that ok? */
        rc = memset_s(map, BCMMAPSIZE, 0, BCMMAPSIZE);
        securec_check(rc, "", "");
        MarkBufferDirty(mapBuffer);

        UnlockReleaseBuffer(mapBuffer);
    }
}

/*
 * BCM_truncate - truncate the bcm map
 *
 * The caller must hold AccessExclusiveLock on the relation, to ensure that
 * other backends receive the smgr invalidation event that this function sends
 * before they access the bcm again.
 *
 * Note: bcm will be truncated to zero. Only data replication can generate bcm
 * file, and heap can not be truncated by lazy vacuum(the function of
 * lazy_truncate_heap has been disabled at data replication mode), so we need
 * not to realize the code about truncate the bcm file to nblock.
 */
void BCM_truncate(Relation rel)
{
#ifdef TRACE_BCMMAP
    ereport(DEBUG1, (errmodule(MOD_REP), errmsg("bcm_truncate %s", RelationGetRelationName(rel))));
#endif

    RelationOpenSmgr(rel);

    /*
     * If no bcm map has been created yet for this relation, there's
     * nothing to truncate.
     */
    if (!smgrexists(rel->rd_smgr, BCM_FORKNUM))
        return;

    /* Truncate the bcm pages, and send smgr inval message */
    smgrtruncate(rel->rd_smgr, BCM_FORKNUM, 0);

    /*
     * We might as well update the local smgr_bcm_nblocks setting. smgrtruncate
     * sent an smgr cache inval message, which will cause other backends to
     * invalidate their copy of smgr_bcm_nblocks, and this one too at the next
     * command boundary.  But this ensures it isn't outright wrong until then.
     */
    for (int i = 0; i < rel->rd_smgr->smgr_bcmarry_size; i++)
        rel->rd_smgr->smgr_bcm_nblocks[i] = 0;
}

/*
 * Read a bcm map page.
 *
 * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
 * true, the bcm map file is extended.
 */
static Buffer BCM_readbuf(Relation rel, BlockNumber blkno, bool extend, int col)
{
    Buffer buf;
    ForkNumber forknum = BCM_FORKNUM;

    if (col > 0) {
        forknum = ColumnId2ColForkNum(col);

        /*
         * We might not have opened the relation at the smgr level yet, or we
         * might have been forced to close it by a sinval message.	The code below
         * won't necessarily notice relation extension immediately when extend =
         * false, so we rely on sinval messages to ensure that our ideas about the
         * size of the map aren't too far out of date.
         */
        CStoreRelationOpenSmgr(rel, col);
    } else {
        RelationOpenSmgr(rel);
    }

    /*
     * If we haven't cached the size of the bcm map fork yet, check it
     * first.
     */
    if (rel->rd_smgr->smgr_bcm_nblocks[col] == InvalidBlockNumber) {
        if (smgrexists(rel->rd_smgr, forknum))
            rel->rd_smgr->smgr_bcm_nblocks[col] = smgrnblocks(rel->rd_smgr, forknum);
        else
            rel->rd_smgr->smgr_bcm_nblocks[col] = 0;
    }

    /* Handle requests beyond EOF */
    if (blkno >= rel->rd_smgr->smgr_bcm_nblocks[col]) {
        if (extend)
            BCM_extend(rel, blkno + 1, col);
        else
            return InvalidBuffer;
    }

    /*
     * Use ZERO_ON_ERROR mode, and initialize the page if necessary. It's
     * always safe to clear bits, so it's better to clear corrupt pages than
     * error out.
     */
    buf = ReadBufferExtended(rel, forknum, blkno, RBM_ZERO_ON_ERROR, NULL);
    if (PageIsNew(BufferGetPage(buf)))
        PageInit(BufferGetPage(buf), BLCKSZ, 0);
    return buf;
}

/*
 * Ensure that the bcm map fork is at least bcm_nblocks long, extending
 * it if necessary with zeroed pages.
 */
static void BCM_extend(Relation rel, BlockNumber bcm_nblocks, int col)
{
    BlockNumber bcm_nblocks_now;
    Page pg;
    ForkNumber forknum = BCM_FORKNUM;

    ADIO_RUN()
    {
        pg = (Page)adio_align_alloc(BLCKSZ);
    }
    ADIO_ELSE()
    {
        pg = (Page)palloc(BLCKSZ);
    }
    ADIO_END();

    PageInit(pg, BLCKSZ, 0);

    /*
     * We use the relation extension lock to lock out other backends trying to
     * extend the bcm map at the same time. It also locks out extension
     * of the main fork, unnecessarily, but extending the bcm map
     * happens seldom enough that it doesn't seem worthwhile to have a
     * separate lock tag type for it.
     *
     * Note that another backend might have extended or created the relation
     * by the time we get the lock.
     */
    LockRelationForExtension(rel, ExclusiveLock);

    /*
     * Create the file first if it doesn't exist.  If smgr_bcm_nblocks is
     * positive then it must exist, no need for an smgrexists call.
     */
    if (col > 0) {
        forknum = ColumnId2ColForkNum(col);
        CStoreRelationOpenSmgr(rel, col);
    } else {
        /* Might have to re-open if a cache flush happened */
        RelationOpenSmgr(rel);
    }

    if ((rel->rd_smgr->smgr_bcm_nblocks[col] == 0 || rel->rd_smgr->smgr_bcm_nblocks[col] == InvalidBlockNumber) &&
        !smgrexists(rel->rd_smgr, forknum)) {
        createBCMFile(rel, col);
    }

    bcm_nblocks_now = smgrnblocks(rel->rd_smgr, forknum);

    if (bcm_nblocks_now < bcm_nblocks) {
        VerifyTblspcWhenBcmExtend(rel, col, bcm_nblocks - bcm_nblocks_now);
    }

    /* Now extend the file */
    while (bcm_nblocks_now < bcm_nblocks) {
        PageSetChecksumInplace(pg, bcm_nblocks_now);

        smgrextend(rel->rd_smgr, forknum, bcm_nblocks_now, (char *)pg, false);
        bcm_nblocks_now++;
    }

    /*
     * Send a shared-inval message to force other backends to close any smgr
     * references they may have for this rel, which we are about to change.
     * This is a useful optimization because it means that backends don't have
     * to keep checking for creation or extension of the file, which happens
     * infrequently.
     */
    CacheInvalidateSmgr(rel->rd_smgr->smgr_rnode);

    /* Update local cache with the up-to-date size */
    rel->rd_smgr->smgr_bcm_nblocks[col] = bcm_nblocks_now;

    UnlockRelationForExtension(rel, ExclusiveLock);

    ADIO_RUN();
    {
        adio_align_free(pg);
    }
    ADIO_ELSE()
    {
        pfree(pg);
        pg = NULL;
    }
    ADIO_END();
}

/* Read bcm page */
void BCM_CStore_pin(Relation rel, int col, uint64 offset, Buffer *buf)
{
    Assert(col > 0);
    uint64 align_size = (uint64)(uint32)CUAlignUtils::GetCuAlignSizeColumnId(col);
    BlockNumber mapBlock = cstore_offset_to_bcmblock(offset, align_size);
    *buf = BCM_readbuf(rel, mapBlock, true, col);
}

/* Read bcm page */
void BCM_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
{
    BlockNumber mapBlock = HEAPBLK_TO_BCMBLOCK(heapBlk);

    *buf = BCM_readbuf(rel, mapBlock, true);
}

/*
 * BCMSendData
 *
 * Traverse every BCM page of current relation to see if a corresponding
 * heap page or CU unit needs to send to standby. If needed, load the
 * heap page or CU unit data and push it to the send queue. We should
 * hold the relation lock to avoid been dropped during catchup.
 * In order to speed up the check efficiency, we just need to walk the
 * bcm meta buffer instead. More comments see in bcm meta buffer.
 */
static void BCMSendData(const RelFileNode &relfilenode, const char *bcmpath, int col)
{
    RelFileNode InvalidRelFileNode = { 0, 0, 0, -1 };
    Relation rel;
    Buffer metabuffer = InvalidBuffer;
    ForkNumber forknum = BCM_FORKNUM;
    BlockNumber heapBlock = InvalidBlockNumber;
    BlockNumber metanum = 1;
    BlockNumber maxHeapBlock = InvalidBlockNumber;
    struct stat stat_buf;

    volatile DataSndCtlData *datasndctl = t_thrd.datasender_cxt.DataSndCtl;
    bool isColStore = col > 0 ? true : false;
    int contibits = 0;

    /* if disabled stream replication or relfilenode is invalid, skip current relation. */
    if (!u_sess->attr.attr_storage.enable_stream_replication ||
        (0 == memcmp(&relfilenode, &InvalidRelFileNode, sizeof(RelFileNode))))
        return;

    /*
     * Here we lock the database to solve the checkpoint failure " ERROR:checkpoint request failed
     * CONTEXT:  Error message received from nodes:xxx" because of the concurrent execution of drop
     * database and catchup.Steps to reproduce:
     * 1.create database test,and create table t1 in test;
     * 2.copy data to t1(without standby)
     * 3.drop database and sleep before rm data
     * 4.start standby, catchup thread will start and send data in primary
     * 5.conitue step 3
     * 6.drop database will success
     * 7.create database or checkpoint will get the error.
     */
    LockSharedObject(DatabaseRelationId, relfilenode.dbNode, 0, RowExclusiveLock);

    rel = CreateFakeRelcacheEntry(relfilenode);

    /*
     * First lock relfilenode(Notes: relfilenode.relNode maybe differnt from oid),
     * at this time, LockRelation is equal to LockRelFileNode,
     * then read the bcm file, if it is not exit,
     * the table maybe delete, so we will return.
     *
     * ExclusiveLock will block insert, because catchup maybe read a zero block
     * after insert, it is tested on xfs file system.
     */
    LockRelFileNode(relfilenode, ExclusiveLock);

    if (isColStore) {
        forknum = ColumnId2ColForkNum(col);
        CStoreRelationOpenSmgr(rel, col);
    } else {
        RelationOpenSmgr(rel);
    }

    /*
     * BCM file is just removed, skip following check.
     * smgrexists maybe not correct(After the table is dropped), so we should use
     * stat to check it.
     */
    if (!smgrexists(rel->rd_smgr, forknum) || stat(bcmpath, &stat_buf) != 0) {
        UnlockRelFileNode(relfilenode, ExclusiveLock);
        FreeFakeRelcacheEntry(rel);
        UnlockSharedObject(DatabaseRelationId, relfilenode.dbNode, 0, RowExclusiveLock);
        return;
    }

    BCMClearMetaBit(rel, col);

    metabuffer = BCM_readbuf(rel, metanum, false, col);
    if (!BufferIsValid(metabuffer)) {
        /* Nothing to do, the file was already smaller */
        UnlockRelFileNode(relfilenode, ExclusiveLock);
        FreeFakeRelcacheEntry(rel);
        UnlockSharedObject(DatabaseRelationId, relfilenode.dbNode, 0, RowExclusiveLock);
        return;
    }

    /* get max size of data file */
    maxHeapBlock = BCMGetDataFileMaxSize(rel, col);
    if (maxHeapBlock == InvalidBlockNumber) {
        /* Nothing to do, the file size was zero */
        UnlockRelFileNode(relfilenode, ExclusiveLock);
        FreeFakeRelcacheEntry(rel);
        UnlockSharedObject(DatabaseRelationId, relfilenode.dbNode, 0, RowExclusiveLock);
        return;
    }

    CUFile *cFile = isColStore ? New(CurrentMemoryContext) CUFile(relfilenode, col) : NULL;
    do {
        ereport(DEBUG3, (errmsg("valid bcm meta buffer :%u", metanum)));

        BCMWalkMetaBuffer(rel, cFile, metabuffer, heapBlock, contibits, maxHeapBlock, col);
        ReleaseBuffer(metabuffer);

        /* caculate the next meta page, than check again. */
        metanum += META_BLOCKS_PER_PAGE + 1;
        metabuffer = BCM_readbuf(rel, metanum, false, col);
    } while (BufferIsValid(metabuffer));

    /*
     * For column store, after we loaded all the bcm buffers, especially when
     * the last bcm status was NOTSYNCED, we should finish the surplus work --
     * push the last contibits data to queue.
     */
    if (contibits > 0)
        bcm_read_multi_cu(cFile, rel, col, heapBlock, contibits, maxHeapBlock);

    if (cFile)
        DELETE_EX(cFile);

    /*
     * we should wait until all the pushed data has been send to the standby,
     * then clear the BCMArray.
     */
    while (DQByteLT(datasndctl->queue_offset, t_thrd.proc->waitDataSyncPoint)) {
        CatchupShutdownIfNoDataSender();
        pg_usleep(1000L); /* 1ms */
    }

    ClearBCMArray();
    BCMResetMetaBit(rel, metanum, col);

    UnlockRelFileNode(relfilenode, ExclusiveLock);
    FreeFakeRelcacheEntry(rel);

    UnlockSharedObject(DatabaseRelationId, relfilenode.dbNode, 0, RowExclusiveLock);
}

/*
 * BCMWalkMetaBuffer
 *
 * Walk through every bit in current meta page to find out if any corresponding
 * BCM page needs to search.
 */
static void BCMWalkMetaBuffer(Relation rel, CUFile *cFile, Buffer metabuffer, BlockNumber &heapBlock, int &contibits,
                              BlockNumber maxHeapBlock, int col)
{
    Buffer bcmbuffer = InvalidBuffer;
    BlockNumber metaBlock;
    BlockNumber bcmBlock;
    int i;
    int j;
    uint32 bshift;
    BCMBitStatus status;
    Page metapage;
    unsigned char *map = NULL;

    Assert(BufferIsValid(metabuffer));
    metaBlock = BufferGetBlockNumber(metabuffer);
    metapage = BufferGetPage(metabuffer);
    map = (unsigned char *)PageGetContents(metapage);

    for (i = 0; i < (int)BCMMAPSIZE; i++) {
        for (j = 0; j < META_BLOCKS_PER_BYTE; j++) {
            bshift = (uint32)j * META_BITS_PER_BLOCK;
            status = ((map[i] >> bshift) & META_SYNC1_BITMASK) >> 1;

            /* the bcm block needs to sync */
            if (status == NOTSYNCED) {
                CatchupShutdownIfNoDataSender();
                /* get bcm page block */
                bcmBlock = GET_BCM_BLOCK(metaBlock, i, j);
                ereport(DEBUG2, (errmsg("relation %u/%u/%u col %d try to sync bcm block %u", rel->rd_node.spcNode,
                                        rel->rd_node.dbNode, rel->rd_node.relNode, col, bcmBlock)));
                /*
                 * We assume that if the bcm buffer is invalid, it means that some
                 * thread has just extended that block, and we can see it in meta page
                 * but not in the opened smgr of current relation. It's safe to skip this
                 * block 'cause we can sync it by data replication.
                 */
                bcmbuffer = BCM_readbuf(rel, bcmBlock, false, col);
                if (BufferIsValid(bcmbuffer)) {
                    BCMSendOneBuffer(rel, cFile, bcmbuffer, heapBlock, contibits, maxHeapBlock, col);
                    ReleaseBuffer(bcmbuffer);
                }
            }
        }
    }
}

/*
 * BCMSendOneBuffer
 *
 * Walk through every bit in current bcm page to find out if any corresponding
 * heap pages or CU units need to send to standby.
 */
static void BCMSendOneBuffer(Relation rel, CUFile *cFile, Buffer bcmbuffer, BlockNumber &heapBlock, int &contibits,
                             BlockNumber maxHeapBlock, int col)
{
    Buffer heapbuffer = InvalidBuffer;
    Page bcmpage;
    int i;
    int j;
    uint32 bshift;
    unsigned char *map = NULL;
    BCMBitStatus status;
    BlockNumber blocknum = 0;
    bool isColStore = col > 0 ? true : false;

    blocknum = BufferGetBlockNumber(bcmbuffer);
    Assert(isColStore || (cFile == NULL));

    /*
     * Do not lock buffer, maybe deadlock, if
     * Catchup held this buffer share lock, and push to dataqueue, but queue has no freespace,
     * Catchup will sleep with share lock; wait for DataSender to free queue's space;
     * But DataSender need to get this buffer exclusive lock to set BCM bit: 1-->0, so
     * Catchup held share lock wait DataSender; DataSender wait exclusive lock held by Catchup;
     * then deadlock occured.
     */
    bcmpage = BufferGetPage(bcmbuffer);
    map = (unsigned char *)PageGetContents(bcmpage);

    for (i = 0; i < (int)BCMMAPSIZE; i++) {
        for (j = 0; j < BCM_BLOCKS_PER_BYTE; j++) {
            bshift = (uint32)j * BCM_BITS_PER_BLOCK;
            status = ((map[i] >> bshift) & BCM_SYNC_BITMASK) >> 1;

            /* If not sync */
            if (status == NOTSYNCED) {
                CatchupShutdownIfNoDataSender();
                if (isColStore) { /* column store */
                    /* get heap page block */
                    if (contibits == 0)
                        heapBlock = GET_HEAP_BLOCK(blocknum, i, j);
                    contibits++;
                } else { /* row store */
                    /* get heap page block */
                    heapBlock = GET_HEAP_BLOCK(blocknum, i, j);

                    if (u_sess->attr.attr_storage.HaModuleDebug) {
                        ereport(LOG, (errmsg("HA-BCMSendOneBuffer: relation %u/%u/%u col %d try to sync bcm "
                                             "blockno %u heap blockno %u maxHeapBlock %u",
                                             rel->rd_node.spcNode, rel->rd_node.dbNode, rel->rd_node.relNode, col,
                                             blocknum, heapBlock, maxHeapBlock)));
                    }

                    /*
                     * For OS crash, Data file block maybe not fsync disk to 100(For example),
                     * but BCM maybe flush disk to 101. We can not read data file block.
                     */
                    if (heapBlock > maxHeapBlock)
                        return;

                    heapbuffer = ReadBuffer(rel, heapBlock);

                    LockBuffer(heapbuffer, BUFFER_LOCK_SHARE);
                    PushHeapPageToDataQueue(heapbuffer);
                    UnlockReleaseBuffer(heapbuffer);
                }
            }

            if (isColStore) {
                /*
                 * for column store, we record the continuous no-sync status,
                 * load CU data for once as much as possible.
                 */
                int max_contibits = (512 * 1024) / CUAlignUtils::GetCuAlignSizeColumnId(col);
                if (contibits > 0 && (status == SYNCED || contibits >= max_contibits))
                    bcm_read_multi_cu(cFile, rel, col, heapBlock, contibits, maxHeapBlock);
            }
        }
    }
}

/*
 * Get max block num for bcm file relfilenode
 */
static BlockNumber BCMGetDataFileMaxSize(Relation rel, int col)
{
    BlockNumber maxHeapBlock = 0;

    if (col > 0) {
        uint64 filesize = GetColDataFileSize(rel, col);
        maxHeapBlock = (BlockNumber)(filesize / CUAlignUtils::GetCuAlignSizeColumnId(col));
    } else {
        if (smgrexists(rel->rd_smgr, MAIN_FORKNUM)) {
            maxHeapBlock = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
        } else {
            char *rpath = NULL;
            RelFileNodeBackend smgr_rnode;
            smgr_rnode.node = rel->rd_node;
            smgr_rnode.backend = InvalidBackendId;
            rpath = relpath(smgr_rnode, MAIN_FORKNUM);
            ereport(WARNING, (errcode_for_file_access(), errmsg("relation file is not exist when get max block num "
                                                                "for bcm file relfilenode: \"%s\": %m",
                                                                rpath)));
            pfree(rpath);
            rpath = NULL;
        }
    }

    /*
     * Block is from 0 to nblocks-1, if maxHeapBlock is 0, we should return 0
     */
    return maxHeapBlock ? (maxHeapBlock - 1) : InvalidBlockNumber;
}

/*
 * Check if we have specific postfix in the string.
 */
static bool CheckFilePostfix(const char *str1, const char *str2)
{
    int len1 = 0;
    int len2 = 0;
    if (str1 == NULL || str2 == NULL) {
        return false;
    }
    len1 = (int)strlen(str1);
    len2 = (int)strlen(str2);
    if ((len1 < len2) || (len1 == 0 || len2 == 0)) {
        return false;
    }
    while (len2 >= 1) {
        if (str2[len2 - 1] != str1[len1 - 1]) {
            return false;
        }
        len2--;
        len1--;
    }
    return true;
}

/*
 * BCMClearFile: set the BCM file's pages to init pages
 * except the first page(BCM File Header).
 * FUTURE CASE:: Maybe we should Consider concurrency scenarios,
 * one is clearing file another is setting.
 */
static void BCMClearFile(const RelFileNode &relfilenode, int col)
{
    RelFileNode InvalidRelFileNode = { 0, 0, 0, -1 };
    Relation rel;

    if (0 == memcmp(&relfilenode, &InvalidRelFileNode, sizeof(RelFileNode)))
        return;

    rel = CreateFakeRelcacheEntry(relfilenode);
    BCMClearRel(rel, col);
    FreeFakeRelcacheEntry(rel);
}

/* Recursion search BCM files with the tableSpacePath */
static void searchBCMFiles(const char *tableSpacePath, const char *relativepath, bool undertablespace, bool clear,
                           int iterations)
{
    DIR *dir = NULL;
    struct dirent *de;
    char path[MAXPGPATH] = {'\0'};
    char rpath[MAXPGPATH] = {'\0'};
    int nRet = 0;

    /* the layer number of searchBCMFiles iterations */
    iterations++;

    dir = AllocateDir(tableSpacePath);
    while ((de = ReadDir(dir, tableSpacePath)) != NULL) {
        if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
            continue;

        if (strncmp(de->d_name, PG_TEMP_FILE_PREFIX, strlen(PG_TEMP_FILE_PREFIX)) == 0)
            continue;

        if (strncmp(de->d_name, "pg_log", strlen("pg_log")) == 0 ||
            strncmp(de->d_name, "pg_location", strlen("pg_location")) == 0)
            continue;

        if (strncmp(de->d_name, "pg_xlog", strlen("pg_xlog")) == 0)
            continue;

        if (strncmp(de->d_name, "full_upgrade_bak", strlen("full_upgrade_bak")) == 0)
            continue;

        nRet = snprintf_s(path, sizeof(path), MAXPGPATH - 1, "%s/%s", tableSpacePath, de->d_name);
        securec_check_ss(nRet, "", "");

        if (undertablespace) {
            if (NULL == strstr(path, TABLESPACE_VERSION_DIRECTORY) ||
                NULL == strstr(path, g_instance.attr.attr_common.PGXCNodeName))
                continue;
        } else {
            if (strcmp(de->d_name, "pg_tblspc") == 0)
                continue;
        }

        if (relativepath) {
            nRet = snprintf_s(rpath, sizeof(rpath), MAXPGPATH - 1, "%s/%s", relativepath, de->d_name);
            securec_check_ss(nRet, "", "");
        } else {
            nRet = snprintf_s(rpath, sizeof(rpath), MAXPGPATH - 1, "%s", de->d_name);
            securec_check_ss(nRet, "", "");
        }

        /*
         * serchBCMFiles will be recursive call 3 interations to get file path. In the third layer,
         * the file path is table file, not Dir, so we need not to decide whether it is a folder,
         * because the performance of stat interface is too bad. The file path such as
         * ./base/13764 or /home/xxx/tablespace/PG_9.2_201611171_datanode1/13764
         */
        if (iterations < 3 && isDirExist(path)) {
            ereport(DEBUG3, (errmsg("search path %s, relative path: %s, iterations: %d.", path, rpath, iterations)));
            searchBCMFiles(path, rpath, undertablespace, clear, iterations);
        } else {
            /*
             * When we handle the bcm files, we will find if we end with "_bcm".
             */
            if (CheckFilePostfix(rpath, BCM)) {
                HandleBCMfile(rpath, clear);
            }
        }
    }
    FreeDir(dir);
}

static void HandleBCMfile(char *bcmpath, bool clear)
{
    RelFileNodeForkNum bcmfilenode;

    bcmfilenode = relpath_to_filenode(bcmpath);
    if (bcmfilenode.forknumber == InvalidForkNumber) {
        ereport(WARNING,
                (errmsg("relfilenode [spcNode%u] [dbNode%u] [relNode%u]"
                        "[backendId%d] [segno%u] [forkNumber-%d] forkNumber is invalid",
                        bcmfilenode.rnode.node.spcNode, bcmfilenode.rnode.node.dbNode, bcmfilenode.rnode.node.relNode,
                        bcmfilenode.rnode.backend, bcmfilenode.segno, bcmfilenode.forknumber)));
        return;
    }

    ereport(DEBUG3,
            (errmsg("relfilenode [spcNode%u] [dbNode%u] [relNode%u]"
                    "[backendId%d] [segno%u] [forkNumber-%d]",
                    bcmfilenode.rnode.node.spcNode, bcmfilenode.rnode.node.dbNode, bcmfilenode.rnode.node.relNode,
                    bcmfilenode.rnode.backend, bcmfilenode.segno, bcmfilenode.forknumber)));

    if (clear) {
        /* Clear this bcm file */
        ereport(DEBUG2, (errmsg("clear bcm file %s ", bcmpath)));
        BCMClearFile(bcmfilenode.rnode.node, GetColumnNum(bcmfilenode.forknumber));
    } else {
        /*
         * According to bcm file bcmPath, we put the data(not synchronized)
         * to the queue.
         */
        ereport(DEBUG2, (errmsg("according to bcm file %s, send data(not synchronized)", bcmpath)));

        CatchupShutdownIfNoDataSender();
        BCMSendData(bcmfilenode.rnode.node, bcmpath, GetColumnNum(bcmfilenode.forknumber));
    }
}

/* Get all bcm files, clear all or send the according not sync heap blocks */
void GetBcmFileList(bool clear)
{
    DIR *dir = NULL;
    List *tablespaces = NIL;
    ListCell *lc = NULL;
    struct dirent *de;
    tablespaceinfo *ti = NULL;

    MemoryContext bcm_context;
    MemoryContext old_context;

    int nRet = 0;

    bcm_context = AllocSetContextCreate(CurrentMemoryContext, "Search BCM files context", ALLOCSET_DEFAULT_MINSIZE,
                                        ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE);
    old_context = MemoryContextSwitchTo(bcm_context);
    ereport(LOG, (errmsg("catchup process start to search all of bcm files.")));

    /* Make sure we can open the directory with tablespaces in it. */
    dir = AllocateDir("pg_tblspc");
    if (!dir) {
        ereport(ERROR, (errcode_for_file_access(), errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
        return;
    }

    /* Collect information about all tablespaces. */
    while ((de = ReadDir(dir, "pg_tblspc")) != NULL) {
        char fullpath[MAXPGPATH];
        char linkpath[MAXPGPATH];
        int rllen;

        /* Skip special stuff */
        if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
            continue;

        nRet = snprintf_s(fullpath, sizeof(fullpath), sizeof(fullpath) - 1, "pg_tblspc/%s", de->d_name);
        securec_check_ss(nRet, "", "");

#if defined(HAVE_READLINK) || defined(WIN32)
        rllen = readlink(fullpath, linkpath, sizeof(linkpath));
        if (rllen < 0) {
            ereport(WARNING, (errmsg("could not read symbolic link \"%s\": %m", fullpath)));
            continue;
        } else if (rllen >= (int)sizeof(linkpath)) {
            ereport(WARNING, (errmsg("symbolic link \"%s\" target is too long", fullpath)));
            continue;
        }
        linkpath[rllen] = '\0';

        ti = (tablespaceinfo *)palloc(sizeof(tablespaceinfo));
        ti->oid = pstrdup(de->d_name);
        ti->path = pstrdup(linkpath);
        ti->relativePath = pstrdup(fullpath);
        ti->size = -1;
        tablespaces = lappend(tablespaces, ti);
#else

        /*
         * If the platform does not have symbolic links, it should not be
         * possible to have tablespaces - clearly somebody else created
         * them. Warn about it and ignore.
         */
        ereport(WARNING,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("tablespaces are not supported on this platform")));
#endif
    }

    /* Add a node for the base directory at the end */
    ti = (tablespaceinfo *)palloc0(sizeof(tablespaceinfo));
    tablespaces = lcons(ti, tablespaces);

    foreach (lc, tablespaces) {
        tablespaceinfo *tsi = (tablespaceinfo *)lfirst(lc);
        if (tsi->path != NULL) {
            /* Tablespace create by user */
            ereport(DEBUG1, (errmsg("bcm path: %s; relative path: %s.", tsi->path, tsi->relativePath)));
            searchBCMFiles(tsi->path, tsi->relativePath, true, clear, 0);
        } else {
            /* Default tablespace */
            ereport(DEBUG1, (errmsg("bcm path: %s; relative path: %s.", ".", ".")));
            searchBCMFiles(".", NULL, false, clear, 0);
        }
    }

    FreeDir(dir);
    ereport(LOG, (errmsg("catchup process done to search all bcm files.")));

    MemoryContextSwitchTo(old_context);
    MemoryContextDelete(bcm_context);
}

/* Get incremental bcm files, clear all or send the according not sync heap blocks */
void GetIncrementalBcmFileList()
{
    int num = 0;
    char path[MAXPGPATH] = {'\0'};
    char *temp = NULL;
    char *fileList = NULL;
    int msgLength = 0;
    errno_t errorno = EOK;
    ereport(LOG, (errmsg("catchup process start to search incremental bcm files.")));
    int getIncrementalCatchupParseBcmTime = 0;
    int getIncrementalCatchupHandleBcmTime = 0;
    num = g_incrementalBcmInfo.msgLength / sizeof(RelFileNodeKey);
    msgLength = g_incrementalBcmInfo.msgLength;
    fileList = g_incrementalBcmInfo.receivedFileList;

    temp = fileList;
    ereport(LOG, (errmsg("num of file list we got from dummy:%d", num)));

    while (num != 0) {
        TimestampTz parseBcmStartTime = GetCurrentTimestamp();
        RelFileNodeKey data;
        errorno = memcpy_s((void *)&data, sizeof(RelFileNodeKey), temp, sizeof(RelFileNodeKey));
        securec_check(errorno, "", "");
        temp += sizeof(RelFileNodeKey);

        if ((int)data.relfilenode.spcNode == DEFAULTTABLESPACE_OID) {
            GetIncrementalBcmFilePathForDefault(data, path, sizeof(path));
        } else {
            GetIncrementalBcmFilePathForCustome(data, path, sizeof(path));
        }

        getIncrementalCatchupParseBcmTime += ComputeTimeStamp(parseBcmStartTime);
        if (*path != '\0') {
            TimestampTz handleBcmStartTime = GetCurrentTimestamp();
            HandleBCMfile(path, false);
            getIncrementalCatchupHandleBcmTime += ComputeTimeStamp(handleBcmStartTime);
        }
        num--;
    }
    ReplaceOrFreeBcmFileListBuffer(NULL, 0);
    ereport(
        LOG,
        (errmsg("incremental catchup parsing bcm costs %d milliseconds, handling bcm costs %d milliseconds, and total "
                "costs %d milliseconds",
                getIncrementalCatchupParseBcmTime, getIncrementalCatchupHandleBcmTime,
                getIncrementalCatchupParseBcmTime + getIncrementalCatchupHandleBcmTime)));
    ereport(LOG, (errmsg("catchup process done to search incremental bcm files.")));
}

/* Get incremental bcm file path for default tablespace path example: base/dbnode/relnode_BCM */
static void GetIncrementalBcmFilePathForDefault(const RelFileNodeKey &data, char *path, int length)
{
    int nRet = 0;

    if ((int)data.relfilenode.spcNode == DEFAULTTABLESPACE_OID) {
        if (data.columnid != 0) {
            nRet = snprintf_s(path, length, length - 1, "base/%u/%u_C%d_bcm", data.relfilenode.dbNode,
                              data.relfilenode.relNode, data.columnid);
            securec_check_ss(nRet, "", "");
        } else {
            nRet = snprintf_s(path, length, length - 1, "base/%u/%u_bcm", data.relfilenode.dbNode,
                              data.relfilenode.relNode);
            securec_check_ss(nRet, "", "");
        }
        if (u_sess->attr.attr_storage.HaModuleDebug) {
            ereport(LOG, (errmsg("default tablespace path :%s\n", path)));
        }
    }
}

/* Get incremental bcm file path for custome tablespace path example:
 * pg_tblspc/spcnode/version_nodename/dbnode/relnode_BCM */
static void GetIncrementalBcmFilePathForCustome(const RelFileNodeKey &data, char *path, int length)
{
    int nRet = 0;
    DIR *dir = NULL;
    char fullPath[MAXPGPATH];
    char linkPath[MAXPGPATH];
    int readLinkPathLength;

    if ((int)data.relfilenode.spcNode != DEFAULTTABLESPACE_OID) {
        /* Check pg_tblspc dir */
        dir = AllocateDir("pg_tblspc");
        if (!dir) {
            ereport(ERROR, (errcode_for_file_access(), errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
            return;
        }
        FreeDir(dir);

        /* pg_tblspc/spcnode */
        nRet = snprintf_s(fullPath, sizeof(fullPath), sizeof(fullPath) - 1, "pg_tblspc/%u", data.relfilenode.spcNode);
        securec_check_ss(nRet, "", "");

#if defined(HAVE_READLINK) || defined(WIN32)
        /* Check link path */
        readLinkPathLength = readlink(fullPath, linkPath, sizeof(linkPath));
        if (readLinkPathLength < 0) {
            ereport(WARNING, (errmsg("could not read symbolic link \"%s\": %m", fullPath)));
            return;
        } else if (readLinkPathLength >= (int)sizeof(linkPath)) {
            ereport(WARNING, (errmsg("symbolic link \"%s\" target is too long", fullPath)));
            return;
        }
        linkPath[readLinkPathLength] = '\0';
#else
        /*
         * If the platform does not have symbolic links, it should not be
         * possible to have tablespaces - clearly somebody else have created
         * them. Warn about it and ignore.
         */
        ereport(WARNING,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("tablespaces are not supported on this platform")));
#endif

        if (data.columnid != 0) {
            /* pg_tblspc/spcnode/version_nodename/dbnode/relnode_C1_BCM */
            nRet = snprintf_s(path, length, length - 1, "%s/%s_%s/%u/%u_C%d_bcm", fullPath,
                              TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName,
                              data.relfilenode.dbNode, data.relfilenode.relNode, data.columnid);
            securec_check_ss(nRet, "", "");
        } else {
            /* pg_tblspc/spcnode/version_nodename/dbnode/relnode_BCM */
            nRet = snprintf_s(path, length, length - 1, "%s/%s_%s/%u/%u_bcm", fullPath, TABLESPACE_VERSION_DIRECTORY,
                              g_instance.attr.attr_common.PGXCNodeName, data.relfilenode.dbNode,
                              data.relfilenode.relNode);
            securec_check_ss(nRet, "", "");
        }
        if (u_sess->attr.attr_storage.HaModuleDebug) {
            ereport(LOG, (errmsg("custome tablespace BCM path:%s\n", path)));
        }
    }
}

/*
 * Load multiple CU units to buffer, push data to sender queue.
 * Cause the CU manager may not return the exact size we expected,
 * so try again until we get the data we need.
 */
static void bcm_read_multi_cu(CUFile *cFile, Relation rel, int col, BlockNumber heapBlock, int &contibits,
                              BlockNumber maxHeapBlock)
{
    uint64 align_size = (uint64)(uint32)CUAlignUtils::GetCuAlignSizeColumnId(col);
    uint64 offset = align_size * (uint64)heapBlock;
    char *write_buf = NULL;
    int realSize = 0;

    if (u_sess->attr.attr_storage.HaModuleDebug) {
        ereport(LOG, (errmsg("HA-bcm_read_multi_cu: relation %u/%u/%u col %d try to sync "
                             "cu blockno %u, contibits %d, maxHeapBlock %u",
                             rel->rd_node.spcNode, rel->rd_node.dbNode, rel->rd_node.relNode, col, heapBlock, contibits,
                             maxHeapBlock)));
    }

    /* The heapBlock of data file must be not exist */
    if (heapBlock > maxHeapBlock) {
        contibits = 0;
        return;
    }

    /* we should send the NOTSYNCED data from heapBlock to maxHeapBlock */
    contibits = (int)Min((uint32)contibits, maxHeapBlock - heapBlock + 1);

    while (contibits > 0) {
        CatchupShutdownIfNoDataSender();
        write_buf = cFile->Read(offset, align_size * contibits, &realSize, (int)align_size);
        if (write_buf == NULL) {
            Assert(realSize == 0);
            contibits = 0;
            return;
        }

        if (u_sess->attr.attr_storage.HaModuleDebug)
            check_cu_block(write_buf, realSize, (int)align_size);

        PushCUToDataQueue(rel, col, write_buf, offset, realSize, false);
        ereport(DEBUG3, (errmsg("cuBlock %u col %d read and send data's realsize is %d.", heapBlock, col, realSize)));
        offset += realSize;
        contibits -= realSize / align_size;
    }
    Assert(contibits == 0);
}

void check_cu_block(char *mem, int size, int alignSize)
{
    Assert(alignSize > 0);
    int cuUnit = size / alignSize;
    char zeroBlock[alignSize] = {0};
    char *mem_temp = mem;

    for (int i = 0; i < cuUnit; i++) {
        if (memcmp(mem_temp, zeroBlock, alignSize) == 0)
            ereport(WARNING, (errmsg("HA-check_cu_block: check cu blockno %d failed, it is zeropage", i)));

        mem_temp += alignSize;
    }
}

uint64 cstore_offset_to_cstoreblock(uint64 offset, uint64 align_size)
{
    return offset / align_size;
}

uint64 cstore_offset_to_bcmblock(uint64 offset, uint64 align_size)
{
    uint64 cstore_block = cstore_offset_to_cstoreblock(offset, align_size);
    return (cstore_block / BCM_BLOCKS_PER_PAGE) + UNITBLK_TO_BCMGROUP(cstore_block) + 2;
}