openGauss-server/src/gausskernel/storage/access/gin/ginget.cpp

/* -------------------------------------------------------------------------
 *
 * ginget.cpp
 *	  fetch tuples from a GIN scan.
 *
 *
 * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
 *			src/gausskernel/storage/access/gin/ginget.cpp
 * -------------------------------------------------------------------------
 */
#include "postgres.h"
#include "knl/knl_variable.h"

#include "access/gin_private.h"
#include "access/relscan.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "utils/datum.h"
#include "utils/memutils.h"
#include "vecexecutor/vecnodes.h"
#include "vecexecutor/vecnodecstorescan.h"

typedef struct pendingPosition {
    Buffer pendingBuffer;
    OffsetNumber firstOffset;
    OffsetNumber lastOffset;
    ItemPointerData item;
    bool* hasMatchKey;
} pendingPosition;

static int CompareItemPointers(const void* a, const void* b)
{
    int res = ginCompareItemPointers((ItemPointer)a, (ItemPointer)b);
    return res;
}

/*
 * Goes to the next page if current offset is outside of bounds
 */
static bool moveRightIfItNeeded(GinBtreeData* btree, GinBtreeStack* stack)
{
    Page page = BufferGetPage(stack->buffer);
    if (stack->off > PageGetMaxOffsetNumber(page)) {
        /*
         * We scanned the whole page, so we should take right page
         */
        if (GinPageRightMost(page))
            return false; /* no more pages */

        stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
        stack->blkno = BufferGetBlockNumber(stack->buffer);
        stack->off = FirstOffsetNumber;
    }

    return true;
}

/*
 * Scan all pages of a posting tree and save all its heap ItemPointers
 * in scanEntry->matchBitmap
 */
static void scanPostingTree(Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree, bool isColStore)
{
    GinBtreeData btree;
    GinBtreeStack* stack = NULL;
    Buffer buffer;
    Page page;

    /* Descend to the leftmost leaf page */
    stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
    buffer = stack->buffer;
    IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */

    /*
     * Loop iterates through all leaf pages of posting tree
     */
    for (;;) {
        page = BufferGetPage(buffer);
        if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0) {
            int nitems = 0;
            if (!isColStore) {
                nitems = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
            } else {
                ItemPointerData minItem;
                ItemPointerSetMin(&minItem);
                ItemPointer items = GinDataLeafPageGetItems(page, &nitems, minItem);
                if (scanEntry->matchList == NULL) {
                    scanEntry->matchList = (ItemPointer)palloc(nitems * sizeof(ItemPointerData));
                    scanEntry->matchNum = 0;
                } else {
                    scanEntry->matchList = (ItemPointer)repalloc(
                        scanEntry->matchList, (scanEntry->matchNum + nitems) * sizeof(ItemPointerData));
                }
                for (int i = 0; i < nitems; i++) {
                    scanEntry->matchList[scanEntry->matchNum + i] = items[i];
                }
                scanEntry->matchNum += nitems;
                pfree(items);
                items = NULL;
            }
            scanEntry->predictNumberResult += nitems;
        }

        if (GinPageRightMost(page))
            break; /* no more pages */

        buffer = ginStepRight(buffer, index, GIN_SHARE);
    }

    freeGinBtreeStack(stack);
    UnlockReleaseBuffer(buffer);
}

/*
 * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
 * match the search entry.  This supports three different match modes:
 *
 * 1. Partial-match support: scan from current point until the
 *	  comparePartialFn says we're done.
 * 2. SEARCH_MODE_ALL: scan from current point (which should be first
 *	  key for the current attnum) until we hit null items or end of attnum
 * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
 *	  key for the current attnum) until we hit end of attnum
 *
 * Returns true if done, false if it's necessary to restart scan from scratch
 */
static bool collectMatchBitmap(GinBtreeData* btree, GinBtreeStack* stack, GinScanEntry scanEntry, bool isColStore)
{
    OffsetNumber attnum;
    Form_pg_attribute attr;

    /* Initialize empty bitmap result */
    if (!isColStore) {
        scanEntry->matchBitmap = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L);
    }

    /* Null query cannot partial-match anything */
    if (scanEntry->isPartialMatch && scanEntry->queryCategory != GIN_CAT_NORM_KEY)
        return true;

    /* Locate tupdesc entry for key column (for attbyval/attlen data) */
    attnum = scanEntry->attnum;
    attr = btree->ginstate->origTupdesc->attrs[attnum - 1];

    for (;;) {
        Page page;
        IndexTuple itup;
        Datum idatum;
        GinNullCategory icategory;

        /*
         * stack->off points to the interested entry, buffer is already locked
         */
        if (moveRightIfItNeeded(btree, stack) == false)
            return true;

        page = BufferGetPage(stack->buffer);
        itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, stack->off));
        /*
         * If tuple stores another attribute then stop scan
         */
        if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
            return true;

        /* Safe to fetch attribute value */
        idatum = gintuple_get_key(btree->ginstate, itup, &icategory);

        /*
         * Check for appropriate scan stop conditions
         */
        if (scanEntry->isPartialMatch) {
            int32 cmp;

            /*
             * In partial match, stop scan at any null (including
             * placeholders); partial matches never match nulls
             */
            if (icategory != GIN_CAT_NORM_KEY)
                return true;

            /* ----------
             * Check of partial match.
             * case cmp == 0 => match
             * case cmp > 0 => not match and finish scan
             * case cmp < 0 => not match and continue scan
             * ----------
             */
            cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
                btree->ginstate->supportCollation[attnum - 1],
                scanEntry->queryKey,
                idatum,
                UInt16GetDatum(scanEntry->strategy),
                PointerGetDatum(scanEntry->extra_data)));
            if (cmp > 0)
                return true;
            else if (cmp < 0) {
                stack->off++;
                continue;
            }
        } else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL) {
            /*
             * In ALL mode, we are not interested in null items, so we can
             * stop if we get to a null-item placeholder (which will be the
             * last entry for a given attnum).  We do want to include NULL_KEY
             * and EMPTY_ITEM entries, though.
             */
            if (icategory == GIN_CAT_NULL_ITEM)
                return true;
        }

        /*
         * OK, we want to return the TIDs listed in this entry.
         */
        if (GinIsPostingTree(itup)) {
            BlockNumber rootPostingTree = GinGetPostingTree(itup);

            /*
             * We should unlock current page (but not unpin) during tree scan
             * to prevent deadlock with vacuum processes.
             *
             * We save current entry value (idatum) to be able to re-find our
             * tuple after re-locking
             */
            if (icategory == GIN_CAT_NORM_KEY)
                idatum = datumCopy(idatum, attr->attbyval, attr->attlen);

            LockBuffer(stack->buffer, GIN_UNLOCK);

            /* Collect all the TIDs in this entry's posting tree */
            scanPostingTree(btree->index, scanEntry, rootPostingTree, isColStore);

            /*
             * We lock again the entry page and while it was unlocked insert
             * might have occurred, so we need to re-find our position.
             */
            LockBuffer(stack->buffer, GIN_SHARE);
            page = BufferGetPage(stack->buffer);
            if (!GinPageIsLeaf(page)) {
                /*
                 * Root page becomes non-leaf while we unlock it. We will
                 * start again, this situation doesn't occur often - root can
                 * became a non-leaf only once per life of index.
                 */
                return false;
            }

            /* Search forward to re-find idatum */
            for (;;) {
                Datum newDatum;
                GinNullCategory newCategory;

                if (moveRightIfItNeeded(btree, stack) == false)
                    /* must not happen !!! */
                    ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("lost saved point in index")));

                page = BufferGetPage(stack->buffer);
                itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, stack->off));
                if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
                    /* must not happen !!! */
                    ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("lost saved point in index")));

                newDatum = gintuple_get_key(btree->ginstate, itup, &newCategory);
                if (ginCompareEntries(btree->ginstate, attnum, newDatum, newCategory, idatum, icategory) == 0)
                    break; /* Found! */

                stack->off++;
            }

            if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval) {
                Pointer temp = DatumGetPointer(idatum);
                pfree(temp);
                temp = NULL;
            }
        } else {
            ItemPointer ipd;
            int nipd;

            ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);

            if (!isColStore) {
                tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
            } else {
                if (scanEntry->matchList == NULL) {
                    scanEntry->matchList = (ItemPointer)palloc(nipd * sizeof(ItemPointerData));
                } else {
                    scanEntry->matchList = (ItemPointer)repalloc(
                        scanEntry->matchList, (scanEntry->matchNum + nipd) * sizeof(ItemPointerData));
                }
                for (int i = 0; i < nipd; i++) {
                    scanEntry->matchList[scanEntry->matchNum + i] = ipd[i];
                }
                scanEntry->matchNum += nipd;
            }
            scanEntry->predictNumberResult += GinGetNPosting(itup);
            pfree(ipd);
            ipd = NULL;
        }

        /*
         * Done with this entry, go to the next
         */
        stack->off++;
    }
}

/*
 * Start* functions setup beginning state of searches: finds correct buffer and pins it.
 */
static void startScanEntry(GinState* ginstate, GinScanEntry entry, bool isColStore)
{
    GinBtreeData btreeEntry;
    GinBtreeStack* stackEntry = NULL;
    Page page;
    bool needUnlock = false;

restartScanEntry:
    entry->buffer = InvalidBuffer;
    ItemPointerSetMin(&entry->curItem);
    entry->offset = InvalidOffsetNumber;
    if (entry->list)
        pfree(entry->list);
    entry->list = NULL;
    entry->nlist = 0;
    entry->matchBitmap = NULL;
    entry->matchResult = NULL;
    entry->reduceResult = false;
    entry->predictNumberResult = 0;
    if (entry->matchList)
        pfree(entry->matchList);
    entry->matchList = NULL;
    entry->matchNum = 0;

    /*
     * we should find entry, and begin scan of posting tree or just store
     * posting list in memory
     */
    ginPrepareEntryScan(&btreeEntry, entry->attnum, entry->queryKey, entry->queryCategory, ginstate);
    stackEntry = ginFindLeafPage(&btreeEntry, true);
    page = BufferGetPage(stackEntry->buffer);
    needUnlock = true;

    entry->isFinished = true;

    if (entry->isPartialMatch || entry->queryCategory == GIN_CAT_EMPTY_QUERY) {
        /*
         * btreeEntry.findItem locates the first item >= given search key.
         * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
         * because of the way the GIN_CAT_EMPTY_QUERY category code is
         * assigned.)  We scan forward from there and collect all TIDs needed
         * for the entry type.
         */
        btreeEntry.findItem(&btreeEntry, stackEntry);

        if (collectMatchBitmap(&btreeEntry, stackEntry, entry, isColStore) == false) {
            /*
             * GIN tree was seriously restructured, so we will cleanup all found data and rescan.
             * See comments near 'return false' in collectMatchBitmap()
             */
            if (!isColStore && entry->matchBitmap) {
                if (entry->matchIterator)
                    tbm_end_iterate(entry->matchIterator);
                entry->matchIterator = NULL;
                tbm_free(entry->matchBitmap);
                entry->matchBitmap = NULL;
            }

            if (isColStore && entry->matchList) {
                pfree(entry->matchList);
                entry->matchList = NULL;
                entry->matchNum = 0;
            }

            LockBuffer(stackEntry->buffer, GIN_UNLOCK);
            freeGinBtreeStack(stackEntry);
            goto restartScanEntry;
        }

        if (!isColStore && entry->matchBitmap && !tbm_is_empty(entry->matchBitmap)) {
            entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
            entry->isFinished = false;
        }

        if (isColStore && entry->matchList && entry->matchNum > 0) {
            qsort(entry->matchList, entry->matchNum, sizeof(ItemPointerData), CompareItemPointers);
            entry->isFinished = false;
        }
    } else if (btreeEntry.findItem(&btreeEntry, stackEntry)) {
        IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, stackEntry->off));
        if (GinIsPostingTree(itup)) {
            BlockNumber rootPostingTree = GinGetPostingTree(itup);
            GinBtreeStack* stack = NULL;
            ItemPointerData minItem;

            /*
             * We should unlock entry page before touching posting tree to
             * prevent deadlocks with vacuum processes. Because entry is never
             * deleted from page and posting tree is never reduced to the
             * posting list, we can unlock page after getting BlockNumber of
             * root of posting tree.
             */
            LockBuffer(stackEntry->buffer, GIN_UNLOCK);
            needUnlock = false;

            stack = ginScanBeginPostingTree(&entry->btree, ginstate->index, rootPostingTree);
            entry->buffer = stack->buffer;

            /*
             * We keep buffer pinned because we need to prevent deletion of
             * page during scan. See GIN's vacuum implementation. RefCount is
             * increased to keep buffer pinned after freeGinBtreeStack() call.
             */
            IncrBufferRefCount(entry->buffer);

            Page tmpPage = BufferGetPage(entry->buffer);

            /*
             * Load the first page into memory.
             */
            ItemPointerSetMin(&minItem);
            entry->list = GinDataLeafPageGetItems(tmpPage, &entry->nlist, minItem);

            entry->predictNumberResult = stack->predictNumber * entry->nlist;

            LockBuffer(entry->buffer, GIN_UNLOCK);
            freeGinBtreeStack(stack);
            entry->isFinished = false;
        } else if (GinGetNPosting(itup) > 0) {
            entry->list = ginReadTuple(ginstate, entry->attnum, itup, &entry->nlist);
            entry->predictNumberResult = entry->nlist;

            entry->isFinished = false;
        }
    }

    if (needUnlock)
        LockBuffer(stackEntry->buffer, GIN_UNLOCK);
    freeGinBtreeStack(stackEntry);
}

/*
 * Comparison function for scan entry indexes. Sorts by predictNumberResult,
 * least frequent items first.
 */
static int entryIndexByFrequencyCmp(const void* a1, const void* a2, void* arg)
{
    const GinScanKey key = (const GinScanKey)arg;
    int i1 = *(const int*)a1;
    int i2 = *(const int*)a2;
    uint32 n1 = key->scanEntry[i1]->predictNumberResult;
    uint32 n2 = key->scanEntry[i2]->predictNumberResult;

    if (n1 < n2)
        return -1;
    else if (n1 == n2)
        return 0;
    else
        return 1;
}

static void startScanKey(GinState* ginstate, GinScanOpaque so, GinScanKey key)
{
    MemoryContext oldCtx = CurrentMemoryContext;
    int i;
    int j;
    int* entryIndexes = NULL;

    ItemPointerSetMin(&key->curItem);
    key->curItemMatches = false;
    key->recheckCurItem = false;
    key->isFinished = false;

    /*
     * Divide the entries into two distinct sets: required and additional.
     * Additional entries are not enough for a match alone, without any items
     * from the required set, but are needed by the consistent function to
     * decide if an item matches. When scanning, we can skip over items from
     * additional entries that have no corresponding matches in any of the
     * required entries. That speeds up queries like "frequent & rare"
     * considerably, if the frequent term can be put in the additional set.
     *
     * There can be many legal ways to divide them entries into these two
     * sets. A conservative division is to just put everything in the required
     * set, but the more you can put in the additional set, the more you can
     * skip during the scan. To maximize skipping, we try to put as many
     * frequent items as possible into additional, and less frequent ones into
     * required. To do that, sort the entries by frequency
     * (predictNumberResult), and put entries into the required set in that
     * order, until the consistent function says that none of the remaining
     * entries can form a match, without any items from the required set. The
     * rest go to the additional set.
     */
    if (key->nentries > 1) {
        MemoryContextSwitchTo(so->tempCtx);

        entryIndexes = (int*)palloc(sizeof(int) * key->nentries);
        for (i = 0; i < (int)key->nentries; i++)
            entryIndexes[i] = i;
        qsort_arg(entryIndexes, key->nentries, sizeof(int), entryIndexByFrequencyCmp, key);

        for (i = 0; i < (int)key->nentries - 1; i++) {
            /* Pass all entries <= i as FALSE, and the rest as MAYBE */
            for (j = 0; j <= i; j++)
                key->entryRes[entryIndexes[j]] = GIN_FALSE;
            for (j = i + 1; j < (int)key->nentries; j++)
                key->entryRes[entryIndexes[j]] = GIN_MAYBE;

            if (key->triConsistentFn(key) == GIN_FALSE)
                break;
        }
        /* i is now the last required entry. */
        MemoryContextSwitchTo(so->keyCtx);

        key->nrequired = i + 1;
        key->nadditional = key->nentries - key->nrequired;
        key->requiredEntries = (GinScanEntry*)palloc(key->nrequired * sizeof(GinScanEntry));
        key->additionalEntries = (GinScanEntry*)palloc(key->nadditional * sizeof(GinScanEntry));

        j = 0;
        for (i = 0; i < key->nrequired; i++)
            key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
        for (i = 0; i < key->nadditional; i++)
            key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];

        /* clean up after consistentFn calls (also frees entryIndexes) */
        MemoryContextReset(so->tempCtx);
    } else {
        MemoryContextSwitchTo(so->keyCtx);

        key->nrequired = 1;
        key->nadditional = 0;
        key->requiredEntries = (GinScanEntry*)palloc(1 * sizeof(GinScanEntry));
        key->requiredEntries[0] = key->scanEntry[0];
    }
    MemoryContextSwitchTo(oldCtx);
}

static void startScan(IndexScanDesc scan, bool isColStore)
{
    GinScanOpaque so = (GinScanOpaque)scan->opaque;
    GinState* ginstate = &so->ginstate;
    uint32 i;

    for (i = 0; i < so->totalentries; i++)
        startScanEntry(ginstate, so->entries[i], isColStore);

    if (u_sess->attr.attr_common.GinFuzzySearchLimit > 0) {
        /*
         * If all of keys more than threshold we will try to reduce result, we
         * hope (and only hope, for intersection operation of array our
         * supposition isn't true), that total result will not more than
         * minimal predictNumberResult.
         */
        bool reduce = true;

        for (i = 0; i < so->totalentries; i++) {
            if (so->entries[i]->predictNumberResult <=
                so->totalentries * u_sess->attr.attr_common.GinFuzzySearchLimit) {
                reduce = false;
                break;
            }
        }
        if (reduce) {
            for (i = 0; i < so->totalentries; i++) {
                so->entries[i]->predictNumberResult /= so->totalentries;
                so->entries[i]->reduceResult = true;
            }
        }
    }

    /*
     * Now that we have the estimates for the entry frequencies, finish
     * initializing the scan keys.
     */
    for (i = 0; i < so->nkeys; i++)
        startScanKey(ginstate, so, so->keys + i);
}

/*
 * Load the next batch of item pointers from a posting tree.
 *
 * Note that we copy the page into GinScanEntry->list array and unlock it, but
 * keep it pinned to prevent interference with vacuum.
 */
static void entryLoadMoreItems(GinState* ginstate, GinScanEntry entry, ItemPointerData advancePast)
{
    Page page;
    int i;
    bool stepright = false;

    if (!BufferIsValid(entry->buffer)) {
        entry->isFinished = true;
        return;
    }

    /*
     * We have two strategies for finding the correct page: step right from
     * the current page, or descend the tree again from the root. If
     * advancePast equals the current item, the next matching item should be
     * on the next page, so we step right. Otherwise, descend from root.
     */
    if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0) {
        stepright = true;
        LockBuffer(entry->buffer, GIN_SHARE);
    } else {
        GinBtreeStack* stack = NULL;

        ReleaseBuffer(entry->buffer);

        /*
         * Set the search key, and find the correct leaf page.
         */
        if (ItemPointerIsLossyPage(&advancePast)) {
            ItemPointerSet(&entry->btree.itemptr, GinItemPointerGetBlockNumber(&advancePast) + 1, FirstOffsetNumber);
        } else {
            entry->btree.itemptr = advancePast;
            entry->btree.itemptr.ip_posid++;
        }
        entry->btree.fullScan = false;
        stack = ginFindLeafPage(&entry->btree, true);

        /* we don't need the stack, just the buffer. */
        entry->buffer = stack->buffer;
        IncrBufferRefCount(entry->buffer);
        freeGinBtreeStack(stack);
        stepright = false;
    }

    ereport(DEBUG2,
        (errmsg("entryLoadMoreItems, %u/%hu, skip: %d",
            GinItemPointerGetBlockNumber(&advancePast),
            GinItemPointerGetOffsetNumber(&advancePast),
            !stepright)));

    page = BufferGetPage(entry->buffer);
    for (;;) {
        entry->offset = InvalidOffsetNumber;
        if (entry->list) {
            pfree(entry->list);
            entry->list = NULL;
            entry->nlist = 0;
        }

        if (stepright) {
            /*
             * We've processed all the entries on this page. If it was the
             * last page in the tree, we're done.
             */
            if (GinPageRightMost(page)) {
                UnlockReleaseBuffer(entry->buffer);
                entry->buffer = InvalidBuffer;
                entry->isFinished = true;
                return;
            }

            /*
             * Step to next page, following the right link. then find the
             * first ItemPointer greater than advancePast.
             */
            entry->buffer = ginStepRight(entry->buffer, ginstate->index, GIN_SHARE);
            page = BufferGetPage(entry->buffer);
        }
        stepright = true;

        if (GinPageGetOpaque(page)->flags & GIN_DELETED)
            continue; /* page was deleted by concurrent vacuum */

        /*
         * The first item > advancePast might not be on this page, but
         * somewhere to the right, if the page was split, or a non-match from
         * another key in the query allowed us to skip some items from this
         * entry. Keep following the right-links until we re-find the correct
         * page.
         */
        if (!GinPageRightMost(page) && ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0) {
            /*
             * the item we're looking is > the right bound of the page, so it
             * can't be on this page.
             */
            continue;
        }

        entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);

        for (i = 0; i < entry->nlist; i++) {
            if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0) {
                entry->offset = i;

                if (GinPageRightMost(page)) {
                    /* after processing the copied items, we're done. */
                    UnlockReleaseBuffer(entry->buffer);
                    entry->buffer = InvalidBuffer;
                } else
                    LockBuffer(entry->buffer, GIN_UNLOCK);
                return;
            }
        }
    }
}

#define gin_rand() (((double)gs_random()) / ((double)MAX_RANDOM_VALUE))
#define dropItem(e) \
    (gin_rand() > ((double)u_sess->attr.attr_common.GinFuzzySearchLimit) / ((double)((e)->predictNumberResult)))

/*
 * Sets entry->curItem to next heap item pointer > advancePast, for one entry
 * of one scan key, or sets entry->isFinished to TRUE if there are no more.
 *
 * Item pointers are returned in ascending order.
 *
 * Note: this can return a "lossy page" item pointer, indicating that the
 * entry potentially matches all items on that heap page.  However, it is
 * not allowed to return both a lossy page pointer and exact (regular)
 * item pointers for the same page.  (Doing so would break the key-combination
 * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
 * current implementation this is guaranteed by the behavior of tidbitmaps.
 */
static void entryGetItem(GinState* ginstate, GinScanEntry entry, ItemPointerData advancePast)
{
    Assert(!entry->isFinished);

    Assert(!ItemPointerIsValid(&entry->curItem) || ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);

    if (entry->matchBitmap) {
        /* A bitmap result */
        BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
        OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
        bool gotitem = false;

        do {
            /*
             * If we've exhausted all items on this block, move to next block
             * in the bitmap.
             */
            while (entry->matchResult == NULL ||
                   (entry->matchResult->ntuples >= 0 && entry->offset >= entry->matchResult->ntuples) ||
                   entry->matchResult->blockno < advancePastBlk ||
                   (ItemPointerIsLossyPage(&advancePast) && entry->matchResult->blockno == advancePastBlk)) {
                entry->matchResult = tbm_iterate(entry->matchIterator);

                if (entry->matchResult == NULL) {
                    ItemPointerSetInvalid(&entry->curItem);
                    tbm_end_iterate(entry->matchIterator);
                    entry->matchIterator = NULL;
                    entry->isFinished = true;
                    break;
                }

                /*
                 * Reset counter to the beginning of entry->matchResult. Note:
                 * entry->offset is still greater than matchResult->ntuples if
                 * matchResult is lossy.  So, on next call we will get next
                 * result from TIDBitmap.
                 */
                entry->offset = 0;
            }
            if (entry->isFinished)
                break;

            /*
             * We're now on the first page after advancePast which has any
             * items on it. If it's a lossy result, return that.
             */
            if (entry->matchResult->ntuples < 0) {
                ItemPointerSetLossyPage(&entry->curItem, entry->matchResult->blockno);

                /*
                 * We might as well fall out of the loop; we could not
                 * estimate number of results on this page to support correct
                 * reducing of result even if it's enabled.
                 */
                gotitem = true;
                break;
            }

            /*
             * Not a lossy page. Skip over any offsets <= advancePast, and
             * return that.
             */
            if (entry->matchResult->blockno == advancePastBlk) {
                /*
                 * First, do a quick check against the last offset on the
                 * page. If that's > advancePast, so are all the other
                 * offsets.
                 */
                if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff) {
                    entry->offset = entry->matchResult->ntuples;
                    continue;
                }

                /* Otherwise scan to find the first item > advancePast */
                while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
                    entry->offset++;
            }

            ItemPointerSet(&entry->curItem, entry->matchResult->blockno, entry->matchResult->offsets[entry->offset]);
            entry->offset++;
            gotitem = true;
        } while (!gotitem || (entry->reduceResult && dropItem(entry)));
    } else if (entry->matchList) {
        do {
            if (entry->offset >= entry->matchNum) {
                ItemPointerSetInvalid(&entry->curItem);
                entry->isFinished = true;
                break;
            }

            entry->curItem = entry->matchList[entry->offset++];
        } while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
    } else if (!BufferIsValid(entry->buffer)) {
        /*
         * A posting list from an entry tuple, or the last page of a posting
         * tree.
         */
        do {
            if (entry->offset >= entry->nlist) {
                ItemPointerSetInvalid(&entry->curItem);
                entry->isFinished = true;
                break;
            }

            entry->curItem = entry->list[entry->offset++];
        } while
            (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0 || (entry->reduceResult && dropItem(entry)));
    } else {
        /* A posting tree */
        do {
            /* If we've processed the current batch, load more items */
            while (entry->offset >= entry->nlist) {
                entryLoadMoreItems(ginstate, entry, advancePast);

                if (entry->isFinished) {
                    ItemPointerSetInvalid(&entry->curItem);
                    return;
                }
            }

            entry->curItem = entry->list[entry->offset++];

            if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
                continue;

            advancePast = entry->curItem;
        } while (entry->reduceResult && dropItem(entry));
    }
}

/*
 * Identify the "current" item among the input entry streams for this scan key
 * that is greater than advancePast, and test whether it passes the scan key
 * qual condition.
 *
 * The current item is the smallest curItem among the inputs.  key->curItem
 * is set to that value.  key->curItemMatches is set to indicate whether that
 * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
 * iff recheck is needed for this item pointer (including the case where the
 * item pointer is a lossy page pointer).
 *
 * If all entry streams are exhausted, sets key->isFinished to TRUE.
 *
 * Item pointers must be returned in ascending order.
 *
 * Note: this can return a "lossy page" item pointer, indicating that the
 * key potentially matches all items on that heap page.  However, it is
 * not allowed to return both a lossy page pointer and exact (regular)
 * item pointers for the same page.  (Doing so would break the key-combination
 * logic in scanGetItem.)
 */
static void keyGetItem(GinState* ginstate, MemoryContext tempCtx, GinScanKey key, ItemPointerData advancePast)
{
    ItemPointerData minItem;
    ItemPointerData curPageLossy;
    uint32 i;
    bool haveLossyEntry = false;
    GinScanEntry entry;
    GinTernaryValue res;
    MemoryContext oldCtx;
    bool allFinished = true;

    Assert(!key->isFinished);

    /*
     * We might have already tested this item; if so, no need to repeat work.
     * (Note: the ">" case can happen, if advancePast is exact but we
     * previously had to set curItem to a lossy-page pointer.)
     */
    if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
        return;

    /*
     * Find the minimum item > advancePast among the active entry streams.
     *
     * Note: a lossy-page entry is encoded by a ItemPointer with max value for
     * offset (0xffff), so that it will sort after any exact entries for the
     * same page.  So we'll prefer to return exact pointers not lossy
     * pointers, which is good.
     */
    ItemPointerSetMax(&minItem);
    for (i = 0; i < (uint32)key->nrequired; i++) {
        entry = key->requiredEntries[i];

        if (entry->isFinished)
            continue;

        /*
         * Advance this stream if necessary.
         *
         * In particular, since entry->curItem was initialized with
         * ItemPointerSetMin, this ensures we fetch the first item for each
         * entry on the first call.
         */
        if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0) {
            entryGetItem(ginstate, entry, advancePast);
            if (entry->isFinished)
                continue;
        }

        allFinished = false;
        if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
            minItem = entry->curItem;
    }

    if (allFinished) {
        /* all entries are finished */
        key->isFinished = true;
        return;
    }

    /*
     * Ok, we now know that there are no matches < minItem.
     *
     * If minItem is lossy, it means that there were no exact items on the
     * page among requiredEntries, because lossy pointers sort after exact
     * items. However, there might be exact items for the same page among
     * additionalEntries, so we mustn't advance past them.
     */
    if (ItemPointerIsLossyPage(&minItem)) {
        if (GinItemPointerGetBlockNumber(&advancePast) < GinItemPointerGetBlockNumber(&minItem)) {
            advancePast.ip_blkid = minItem.ip_blkid;
            advancePast.ip_posid = 0;
        }
    } else {
        Assert(minItem.ip_posid > 0);
        advancePast = minItem;
        advancePast.ip_posid--;
    }

    /*
     * We might not have loaded all the entry streams for this TID yet. We
     * could call the consistent function, passing MAYBE for those entries, to
     * see if it can decide if this TID matches based on the information we
     * have. But if the consistent-function is expensive, and cannot in fact
     * decide with partial information, that could be a big loss. So, load all
     * the additional entries, before calling the consistent function.
     */
    for (i = 0; i < (uint32)key->nadditional; i++) {
        entry = key->additionalEntries[i];

        if (entry->isFinished)
            continue;

        if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0) {
            entryGetItem(ginstate, entry, advancePast);
            if (entry->isFinished)
                continue;
        }

        /*
         * Normally, none of the items in additionalEntries can have a curItem
         * larger than minItem. But if minItem is a lossy page, then there
         * might be exact items on the same page among additionalEntries.
         */
        if (ginCompareItemPointers(&entry->curItem, &minItem) < 0) {
            Assert(ItemPointerIsLossyPage(&minItem));
            minItem = entry->curItem;
        }
    }

    /*
     * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
     * and perform consistentFn test.
     *
     * Lossy-page entries pose a problem, since we don't know the correct
     * entryRes state to pass to the consistentFn, and we also don't know what
     * its combining logic will be (could be AND, OR, or even NOT). If the
     * logic is OR then the consistentFn might succeed for all items in the
     * lossy page even when none of the other entries match.
     *
     * Our strategy is to call the tri-state consistent function, with the
     * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
     * returns FALSE, none of the lossy items alone are enough for a match, so
     * we don't need to return a lossy-page pointer. Otherwise, return a
     * lossy-page pointer to indicate that the whole heap page must be
     * checked.  (On subsequent calls, we'll do nothing until minItem is past
     * the page altogether, thus ensuring that we never return both regular
     * and lossy pointers for the same page.)
     *
     * An exception is that it doesn't matter what we pass for lossy pointers
     * in "hidden" entries, because the consistentFn's result can't depend on
     * them. We could pass them as MAYBE as well, but if we're using the
     * "shim" implementation of a tri-state consistent function (see
     * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
     * them as TRUE.
     *
     * Note that only lossy-page entries pointing to the current item's page
     * should trigger this processing; we might have future lossy pages in the
     * entry array, but they aren't relevant yet.
     */
    key->curItem = minItem;
    ItemPointerSetLossyPage(&curPageLossy, GinItemPointerGetBlockNumber(&key->curItem));
    for (i = 0; i < key->nentries; i++) {
        entry = key->scanEntry[i];
        if (!entry->isFinished && ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0) {
            if (i < key->nuserentries)
                key->entryRes[i] = GIN_MAYBE;
            else
                key->entryRes[i] = GIN_TRUE;
            haveLossyEntry = true;
        } else
            key->entryRes[i] = GIN_FALSE;
    }

    /* prepare for calling consistentFn in temp context */
    oldCtx = MemoryContextSwitchTo(tempCtx);

    if (haveLossyEntry) {
        /* Have lossy-page entries, so see if whole page matches */
        res = key->triConsistentFn(key);
        if (res == GIN_TRUE || res == GIN_MAYBE) {
            /* Yes, so clean up ... */
            MemoryContextSwitchTo(oldCtx);
            MemoryContextReset(tempCtx);

            /* and return lossy pointer for whole page */
            key->curItem = curPageLossy;
            key->curItemMatches = true;
            key->recheckCurItem = true;
            return;
        }
    }

    /*
     * At this point we know that we don't need to return a lossy whole-page
     * pointer, but we might have matches for individual exact item pointers,
     * possibly in combination with a lossy pointer. Pass lossy pointers as
     * MAYBE to the ternary consistent function, to let it decide if this
     * tuple satisfies the overall key, even though we don't know if the lossy
     * entries match.
     *
     * Prepare entryRes array to be passed to consistentFn.
     */
    for (i = 0; i < key->nentries; i++) {
        entry = key->scanEntry[i];
        if (entry->isFinished)
            key->entryRes[i] = GIN_FALSE;
        else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
            key->entryRes[i] = GIN_MAYBE;
        else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
            key->entryRes[i] = GIN_TRUE;
        else
            key->entryRes[i] = GIN_FALSE;
    }

    res = key->triConsistentFn(key);

    switch (res) {
        case GIN_TRUE:
            key->curItemMatches = true;
            /* triConsistentFn set recheckCurItem */
            break;

        case GIN_FALSE:
            key->curItemMatches = false;
            break;

        case GIN_MAYBE:
            key->curItemMatches = true;
            key->recheckCurItem = true;
            break;

        default:

            /*
             * the 'default' case shouldn't happen, but if the consistent
             * function returns something bogus, this is the safe result
             */
            key->curItemMatches = true;
            key->recheckCurItem = true;
            break;
    }

    /*
     * We have a tuple, and we know if it matches or not. If it's a non-match,
     * we could continue to find the next matching tuple, but let's break out
     * and give scanGetItem a chance to advance the other keys. They might be
     * able to skip past to a much higher TID, allowing us to save work.
     *
     * clean up after consistentFn calls
     */
    MemoryContextSwitchTo(oldCtx);
    MemoryContextReset(tempCtx);
}

/*
 * Get next heap item pointer (after advancePast) from scan.
 * Returns true if anything found.
 * On success, *item and *recheck are set.
 *
 * Note: this is very nearly the same logic as in keyGetItem(), except
 * that we know the keys are to be combined with AND logic, whereas in
 * keyGetItem() the combination logic is known only to the consistentFn.
 */
static bool scanGetItem(IndexScanDesc scan, ItemPointerData advancePast, ItemPointerData* item, bool* recheck)
{
    GinScanOpaque so = (GinScanOpaque)scan->opaque;
    uint32 i;
    bool match = false;

    /* ----------
     * Advance the scan keys in lock-step, until we find an item that matches
     * all the keys. If any key reports isFinished, meaning its subset of the
     * entries is exhausted, we can stop.  Otherwise, set *item to the next
     * matching item.
     *
     * This logic works only if a keyGetItem stream can never contain both
     * exact and lossy pointers for the same page.  Else we could have a
     * case like
     *
     *		stream 1		stream 2
     *		...             ...
     *		42/6			42/7
     *		50/1			42/0xffff
     *		...             ...
     *
     * We would conclude that 42/6 is not a match and advance stream 1,
     * thus never detecting the match to the lossy pointer in stream 2.
     * (keyGetItem has a similar problem versus entryGetItem.)
     * ----------
     */
    do {
        ItemPointerSetMin(item);
        match = true;
        for (i = 0; i < so->nkeys && match; i++) {
            GinScanKey key = so->keys + i;

            /* Fetch the next item for this key that is > advancePast. */
            keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);

            if (key->isFinished)
                return false;

            /*
             * If it's not a match, we can immediately conclude that nothing
             * <= this item matches, without checking the rest of the keys.
             */
            if (!key->curItemMatches) {
                advancePast = key->curItem;
                match = false;
                break;
            }

            /*
             * It's a match. We can conclude that nothing < matches, so the
             * other key streams can skip to this item.
             *
             * Beware of lossy pointers, though; from a lossy pointer, we can
             * only conclude that nothing smaller than this *block* matches.
             */
            if (ItemPointerIsLossyPage(&key->curItem)) {
                if (GinItemPointerGetBlockNumber(&advancePast) < GinItemPointerGetBlockNumber(&key->curItem)) {
                    advancePast.ip_blkid = key->curItem.ip_blkid;
                    advancePast.ip_posid = 0;
                }
            } else {
                Assert(key->curItem.ip_posid > 0);
                advancePast = key->curItem;
                advancePast.ip_posid--;
            }

            /*
             * If this is the first key, remember this location as a potential
             * match, and proceed to check the rest of the keys.
             *
             * Otherwise, check if this is the same item that we checked the
             * previous keys for (or a lossy pointer for the same page). If
             * not, loop back to check the previous keys for this item (we
             * will check this key again too, but keyGetItem returns quickly
             * for that)
             */
            if (i == 0) {
                *item = key->curItem;
            } else {
                if (ItemPointerIsLossyPage(&key->curItem) || ItemPointerIsLossyPage(item)) {
                    Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
                    match = (GinItemPointerGetBlockNumber(&key->curItem) == GinItemPointerGetBlockNumber(item));
                } else {
                    Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
                    match = (ginCompareItemPointers(&key->curItem, item) == 0);
                }
            }
        }
    } while (!match);

    Assert(!ItemPointerIsMin(item));

    /*
     * Now *item contains the first ItemPointer after previous result that
     * satisfied all the keys for that exact TID, or a lossy reference to the
     * same page.
     *
     * We must return recheck = true if any of the keys are marked recheck.
     */
    *recheck = false;
    for (i = 0; i < so->nkeys; i++) {
        GinScanKey key = so->keys + i;

        if (key->recheckCurItem) {
            *recheck = true;
            break;
        }
    }

    return true;
}

/*
 * Functions for scanning the pending list
 */
/*
 * Get ItemPointer of next heap row to be checked from pending list.
 * Returns false if there are no more. On pages with several heap rows
 * it returns each row separately, on page with part of heap row returns
 * per page data.  pos->firstOffset and pos->lastOffset are set to identify
 * the range of pending-list tuples belonging to this heap row.
 *
 * The pendingBuffer is presumed pinned and share-locked on entry, and is
 * pinned and share-locked on success exit.  On failure exit it's released.
 */
static bool scanGetCandidate(IndexScanDesc scan, pendingPosition* pos)
{
    OffsetNumber maxoff;
    Page page;
    IndexTuple itup;

    ItemPointerSetInvalid(&pos->item);
    for (;;) {
        page = BufferGetPage(pos->pendingBuffer);

        maxoff = PageGetMaxOffsetNumber(page);
        if (pos->firstOffset > maxoff) {
            BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
            if (blkno == InvalidBlockNumber) {
                UnlockReleaseBuffer(pos->pendingBuffer);
                pos->pendingBuffer = InvalidBuffer;

                return false;
            } else {
                /*
                 * Here we must prevent deletion of next page by insertcleanup
                 * process, which may be trying to obtain exclusive lock on
                 * current page.  So, we lock next page before releasing the
                 * current one
                 */
                Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);

                LockBuffer(tmpbuf, GIN_SHARE);
                UnlockReleaseBuffer(pos->pendingBuffer);

                pos->pendingBuffer = tmpbuf;
                pos->firstOffset = FirstOffsetNumber;
            }
        } else {
            itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, pos->firstOffset));
            pos->item = itup->t_tid;
            if (GinPageHasFullRow(page)) {
                /*
                 * find itempointer to the next row
                 */
                for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++) {
                    itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, pos->lastOffset));
                    if (!ItemPointerEquals(&pos->item, &itup->t_tid))
                        break;
                }
            } else {
                /*
                 * All itempointers are the same on this page
                 */
                pos->lastOffset = maxoff + 1;
            }

            /*
             * Now pos->firstOffset points to the first tuple of current heap
             * row, pos->lastOffset points to the first tuple of next heap row
             * (or to the end of page)
             */
            break;
        }
    }

    return true;
}

/*
 * Scan pending-list page from current tuple (off) up till the first of:
 * - match is found (then returns true)
 * - no later match is possible
 * - tuple's attribute number is not equal to entry's attrnum
 * - reach end of page
 *
 * datum[]/category[]/datumExtracted[] arrays are used to cache the results
 * of gintuple_get_key() on the current page.
 */
static bool matchPartialInPendingList(GinState* ginstate, Page page, OffsetNumber off, OffsetNumber maxoff,
    GinScanEntry entry, Datum* datum, GinNullCategory* category, bool* datumExtracted)
{
    IndexTuple itup;
    int32 cmp;

    /* Partial match to a null is not possible */
    if (entry->queryCategory != GIN_CAT_NORM_KEY)
        return false;

    while (off < maxoff) {
        itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, off));
        if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
            return false;

        if (datumExtracted[off - 1] == false) {
            datum[off - 1] = gintuple_get_key(ginstate, itup, &category[off - 1]);
            datumExtracted[off - 1] = true;
        }

        /* Once we hit nulls, no further match is possible */
        if (category[off - 1] != GIN_CAT_NORM_KEY)
            return false;

        /* ----------
         * Check partial match.
         * case cmp == 0 => match
         * case cmp > 0 => not match and end scan (no later match possible)
         * case cmp < 0 => not match and continue scan
         * ----------
         */
        cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
            ginstate->supportCollation[entry->attnum - 1],
            entry->queryKey,
            datum[off - 1],
            UInt16GetDatum(entry->strategy),
            PointerGetDatum(entry->extra_data)));
        if (cmp == 0)
            return true;
        else if (cmp > 0)
            return false;

        off++;
    }

    return false;
}

/*
 * Set up the entryRes array for each key by looking at
 * every entry for current heap row in pending list.
 *
 * Returns true if each scan key has at least one entryRes match.
 * This corresponds to the situations where the normal index search will
 * try to apply the key's consistentFn.  (A tuple not meeting that requirement
 * cannot be returned by the normal search since no entry stream will
 * source its TID.)
 *
 * The pendingBuffer is presumed pinned and share-locked on entry.
 */
static bool collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition* pos)
{
    GinScanOpaque so = (GinScanOpaque)scan->opaque;
    OffsetNumber attrnum;
    Page page;
    IndexTuple itup;
    uint32 i, j;
    errno_t ret = EOK;

    /*
     * Reset all entryRes and hasMatchKey flags
     */
    for (i = 0; i < so->nkeys; i++) {
        GinScanKey key = so->keys + i;

        ret = memset_s(
            key->entryRes, sizeof(GinTernaryValue) * key->nentries, GIN_FALSE, sizeof(GinTernaryValue) * key->nentries);
        securec_check(ret, "", "");
    }
    ret = memset_s(pos->hasMatchKey, so->nkeys, FALSE, so->nkeys);
    securec_check(ret, "", "");

    /*
     * Outer loop iterates over multiple pending-list pages when a single heap
     * row has entries spanning those pages.
     */
    for (;;) {
        Datum datum[BLCKSZ / sizeof(IndexTupleData)];
        GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
        bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];

        Assert(pos->lastOffset > pos->firstOffset);
        ret = memset_s(datumExtracted + pos->firstOffset - 1,
            sizeof(bool) * (pos->lastOffset - pos->firstOffset),
            0,
            sizeof(bool) * (pos->lastOffset - pos->firstOffset));
        securec_check(ret, "", "");

        page = BufferGetPage(pos->pendingBuffer);

        for (i = 0; i < so->nkeys; i++) {
            GinScanKey key = so->keys + i;
            for (j = 0; j < key->nentries; j++) {
                GinScanEntry entry = key->scanEntry[j];
                OffsetNumber StopLow = pos->firstOffset;
                OffsetNumber StopHigh = pos->lastOffset;
                OffsetNumber StopMiddle;

                /* If already matched on earlier page, do no extra work */
                if (key->entryRes[j]) {
                    continue;
                }

                /*
                 * Interesting tuples are from pos->firstOffset to
                 * pos->lastOffset and they are ordered by (attnum, Datum) as
                 * it's done in entry tree.  So we can use binary search to
                 * avoid linear scanning.
                 */
                while (StopLow < StopHigh) {
                    int res;

                    StopMiddle = StopLow + ((uint32)(StopHigh - StopLow) >> 1);

                    itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, StopMiddle));

                    attrnum = gintuple_get_attrnum(&so->ginstate, itup);
                    if (key->attnum < attrnum) {
                        StopHigh = StopMiddle;
                        continue;
                    }
                    if (key->attnum > attrnum) {
                        StopLow = StopMiddle + 1;
                        continue;
                    }

                    if (datumExtracted[StopMiddle - 1] == false) {
                        datum[StopMiddle - 1] = gintuple_get_key(&so->ginstate, itup, &category[StopMiddle - 1]);
                        datumExtracted[StopMiddle - 1] = true;
                    }

                    if (entry->queryCategory == GIN_CAT_EMPTY_QUERY) {
                        /* special behavior depending on searchMode */
                        if (entry->searchMode == GIN_SEARCH_MODE_ALL) {
                            /* match anything except NULL_ITEM */
                            if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM) {
                                res = -1;
                            } else {
                                res = 0;
                            }
                        } else {
                            /* match everything */
                            res = 0;
                        }
                    } else {
                        res = ginCompareEntries(&so->ginstate,
                            entry->attnum,
                            entry->queryKey,
                            entry->queryCategory,
                            datum[StopMiddle - 1],
                            category[StopMiddle - 1]);
                    }

                    if (res == 0) {
                        /*
                         * Found exact match (there can be only one, except in
                         * EMPTY_QUERY mode).
                         *
                         * If doing partial match, scan forward from here to
                         * end of page to check for matches.
                         *
                         * See comment above about tuple's ordering.
                         */
                        if (entry->isPartialMatch) {
                            if (pos->lastOffset > BLCKSZ / sizeof(IndexTupleData)) {
                                key->entryRes[j] = false;
                            } else {
                                key->entryRes[j] = matchPartialInPendingList(&so->ginstate,
                                    page,
                                    StopMiddle,
                                    pos->lastOffset,
                                    entry,
                                    datum,
                                    category,
                                    datumExtracted);
                            }
                        } else {
                            key->entryRes[j] = true;
                        }

                        /* done with binary search */
                        break;
                    } else if (res < 0) {
                        StopHigh = StopMiddle;
                    } else {
                        StopLow = StopMiddle + 1;
                    }
                }

                if (StopLow >= StopHigh && entry->isPartialMatch) {
                    /*
                     * No exact match on this page.  If doing partial match,
                     * scan from the first tuple greater than target value to
                     * end of page.  Note that since we don't remember whether
                     * the comparePartialFn told us to stop early on a
                     * previous page, we will uselessly apply comparePartialFn
                     * to the first tuple on each subsequent page.
                     */
                    if (pos->lastOffset > BLCKSZ / sizeof(IndexTupleData)) {
                        key->entryRes[j] = false;
                    } else {
                        key->entryRes[j] = matchPartialInPendingList(
                            &so->ginstate, page, StopHigh, pos->lastOffset, entry, datum, category, datumExtracted);
                    }
                }

                pos->hasMatchKey[i] |= key->entryRes[j];
            }
        }

        /* Advance firstOffset over the scanned tuples */
        pos->firstOffset = pos->lastOffset;

        if (GinPageHasFullRow(page)) {
            /*
             * We have examined all pending entries for the current heap row.
             * Break out of loop over pages.
             */
            break;
        } else {
            /*
             * Advance to next page of pending entries for the current heap
             * row.  Complain if there isn't one.
             */
            ItemPointerData item = pos->item;

            if (scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item)) {
                ereport(ERROR,
                    (errcode(ERRCODE_INDEX_CORRUPTED),
                        errmsg("could not find additional pending pages for same heap tuple")));
            }
        }
    }

    /*
     * Now return "true" if all scan keys have at least one matching datum
     */
    for (i = 0; i < so->nkeys; i++) {
        if (pos->hasMatchKey[i] == false) {
            return false;
        }
    }

    return true;
}

/*
 * Collect all matched rows from pending list into bitmap
 */
static void scanPendingInsert(IndexScanDesc scan, TIDBitmap* tbm, int64* ntids)
{
    GinScanOpaque so = (GinScanOpaque)scan->opaque;
    MemoryContext oldCtx;
    bool recheck = false;
    bool match = false;
    uint32 i;
    pendingPosition pos;
    Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
    BlockNumber blkno;

    *ntids = 0;

    LockBuffer(metabuffer, GIN_SHARE);
    blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
    /*
     * fetch head of list before unlocking metapage. head page must be pinned
     * to prevent deletion by vacuum process
     */
    if (blkno == InvalidBlockNumber) {
        /* No pending list, so proceed with normal scan */
        UnlockReleaseBuffer(metabuffer);
        return;
    }

    pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
    LockBuffer(pos.pendingBuffer, GIN_SHARE);
    pos.firstOffset = FirstOffsetNumber;
    UnlockReleaseBuffer(metabuffer);
    pos.hasMatchKey = (bool*)palloc(sizeof(bool) * so->nkeys);

    /*
     * loop for each heap row. scanGetCandidate returns full row or row's
     * tuples from first page.
     */
    while (scanGetCandidate(scan, &pos)) {
        /*
         * Check entries in tuple and set up entryRes array.
         *
         * If pending tuples belonging to the current heap row are spread
         * across several pages, collectMatchesForHeapRow will read all of
         * those pages.
         */
        if (!collectMatchesForHeapRow(scan, &pos))
            continue;

        /*
         * Matching of entries of one row is finished, so check row using
         * consistent functions.
         */
        oldCtx = MemoryContextSwitchTo(so->tempCtx);
        recheck = false;
        match = true;

        for (i = 0; i < so->nkeys; i++) {
            GinScanKey key = so->keys + i;

            if (!key->boolConsistentFn(key)) {
                match = false;
                break;
            }
            recheck = recheck || key->recheckCurItem;
        }

        MemoryContextSwitchTo(oldCtx);
        MemoryContextReset(so->tempCtx);

        if (match) {
            tbm_add_tuples(tbm, &pos.item, 1, recheck);
            (*ntids)++;
        }
    }

    pfree(pos.hasMatchKey);
    pos.hasMatchKey = NULL;
}

#define GinIsVoidRes(s) (((GinScanOpaque)scan->opaque)->isVoidRes)

Datum gingetbitmap(PG_FUNCTION_ARGS)
{
    IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0);
    TIDBitmap* tbm = (TIDBitmap*)PG_GETARG_POINTER(1);

    if (scan == NULL || tbm == NULL)
        ereport(
            ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("Invalid arguments for function gingetbitmap")));

    GinScanOpaque so = (GinScanOpaque)scan->opaque;
    int64 ntids;
    ItemPointerData iptr;
    bool recheck = false;

    /*
     * Set up the scan keys, and check for unsatisfiable query.
     */
    ginFreeScanKeys(so); /* there should be no keys yet, but just to be sure */
    ginNewScanKey(scan);

    if (GinIsVoidRes(scan))
        PG_RETURN_INT64(0);

    ntids = 0;

    /*
     * First, scan the pending list and collect any matching entries into the
     * bitmap.  After we scan a pending item, some other backend could post it
     * into the main index, and so we might visit it a second time during the
     * main scan.  This is okay because we'll just re-set the same bit in the
     * bitmap.  (The possibility of duplicate visits is a major reason why GIN
     * can't support the amgettuple API, however.) Note that it would not do
     * to scan the main index before the pending list, since concurrent
     * cleanup could then make us miss entries entirely.
     */
    scanPendingInsert(scan, tbm, &ntids);

    /*
     * Now scan the main index.
     */
    startScan(scan, false);

    ItemPointerSetMin(&iptr);

    for (;;) {
        CHECK_FOR_INTERRUPTS();

        if (!scanGetItem(scan, iptr, &iptr, &recheck))
            break;

        if (ItemPointerIsLossyPage(&iptr))
            tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
        else
            tbm_add_tuples(tbm, &iptr, 1, recheck);
        ntids++;
    }

    PG_RETURN_INT64(ntids);
}

/*
 * Collect all matched rows from pending list into sort
 */
static void scanCGinPendingInsert(IndexScanDesc scan, IndexSortState* sort, VectorBatch* tids, int64* ntids)
{
    GinScanOpaque so = (GinScanOpaque)scan->opaque;
    MemoryContext oldCtx;
    bool recheck = false;
    bool match = false;
    uint32 i;
    pendingPosition pos;
    Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
    BlockNumber blkno;
    ScalarVector* vecs = tids->m_arr;
    int offset = 0;

    *ntids = 0;

    LockBuffer(metabuffer, GIN_SHARE);
    blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
    /*
     * fetch head of list before unlocking metapage. head page must be pinned
     * to prevent deletion by vacuum process
     */
    if (blkno == InvalidBlockNumber) {
        /* No pending list, so proceed with normal scan */
        UnlockReleaseBuffer(metabuffer);
        return;
    }

    pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
    LockBuffer(pos.pendingBuffer, GIN_SHARE);
    pos.firstOffset = FirstOffsetNumber;
    UnlockReleaseBuffer(metabuffer);
    pos.hasMatchKey = (bool*)palloc(sizeof(bool) * so->nkeys);

    /*
     * loop for each heap row. scanGetCandidate returns full row or row's
     * tuples from first page.
     */
    while (scanGetCandidate(scan, &pos)) {
        /*
         * Check entries in tuple and set up entryRes array.
         *
         * If pending tuples belonging to the current heap row are spread
         * across several pages, collectMatchesForHeapRow will read all of
         * those pages.
         */
        if (!collectMatchesForHeapRow(scan, &pos))
            continue;

        /*
         * Matching of entries of one row is finished, so check row using
         * consistent functions.
         */
        oldCtx = MemoryContextSwitchTo(so->tempCtx);
        recheck = false;
        match = true;

        for (i = 0; i < so->nkeys; i++) {
            GinScanKey key = so->keys + i;

            if (!key->boolConsistentFn(key)) {
                match = false;
                break;
            }
            recheck = recheck || key->recheckCurItem;
        }

        MemoryContextSwitchTo(oldCtx);
        MemoryContextReset(so->tempCtx);

        if (match) {
            ItemPointer itemP = (ItemPointer) & (vecs[0].m_vals[offset]);
            itemP->ip_blkid = pos.item.ip_blkid;
            itemP->ip_posid = pos.item.ip_posid;

            offset++;
            if (offset == BatchMaxSize) {
                vecs[0].m_rows = offset;
                tids->m_rows = offset;
                PutBatchToSorter(sort, tids);
                tids->Reset(true);
                offset = 0;
            }

            (*ntids)++;
        }
    }

    if (offset > 0) {
        vecs[0].m_rows = offset;
        tids->m_rows = offset;
        PutBatchToSorter(sort, tids);
    }

    pfree(pos.hasMatchKey);
    pos.hasMatchKey = NULL;
}

Datum cgingetbitmap(PG_FUNCTION_ARGS)
{
    IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0);
    IndexSortState* sort = (IndexSortState*)PG_GETARG_POINTER(1);
    VectorBatch* tids = (VectorBatch*)PG_GETARG_POINTER(2);

    if (scan == NULL || sort == NULL || tids == NULL)
        ereport(
            ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("Invalid arguments for function cgingetbitmap")));

    ScalarVector* vecs = tids->m_arr;
    int offset = 0;
    GinScanOpaque so = (GinScanOpaque)scan->opaque;
    ItemPointerData iptr;
    bool recheck = false;

    int64 ntids = 0;

    /*
     * Set up the scan keys, and check for unsatisfiable query.
     */
    ginFreeScanKeys(so); /* there should be no keys yet, but just to be sure */
    ginNewScanKey(scan);

    if (GinIsVoidRes(scan))
        PG_RETURN_INT64(0);

    /*
     * First scan the pending list.
     */
    scanCGinPendingInsert(scan, sort, tids, &ntids);

    /*
     * Now scan the main index.
     */
    startScan(scan, true);

    ItemPointerSetMin(&iptr);

    for (;;) {
        CHECK_FOR_INTERRUPTS();

        if (!scanGetItem(scan, iptr, &iptr, &recheck)) {
            sort->m_tidEnd = true;
            break;
        }

        ItemPointer itemP = (ItemPointer) & (vecs[0].m_vals[offset]);
        itemP->ip_blkid = iptr.ip_blkid;
        itemP->ip_posid = iptr.ip_posid;

        offset++;
        if (offset == BatchMaxSize) {
            vecs[0].m_rows = offset;
            tids->m_rows = offset;
            PutBatchToSorter(sort, tids);
            tids->Reset(true);
            offset = 0;
        }
        ntids++;
    }

    if (offset > 0) {
        vecs[0].m_rows = offset;
        tids->m_rows = offset;
        PutBatchToSorter(sort, tids);
    }

    PG_RETURN_INT64(ntids);
}