diff --git a/src/common/backend/nodes/tidbitmap.cpp b/src/common/backend/nodes/tidbitmap.cpp
index 00dc5d881..398638781 100644
--- a/src/common/backend/nodes/tidbitmap.cpp
+++ b/src/common/backend/nodes/tidbitmap.cpp
@@ -45,6 +45,7 @@
 #include "nodes/bitmapset.h"
 #include "nodes/tidbitmap.h"
 #include "utils/hsearch.h"
+#include "utils/hashutils.h"
 #include "access/ustore/knl_upage.h"
 
 /*
@@ -64,12 +65,12 @@
  * for that page in the page table.
  *
  * We actually store both exact pages and lossy chunks in the same hash
- * table, using identical data structures.	(This is because dynahash.c's
- * memory management doesn't allow space to be transferred easily from one
- * hashtable to another.)  Therefore it's best if PAGES_PER_CHUNK is the
- * same as MAX_TUPLES_PER_PAGE, or at least not too different.	But we
- * also want PAGES_PER_CHUNK to be a power of 2 to avoid expensive integer
- * remainder operations.  So, define it like this:
+ * table, using identical data structures.	(This is because the memory
+ * management for hashtables doesn't easily/efficiently allow space to be
+ * transferred easily from onehashtable to another.)  Therefore it's best
+ * if PAGES_PER_CHUNK is the same as MAX_TUPLES_PER_PAGE, or at least not
+ * too different. But wealso want PAGES_PER_CHUNK to be a power of 2 to
+ * avoid expensive integer remainder operations.  So, define it like this:
  */
 #define PAGES_PER_HEAP_CHUNK (BLCKSZ / 32)
 #define PAGES_PER_UHEAP_CHUNK (BLCKSZ / 16)
@@ -107,7 +108,7 @@
 /*
  * Used as key of hash table for PagetableEntry.
  */
-typedef struct PagetableEntryNode_s {
+typedef struct PagetableEntryNode {
     BlockNumber blockNo;    /* page number (hashtable key) */
     Oid partitionOid;       /* used for GLOBAL partition index to indicate partition table */
     int2 bucketid;          /* used for cross-bucket index on hashbucket table */
@@ -129,21 +130,22 @@ typedef struct PagetableEntryNode_s {
  */
 typedef struct PagetableEntry {
     PagetableEntryNode entryNode;
+    char status;         /* hash entry status */             
     bool ischunk;        /* T = lossy storage, F = exact */
-    bool recheck; /* should the tuples be rechecked? */
+    bool recheck;        /* should the tuples be rechecked? */
     bitmapword
         words[Max(Max(WORDS_PER_HEAP_PAGE, WORDS_PER_HEAP_CHUNK), Max(WORDS_PER_UHEAP_PAGE, WORDS_PER_UHEAP_CHUNK))];
 } PagetableEntry;
 /*
- * dynahash.c is optimized for relatively large, long-lived hash tables.
- * This is not ideal for TIDBitMap, particularly when we are using a bitmap
- * scan on the inside of a nestloop join: a bitmap may well live only long
- * enough to accumulate one entry in such cases.  We therefore avoid creating
- * an actual hashtable until we need two pagetable entries.  When just one
- * pagetable entry is needed, we store it in a fixed field of TIDBitMap.
- * (NOTE: we don't get rid of the hashtable if the bitmap later shrinks down
- * to zero or one page again.  So, status can be TBM_HASH even when nentries
- * is zero or one.)
+ * We want to avoid the overhead of creating the hashtable, which is
+ * comparatively large, when not necessary.particularly when we are using a
+ * bitmap scan on the inside of a nestloop join: a bitmap may well live only
+ * long enough to accumulate one entry in such cases.  We therefore avoid
+ * creating an actual hashtable until we need two pagetable entries.  When
+ * just one pagetable entry is needed, we store it in a fixed field of
+ * TIDBitMap. (NOTE: we don't get rid of the hashtable if the bitmap later
+ * shrinks down to zero or one page again.  So, status can be TBM_HASH even
+ * when nentries is zero or one.)
  */
 typedef enum {
     TBM_EMPTY,    /* no hashtable, nentries == 0 */
@@ -151,6 +153,16 @@ typedef enum {
     TBM_HASH      /* pagetable is valid, entry1 is not */
 } TBMStatus;
 
+/*
+ * Marks a tbm hash table type, used in template.
+ */
+typedef enum {
+    TBM_DYNAMIC_HASH,   /* use dynamic hash table */
+    TBM_SIMPLE_HASH,    /* use simple hash table */
+} TBMHashType;
+
+#define TBM_TEMPLATE template <TBMHashType type>
+
 /*
  * Here is the representation for a whole TIDBitMap:
  */
@@ -158,18 +170,21 @@ struct TIDBitmap {
     NodeTag type;          /* to make it a valid Node */
     MemoryContext mcxt;    /* memory context containing me */
     TBMStatus status;      /* see codes above */
+    TBMHandler handler;    /* tid bitmap handlers */
     HTAB* pagetable;       /* hash table of PagetableEntry's */
+    struct pagetable_hash* simple_pagetable;    /* hash table of simplehash implementation */
     int nentries;          /* number of entries in pagetable */
     int maxentries;        /* limit on same to meet maxbytes */
     int npages;            /* number of exact entries in pagetable */
     int nchunks;           /* number of lossy entries in pagetable */
     bool iterating;        /* tbm_begin_iterate called? */
-    bool isGlobalPart;     /* represent global partition index tbm */
-    bool crossbucket;      /* represent crossbucket index tbm */
+    uint32 lossify_start;  /* offset to start lossifying hashtable at */
     PagetableEntry entry1; /* used when status == TBM_ONE_PAGE */
     /* these are valid when iterating is true: */
     PagetableEntry** spages;  /* sorted exact-page list, or NULL */
     PagetableEntry** schunks; /* sorted lossy-chunk list, or NULL */
+    bool is_global_part;   /* is global index */
+    bool is_crossbucket;   /* is crossbucket index */
     bool is_ustore;
     int max_tuples_page;
     int pages_per_chunk;
@@ -190,26 +205,76 @@ struct TBMIterator {
     TBMIterateResult output; /* MUST BE LAST (because variable-size) */
 };
 
-/* Local function prototypes */
-static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage);
-static bool tbm_intersect_page(TIDBitmap* a, PagetableEntry* apage, const TIDBitmap* b);
-static const PagetableEntry* tbm_find_pageentry(const TIDBitmap* tbm, PagetableEntryNode pageNode);
-static PagetableEntry* tbm_get_pageentry(TIDBitmap* tbm, PagetableEntryNode pageNode);
-static bool tbm_page_is_lossy(const TIDBitmap* tbm, PagetableEntryNode pageNode);
-static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode);
-static void tbm_lossify(TIDBitmap* tbm);
-static int tbm_comparator(const void* left, const void* right);
+/*
+ * Local function prototypes
+ */
+TBM_TEMPLATE static void tbm_create_pagetable(TIDBitmap* tbm);
+TBM_TEMPLATE static void tbm_init_handlers(TIDBitmap* tbm);
+TBM_TEMPLATE static void tbm_add_tuples(TIDBitmap* tbm, const ItemPointer tids, int ntids, bool recheck, Oid partitionOid = InvalidOid, int2 bucketid = InvalidBktId);
+TBM_TEMPLATE static void tbm_add_page(TIDBitmap* tbm, BlockNumber pageno, Oid partitionOid = InvalidOid, int2 bucketid = InvalidBktId);
+
+/* tid bitmap operation prototypes */
+TBM_TEMPLATE static void tbm_union(TIDBitmap* a, const TIDBitmap* bpage);
+TBM_TEMPLATE static void tbm_intersect(TIDBitmap* a, const TIDBitmap* b);
+TBM_TEMPLATE static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage);
+TBM_TEMPLATE static bool tbm_intersect_page(TIDBitmap* a, PagetableEntry* apage, const TIDBitmap* b);
+
+/* tid bitmap iterator prototypes */
+TBM_TEMPLATE static TBMIterator* tbm_begin_iterate(TIDBitmap* tbm);
+
+/* tid bitmap page entry prototypes */
+TBM_TEMPLATE static const PagetableEntry* tbm_find_pageentry(const TIDBitmap* tbm, PagetableEntryNode pageNode);
+TBM_TEMPLATE static PagetableEntry* tbm_get_pageentry(TIDBitmap* tbm, PagetableEntryNode pageNode);
+
+/* tid bitmap lossy prototypes */
+TBM_TEMPLATE static bool tbm_page_is_lossy(const TIDBitmap* tbm, PagetableEntryNode pageNode);
+TBM_TEMPLATE static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode);
+TBM_TEMPLATE static void tbm_lossify(TIDBitmap* tbm);
+TBM_TEMPLATE static inline void tbm_lossify_generic_iterate(TIDBitmap* tbm);
+TBM_TEMPLATE static inline void tbm_lossify_simple_iterate(TIDBitmap* tbm);
+
+/* tid bitmap utility prototypes */
+TBM_TEMPLATE static int tbm_comparator(const void* left, const void* right);
 
 /*
- * TbmCreate - create an initially-empty bitmap
+ * tbm_hash_complex_key : private hash function for pagetableEntryNode
+ */
+static inline uint32 tbm_hash_complex_key(const void* key, Size keysize)
+{
+    PagetableEntryNode* node = (PagetableEntryNode*)key;
+    uint32 ret = murmurhash32(node->blockNo);
+
+    ret = hash_combine(ret, murmurhash32(node->partitionOid));
+    ret = hash_combine(ret, murmurhash32(node->bucketid));
+    return ret;
+}
+
+/* define hashtable mapping block numbers to PagetableEntry's */
+#define SH_PREFIX pagetable
+#define SH_ELEMENT_TYPE PagetableEntry
+#define SH_KEY_TYPE BlockNumber
+#define SH_KEY entryNode.blockNo
+#define SH_HASH_KEY(tb, key) murmurhash32(key)
+#define SH_EQUAL(tb, a, b) (a == b)
+#define SH_SCOPE static inline
+#define SH_DEFINE
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
+/*
+ * tbm_create - create an initially-empty bitmap
  *
  * The bitmap will live in the memory context that is CurrentMemoryContext
  * at the time of this call.  It will be limited to (approximately) maxbytes
  * total memory consumption.
+ * 
+ * when GPI or CPI is involved. Both of them requires extra key(s) to create
+ * the hashtable (partitionOid and bucketid to be exact).
  */
-TIDBitmap* TbmCreate(long maxbytes, bool is_ustore)
+TIDBitmap* tbm_create(long maxbytes, bool is_global_part, bool is_crossbucket, bool is_ustore)
 {
     TIDBitmap* tbm = NULL;
+    bool complex_key = (is_global_part || is_crossbucket);
     long nbuckets;
 
     /* Create the TIDBitmap struct and zero all its fields */
@@ -217,19 +282,33 @@ TIDBitmap* TbmCreate(long maxbytes, bool is_ustore)
 
     tbm->mcxt = CurrentMemoryContext;
     tbm->status = TBM_EMPTY;
-    tbm->isGlobalPart = false;
+
+    /*
+     * Fill TBM handlers base on the complexity of the keys.
+     * If the context requires complementary keys like partitionOid or
+     * bucketid, we use generic dynamichash table to accomodate bitmap.
+     * Otherwise, we use a more cache-friendly hash table to do the
+     * trick.
+     */
+    if (!complex_key) {
+        tbm_init_handlers<TBM_SIMPLE_HASH>(tbm);
+    } else {
+        tbm_init_handlers<TBM_DYNAMIC_HASH>(tbm);
+    }
+
     /*
      * Estimate number of hashtable entries we can have within maxbytes. This
-     * estimates the hash overhead at MAXALIGN(sizeof(HASHELEMENT)) plus a
-     * pointer per hash entry, which is crude but good enough for our purpose.
-     * Also count an extra Pointer per entry for the arrays created during
-     * iteration readout.
+     * estimates the hash cost as at sizeof(PagetableEntry), which is good enough
+     * for our purpose. Alse count an extra pointer per hash entry for the arrays 
+     * created during iteration readout.
      */
-    nbuckets = maxbytes /
-               (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(sizeof(PagetableEntry)) + sizeof(Pointer) + sizeof(Pointer));
-    nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
-    nbuckets = Max(nbuckets, 16);          /* sanity limit */
+    nbuckets = tbm_calculate_entries(maxbytes, complex_key);
     tbm->maxentries = (int)nbuckets;
+    tbm->lossify_start = 0;
+
+    /* Set TBM index & storage attributes */
+    tbm->is_global_part = is_global_part;
+    tbm->is_crossbucket = is_crossbucket;
     tbm->is_ustore = is_ustore;
 
     if (is_ustore) {
@@ -246,38 +325,76 @@ TIDBitmap* TbmCreate(long maxbytes, bool is_ustore)
 }
 
 /*
- * Actually create the hashtable.  Since this is a moderately expensive
- * proposition, we don't do it until we have to.
+ * Tid bitmap handler initializer.
+ *
+ * initialize templated utility tbm handlers, so that the caller can invoke.
  */
-static void tbm_create_pagetable(TIDBitmap* tbm)
+TBM_TEMPLATE static void tbm_init_handlers(TIDBitmap* tbm)
 {
-    HASHCTL hash_ctl;
-    int rc = 0;
+    tbm->handler._add_tuples = tbm_add_tuples<type>;
+    tbm->handler._add_page= tbm_add_page<type>;
+
+    tbm->handler._union = tbm_union<type>;
+    tbm->handler._intersect = tbm_intersect<type>;
+
+    tbm->handler._begin_iterate = tbm_begin_iterate<type>;
+}
+
+/*
+ * Get bitmap handler.
+ *
+ * get templated utility tbm handlers, so that the caller can invoke.
+ */
+TBMHandler tbm_get_handler(TIDBitmap* tbm)
+{
+    return tbm->handler;
+}
+/*
+ * Actually create the hashtable.
+ *
+ * Since this is a moderately expensive proposition, we don't do it until we have to.
+ */
+TBM_TEMPLATE static void tbm_create_pagetable(TIDBitmap* tbm)
+{   
+    errno_t rc = EOK;
 
     Assert(tbm->status != TBM_HASH);
     Assert(tbm->pagetable == NULL);
 
-    /* Create the hashtable proper */
-    rc = memset_s(&hash_ctl, sizeof(hash_ctl), 0, sizeof(hash_ctl));
-    securec_check(rc, "", "");
-    hash_ctl.keysize = sizeof(PagetableEntryNode);
-    hash_ctl.entrysize = sizeof(PagetableEntry);
-    hash_ctl.hash = tag_hash;
-    hash_ctl.hcxt = tbm->mcxt;
-    tbm->pagetable = hash_create("TIDBitmap",
-        128, /* start small and extend */
-        &hash_ctl,
-        HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+    if (type == TBM_SIMPLE_HASH) {
+        tbm->simple_pagetable = (struct pagetable_hash*)pagetable_create(tbm->mcxt, 128, tbm);
+    } else {
+        /* Create the hashtable proper */
+        HASHCTL hash_ctl;
+        rc = memset_s(&hash_ctl, sizeof(hash_ctl), 0, sizeof(hash_ctl));
+        securec_check(rc, "", "");
+        hash_ctl.keysize = sizeof(PagetableEntryNode);
+        hash_ctl.entrysize = sizeof(PagetableEntry);
+        hash_ctl.hash = tbm_hash_complex_key;
+        hash_ctl.hcxt = tbm->mcxt;
+        tbm->pagetable = hash_create("TIDBitmap", 128, /* start small and extend */
+                                     &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+    }
 
     /* If entry1 is valid, push it into the hashtable */
     if (tbm->status == TBM_ONE_PAGE) {
         PagetableEntry* page = NULL;
         bool found = false;
+        char oldstatus;
 
-        page = (PagetableEntry*)hash_search(tbm->pagetable, (void*)&tbm->entry1.entryNode, HASH_ENTER, &found);
-        Assert(!found);
-        errno_t rc = memcpy_s(page, sizeof(PagetableEntry), &tbm->entry1, sizeof(PagetableEntry));
-        securec_check(rc, "\0", "\0");
+        if (type == TBM_SIMPLE_HASH) {
+            page = pagetable_insert(tbm->simple_pagetable, tbm->entry1.entryNode.blockNo, &found);
+            Assert(!found);
+            oldstatus = page->status;
+            rc = memcpy_s(page, sizeof(PagetableEntry), &tbm->entry1, sizeof(PagetableEntry));
+            securec_check(rc, "\0", "\0");
+            page->status = oldstatus;
+        } else {
+            page = (PagetableEntry *)hash_search(tbm->pagetable, (void *)&tbm->entry1.entryNode, HASH_ENTER, &found);
+            Assert(!found);
+            rc = memcpy_s(page, sizeof(PagetableEntry), &tbm->entry1, sizeof(PagetableEntry));
+            securec_check(rc, "\0", "\0");
+        }
     }
 
     tbm->status = TBM_HASH;
@@ -291,6 +408,9 @@ void tbm_free(TIDBitmap* tbm)
     if (tbm->pagetable != NULL) {
         hash_destroy(tbm->pagetable);
     }
+    if (tbm->simple_pagetable != NULL) {
+        pagetable_destroy(tbm->simple_pagetable);
+    }
     if (tbm->spages != NULL) {
         pfree_ext(tbm->spages);
     }
@@ -301,19 +421,31 @@ void tbm_free(TIDBitmap* tbm)
 }
 
 /*
+ * tbm_calculate_entries
+ *
  * Estimate number of hashtable entries we can have within maxbytes.
+ * complex_keys is set when evaluating bitmaps with partitioned
+ * relations (e.g GPI, CBI etc.)
  */
-long tbm_calculate_entries(double maxbytes)
-{
+long tbm_calculate_entries(double maxbytes, bool complex_keys)
+{   
+    long nbuckets;
+
     /*
-     * This estimates the hash cost as sizeof(PagetableEntry), which is good enough
-     * for our purpose.  Also count an extra Pointer per entry for the arrays created
-     * during iteration readout.
+     * Estimate number of hashtable entries we can have within maxbytes. This
+     * estimates the hash cost as sizeof(PagetableEntry), which is good enough
+     * for our purpose.  Also count an extra Pointer per entry for the arrays
+     * created during iteration readout.
      */
-    long nbuckets = maxbytes / (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
+    if (complex_keys) {
+        nbuckets = maxbytes /
+               (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(sizeof(PagetableEntry)) + sizeof(Pointer) + sizeof(Pointer));
+    } else {
+        nbuckets = maxbytes / (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
+    }
+
     nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
-    const int max_buckets_lower_limit = 16;
-    nbuckets = Max(nbuckets, max_buckets_lower_limit); /* sanity limit */
+    nbuckets = Max(nbuckets, 16); /* sanity limit */
 
     return nbuckets;
 }
@@ -324,7 +456,7 @@ long tbm_calculate_entries(double maxbytes)
  * If recheck is true, then the recheck flag will be set in the
  * TBMIterateResult when any of these tuples are reported out.
  */
-void tbm_add_tuples(TIDBitmap* tbm, const ItemPointer tids, int ntids, bool recheck, Oid partitionOid, int2 bucketid)
+TBM_TEMPLATE void tbm_add_tuples(TIDBitmap* tbm, const ItemPointer tids, int ntids, bool recheck, Oid partitionOid, int2 bucketid)
 {
     int i;
 
@@ -344,11 +476,11 @@ void tbm_add_tuples(TIDBitmap* tbm, const ItemPointer tids, int ntids, bool rech
                     errmsg("tuple offset out of range: %u", off)));
         }
 
-        if (tbm_page_is_lossy(tbm, pageNode)) {
+        if (tbm_page_is_lossy<type>(tbm, pageNode)) {
             continue; /* whole page is already marked */
         }
 
-        page = tbm_get_pageentry(tbm, pageNode);
+        page = tbm_get_pageentry<type>(tbm, pageNode);
 
         if (page->ischunk) {
             /* The page is a lossy chunk header, set bit for itself */
@@ -362,7 +494,7 @@ void tbm_add_tuples(TIDBitmap* tbm, const ItemPointer tids, int ntids, bool rech
         page->recheck |= recheck;
 
         if (tbm->nentries > tbm->maxentries) {
-            tbm_lossify(tbm);
+            tbm_lossify<type>(tbm);
         }
     }
 }
@@ -373,14 +505,14 @@ void tbm_add_tuples(TIDBitmap* tbm, const ItemPointer tids, int ntids, bool rech
  * This causes the whole page to be reported (with the recheck flag)
  * when the TIDBitmap is scanned.
  */
-void tbm_add_page(TIDBitmap* tbm, BlockNumber pageno, Oid partitionOid, int2 bucketid)
+TBM_TEMPLATE void tbm_add_page(TIDBitmap* tbm, BlockNumber pageno, Oid partitionOid, int2 bucketid)
 {
     PagetableEntryNode pnode = {pageno, partitionOid, bucketid};
     /* Enter the page in the bitmap, or mark it lossy if already present */
-    tbm_mark_page_lossy(tbm, pnode);
+    tbm_mark_page_lossy<type>(tbm, pnode);
     /* If we went over the memory limit, lossify some more pages */
     if (tbm->nentries > tbm->maxentries) {
-        tbm_lossify(tbm);
+        tbm_lossify<type>(tbm);
     }
 }
 
@@ -389,8 +521,9 @@ void tbm_add_page(TIDBitmap* tbm, BlockNumber pageno, Oid partitionOid, int2 buc
  *
  * a is modified in-place, b is not changed
  */
-void tbm_union(TIDBitmap* a, const TIDBitmap* b)
+TBM_TEMPLATE void tbm_union(TIDBitmap* a, const TIDBitmap* b)
 {
+    PagetableEntry* bpage = NULL;
     Assert(!a->iterating);
     /* Nothing to do if b is empty */
     if (b->nentries == 0) {
@@ -398,21 +531,29 @@ void tbm_union(TIDBitmap* a, const TIDBitmap* b)
     }
     /* Scan through chunks and pages in b, merge into a */
     if (b->status == TBM_ONE_PAGE) {
-        tbm_union_page(a, &b->entry1);
+        tbm_union_page<type>(a, &b->entry1);
+        return;
+    } 
+    
+    Assert(b->status == TBM_HASH);
+
+    if (type == TBM_SIMPLE_HASH) {
+        pagetable_iterator i;
+        pagetable_start_iterate(b->simple_pagetable, &i);
+        while ((bpage = pagetable_iterate(b->simple_pagetable, &i)) != NULL) {
+            tbm_union_page<type>(a, bpage);
+        }
     } else {
         HASH_SEQ_STATUS status;
-        PagetableEntry* bpage = NULL;
-
-        Assert(b->status == TBM_HASH);
         hash_seq_init(&status, b->pagetable);
         while ((bpage = (PagetableEntry*)hash_seq_search(&status)) != NULL) {
-            tbm_union_page(a, bpage);
+            tbm_union_page<type>(a, bpage);
         }
-    }
+    }     
 }
 
 /* Process one page of b during a union op */
-static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage)
+TBM_TEMPLATE static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage)
 {
     PagetableEntry* apage = NULL;
     int wordnum;
@@ -429,18 +570,18 @@ static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage)
                 while (w != 0) {
                     if (w & 1) {
                         PagetableEntryNode unionNode = {pg, bpage->entryNode.partitionOid, bpage->entryNode.bucketid};
-                        tbm_mark_page_lossy(a, unionNode);
+                        tbm_mark_page_lossy<type>(a, unionNode);
                     }
                     pg++;
                     w >>= 1;
                 }
             }
         }
-    } else if (tbm_page_is_lossy(a, bpage->entryNode)) {
+    } else if (tbm_page_is_lossy<type>(a, bpage->entryNode)) {
         /* page is already lossy in a, nothing to do */
         return;
     } else {
-        apage = tbm_get_pageentry(a, bpage->entryNode);
+        apage = tbm_get_pageentry<type>(a, bpage->entryNode);
         if (apage->ischunk) {
             /* The page is a lossy chunk header, set bit for itself */
             apage->words[0] |= ((bitmapword)1 << 0);
@@ -454,7 +595,7 @@ static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage)
     }
 
     if (a->nentries > a->maxentries) {
-        tbm_lossify(a);
+        tbm_lossify<type>(a);
     }
 }
 
@@ -463,16 +604,19 @@ static void tbm_union_page(TIDBitmap* a, const PagetableEntry* bpage)
  *
  * a is modified in-place, b is not changed
  */
-void tbm_intersect(TIDBitmap* a, const TIDBitmap* b)
-{
+TBM_TEMPLATE void tbm_intersect(TIDBitmap* a, const TIDBitmap* b)
+{   
+    PagetableEntry* apage = NULL;
+
     Assert(!a->iterating);
     /* Nothing to do if a is empty */
     if (a->nentries == 0) {
         return;
     }
+
     /* Scan through chunks and pages in a, try to match to b */
     if (a->status == TBM_ONE_PAGE) {
-        if (tbm_intersect_page(a, &a->entry1, b)) {
+        if (tbm_intersect_page<type>(a, &a->entry1, b)) {
             /* Page is now empty, remove it from a */
             Assert(!a->entry1.ischunk);
             a->npages--;
@@ -480,14 +624,17 @@ void tbm_intersect(TIDBitmap* a, const TIDBitmap* b)
             Assert(a->nentries == 0);
             a->status = TBM_EMPTY;
         }
-    } else {
-        HASH_SEQ_STATUS status;
-        PagetableEntry* apage = NULL;
+        return;
+    }
 
-        Assert(a->status == TBM_HASH);
-        hash_seq_init(&status, a->pagetable);
-        while ((apage = (PagetableEntry*)hash_seq_search(&status)) != NULL) {
-            if (tbm_intersect_page(a, apage, b)) {
+    Assert(a->status == TBM_HASH);
+
+    if (type == TBM_SIMPLE_HASH) {
+        pagetable_iterator i;
+
+        pagetable_start_iterate(a->simple_pagetable, &i);
+        while ((apage = pagetable_iterate(a->simple_pagetable, &i)) != NULL) {
+            if (tbm_intersect_page<type>(a, apage, b)) {
                 /* Page or chunk is now empty, remove it from a */
                 if (apage->ischunk) {
                     a->nchunks--;
@@ -495,10 +642,29 @@ void tbm_intersect(TIDBitmap* a, const TIDBitmap* b)
                     a->npages--;
                 }
                 a->nentries--;
-                if (hash_search(a->pagetable, (void*)&apage->entryNode, HASH_REMOVE, NULL) == NULL) {
+                if (!pagetable_delete(a->simple_pagetable,apage->entryNode.blockNo)) {
                     ereport(ERROR,
-                        (errcode(ERRCODE_DATA_CORRUPTED), errmodule(MOD_EXECUTOR), errmsg("hash table corrupted")));
-               }
+                            (errcode(ERRCODE_DATA_CORRUPTED), errmodule(MOD_EXECUTOR), errmsg("hash table corrupted")));
+                }
+            }
+        }
+    } else {
+        HASH_SEQ_STATUS status;
+
+        hash_seq_init(&status, a->pagetable);
+        while ((apage = (PagetableEntry *)hash_seq_search(&status)) != NULL) {
+            if (tbm_intersect_page<type>(a, apage, b)) {
+                /* Page or chunk is now empty, remove it from a */
+                if (apage->ischunk) {
+                    a->nchunks--;
+                } else {
+                    a->npages--;
+                }
+                a->nentries--;
+                if (hash_search(a->pagetable, (void *)&apage->entryNode, HASH_REMOVE, NULL) == NULL) {
+                    ereport(ERROR,
+                            (errcode(ERRCODE_DATA_CORRUPTED), errmodule(MOD_EXECUTOR), errmsg("hash table corrupted")));
+                }
             }
         }
     }
@@ -509,7 +675,7 @@ void tbm_intersect(TIDBitmap* a, const TIDBitmap* b)
  *
  * Returns TRUE if apage is now empty and should be deleted from a
  */
-static bool tbm_intersect_page(TIDBitmap* a, PagetableEntry* apage, const TIDBitmap* b)
+TBM_TEMPLATE static bool tbm_intersect_page(TIDBitmap* a, PagetableEntry* apage, const TIDBitmap* b)
 {
     const PagetableEntry* bpage = NULL;
     int wordnum;
@@ -533,7 +699,7 @@ static bool tbm_intersect_page(TIDBitmap* a, PagetableEntry* apage, const TIDBit
                 while (w != 0) {
                     if (w & 1) {
                         PagetableEntryNode pNode = {pg, apage->entryNode.partitionOid, apage->entryNode.bucketid};
-                        if (!tbm_page_is_lossy(b, pNode) && tbm_find_pageentry(b, pNode) == NULL) {
+                        if (!tbm_page_is_lossy<type>(b, pNode) && tbm_find_pageentry<type>(b, pNode) == NULL) {
                             /* Page is not in b at all, lose lossy bit */
                             neww &= ~((bitmapword)1 << (unsigned int)bitnum);
                         }
@@ -549,7 +715,7 @@ static bool tbm_intersect_page(TIDBitmap* a, PagetableEntry* apage, const TIDBit
             }
         }
         return candelete;
-    } else if (tbm_page_is_lossy(b, apage->entryNode)) {
+    } else if (tbm_page_is_lossy<type>(b, apage->entryNode)) {
         /*
          * Some of the tuples in 'a' might not satisfy the quals for 'b', but
          * because the page 'b' is lossy, we don't know which ones. Therefore
@@ -561,7 +727,7 @@ static bool tbm_intersect_page(TIDBitmap* a, PagetableEntry* apage, const TIDBit
     } else {
         bool candelete = true;
 
-        bpage = tbm_find_pageentry(b, apage->entryNode);
+        bpage = tbm_find_pageentry<type>(b, apage->entryNode);
         if (bpage != NULL) {
             /* Both pages are exact, merge at the bit level */
             Assert(!bpage->ischunk);
@@ -599,7 +765,7 @@ bool tbm_is_empty(const TIDBitmap* tbm)
  * of the bitmap.  However, you can call this multiple times to scan the
  * contents repeatedly, including parallel scans.
  */
-TBMIterator* tbm_begin_iterate(TIDBitmap* tbm)
+TBM_TEMPLATE TBMIterator* tbm_begin_iterate(TIDBitmap* tbm)
 {
     TBMIterator* iterator = NULL;
 
@@ -624,7 +790,6 @@ TBMIterator* tbm_begin_iterate(TIDBitmap* tbm)
      * than one iterator.
      */
     if (tbm->status == TBM_HASH && !tbm->iterating) {
-        HASH_SEQ_STATUS status;
         PagetableEntry* page = NULL;
         int npages;
         int nchunks;
@@ -635,23 +800,38 @@ TBMIterator* tbm_begin_iterate(TIDBitmap* tbm)
         if ((tbm->schunks == NULL) && tbm->nchunks > 0) {
             tbm->schunks = (PagetableEntry**)MemoryContextAlloc(tbm->mcxt, tbm->nchunks * sizeof(PagetableEntry*));
         }
-        
-        hash_seq_init(&status, tbm->pagetable);
+
         npages = nchunks = 0;
-        while ((page = (PagetableEntry*)hash_seq_search(&status)) != NULL) {
-            if (page->ischunk) {
-                tbm->schunks[nchunks++] = page;
-            } else {
-                tbm->spages[npages++] = page;
+        if (type == TBM_SIMPLE_HASH) {
+            pagetable_iterator i;
+            pagetable_start_iterate(tbm->simple_pagetable, &i);
+            while ((page = pagetable_iterate(tbm->simple_pagetable, &i)) != NULL) {
+                if (page->ischunk) {
+                    tbm->schunks[nchunks++] = page;
+                } else {
+                    tbm->spages[npages++] = page;
+                }
+            }
+        } else {
+            /* make TBM_DYNAMIC_HASH a default*/
+            HASH_SEQ_STATUS status;
+            hash_seq_init(&status, tbm->pagetable);
+            while ((page = (PagetableEntry *)hash_seq_search(&status)) != NULL) {
+                if (page->ischunk) {
+                    tbm->schunks[nchunks++] = page;
+                } else {
+                    tbm->spages[npages++] = page;
+                }
             }
         }
+
         Assert(npages == tbm->npages);
         Assert(nchunks == tbm->nchunks);
         if (npages > 1) {
-            qsort(tbm->spages, npages, sizeof(PagetableEntry*), tbm_comparator);
+            qsort(tbm->spages, npages, sizeof(PagetableEntry*), tbm_comparator<type>);
         }
         if (nchunks > 1) {
-            qsort(tbm->schunks, nchunks, sizeof(PagetableEntry*), tbm_comparator);
+            qsort(tbm->schunks, nchunks, sizeof(PagetableEntry*), tbm_comparator<type>);
         }
     }
 
@@ -711,11 +891,11 @@ TBMIterateResult* tbm_iterate(TBMIterator* iterator)
      */
     if (iterator->schunkptr < tbm->nchunks) {
         PagetableEntry* chunk = tbm->schunks[iterator->schunkptr];
-        PagetableEntryNode pnode;
-        pnode.blockNo = chunk->entryNode.blockNo + iterator->schunkbit;
-        pnode.partitionOid = chunk->entryNode.partitionOid;
-        pnode.bucketid = chunk->entryNode.bucketid;
-        pnode.padding = chunk->entryNode.padding;
+        PagetableEntryNode pnode = {
+            chunk->entryNode.blockNo + iterator->schunkbit,
+            chunk->entryNode.partitionOid,
+            chunk->entryNode.bucketid
+        };
         if (iterator->spageptr >= tbm->npages ||
             IS_CHUNK_BEFORE_PAGE(pnode, tbm->spages[iterator->spageptr]->entryNode)) {
             /* Return a lossy page indicator from the chunk */
@@ -788,7 +968,7 @@ void tbm_end_iterate(TBMIterator* iterator)
  *
  * Returns NULL if there is no non-lossy entry for the pageno.
  */
-static const PagetableEntry* tbm_find_pageentry(const TIDBitmap* tbm, PagetableEntryNode pageNode)
+TBM_TEMPLATE static const PagetableEntry* tbm_find_pageentry(const TIDBitmap* tbm, PagetableEntryNode pageNode)
 {
     const PagetableEntry* page = NULL;
 
@@ -805,7 +985,12 @@ static const PagetableEntry* tbm_find_pageentry(const TIDBitmap* tbm, PagetableE
         return page;
     }
 
-    page = (PagetableEntry*)hash_search(tbm->pagetable, (void*)&pageNode, HASH_FIND, NULL);
+    if (type == TBM_SIMPLE_HASH) {
+        page = pagetable_lookup(tbm->simple_pagetable, pageNode.blockNo);
+    } else {
+        page = (PagetableEntry*)hash_search(tbm->pagetable, (void*)&pageNode, HASH_FIND, NULL);
+    }
+    
     if (page == NULL) {
         return NULL;
     }
@@ -823,7 +1008,7 @@ static const PagetableEntry* tbm_find_pageentry(const TIDBitmap* tbm, PagetableE
  * This may cause the table to exceed the desired memory size.	It is
  * up to the caller to call tbm_lossify() at the next safe point if so.
  */
-static PagetableEntry* tbm_get_pageentry(TIDBitmap* tbm, PagetableEntryNode pageNode)
+TBM_TEMPLATE static PagetableEntry* tbm_get_pageentry(TIDBitmap* tbm, PagetableEntryNode pageNode)
 {
     PagetableEntry* page = NULL;
     bool found = false;
@@ -841,17 +1026,30 @@ static PagetableEntry* tbm_get_pageentry(TIDBitmap* tbm, PagetableEntryNode page
                 return page;
             }
             /* Time to switch from one page to a hashtable */
-            tbm_create_pagetable(tbm);
+            tbm_create_pagetable<type>(tbm);
         }
 
         /* Look up or create an entry */
-        page = (PagetableEntry*)hash_search(tbm->pagetable, (void*)&pageNode, HASH_ENTER, &found);
+        if (type == TBM_SIMPLE_HASH) {
+            page = pagetable_insert(tbm->simple_pagetable, pageNode.blockNo, &found);
+        } else {
+            /* make TBM_DYNAMIC_HASH a default */
+            page = (PagetableEntry*)hash_search(tbm->pagetable, (void*)&pageNode, HASH_ENTER, &found);
+        }
     }
 
     /* Initialize it if not present before */
     if (!found) {
-        rc = memset_s(page, sizeof(PagetableEntry), 0, sizeof(PagetableEntry));
-        securec_check(rc, "", "");
+        char oldstatus;
+        if (type == TBM_SIMPLE_HASH) {
+            oldstatus = page->status;
+            rc = memset_s(page, sizeof(PagetableEntry), 0, sizeof(PagetableEntry));
+            securec_check(rc, "", "");
+            page->status = oldstatus;
+        } else {
+            rc = memset_s(page, sizeof(PagetableEntry), 0, sizeof(PagetableEntry));
+            securec_check(rc, "", "");
+        }
         page->entryNode.blockNo = pageNode.blockNo;
         page->entryNode.partitionOid = pageNode.partitionOid;
         page->entryNode.bucketid = pageNode.bucketid;
@@ -866,7 +1064,7 @@ static PagetableEntry* tbm_get_pageentry(TIDBitmap* tbm, PagetableEntryNode page
 /*
  * tbm_page_is_lossy - is the page marked as lossily stored?
  */
-static bool tbm_page_is_lossy(const TIDBitmap* tbm, PagetableEntryNode pageNode)
+TBM_TEMPLATE static bool tbm_page_is_lossy(const TIDBitmap* tbm, PagetableEntryNode pageNode)
 {
     PagetableEntry* page = NULL;
     BlockNumber chunkPageNo;
@@ -881,7 +1079,13 @@ static bool tbm_page_is_lossy(const TIDBitmap* tbm, PagetableEntryNode pageNode)
     bitno = pageNode.blockNo % tbm->pages_per_chunk;
     chunkPageNo = pageNode.blockNo - bitno;
     PagetableEntryNode chunkNode = {chunkPageNo, pageNode.partitionOid, pageNode.bucketid};
-    page = (PagetableEntry*)hash_search(tbm->pagetable, (void*)&chunkNode, HASH_FIND, NULL);
+
+    if (type == TBM_SIMPLE_HASH) {
+        page = pagetable_lookup(tbm->simple_pagetable, chunkNode.blockNo);
+    } else {
+        page = (PagetableEntry*)hash_search(tbm->pagetable, (void*)&chunkNode, HASH_FIND, NULL);
+    }
+    
     if (page != NULL && page->ischunk) {
         int wordnum = WORDNUM(bitno);
         int bitnum = BITNUM(bitno);
@@ -899,19 +1103,20 @@ static bool tbm_page_is_lossy(const TIDBitmap* tbm, PagetableEntryNode pageNode)
  * This may cause the table to exceed the desired memory size.	It is
  * up to the caller to call tbm_lossify() at the next safe point if so.
  */
-static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode)
+TBM_TEMPLATE static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode)
 {
     PagetableEntry* page = NULL;
     bool found = false;
+    bool deleted = false;
     BlockNumber chunkPageNo;
     int bitno;
     int wordnum;
     int bitnum;
-    int rc  = 0;
+    int rc = 0;
 
     /* We force the bitmap into hashtable mode whenever it's lossy */
     if (tbm->status != TBM_HASH) {
-        tbm_create_pagetable(tbm);
+        tbm_create_pagetable<type>(tbm);
     }
 
     bitno = pageNode.blockNo % tbm->pages_per_chunk;
@@ -922,7 +1127,13 @@ static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode)
      * chunk header, however, we skip this and handle the case below.
      */
     if (bitno != 0) {
-        if (hash_search(tbm->pagetable, (void*)&pageNode, HASH_REMOVE, NULL) != NULL) {
+        if (type == TBM_SIMPLE_HASH) {
+            deleted = pagetable_delete(tbm->simple_pagetable, pageNode.blockNo);
+        } else {
+            deleted = (hash_search(tbm->pagetable, (void*)&pageNode, HASH_REMOVE, NULL) != NULL);
+        }
+
+        if(deleted) {
             /* It was present, so adjust counts */
             tbm->nentries--;
             tbm->npages--; /* assume it must have been non-lossy */
@@ -930,12 +1141,25 @@ static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode)
     }
 
     /* Look up or create entry for chunk-header page */
-    page = (PagetableEntry*)hash_search(tbm->pagetable, (void*)&chunkNode, HASH_ENTER, &found);
+    if (type == TBM_SIMPLE_HASH) {
+        page = pagetable_insert(tbm->simple_pagetable, chunkNode.blockNo, &found);
+    } else {
+        /* make TBM_DYNAMIC_HASH a default */
+        page = (PagetableEntry*)hash_search(tbm->pagetable, (void*)&chunkNode, HASH_ENTER, &found);
+    }
 
     /* Initialize it if not present before */
     if (!found) {
-        rc = memset_s(page, sizeof(PagetableEntry), 0, sizeof(PagetableEntry));
-        securec_check(rc, "", "");
+        char oldstatus;
+        if (type == TBM_SIMPLE_HASH) {
+            oldstatus = page->status;
+            rc = memset_s(page, sizeof(PagetableEntry), 0, sizeof(PagetableEntry));
+            securec_check(rc, "", "");
+            page->status = oldstatus;
+        } else {
+            rc = memset_s(page, sizeof(PagetableEntry), 0, sizeof(PagetableEntry));
+            securec_check(rc, "", "");
+        }
         page->entryNode = chunkNode;
         page->ischunk = true;
         /* must count it too */
@@ -943,8 +1167,16 @@ static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode)
         tbm->nchunks++;
     } else if (!page->ischunk) {
         /* chunk header page was formerly non-lossy, make it lossy */
-        rc = memset_s(page, sizeof(PagetableEntry), 0, sizeof(PagetableEntry));
-        securec_check(rc, "", "");
+        char oldstatus;
+        if (type == TBM_SIMPLE_HASH) {
+            oldstatus = page->status;
+            rc = memset_s(page, sizeof(PagetableEntry), 0, sizeof(PagetableEntry));
+            securec_check(rc, "", "");
+            page->status = oldstatus;
+        } else {
+            rc = memset_s(page, sizeof(PagetableEntry), 0, sizeof(PagetableEntry));
+            securec_check(rc, "", "");
+        }
         page->entryNode = chunkNode;
         page->ischunk = true;
         /* we assume it had some tuple bit(s) set, so mark it lossy */
@@ -963,11 +1195,8 @@ static void tbm_mark_page_lossy(TIDBitmap* tbm, PagetableEntryNode pageNode)
 /*
  * tbm_lossify - lose some information to get back under the memory limit
  */
-static void tbm_lossify(TIDBitmap* tbm)
+TBM_TEMPLATE static void tbm_lossify(TIDBitmap* tbm)
 {
-    HASH_SEQ_STATUS status;
-    PagetableEntry* page = NULL;
-
     /*
      * XXX Really stupid implementation: this just lossifies pages in
      * essentially random order.  We should be paying some attention to the
@@ -980,33 +1209,11 @@ static void tbm_lossify(TIDBitmap* tbm)
     Assert(!tbm->iterating);
     Assert(tbm->status == TBM_HASH);
 
-    hash_seq_init(&status, tbm->pagetable);
-    while ((page = (PagetableEntry*)hash_seq_search(&status)) != NULL) {
-        if (page->ischunk) {
-            continue; /* already a chunk header */
-        }
-        /*
-         * If the page would become a chunk header, we won't save anything by
-         * converting it to lossy, so skip it.
-         */
-        if ((page->entryNode.blockNo % tbm->pages_per_chunk) == 0) {
-            continue;
-        }
-        
-        /* This does the dirty work ... */
-        tbm_mark_page_lossy(tbm, page->entryNode);
-
-        if (tbm->nentries <= tbm->maxentries / 2) {
-            /* we have done enough */
-            hash_seq_term(&status);
-            break;
-        }
-
-        /*
-         * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
-         * hashtable.  We can continue the same seq_search scan since we do
-         * not care whether we visit lossy chunks or not.
-         */
+    if (type == TBM_SIMPLE_HASH) {
+        tbm_lossify_simple_iterate<type>(tbm);
+    } else {
+        /* make TBM_DYNAMIC_HASH a default */
+        tbm_lossify_generic_iterate<type>(tbm);
     }
 
     /*
@@ -1024,46 +1231,133 @@ static void tbm_lossify(TIDBitmap* tbm)
     }
 }
 
+TBM_TEMPLATE static inline void tbm_lossify_generic_iterate(TIDBitmap* tbm)
+{
+    HASH_SEQ_STATUS status;
+    PagetableEntry* page = NULL;
+
+    hash_seq_init(&status, tbm->pagetable);
+    while ((page = (PagetableEntry*)hash_seq_search(&status)) != NULL) {
+        if (page->ischunk) {
+            continue; /* already a chunk header */
+        }
+        /*
+         * If the page would become a chunk header, we won't save anything by
+         * converting it to lossy, so skip it.
+         */
+        if ((page->entryNode.blockNo % tbm->pages_per_chunk) == 0) {
+            continue;
+        }
+        
+        /* This does the dirty work ... */
+        tbm_mark_page_lossy<type>(tbm, page->entryNode);
+
+        if (tbm->nentries <= tbm->maxentries / 2) {
+            /* we have done enough */
+            hash_seq_term(&status);
+            break;
+        }
+
+        /*
+         * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
+         * hashtable.  We can continue the same seq_search scan since we do
+         * not care whether we visit lossy chunks or not.
+         */
+    }
+}
+
+TBM_TEMPLATE static inline void tbm_lossify_simple_iterate(TIDBitmap* tbm)
+{
+    pagetable_iterator i;
+    PagetableEntry* page = NULL;
+
+    pagetable_start_iterate_at(tbm->simple_pagetable, &i, tbm->lossify_start);
+    while ((page = pagetable_iterate(tbm->simple_pagetable, &i)) != NULL) {
+        if (page->ischunk) {
+            continue; /* already a chunk header */
+        }
+        /*
+         * If the page would become a chunk header, we won't save anything by
+         * converting it to lossy, so skip it.
+         */
+        if ((page->entryNode.blockNo % tbm->pages_per_chunk) == 0) {
+            continue;
+        }
+        
+        /* This does the dirty work ... */
+        tbm_mark_page_lossy<type>(tbm, page->entryNode);
+
+        if (tbm->nentries <= tbm->maxentries / 2) {
+            /*
+             * we have made enough room. Remember where to start lossifying
+             * next round, so we evenly iterate over the hashtable.
+             */
+            tbm->lossify_start = i.cur;
+            break;
+        }
+
+        /*
+         * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
+         * hashtable and may have deleted the non-lossy chunk.  We can
+         * continue the same hash table scan, since failure to visit one
+         * element or visiting the newly inserted element,isn't fatal. 
+         */
+    }
+}
+
+
 /*
  * qsort comparator to handle PagetableEntry pointers.
  */
-static int tbm_comparator(const void* left, const void* right)
+TBM_TEMPLATE static int tbm_comparator(const void* left, const void* right)
 {
     PagetableEntryNode l = (*((PagetableEntry* const*)left))->entryNode;
     PagetableEntryNode r = (*((PagetableEntry* const*)right))->entryNode;
 
-    if (l.partitionOid < r.partitionOid) {
-        return -1;
-    } else if (l.partitionOid > r.partitionOid) {
-        return 1;
-    } else if (l.bucketid < r.bucketid) {
-        return -1;
-    } else if (l.bucketid >  r.bucketid) {
-        return 1;
-    } else if (l.blockNo < r.blockNo) {
-        return -1;
-    } else if (l.blockNo > r.blockNo) {
-        return 1;
+    if (type == TBM_SIMPLE_HASH) {
+        if (l.blockNo < r.blockNo) {
+            return -1;
+        } else if (l.blockNo > r.blockNo) {
+            return 1;
+        }
+    } else {
+        if (l.partitionOid < r.partitionOid) {
+            return -1;
+        } else if (l.partitionOid > r.partitionOid) {
+            return 1;
+        } else if (l.bucketid < r.bucketid) {
+            return -1;
+        } else if (l.bucketid > r.bucketid) {
+            return 1;
+        } else if (l.blockNo < r.blockNo) {
+            return -1;
+        } else if (l.blockNo > r.blockNo) {
+            return 1;
+        }
     }
     return 0;
 }
 
+/*
+ * check if the tid bitmap for global index.
+ */
 bool tbm_is_global(const TIDBitmap* tbm)
 {
-    return tbm->isGlobalPart;
+    return tbm->is_global_part;
 }
 
-void tbm_set_global(TIDBitmap* tbm, bool isGlobal)
+/*
+ * set tid bitmap is for global index.
+ */
+void tbm_set_global(TIDBitmap* tbm, bool val)
 {
-    tbm->isGlobalPart = isGlobal;
+    tbm->is_global_part = val;
 }
 
+/*
+ * check if the tid bitmap for crossbucket index.
+ */
 bool tbm_is_crossbucket(const TIDBitmap* tbm)
 {
-    return tbm->crossbucket;
-}
-
-void tbm_set_crossbucket(TIDBitmap* tbm, bool crossbucket)
-{
-    tbm->crossbucket = crossbucket;
+    return tbm->is_crossbucket;
 }
diff --git a/src/common/backend/utils/mmgr/mcxt.cpp b/src/common/backend/utils/mmgr/mcxt.cpp
index f4b722e12..b2e8a4985 100644
--- a/src/common/backend/utils/mmgr/mcxt.cpp
+++ b/src/common/backend/utils/mmgr/mcxt.cpp
@@ -1137,6 +1137,67 @@ void* MemoryContextAllocZeroAlignedDebug(MemoryContext context, Size size, const
 
     return ret;
 }
+
+/*
+ * MemoryContextAllocExtended
+ *	  Allocate space within the specified context using the given flags.
+ *   
+ *    This method supports all three memory allocation flags which makes it   
+ *    suitable for almost all circumstances.
+ */
+void* MemoryContextAllocExtendedDebug(MemoryContext context, Size size, int flags, const char* file, int line)
+{
+    void* ret = NULL;
+    bool allocsz_is_valid = false;
+
+	Assert(MemoryContextIsValid(context));
+#ifdef MEMORY_CONTEXT_CHECKING
+    PreventActionOnSealedContext(context);
+#endif
+
+    /* Make sure memory allocation size is valid. */
+	if ((flags & MCXT_ALLOC_HUGE) != 0) {
+        allocsz_is_valid = AllocHugeSizeIsValid(size);
+    } else {
+        allocsz_is_valid = AllocSizeIsValid(size);
+    }
+
+    if (!allocsz_is_valid) {
+        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                        errmsg("invalid memory alloc request size %lu in %s:%d", (unsigned long)size, file, line)));
+    }
+
+	context->isReset = false;
+
+    /* Invoke memory allocator */
+    ret = (*context->methods->alloc)(context, 0, size, file, line);
+    if ((flags & MCXT_ALLOC_NO_OOM) != 0) {
+        /* Do nothing */
+    } else if (unlikely(ret == NULL)) {
+        ereport(ERROR, (errcode(ERRCODE_OUT_OF_LOGICAL_MEMORY), errmsg("memory is temporarily unavailable"),
+                        errdetail("Failed on request of size %lu bytes under queryid %lu in %s:%d.",
+                                  (unsigned long)size, u_sess->debug_query_id, file, line)));
+    }
+
+    /* Set aligned if MCXT_ALLOC_ZERO */
+    if ((flags & MCXT_ALLOC_ZERO) != 0) {
+        MemSetAligned(ret, 0, size);
+    }
+
+#ifdef MEMORY_CONTEXT_CHECKING
+    /* check if the memory context is out of control */
+    MemoryContextCheckMaxSize(context, size, file, line);
+#endif
+
+    /* check if the session used memory is beyond the limitation */
+    if (unlikely(STATEMENT_MAX_MEM)) {
+        MemoryContextCheckSessionMemory(context, size, file, line);
+    }
+    InsertMemoryAllocInfo(ret, context, file, line, size);
+
+    return ret;
+}
+
 /*
  * palloc_extended
  *    palloc with flags, it will return NULL while OOM happend.
diff --git a/src/gausskernel/optimizer/path/costsize.cpp b/src/gausskernel/optimizer/path/costsize.cpp
index 6dfc60d29..4b90d7fca 100755
--- a/src/gausskernel/optimizer/path/costsize.cpp
+++ b/src/gausskernel/optimizer/path/costsize.cpp
@@ -1424,7 +1424,7 @@ bool has_lossy_pages(RelOptInfo *baserel, const double &pages_fetched, double &l
      */
     double heap_pages = Min(pages_fetched, baserel->pages);
     const long work_mem_size = u_sess->attr.attr_memory.work_mem * 1024L;
-    long maxentries = tbm_calculate_entries(work_mem_size);
+    long maxentries = tbm_calculate_entries(work_mem_size, false);
     if (maxentries >= heap_pages) {
         return false;
     }
diff --git a/src/gausskernel/runtime/executor/nodeBitmapAnd.cpp b/src/gausskernel/runtime/executor/nodeBitmapAnd.cpp
index 6a72dbe42..d51aa96fe 100644
--- a/src/gausskernel/runtime/executor/nodeBitmapAnd.cpp
+++ b/src/gausskernel/runtime/executor/nodeBitmapAnd.cpp
@@ -112,8 +112,10 @@ Node* MultiExecBitmapAnd(BitmapAndState* node)
      */
     for (i = 0; i < nplans; i++) {
         PlanState* subnode = bitmapplans[i];
-        subnode->hbktScanSlot.currSlot = node->ps.hbktScanSlot.currSlot;
         TIDBitmap* subresult = NULL;
+        TBMHandler tbm_handler;
+
+        subnode->hbktScanSlot.currSlot = node->ps.hbktScanSlot.currSlot;
 
         subresult = (TIDBitmap*)MultiExecProcNode(subnode);
         if (subresult == NULL || !IsA(subresult, TIDBitmap))
@@ -125,6 +127,8 @@ Node* MultiExecBitmapAnd(BitmapAndState* node)
         if (result == NULL) {
             result = subresult; /* first subplan */
         } else {
+            /* get tbm handlers */
+            tbm_handler = tbm_get_handler(result);
             /*
              * If the global tbm intersect with non-global tbm,
              * set the final result to non-global tbm.
@@ -137,7 +141,7 @@ Node* MultiExecBitmapAnd(BitmapAndState* node)
                 tbm_set_global(result, false);
             }
 
-            tbm_intersect(result, subresult);
+            tbm_handler._intersect(result, subresult);
             tbm_free(subresult);
         }
 
diff --git a/src/gausskernel/runtime/executor/nodeBitmapHeapscan.cpp b/src/gausskernel/runtime/executor/nodeBitmapHeapscan.cpp
index cf028dbd2..416e29645 100644
--- a/src/gausskernel/runtime/executor/nodeBitmapHeapscan.cpp
+++ b/src/gausskernel/runtime/executor/nodeBitmapHeapscan.cpp
@@ -258,6 +258,7 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
     ExprContext* econtext = NULL;
     TableScanDesc scan = NULL;
     TIDBitmap* tbm = NULL;
+    TBMHandler tbm_handler;
     TBMIterator* tbmiterator = NULL;
     TBMIterateResult* tbmres = NULL;
     HBktTblScanDesc hpscan = NULL;
@@ -301,6 +302,7 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
      */
     if (tbm == NULL) {
         tbm = (TIDBitmap*)MultiExecProcNode(outerPlanState(node));
+        tbm_handler = tbm_get_handler(tbm);
 
         if (tbm == NULL || !IsA(tbm, TIDBitmap)) {
             ereport(ERROR,
@@ -310,12 +312,12 @@ static TupleTableSlot* BitmapHeapTblNext(BitmapHeapScanState* node)
         }
 
         node->tbm = tbm;
-        node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
+        node->tbmiterator = tbmiterator = tbm_handler._begin_iterate(tbm);
         node->tbmres = tbmres = NULL;
 
 #ifdef USE_PREFETCH
         if (u_sess->storage_cxt.target_prefetch_pages > 0) {
-            node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm);
+            node->prefetch_iterator = prefetch_iterator = tbm_handler._begin_iterate(tbm);
             node->prefetch_pages = 0;
             node->prefetch_target = -1;
         }
diff --git a/src/gausskernel/runtime/executor/nodeBitmapIndexscan.cpp b/src/gausskernel/runtime/executor/nodeBitmapIndexscan.cpp
index 4136698fc..19073fe94 100644
--- a/src/gausskernel/runtime/executor/nodeBitmapIndexscan.cpp
+++ b/src/gausskernel/runtime/executor/nodeBitmapIndexscan.cpp
@@ -34,21 +34,6 @@
 #include "nodes/makefuncs.h"
 
 static void ExecInitNextPartitionForBitmapIndexScan(BitmapIndexScanState* node);
-/* If bitmapscan uses global partition index, set tbm to global */
-static inline void GPIUpdateTbmType(BitmapIndexScanState* node, TIDBitmap* tbm)
-{
-    if (RelationIsGlobalIndex(node->biss_RelationDesc)) {
-        tbm_set_global(tbm, true);
-    }
-}
-
-/* if bitmapscan uses crossbucket index, set tbm->crossbucket to true */
-static inline void CBIUpdateTbmType(BitmapIndexScanState* node, TIDBitmap* tbm)
-{
-    if (RelationIsCrossBucketIndex(node->biss_RelationDesc)) {
-        tbm_set_crossbucket(tbm, true);
-    }
-}
 
 /* ----------------------------------------------------------------
  *		MultiExecBitmapIndexScan(node)
@@ -98,13 +83,9 @@ Node* MultiExecBitmapIndexScan(BitmapIndexScanState* node)
         node->biss_result = NULL; /* reset for next time */
     } else {
         /* XXX should we use less than u_sess->attr.attr_memory.work_mem for this? */
-        tbm = TbmCreate(u_sess->attr.attr_memory.work_mem * 1024L, isUstore);
-
-        /* If bitmapscan uses global partition index, set tbm to global. */
-        GPIUpdateTbmType(node, tbm);
-
-        /* If bitmapscan uses crossbucket index, set tbm->crossbucket to true. */
-        CBIUpdateTbmType(node, tbm);
+        long maxbytes = u_sess->attr.attr_memory.work_mem * 1024L;
+        tbm = tbm_create(maxbytes, RelationIsGlobalIndex(node->biss_RelationDesc),
+                         RelationIsCrossBucketIndex(node->biss_RelationDesc), isUstore);
     }
 
     /* Cross-bucket index scan should not switch the index bucket. */
diff --git a/src/gausskernel/runtime/executor/nodeBitmapOr.cpp b/src/gausskernel/runtime/executor/nodeBitmapOr.cpp
index 4145d629e..4dea450e4 100644
--- a/src/gausskernel/runtime/executor/nodeBitmapOr.cpp
+++ b/src/gausskernel/runtime/executor/nodeBitmapOr.cpp
@@ -126,15 +126,11 @@ Node* MultiExecBitmapOr(BitmapOrState* node)
             /* first subplan */
             if (result == NULL) {
                 /* XXX should we use less than u_sess->attr.attr_memory.work_mem for this? */
-                result = TbmCreate(u_sess->attr.attr_memory.work_mem * 1024L, isUstore);
-                /* If bitmapscan uses global partition index, set tbm to global. */
-                if (RelationIsGlobalIndex(((BitmapIndexScanState*)subnode)->biss_RelationDesc)) {
-                    tbm_set_global(result, true);
-                }
-                /* If bitmapscan uses crossbucket index, set tbm to crossbucket. */
-                if (RelationIsCrossBucketIndex(((BitmapIndexScanState*)subnode)->biss_RelationDesc)) {
-                    tbm_set_crossbucket(result, true);
-                }
+                long maxbytes = u_sess->attr.attr_memory.work_mem * 1024L;
+                result = tbm_create(maxbytes,
+                                    RelationIsGlobalIndex(((BitmapIndexScanState *)subnode)->biss_RelationDesc),
+                                    RelationIsCrossBucketIndex(((BitmapIndexScanState *)subnode)->biss_RelationDesc),
+                                    isUstore);
             }
 
             ((BitmapIndexScanState*)subnode)->biss_result = result;
@@ -157,13 +153,14 @@ Node* MultiExecBitmapOr(BitmapOrState* node)
             if (result == NULL) {
                 result = subresult; /* first subplan */
             } else {
+                TBMHandler tbm_handler = tbm_get_handler(result);
                 if (tbm_is_global(result) != tbm_is_global(subresult)) {
                     ereport(ERROR,
                         (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
                             errmsg(
                                 "do not support bitmap index scan for global index and local index simultaneously.")));
                 }
-                tbm_union(result, subresult);
+                tbm_handler._union(result, subresult);
                 tbm_free(subresult);
             }
         }
diff --git a/src/gausskernel/storage/access/gin/gindatapage.cpp b/src/gausskernel/storage/access/gin/gindatapage.cpp
index 926a561ed..842999f73 100644
--- a/src/gausskernel/storage/access/gin/gindatapage.cpp
+++ b/src/gausskernel/storage/access/gin/gindatapage.cpp
@@ -178,10 +178,11 @@ int GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
 
         nitems = ginPostingListDecodeAllSegmentsToTbm(segment, len, tbm);
     } else {
+        TBMHandler tbm_handler = tbm_get_handler(tbm);
         uncompressed = dataLeafPageGetUncompressed(page, &nitems);
 
         if (nitems > 0)
-            tbm_add_tuples(tbm, uncompressed, nitems, false);
+            tbm_handler._add_tuples(tbm, uncompressed, nitems, false, InvalidOid, InvalidBktId);
     }
 
     return nitems;
diff --git a/src/gausskernel/storage/access/gin/ginget.cpp b/src/gausskernel/storage/access/gin/ginget.cpp
index d78776941..20d27ae91 100644
--- a/src/gausskernel/storage/access/gin/ginget.cpp
+++ b/src/gausskernel/storage/access/gin/ginget.cpp
@@ -132,10 +132,12 @@ static bool collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack, GinSca
 {
     OffsetNumber attnum;
     Form_pg_attribute attr;
+    TBMHandler tbm_handler;
 
     /* Initialize empty bitmap result */
     if (!isColStore) {
-        scanEntry->matchBitmap = TbmCreate(u_sess->attr.attr_memory.work_mem * 1024L);
+        scanEntry->matchBitmap = tbm_create(u_sess->attr.attr_memory.work_mem * 1024L);
+        tbm_handler = tbm_get_handler(scanEntry->matchBitmap);
     }
 
     /* Null query cannot partial-match anything */
@@ -280,7 +282,7 @@ static bool collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack, GinSca
             ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
 
             if (!isColStore) {
-                tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
+                tbm_handler._add_tuples(scanEntry->matchBitmap, ipd, nipd, false, InvalidOid, InvalidBktId);
             } else {
                 if (scanEntry->matchList == NULL) {
                     scanEntry->matchList = (ItemPointer)palloc(nipd * sizeof(ItemPointerData));
@@ -374,7 +376,8 @@ restartScanEntry:
         }
 
         if (!isColStore && entry->matchBitmap && !tbm_is_empty(entry->matchBitmap)) {
-            entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
+            TBMHandler tbm_handler = tbm_get_handler(entry->matchBitmap);
+            entry->matchIterator = tbm_handler._begin_iterate(entry->matchBitmap);
             entry->isFinished = false;
         }
 
@@ -1569,6 +1572,7 @@ static void scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
     pendingPosition pos;
     Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
     BlockNumber blkno;
+    TBMHandler tbm_handler = tbm_get_handler(tbm);
     Oid partHeapOid = IndexScanGetPartHeapOid(scan);
 
     *ntids = 0;
@@ -1628,7 +1632,7 @@ static void scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
         MemoryContextReset(so->tempCtx);
 
         if (match) {
-            tbm_add_tuples(tbm, &pos.item, 1, recheck, partHeapOid);
+            tbm_handler._add_tuples(tbm, &pos.item, 1, recheck, partHeapOid, InvalidBktId);
             (*ntids)++;
         }
     }
@@ -1652,6 +1656,7 @@ Datum gingetbitmap(PG_FUNCTION_ARGS)
     int64 ntids;
     ItemPointerData iptr;
     bool recheck = false;
+    TBMHandler tbm_handler = tbm_get_handler(tbm);
     Oid partHeapOid = IndexScanGetPartHeapOid(scan);
 
     /*
@@ -1691,9 +1696,9 @@ Datum gingetbitmap(PG_FUNCTION_ARGS)
             break;
 
         if (ItemPointerIsLossyPage(&iptr))
-            tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr), partHeapOid);
+            tbm_handler._add_page(tbm, ItemPointerGetBlockNumber(&iptr), partHeapOid, InvalidBktId);
         else
-            tbm_add_tuples(tbm, &iptr, 1, recheck, partHeapOid);
+            tbm_handler._add_tuples(tbm, &iptr, 1, recheck, partHeapOid, InvalidBktId);
         ntids++;
     }
 
diff --git a/src/gausskernel/storage/access/gin/ginpostinglist.cpp b/src/gausskernel/storage/access/gin/ginpostinglist.cpp
index c83c67930..85a23be62 100644
--- a/src/gausskernel/storage/access/gin/ginpostinglist.cpp
+++ b/src/gausskernel/storage/access/gin/ginpostinglist.cpp
@@ -361,9 +361,10 @@ int ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len, TIDBitmap
 {
     int ndecoded;
     ItemPointer items;
+    TBMHandler tbm_handler = tbm_get_handler(tbm);
 
     items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
-    tbm_add_tuples(tbm, items, ndecoded, false);
+    tbm_handler._add_tuples(tbm, items, ndecoded, false, InvalidOid, InvalidBktId);
     pfree(items);
     return ndecoded;
 }
diff --git a/src/gausskernel/storage/access/gist/gistget.cpp b/src/gausskernel/storage/access/gist/gistget.cpp
index 5b053894e..e837e45fb 100644
--- a/src/gausskernel/storage/access/gist/gistget.cpp
+++ b/src/gausskernel/storage/access/gist/gistget.cpp
@@ -288,11 +288,12 @@ static void gistScanPage(IndexScanDesc scan, const GISTSearchItem *pageItem, con
             continue;
 
         if (tbm && GistPageIsLeaf(page)) {
+            TBMHandler tbm_handler = tbm_get_handler(tbm);
             /*
              * getbitmap scan, so just push heap tuple TIDs into the bitmap
              * without worrying about ordering
              */
-            tbm_add_tuples(tbm, &it->t_tid, 1, recheck, partHeapOid);
+            tbm_handler._add_tuples(tbm, &it->t_tid, 1, recheck, partHeapOid, InvalidBktId);
             (*ntids)++;
         } else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page)) {
             /*
diff --git a/src/gausskernel/storage/access/hash/hash.cpp b/src/gausskernel/storage/access/hash/hash.cpp
index c124c9499..e7cfe74d7 100644
--- a/src/gausskernel/storage/access/hash/hash.cpp
+++ b/src/gausskernel/storage/access/hash/hash.cpp
@@ -357,8 +357,9 @@ Datum hashgetbitmap(PG_FUNCTION_ARGS)
 
         /* Save tuple ID, and continue scanning */
         if (add_tuple) {
+            TBMHandler tbm_handler = tbm_get_handler(tbm);
             /* Note we mark the tuple ID as requiring recheck */
-            tbm_add_tuples(tbm, &(so->hashso_heappos), 1, true, partHeapOid);
+            tbm_handler._add_tuples(tbm, &(so->hashso_heappos), 1, true, partHeapOid, InvalidBktId);
             ntids++;
         }
 
diff --git a/src/gausskernel/storage/access/nbtree/nbtree.cpp b/src/gausskernel/storage/access/nbtree/nbtree.cpp
index 5f409dd74..083d68b41 100644
--- a/src/gausskernel/storage/access/nbtree/nbtree.cpp
+++ b/src/gausskernel/storage/access/nbtree/nbtree.cpp
@@ -320,6 +320,9 @@ int64 btgetbitmap_internal(IndexScanDesc scan, TIDBitmap *tbm)
     BTScanOpaque so = (BTScanOpaque)scan->opaque;
     int64 ntids = 0;
     ItemPointer heapTid;
+    Oid currPartOid;
+    int2 bucketid;
+    TBMHandler tbm_handler = tbm_get_handler(tbm);
 
     /*
      * If we have any array keys, initialize them.
@@ -339,9 +342,9 @@ int64 btgetbitmap_internal(IndexScanDesc scan, TIDBitmap *tbm)
         if (_bt_first(scan, ForwardScanDirection)) {
             /* Save tuple ID, and continue scanning */
             heapTid = &scan->xs_ctup.t_self;
-            Oid currPartOid = so->currPos.items[so->currPos.itemIndex].partitionOid;
-            int2 bucketid = so->currPos.items[so->currPos.itemIndex].bucketid;
-            tbm_add_tuples(tbm, heapTid, 1, false, currPartOid, bucketid);
+            currPartOid = so->currPos.items[so->currPos.itemIndex].partitionOid;
+            bucketid = so->currPos.items[so->currPos.itemIndex].bucketid;
+            tbm_handler._add_tuples(tbm, heapTid, 1, false, currPartOid, bucketid);
             ntids++;
 
             for (;;) {
@@ -360,7 +363,7 @@ int64 btgetbitmap_internal(IndexScanDesc scan, TIDBitmap *tbm)
                 heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
                 currPartOid = so->currPos.items[so->currPos.itemIndex].partitionOid;
                 bucketid = so->currPos.items[so->currPos.itemIndex].bucketid;
-                tbm_add_tuples(tbm, heapTid, 1, false, currPartOid, bucketid);
+                tbm_handler._add_tuples(tbm, heapTid, 1, false, currPartOid, bucketid);
                 ntids++;
             }
         }
diff --git a/src/gausskernel/storage/access/spgist/spgscan.cpp b/src/gausskernel/storage/access/spgist/spgscan.cpp
index 962201d3c..d58b2f2d0 100644
--- a/src/gausskernel/storage/access/spgist/spgscan.cpp
+++ b/src/gausskernel/storage/access/spgist/spgscan.cpp
@@ -489,8 +489,9 @@ static void spgWalk(Relation index, SpGistScanOpaque so, bool scanWholeIndex, st
 
 /* storeRes subroutine for getbitmap case */
 static void storeBitmap(SpGistScanOpaque so, ItemPointer heapPtr, Datum leafValue, bool isnull, bool recheck)
-{
-    tbm_add_tuples(so->tbm, heapPtr, 1, recheck, so->partHeapOid);
+{   
+    TBMHandler tbm_handler = tbm_get_handler(so->tbm);
+    tbm_handler._add_tuples(so->tbm, heapPtr, 1, recheck, so->partHeapOid, InvalidBktId);
     so->ntids++;
 }
 
diff --git a/src/gausskernel/storage/access/ubtree/ubtree.cpp b/src/gausskernel/storage/access/ubtree/ubtree.cpp
index af1e8f85d..196f01e4e 100644
--- a/src/gausskernel/storage/access/ubtree/ubtree.cpp
+++ b/src/gausskernel/storage/access/ubtree/ubtree.cpp
@@ -291,6 +291,8 @@ Datum ubtgetbitmap(PG_FUNCTION_ARGS)
     BTScanOpaque so = (BTScanOpaque)scan->opaque;
     int64 ntids = 0;
     ItemPointer heapTid;
+    Oid currPartOid;
+    TBMHandler tbm_handler = tbm_get_handler(tbm);
 
     WHITEBOX_TEST_STUB("ubtgetbitmap", WhiteboxDefaultErrorEmit);
 
@@ -312,8 +314,8 @@ Datum ubtgetbitmap(PG_FUNCTION_ARGS)
         if (UBTreeFirst(scan, ForwardScanDirection)) {
             /* Save tuple ID, and continue scanning */
             heapTid = &scan->xs_ctup.t_self;
-            Oid currPartOid = so->currPos.items[so->currPos.itemIndex].partitionOid;
-            tbm_add_tuples(tbm, heapTid, 1, scan->xs_recheck_itup, currPartOid);
+            currPartOid = so->currPos.items[so->currPos.itemIndex].partitionOid;
+            tbm_handler._add_tuples(tbm, heapTid, 1, scan->xs_recheck_itup, currPartOid, InvalidBktId);
             ntids++;
 
             for (;;) {
@@ -331,7 +333,7 @@ Datum ubtgetbitmap(PG_FUNCTION_ARGS)
                 /* Save tuple ID, and continue scanning */
                 heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid;
                 currPartOid = so->currPos.items[so->currPos.itemIndex].partitionOid;
-                tbm_add_tuples(tbm, heapTid, 1, scan->xs_recheck_itup, currPartOid);
+                tbm_handler._add_tuples(tbm, heapTid, 1, scan->xs_recheck_itup, currPartOid, InvalidBktId);
                 ntids++;
             }
         }
diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
new file mode 100644
index 000000000..946d7e44d
--- /dev/null
+++ b/src/include/lib/simplehash.h
@@ -0,0 +1,1141 @@
+/*
+ * simplehash.h
+ *
+ *	  When included this file generates a "templated" (by way of macros)
+ *	  open-addressing hash table implementation specialized to user-defined
+ *	  types.
+ *
+ *	  It's probably not worthwhile to generate such a specialized implementation
+ *	  for hash tables that aren't performance or space sensitive.
+ *
+ *	  Compared to dynahash, simplehash has the following benefits:
+ *
+ *	  - Due to the "templated" code generation has known structure sizes and no
+ *	    indirect function calls (which show up substantially in dynahash
+ *	    profiles). These features considerably increase speed for small
+ *	    entries.
+ *	  - Open addressing has better CPU cache behavior than dynahash's chained
+ *	    hashtables.
+ *	  - The generated interface is type-safe and easier to use than dynahash,
+ *	    though at the cost of more complex setup.
+ *	  - Allocates memory in a MemoryContext or another allocator with a
+ *	    malloc/free style interface (which isn't easily usable in a shared
+ *	    memory context)
+ *	  - Does not require the overhead of a separate memory context.
+ *
+ * Usage notes:
+ *
+ *	  To generate a hash-table and associated functions for a use case several
+ *	  macros have to be #define'ed before this file is included.  Including
+ *	  the file #undef's all those, so a new hash table can be generated
+ *	  afterwards.
+ *	  The relevant parameters are:
+ *	  - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
+ *		will result in hash table type 'foo_hash' and functions like
+ *		'foo_insert'/'foo_lookup' and so forth.
+ *	  - SH_ELEMENT_TYPE - type of the contained elements
+ *	  - SH_KEY_TYPE - type of the hashtable's key
+ *	  - SH_DECLARE - if defined function prototypes and type declarations are
+ *		generated
+ *	  - SH_DEFINE - if defined function definitions are generated
+ *	  - SH_SCOPE - in which scope (e.g. extern, static inline) do function
+ *		declarations reside
+ *	  - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
+ *	    use this to allocate bytes. The allocator must zero the returned space.
+ *	  - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
+ *		are defined, so you can supply your own
+ *	  The following parameters are only relevant when SH_DEFINE is defined:
+ *	  - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
+ *	  - SH_EQUAL(table, a, b) - compare two table keys
+ *	  - SH_HASH_KEY(table, key) - generate hash for the key
+ *	  - SH_STORE_HASH - if defined the hash is stored in the elements
+ *	  - SH_GET_HASH(tb, a) - return the field to store the hash in
+ *
+ *	  The element type is required to contain a "status" member that can store
+ *	  the range of values defined in the SH_STATUS enum.
+ *
+ *	  While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
+ *	  the hash table implementation needs to compare hashes to move elements
+ *	  (particularly when growing the hash), it's preferable, if possible, to
+ *	  store the element's hash in the element's data type. If the hash is so
+ *	  stored, the hash table will also compare hashes before calling SH_EQUAL
+ *	  when comparing two keys.
+ *
+ *	  For convenience the hash table create functions accept a void pointer
+ *	  that will be stored in the hash table type's member private_data. This
+ *	  allows callbacks to reference caller provided data.
+ *
+ *	  For examples of usage look at tidbitmap.c (file local definition) and
+ *	  execnodes.h/execGrouping.c (exposed declaration, file local
+ *	  implementation).
+ *
+ * Hash table design:
+ *
+ *	  The hash table design chosen is a variant of linear open-addressing. The
+ *	  reason for doing so is that linear addressing is CPU cache & pipeline
+ *	  friendly. The biggest disadvantage of simple linear addressing schemes
+ *	  are highly variable lookup times due to clustering, and deletions
+ *	  leaving a lot of tombstones around.  To address these issues a variant
+ *	  of "robin hood" hashing is employed.  Robin hood hashing optimizes
+ *	  chaining lengths by moving elements close to their optimal bucket
+ *	  ("rich" elements), out of the way if a to-be-inserted element is further
+ *	  away from its optimal position (i.e. it's "poor").  While that can make
+ *	  insertions slower, the average lookup performance is a lot better, and
+ *	  higher fill factors can be used in a still performant manner.  To avoid
+ *	  tombstones - which normally solve the issue that a deleted node's
+ *	  presence is relevant to determine whether a lookup needs to continue
+ *	  looking or is done - buckets following a deleted element are shifted
+ *	  backwards, unless they're empty or already at their optimal position.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/lib/simplehash.h
+ */
+
+#include "port/pg_bitutils.h"
+
+/* helpers */
+#define SH_MAKE_PREFIX(a) CppConcat(a, _)
+#define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX), name)
+#define SH_MAKE_NAME_(a, b) CppConcat(a, b)
+
+/* name macros for: */
+
+/* type declarations */
+#define SH_TYPE SH_MAKE_NAME(hash)
+#define SH_STATUS SH_MAKE_NAME(status)
+#define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
+#define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
+#define SH_ITERATOR SH_MAKE_NAME(iterator)
+
+/* function declarations */
+#define SH_CREATE SH_MAKE_NAME(create)
+#define SH_DESTROY SH_MAKE_NAME(destroy)
+#define SH_RESET SH_MAKE_NAME(reset)
+#define SH_INSERT SH_MAKE_NAME(insert)
+#define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
+#define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
+#define SH_DELETE SH_MAKE_NAME(delete)
+#define SH_LOOKUP SH_MAKE_NAME(lookup)
+#define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
+#define SH_GROW SH_MAKE_NAME(grow)
+#define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
+#define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
+#define SH_ITERATE SH_MAKE_NAME(iterate)
+#define SH_ALLOCATE SH_MAKE_NAME(allocate)
+#define SH_FREE SH_MAKE_NAME(free)
+#define SH_STAT SH_MAKE_NAME(stat)
+
+/* internal helper functions (no externally visible prototypes) */
+#define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
+#define SH_NEXT SH_MAKE_NAME(next)
+#define SH_PREV SH_MAKE_NAME(prev)
+#define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
+#define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
+#define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
+#define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
+#define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
+
+/* generate forward declarations necessary to use the hash table */
+#ifdef SH_DECLARE
+
+/* type definitions */
+typedef struct SH_TYPE {
+    /*
+     * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
+     * tables.  Note that the maximum number of elements is lower
+     * (SH_MAX_FILLFACTOR)
+     */
+    uint64 size;
+
+    /* how many elements have valid contents */
+    uint32 members;
+
+    /* mask for bucket and size calculations, based on size */
+    uint32 sizemask;
+
+    /* boundary after which to grow hashtable */
+    uint32 grow_threshold;
+
+    /* hash buckets */
+    SH_ELEMENT_TYPE *data;
+
+#ifndef SH_RAW_ALLOCATOR
+    /* memory context to use for allocations */
+    MemoryContext ctx;
+#endif
+
+    /* user defined data, useful for callbacks */
+    void *private_data;
+} SH_TYPE;
+
+typedef enum SH_STATUS { SH_STATUS_EMPTY = 0x00, SH_STATUS_IN_USE = 0x01 } SH_STATUS;
+
+typedef struct SH_ITERATOR {
+    uint32 cur; /* current element */
+    uint32 end;
+    bool done; /* iterator exhausted? */
+} SH_ITERATOR;
+
+/* externally visible function prototypes */
+#ifdef SH_RAW_ALLOCATOR
+/* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
+SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
+#else
+/*
+ * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
+ *								 void *private_data)
+ */
+SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data);
+#endif
+
+/* void <prefix>_destroy(<prefix>_hash *tb) */
+SH_SCOPE void SH_DESTROY(SH_TYPE *tb);
+
+/* void <prefix>_reset(<prefix>_hash *tb) */
+SH_SCOPE void SH_RESET(SH_TYPE *tb);
+
+/* void <prefix>_grow(<prefix>_hash *tb, uint64 newsize) */
+SH_SCOPE void SH_GROW(SH_TYPE *tb, uint64 newsize);
+
+/* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
+SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found);
+
+/*
+ * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
+ * 								  bool *found)
+ */
+SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash, bool *found);
+
+/* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
+SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE *tb, SH_KEY_TYPE key);
+
+/* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
+SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash);
+
+/* void <prefix>_delete_item(<prefix>_hash *tb, <element> *entry) */
+SH_SCOPE void SH_DELETE_ITEM(SH_TYPE *tb, SH_ELEMENT_TYPE *entry);
+
+/* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
+SH_SCOPE bool SH_DELETE(SH_TYPE *tb, SH_KEY_TYPE key);
+
+/* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
+SH_SCOPE void SH_START_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter);
+
+/*
+ * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
+ *								  uint32 at)
+ */
+SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE *tb, SH_ITERATOR *iter, uint32 at);
+
+/* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
+SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter);
+
+/* void <prefix>_stat(<prefix>_hash *tb */
+SH_SCOPE void SH_STAT(SH_TYPE *tb);
+
+#endif /* SH_DECLARE */
+
+/* generate implementation of the hash table */
+#ifdef SH_DEFINE
+
+#ifndef SH_RAW_ALLOCATOR
+#include "utils/memutils.h"
+#endif
+
+/* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
+#define SH_MAX_SIZE (((uint64)PG_UINT32_MAX) + 1)
+
+/* normal fillfactor, unless already close to maximum */
+#ifndef SH_FILLFACTOR
+#define SH_FILLFACTOR (0.9)
+#endif
+/* increase fillfactor if we otherwise would error out */
+#define SH_MAX_FILLFACTOR (0.98)
+/* grow if actual and optimal location bigger than */
+#ifndef SH_GROW_MAX_DIB
+#define SH_GROW_MAX_DIB 25
+#endif
+/* grow if more than elements to move when inserting */
+#ifndef SH_GROW_MAX_MOVE
+#define SH_GROW_MAX_MOVE 150
+#endif
+#ifndef SH_GROW_MIN_FILLFACTOR
+/* but do not grow due to SH_GROW_MAX_* if below */
+#define SH_GROW_MIN_FILLFACTOR 0.1
+#endif
+
+#ifdef SH_STORE_HASH
+#define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
+#else
+#define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
+#endif
+
+/*
+ * Wrap the following definitions in include guards, to avoid multiple
+ * definition errors if this header is included more than once.  The rest of
+ * the file deliberately has no include guards, because it can be included
+ * with different parameters to define functions and types with non-colliding
+ * names.
+ */
+#ifndef SIMPLEHASH_H
+#define SIMPLEHASH_H
+
+#ifdef FRONTEND
+#define sh_error(...)              \
+    do {                           \
+        pg_log_fatal(__VA_ARGS__); \
+        exit(1);                   \
+    } while (0)
+#define sh_log(...) pg_log_info(__VA_ARGS__)
+#else
+#define sh_error(...) elog(ERROR, __VA_ARGS__)
+#define sh_log(...) elog(LOG, __VA_ARGS__)
+#endif
+
+#endif
+
+/* calculate ceil(log base 2) of num */
+static inline uint64 sh_log2(uint64 num)
+{
+    int i;
+    uint64 limit;
+
+    for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
+        ;
+    return i;
+}
+
+/* calculate first power of 2 >= num */
+static inline uint64 sh_pow2(uint64 num)
+{
+    return ((uint64)1) << sh_log2(num);
+}
+
+/*
+ * Compute sizing parameters for hashtable. Called when creating and growing
+ * the hashtable.
+ */
+static inline void SH_COMPUTE_PARAMETERS(SH_TYPE *tb, uint64 newsize)
+{
+    uint64 size;
+
+    /* supporting zero sized hashes would complicate matters */
+    size = Max(newsize, 2);
+
+    /* round up size to the next power of 2, that's how bucketing works */
+    size = sh_pow2(size);
+    Assert(size <= SH_MAX_SIZE);
+
+    /*
+     * Verify that allocation of ->data is possible on this platform, without
+     * overflowing Size.
+     */
+    if (unlikely((((uint64)sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2))
+        sh_error("hash table too large");
+
+    /* now set size */
+    tb->size = size;
+    tb->sizemask = (uint32)(size - 1);
+
+    /*
+     * Compute the next threshold at which we need to grow the hash table
+     * again.
+     */
+    if (tb->size == SH_MAX_SIZE)
+        tb->grow_threshold = ((double)tb->size) * SH_MAX_FILLFACTOR;
+    else
+        tb->grow_threshold = ((double)tb->size) * SH_FILLFACTOR;
+}
+
+/* return the optimal bucket for the hash */
+static inline uint32 SH_INITIAL_BUCKET(SH_TYPE *tb, uint32 hash)
+{
+    return hash & tb->sizemask;
+}
+
+/* return next bucket after the current, handling wraparound */
+static inline uint32 SH_NEXT(SH_TYPE *tb, uint32 curelem, uint32 startelem)
+{
+    curelem = (curelem + 1) & tb->sizemask;
+
+    Assert(curelem != startelem);
+
+    return curelem;
+}
+
+/* return bucket before the current, handling wraparound */
+static inline uint32 SH_PREV(SH_TYPE *tb, uint32 curelem, uint32 startelem)
+{
+    curelem = (curelem - 1) & tb->sizemask;
+
+    Assert(curelem != startelem);
+
+    return curelem;
+}
+
+/* return distance between bucket and its optimal position */
+static inline uint32 SH_DISTANCE_FROM_OPTIMAL(SH_TYPE *tb, uint32 optimal, uint32 bucket)
+{
+    if (optimal <= bucket)
+        return bucket - optimal;
+    else
+        return (tb->size + bucket) - optimal;
+}
+
+static inline uint32 SH_ENTRY_HASH(SH_TYPE *tb, SH_ELEMENT_TYPE *entry)
+{
+#ifdef SH_STORE_HASH
+    return SH_GET_HASH(tb, entry);
+#else
+    return SH_HASH_KEY(tb, entry->SH_KEY);
+#endif
+}
+
+/* default memory allocator function */
+static inline void *SH_ALLOCATE(SH_TYPE *type, Size size);
+static inline void SH_FREE(SH_TYPE *type, void *pointer);
+
+#ifndef SH_USE_NONDEFAULT_ALLOCATOR
+
+/* default memory allocator function */
+static inline void *SH_ALLOCATE(SH_TYPE *type, Size size)
+{
+#ifdef SH_RAW_ALLOCATOR
+    return SH_RAW_ALLOCATOR(size);
+#else
+    return MemoryContextAllocExtended(type->ctx, size, MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
+#endif
+}
+
+/* default memory free function */
+static inline void SH_FREE(SH_TYPE *type, void *pointer)
+{
+    pfree_ext(pointer);
+}
+
+#endif
+
+/*
+ * Create a hash table with enough space for `nelements` distinct members.
+ * Memory for the hash table is allocated from the passed-in context.  If
+ * desired, the array of elements can be allocated using a passed-in allocator;
+ * this could be useful in order to place the array of elements in a shared
+ * memory, or in a context that will outlive the rest of the hash table.
+ * Memory other than for the array of elements will still be allocated from
+ * the passed-in context.
+ */
+#ifdef SH_RAW_ALLOCATOR
+SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data)
+#else
+SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
+#endif
+{
+    SH_TYPE *tb;
+    uint64 size;
+
+#ifdef SH_RAW_ALLOCATOR
+    tb = (SH_TYPE *)SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
+#else
+    tb = (SH_TYPE *)MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
+    tb->ctx = ctx;
+#endif
+    tb->private_data = private_data;
+
+    /* increase nelements by fillfactor, want to store nelements elements */
+    size = Min((double)SH_MAX_SIZE, ((double)nelements) / SH_FILLFACTOR);
+
+    SH_COMPUTE_PARAMETERS(tb, size);
+
+    tb->data = (SH_ELEMENT_TYPE *)SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
+
+    return tb;
+}
+
+/* destroy a previously created hash table */
+SH_SCOPE void SH_DESTROY(SH_TYPE *tb)
+{
+    SH_FREE(tb, tb->data);
+    pfree(tb);
+}
+
+/* reset the contents of a previously created hash table */
+SH_SCOPE void SH_RESET(SH_TYPE *tb)
+{	
+	errno_t rc = EOK;
+    rc = memset_s(tb->data, sizeof(SH_ELEMENT_TYPE) * tb->size, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
+	securec_check(rc, "\0", "\0");
+    tb->members = 0;
+}
+
+/*
+ * Grow a hash table to at least `newsize` buckets.
+ *
+ * Usually this will automatically be called by insertions/deletions, when
+ * necessary. But resizing to the exact input size can be advantageous
+ * performance-wise, when known at some point.
+ */
+SH_SCOPE void SH_GROW(SH_TYPE *tb, uint64 newsize)
+{
+    uint64 oldsize = tb->size;
+    SH_ELEMENT_TYPE *olddata = tb->data;
+    SH_ELEMENT_TYPE *newdata;
+    uint32 i;
+    uint32 startelem = 0;
+    uint32 copyelem;
+
+    Assert(oldsize == sh_pow2(oldsize));
+    Assert(oldsize != SH_MAX_SIZE);
+    Assert(oldsize < newsize);
+
+    /* compute parameters for new table */
+    SH_COMPUTE_PARAMETERS(tb, newsize);
+
+    tb->data = (SH_ELEMENT_TYPE *)SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
+
+    newdata = tb->data;
+
+    /*
+     * Copy entries from the old data to newdata. We theoretically could use
+     * SH_INSERT here, to avoid code duplication, but that's more general than
+     * we need. We neither want tb->members increased, nor do we need to do
+     * deal with deleted elements, nor do we need to compare keys. So a
+     * special-cased implementation is lot faster. As resizing can be time
+     * consuming and frequent, that's worthwhile to optimize.
+     *
+     * To be able to simply move entries over, we have to start not at the
+     * first bucket (i.e olddata[0]), but find the first bucket that's either
+     * empty, or is occupied by an entry at its optimal position. Such a
+     * bucket has to exist in any table with a load factor under 1, as not all
+     * buckets are occupied, i.e. there always has to be an empty bucket.  By
+     * starting at such a bucket we can move the entries to the larger table,
+     * without having to deal with conflicts.
+     */
+
+    /* search for the first element in the hash that's not wrapped around */
+    for (i = 0; i < oldsize; i++) {
+        SH_ELEMENT_TYPE *oldentry = &olddata[i];
+        uint32 hash;
+        uint32 optimal;
+
+        if (oldentry->status != SH_STATUS_IN_USE) {
+            startelem = i;
+            break;
+        }
+
+        hash = SH_ENTRY_HASH(tb, oldentry);
+        optimal = SH_INITIAL_BUCKET(tb, hash);
+
+        if (optimal == i) {
+            startelem = i;
+            break;
+        }
+    }
+
+    /* and copy all elements in the old table */
+    copyelem = startelem;
+    for (i = 0; i < oldsize; i++) {
+        SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
+		errno_t rc = EOK;
+
+        if (oldentry->status == SH_STATUS_IN_USE) {
+            uint32 hash;
+            uint32 startelem;
+            uint32 curelem;
+            SH_ELEMENT_TYPE *newentry;
+
+            hash = SH_ENTRY_HASH(tb, oldentry);
+            startelem = SH_INITIAL_BUCKET(tb, hash);
+            curelem = startelem;
+
+            /* find empty element to put data into */
+            while (true) {
+                newentry = &newdata[curelem];
+
+                if (newentry->status == SH_STATUS_EMPTY) {
+                    break;
+                }
+
+                curelem = SH_NEXT(tb, curelem, startelem);
+            }
+
+            /* copy entry to new slot */
+            rc = memcpy_s(newentry, sizeof(SH_ELEMENT_TYPE), oldentry, sizeof(SH_ELEMENT_TYPE));
+			securec_check(rc, "\0", "\0");
+        }
+
+        /* can't use SH_NEXT here, would use new size */
+        copyelem++;
+        if (copyelem >= oldsize) {
+            copyelem = 0;
+        }
+    }
+
+    SH_FREE(tb, olddata);
+}
+
+/*
+ * This is a separate static inline function, so it can be reliably be inlined
+ * into its wrapper functions even if SH_SCOPE is extern.
+ */
+static inline SH_ELEMENT_TYPE *SH_INSERT_HASH_INTERNAL(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash, bool *found)
+{
+    uint32 startelem;
+    uint32 curelem;
+    SH_ELEMENT_TYPE *data;
+    uint32 insertdist;
+
+restart:
+    insertdist = 0;
+
+    /*
+     * We do the grow check even if the key is actually present, to avoid
+     * doing the check inside the loop. This also lets us avoid having to
+     * re-find our position in the hashtable after resizing.
+     *
+     * Note that this also reached when resizing the table due to
+     * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
+     */
+    if (unlikely(tb->members >= tb->grow_threshold)) {
+        if (unlikely(tb->size == SH_MAX_SIZE))
+            sh_error("hash table size exceeded");
+
+        /*
+         * When optimizing, it can be very useful to print these out.
+         */
+        /* SH_STAT(tb); */
+        SH_GROW(tb, tb->size * 2);
+        /* SH_STAT(tb); */
+    }
+
+    /* perform insert, start bucket search at optimal location */
+    data = tb->data;
+    startelem = SH_INITIAL_BUCKET(tb, hash);
+    curelem = startelem;
+    while (true) {
+        uint32 curdist;
+        uint32 curhash;
+        uint32 curoptimal;
+        SH_ELEMENT_TYPE *entry = &data[curelem];
+
+        /* any empty bucket can directly be used */
+        if (entry->status == SH_STATUS_EMPTY) {
+            tb->members++;
+            entry->SH_KEY = key;
+#ifdef SH_STORE_HASH
+            SH_GET_HASH(tb, entry) = hash;
+#endif
+            entry->status = SH_STATUS_IN_USE;
+            *found = false;
+            return entry;
+        }
+
+        /*
+         * If the bucket is not empty, we either found a match (in which case
+         * we're done), or we have to decide whether to skip over or move the
+         * colliding entry. When the colliding element's distance to its
+         * optimal position is smaller than the to-be-inserted entry's, we
+         * shift the colliding entry (and its followers) forward by one.
+         */
+
+        if (SH_COMPARE_KEYS(tb, hash, key, entry)) {
+            Assert(entry->status == SH_STATUS_IN_USE);
+            *found = true;
+            return entry;
+        }
+
+        curhash = SH_ENTRY_HASH(tb, entry);
+        curoptimal = SH_INITIAL_BUCKET(tb, curhash);
+        curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
+
+        if (insertdist > curdist) {
+            SH_ELEMENT_TYPE *lastentry = entry;
+            uint32 emptyelem = curelem;
+            uint32 moveelem;
+            int32 emptydist = 0;
+			errno_t rc = EOK;
+
+            /* find next empty bucket */
+            while (true) {
+                SH_ELEMENT_TYPE *emptyentry;
+
+                emptyelem = SH_NEXT(tb, emptyelem, startelem);
+                emptyentry = &data[emptyelem];
+
+                if (emptyentry->status == SH_STATUS_EMPTY) {
+                    lastentry = emptyentry;
+                    break;
+                }
+
+                /*
+                 * To avoid negative consequences from overly imbalanced
+                 * hashtables, grow the hashtable if collisions would require
+                 * us to move a lot of entries.  The most likely cause of such
+                 * imbalance is filling a (currently) small table, from a
+                 * currently big one, in hash-table order.  Don't grow if the
+                 * hashtable would be too empty, to prevent quick space
+                 * explosion for some weird edge cases.
+                 */
+                if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
+                    ((double)tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR) {
+                    tb->grow_threshold = 0;
+                    goto restart;
+                }
+            }
+
+            /* shift forward, starting at last occupied element */
+
+            /*
+             * TODO: This could be optimized to be one memcpy in many cases,
+             * excepting wrapping around at the end of ->data. Hasn't shown up
+             * in profiles so far though.
+             */
+            moveelem = emptyelem;
+            while (moveelem != curelem) {
+                SH_ELEMENT_TYPE *moveentry;
+
+                moveelem = SH_PREV(tb, moveelem, startelem);
+                moveentry = &data[moveelem];
+
+				
+                rc = memcpy_s(lastentry, sizeof(SH_ELEMENT_TYPE), moveentry, sizeof(SH_ELEMENT_TYPE));
+				securec_check(rc, "\0", "\0");
+                lastentry = moveentry;
+            }
+
+            /* and fill the now empty spot */
+            tb->members++;
+
+            entry->SH_KEY = key;
+#ifdef SH_STORE_HASH
+            SH_GET_HASH(tb, entry) = hash;
+#endif
+            entry->status = SH_STATUS_IN_USE;
+            *found = false;
+            return entry;
+        }
+
+        curelem = SH_NEXT(tb, curelem, startelem);
+        insertdist++;
+
+        /*
+         * To avoid negative consequences from overly imbalanced hashtables,
+         * grow the hashtable if collisions lead to large runs. The most
+         * likely cause of such imbalance is filling a (currently) small
+         * table, from a currently big one, in hash-table order.  Don't grow
+         * if the hashtable would be too empty, to prevent quick space
+         * explosion for some weird edge cases.
+         */
+        if (unlikely(insertdist > SH_GROW_MAX_DIB) && ((double)tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR) {
+            tb->grow_threshold = 0;
+            goto restart;
+        }
+    }
+}
+
+/*
+ * Insert the key key into the hash-table, set *found to true if the key
+ * already exists, false otherwise. Returns the hash-table entry in either
+ * case.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found)
+{
+    uint32 hash = SH_HASH_KEY(tb, key);
+
+    return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
+}
+
+/*
+ * Insert the key key into the hash-table using an already-calculated
+ * hash. Set *found to true if the key already exists, false
+ * otherwise. Returns the hash-table entry in either case.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash, bool *found)
+{
+    return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
+}
+
+/*
+ * This is a separate static inline function, so it can be reliably be inlined
+ * into its wrapper functions even if SH_SCOPE is extern.
+ */
+static inline SH_ELEMENT_TYPE *SH_LOOKUP_HASH_INTERNAL(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash)
+{
+    const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
+    uint32 curelem = startelem;
+
+    while (true) {
+        SH_ELEMENT_TYPE *entry = &tb->data[curelem];
+
+        if (entry->status == SH_STATUS_EMPTY) {
+            return NULL;
+        }
+
+        Assert(entry->status == SH_STATUS_IN_USE);
+
+        if (SH_COMPARE_KEYS(tb, hash, key, entry))
+            return entry;
+
+        /*
+         * TODO: we could stop search based on distance. If the current
+         * buckets's distance-from-optimal is smaller than what we've skipped
+         * already, the entry doesn't exist. Probably only do so if
+         * SH_STORE_HASH is defined, to avoid re-computing hashes?
+         */
+
+        curelem = SH_NEXT(tb, curelem, startelem);
+    }
+}
+
+/*
+ * Lookup up entry in hash table.  Returns NULL if key not present.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE *tb, SH_KEY_TYPE key)
+{
+    uint32 hash = SH_HASH_KEY(tb, key);
+
+    return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
+}
+
+/*
+ * Lookup up entry in hash table using an already-calculated hash.
+ *
+ * Returns NULL if key not present.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE *tb, SH_KEY_TYPE key, uint32 hash)
+{
+    return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
+}
+
+/*
+ * Delete entry from hash table by key.  Returns whether to-be-deleted key was
+ * present.
+ */
+SH_SCOPE bool SH_DELETE(SH_TYPE *tb, SH_KEY_TYPE key)
+{
+    uint32 hash = SH_HASH_KEY(tb, key);
+    uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
+    uint32 curelem = startelem;
+
+    while (true) {
+        SH_ELEMENT_TYPE *entry = &tb->data[curelem];
+
+        if (entry->status == SH_STATUS_EMPTY)
+            return false;
+
+        if (entry->status == SH_STATUS_IN_USE && SH_COMPARE_KEYS(tb, hash, key, entry)) {
+            SH_ELEMENT_TYPE *lastentry = entry;
+			errno_t rc = EOK;
+
+            tb->members--;
+
+            /*
+             * Backward shift following elements till either an empty element
+             * or an element at its optimal position is encountered.
+             *
+             * While that sounds expensive, the average chain length is short,
+             * and deletions would otherwise require tombstones.
+             */
+            while (true) {
+                SH_ELEMENT_TYPE *curentry;
+                uint32 curhash;
+                uint32 curoptimal;
+
+                curelem = SH_NEXT(tb, curelem, startelem);
+                curentry = &tb->data[curelem];
+
+                if (curentry->status != SH_STATUS_IN_USE) {
+                    lastentry->status = SH_STATUS_EMPTY;
+                    break;
+                }
+
+                curhash = SH_ENTRY_HASH(tb, curentry);
+                curoptimal = SH_INITIAL_BUCKET(tb, curhash);
+
+                /* current is at optimal position, done */
+                if (curoptimal == curelem) {
+                    lastentry->status = SH_STATUS_EMPTY;
+                    break;
+                }
+
+                /* shift */
+                rc = memcpy_s(lastentry, sizeof(SH_ELEMENT_TYPE), curentry, sizeof(SH_ELEMENT_TYPE));
+				securec_check(rc, "\0", "\0");
+
+                lastentry = curentry;
+            }
+
+            return true;
+        }
+
+        /* TODO: return false; if distance too big */
+
+        curelem = SH_NEXT(tb, curelem, startelem);
+    }
+}
+
+/*
+ * Delete entry from hash table by entry pointer
+ */
+SH_SCOPE void SH_DELETE_ITEM(SH_TYPE *tb, SH_ELEMENT_TYPE *entry)
+{
+    SH_ELEMENT_TYPE *lastentry = entry;
+    uint32 hash = SH_ENTRY_HASH(tb, entry);
+    uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
+    uint32 curelem;
+	errno_t rc = EOK;
+
+    /* Calculate the index of 'entry' */
+    curelem = entry - &tb->data[0];
+
+    tb->members--;
+
+    /*
+     * Backward shift following elements till either an empty element or an
+     * element at its optimal position is encountered.
+     *
+     * While that sounds expensive, the average chain length is short, and
+     * deletions would otherwise require tombstones.
+     */
+    while (true) {
+        SH_ELEMENT_TYPE *curentry;
+        uint32 curhash;
+        uint32 curoptimal;
+
+        curelem = SH_NEXT(tb, curelem, startelem);
+        curentry = &tb->data[curelem];
+
+        if (curentry->status != SH_STATUS_IN_USE) {
+            lastentry->status = SH_STATUS_EMPTY;
+            break;
+        }
+
+        curhash = SH_ENTRY_HASH(tb, curentry);
+        curoptimal = SH_INITIAL_BUCKET(tb, curhash);
+
+        /* current is at optimal position, done */
+        if (curoptimal == curelem) {
+            lastentry->status = SH_STATUS_EMPTY;
+            break;
+        }
+
+        /* shift */
+        rc = memcpy_s(lastentry, sizeof(SH_ELEMENT_TYPE), curentry, sizeof(SH_ELEMENT_TYPE));
+		securec_check(rc, "\0", "\0");
+
+        lastentry = curentry;
+    }
+}
+
+/*
+ * Initialize iterator.
+ */
+SH_SCOPE void SH_START_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter)
+{
+    int i;
+    uint64 startelem = PG_UINT64_MAX;
+
+    /*
+     * Search for the first empty element. As deletions during iterations are
+     * supported, we want to start/end at an element that cannot be affected
+     * by elements being shifted.
+     */
+    for (i = 0; i < tb->size; i++) {
+        SH_ELEMENT_TYPE *entry = &tb->data[i];
+
+        if (entry->status != SH_STATUS_IN_USE) {
+            startelem = i;
+            break;
+        }
+    }
+
+    Assert(startelem < SH_MAX_SIZE);
+
+    /*
+     * Iterate backwards, that allows the current element to be deleted, even
+     * if there are backward shifts
+     */
+    iter->cur = startelem;
+    iter->end = iter->cur;
+    iter->done = false;
+}
+
+/*
+ * Initialize iterator to a specific bucket. That's really only useful for
+ * cases where callers are partially iterating over the hashspace, and that
+ * iteration deletes and inserts elements based on visited entries. Doing that
+ * repeatedly could lead to an unbalanced keyspace when always starting at the
+ * same position.
+ */
+SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE *tb, SH_ITERATOR *iter, uint32 at)
+{
+    /*
+     * Iterate backwards, that allows the current element to be deleted, even
+     * if there are backward shifts.
+     */
+    iter->cur = at & tb->sizemask; /* ensure at is within a valid range */
+    iter->end = iter->cur;
+    iter->done = false;
+}
+
+/*
+ * Iterate over all entries in the hash-table. Return the next occupied entry,
+ * or NULL if done.
+ *
+ * During iteration the current entry in the hash table may be deleted,
+ * without leading to elements being skipped or returned twice.  Additionally
+ * the rest of the table may be modified (i.e. there can be insertions or
+ * deletions), but if so, there's neither a guarantee that all nodes are
+ * visited at least once, nor a guarantee that a node is visited at most once.
+ */
+SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE *tb, SH_ITERATOR *iter)
+{
+    while (!iter->done) {
+        SH_ELEMENT_TYPE *elem;
+
+        elem = &tb->data[iter->cur];
+
+        /* next element in backward direction */
+        iter->cur = (iter->cur - 1) & tb->sizemask;
+
+        if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
+            iter->done = true;
+        if (elem->status == SH_STATUS_IN_USE) {
+            return elem;
+        }
+    }
+
+    return NULL;
+}
+
+/*
+ * Report some statistics about the state of the hashtable. For
+ * debugging/profiling purposes only.
+ */
+SH_SCOPE void SH_STAT(SH_TYPE *tb)
+{
+    uint32 max_chain_length = 0;
+    uint32 total_chain_length = 0;
+    double avg_chain_length;
+    double fillfactor;
+    uint32 i;
+
+    uint32 *collisions = (uint32 *)palloc0(tb->size * sizeof(uint32));
+    uint32 total_collisions = 0;
+    uint32 max_collisions = 0;
+    double avg_collisions;
+
+    for (i = 0; i < tb->size; i++) {
+        uint32 hash;
+        uint32 optimal;
+        uint32 dist;
+        SH_ELEMENT_TYPE *elem;
+
+        elem = &tb->data[i];
+
+        if (elem->status != SH_STATUS_IN_USE)
+            continue;
+
+        hash = SH_ENTRY_HASH(tb, elem);
+        optimal = SH_INITIAL_BUCKET(tb, hash);
+        dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
+
+        if (dist > max_chain_length)
+            max_chain_length = dist;
+        total_chain_length += dist;
+
+        collisions[optimal]++;
+    }
+
+    for (i = 0; i < tb->size; i++) {
+        uint32 curcoll = collisions[i];
+
+        if (curcoll == 0)
+            continue;
+
+        /* single contained element is not a collision */
+        curcoll--;
+        total_collisions += curcoll;
+        if (curcoll > max_collisions)
+            max_collisions = curcoll;
+    }
+
+    if (tb->members > 0) {
+        fillfactor = tb->members / ((double)tb->size);
+        avg_chain_length = ((double)total_chain_length) / tb->members;
+        avg_collisions = ((double)total_collisions) / tb->members;
+    } else {
+        fillfactor = 0;
+        avg_chain_length = 0;
+        avg_collisions = 0;
+    }
+
+    sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, "
+                                  "total_collisions: %u, max_collisions: %u, avg_collisions: %f",
+           tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length, total_collisions,
+           max_collisions, avg_collisions);
+}
+
+#endif /* SH_DEFINE */
+
+/* undefine external parameters, so next hash table can be defined */
+#undef SH_PREFIX
+#undef SH_KEY_TYPE
+#undef SH_KEY
+#undef SH_ELEMENT_TYPE
+#undef SH_HASH_KEY
+#undef SH_SCOPE
+#undef SH_DECLARE
+#undef SH_DEFINE
+#undef SH_GET_HASH
+#undef SH_STORE_HASH
+#undef SH_USE_NONDEFAULT_ALLOCATOR
+#undef SH_EQUAL
+
+/* undefine locally declared macros */
+#undef SH_MAKE_PREFIX
+#undef SH_MAKE_NAME
+#undef SH_MAKE_NAME_
+#undef SH_FILLFACTOR
+#undef SH_MAX_FILLFACTOR
+#undef SH_GROW_MAX_DIB
+#undef SH_GROW_MAX_MOVE
+#undef SH_GROW_MIN_FILLFACTOR
+#undef SH_MAX_SIZE
+
+/* types */
+#undef SH_TYPE
+#undef SH_STATUS
+#undef SH_STATUS_EMPTY
+#undef SH_STATUS_IN_USE
+#undef SH_ITERATOR
+
+/* external function names */
+#undef SH_CREATE
+#undef SH_DESTROY
+#undef SH_RESET
+#undef SH_INSERT
+#undef SH_INSERT_HASH
+#undef SH_DELETE_ITEM
+#undef SH_DELETE
+#undef SH_LOOKUP
+#undef SH_LOOKUP_HASH
+#undef SH_GROW
+#undef SH_START_ITERATE
+#undef SH_START_ITERATE_AT
+#undef SH_ITERATE
+#undef SH_ALLOCATE
+#undef SH_FREE
+#undef SH_STAT
+
+/* internal function names */
+#undef SH_COMPUTE_PARAMETERS
+#undef SH_COMPARE_KEYS
+#undef SH_INITIAL_BUCKET
+#undef SH_NEXT
+#undef SH_PREV
+#undef SH_DISTANCE_FROM_OPTIMAL
+#undef SH_ENTRY_HASH
+#undef SH_INSERT_HASH_INTERNAL
+#undef SH_LOOKUP_HASH_INTERNAL
\ No newline at end of file
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index 31cc8ad8a..9fd19dbc1 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -28,8 +28,8 @@
  * Actual bitmap representation is private to tidbitmap.c.	Callers can
  * do IsA(x, TIDBitmap) on it, but nothing else.
  */
-typedef struct TIDBitmap TIDBitmap;
 
+typedef struct TIDBitmap TIDBitmap;
 /* Likewise, TBMIterator is private */
 typedef struct TBMIterator TBMIterator;
 
@@ -44,27 +44,47 @@ typedef struct {
     OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
 } TBMIterateResult;
 
+/*
+ * We want the caller to choose between their own best hash between
+ * dynamic hash and a more cache-friendly simple simple hash table.
+ * Therefore a set of handler is required to avoid all kinds of
+ * unnecessary branches inside this performance-critical area.
+ * 
+ * All handlers defined here can be templated base on the hash
+ * table the caller used.And the caller can invoke the handler
+ * with little to no overheads.
+ * 
+ * Most of external use of tbm related functions are exposed by
+ * this handler interface.Some of others like tbm_oterate does
+ * not templated like handlers are not included.
+ */
+typedef struct TMBHandler {
+    /* page generic handlers */
+    void (*_add_tuples)(TIDBitmap*, const ItemPointer, int, bool, Oid, int2);
+    void (*_add_page)(TIDBitmap*, BlockNumber, Oid, int2);
+
+    /* page operator handlers */
+    void (*_union)(TIDBitmap*, const TIDBitmap*);
+    void (*_intersect)(TIDBitmap*, const TIDBitmap*);
+
+    /* iterator handlers */
+    TBMIterator* (*_begin_iterate)(TIDBitmap*);
+} TBMHandler;
+
 /* function prototypes in nodes/tidbitmap.c */
-extern TIDBitmap* TbmCreate(long maxbytes, bool is_ustore = false);
+extern TIDBitmap* tbm_create(long maxbytes, bool is_global_part = true, bool is_crossbucket =true, bool is_ustore = false);
 extern void tbm_free(TIDBitmap* tbm);
-extern long tbm_calculate_entries(double maxbytes);
+extern long tbm_calculate_entries(double maxbytes, bool complex_key);
 
-extern void tbm_add_tuples(
-    TIDBitmap* tbm, const ItemPointer tids, int ntids, bool recheck, Oid partitionOid = InvalidOid,
-    int2 bucketid = InvalidBktId);
-extern void tbm_add_page(TIDBitmap* tbm, BlockNumber pageno, Oid partitionOid = InvalidOid,
-    int2 bucketid = InvalidBktId);
-
-extern void tbm_union(TIDBitmap* a, const TIDBitmap* b);
-extern void tbm_intersect(TIDBitmap* a, const TIDBitmap* b);
-
-extern bool tbm_is_empty(const TIDBitmap* tbm);
-
-extern TBMIterator* tbm_begin_iterate(TIDBitmap* tbm);
+/* iterator prototypes in nodes/tidbitmap.c */
 extern TBMIterateResult* tbm_iterate(TBMIterator* iterator);
 extern void tbm_end_iterate(TBMIterator* iterator);
+
+/* function prototypes for TIDBitmap member checks */
+extern void tbm_set_global(TIDBitmap* tbm, bool val);
 extern bool tbm_is_global(const TIDBitmap* tbm);
-extern void tbm_set_global(TIDBitmap* tbm, bool isGlobal);
+extern bool tbm_is_empty(const TIDBitmap* tbm);
 extern bool tbm_is_crossbucket(const TIDBitmap* tbm);
-extern void tbm_set_crossbucket(TIDBitmap* tbm, bool crossbucket);
+extern TBMHandler tbm_get_handler(TIDBitmap* tbm);
+
 #endif /* TIDBITMAP_H */
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 940da8257..fe55c924b 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -129,6 +129,21 @@ static inline int pg_rightmost_one_pos64(uint64 word)
 #endif /* HAVE__BUILTIN_CTZ */
 }
 
+static inline uint64 pg_nextpower2_64(uint64 num)
+{
+	Assert(num > 0 && num <= PG_UINT64_MAX / 2 + 1);
+
+	/*
+	 * A power 2 number has only 1 bit set.  Subtracting 1 from such a number
+	 * will turn on all previous bits resulting in no common bits being set
+	 * between num and num-1.
+	 */
+	if ((num & (num - 1)) == 0)
+		return num;				/* already power 2 */
+
+	return ((uint64) 1) << (pg_leftmost_one_pos64(num) + 1);
+}
+
 /* Count the number of one-bits in a uint32 or uint64 */
 extern int (*pg_popcount32)(uint32 word);
 extern int (*pg_popcount64)(uint64 word);
diff --git a/src/include/utils/hashutils.h b/src/include/utils/hashutils.h
index a27eb6b9b..bfc08716d 100644
--- a/src/include/utils/hashutils.h
+++ b/src/include/utils/hashutils.h
@@ -42,4 +42,10 @@ static inline uint32 murmurhash32(uint32 data)
     return h;
 }
 
+static inline uint32 hash_combine(uint32 a, uint32 b)
+{
+    a ^= b + 0x9e3779b9 + (a << 6) + (a >> 2);
+    return a;
+}
+
 #endif /* HASHUTILS_H */
diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h
index a09420797..fa9bd4d48 100644
--- a/src/include/utils/palloc.h
+++ b/src/include/utils/palloc.h
@@ -72,6 +72,8 @@ extern THR_LOCAL PGDLLIMPORT MemoryContext TopMemoryContext;
 #define MemoryContextAllocZero(context, size) MemoryContextAllocZeroDebug(context, size, __FILE__, __LINE__)
 #define MemoryContextAllocZeroAligned(context, size) \
     MemoryContextAllocZeroAlignedDebug(context, size, __FILE__, __LINE__)
+#define MemoryContextAllocExtended(context, size, flags) \
+    MemoryContextAllocExtendedDebug(context, size, flags, __FILE__, __LINE__)
 #define MemoryContextStrdup(context, size) MemoryContextStrdupDebug(context, size, __FILE__, __LINE__)
 #define repalloc(pointer, size) repallocDebug(pointer, size, __FILE__, __LINE__)
 #define repalloc_noexcept(pointer, size) repalloc_noexcept_Debug(pointer, size, __FILE__, __LINE__)
@@ -90,6 +92,7 @@ extern void* MemoryContextAllocHugeDebug(MemoryContext context, Size size, const
 extern void* repallocHugeDebug(void* pointer, Size size, const char* file, int line);
 extern void* MemoryContextAllocZeroDebug(MemoryContext context, Size size, const char* file, int line);
 extern void* MemoryContextAllocZeroAlignedDebug(MemoryContext context, Size size, const char* file, int line);
+extern void* MemoryContextAllocExtendedDebug(MemoryContext context, Size size, int flags, const char* file, int line);
 extern char* MemoryContextStrdupDebug(MemoryContext context, const char* string, const char* file, int line);
 extern void* MemoryContextMemalignAllocDebug(MemoryContext context, Size align, Size size, const char* file, int line);
 extern void MemoryContextMemalignFree(MemoryContext context, void* pointer);
diff --git a/src/test/regress/output/bitmapops.source b/src/test/regress/output/bitmapops.source
index 28bdabe7c..5739a8782 100644
--- a/src/test/regress/output/bitmapops.source
+++ b/src/test/regress/output/bitmapops.source
@@ -25,6 +25,7 @@ EXPLAIN (analyze on, costs off, timing off) SELECT count(*) FROM bmscantest WHER
  Aggregate (actual rows=1 loops=1)
    ->  Bitmap Heap Scan on bmscantest (actual rows=20 loops=1)
          Recheck Cond: ((b = 1) AND (a = 1))
+         Rows Removed by Index Recheck: 1257
 --?         Heap Blocks: exact=.*
          ->  BitmapAnd (actual rows=0 loops=1)
                ->  Bitmap Index Scan on i_bmtest_b (actual rows=1051 loops=1)
@@ -32,7 +33,7 @@ EXPLAIN (analyze on, costs off, timing off) SELECT count(*) FROM bmscantest WHER
                ->  Bitmap Index Scan on i_bmtest_a (actual rows=1170 loops=1)
                      Index Cond: (a = 1)
 --?.*
-(10 rows)
+(11 rows)
 
 SELECT count(*) FROM bmscantest WHERE a = 1 AND b = 1;
  count 
@@ -47,7 +48,7 @@ EXPLAIN (analyze on, costs off, timing off) SELECT count(*) FROM bmscantest WHER
  Aggregate (actual rows=1 loops=1)
    ->  Bitmap Heap Scan on bmscantest (actual rows=2201 loops=1)
          Recheck Cond: ((a = 1) OR (b = 1))
-         Rows Removed by Index Recheck: 14419
+         Rows Removed by Index Recheck: 17287
 --?         Heap Blocks: exact=.* lossy=.*
          ->  BitmapOr (actual rows=0 loops=1)
                ->  Bitmap Index Scan on i_bmtest_a (actual rows=1170 loops=1)
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 64ba19842..468b62679 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1479,6 +1479,9 @@ SERIALIZABLEXIDTAG
 SERVICE_STATUS
 SERVICE_STATUS_HANDLE
 SERVICE_TABLE_ENTRY
+SH_TYPE
+SH_ITERATOR
+SH_STATUS
 SHA1_CTX
 SHA224_CTX
 SHA256_CTX