From c69efe24e4a9301e1a6e0d437ecf6aea129e67c5 Mon Sep 17 00:00:00 2001
From: wanghao19920907 <wanghao1@vastdata.com.cn>
Date: Mon, 6 Feb 2023 00:57:37 -0800
Subject: [PATCH 1/3] =?UTF-8?q?=E5=BF=85=E8=A6=81=E6=97=B6=E5=A2=9E?=
 =?UTF-8?q?=E5=8A=A0hashjoin=E7=9A=84bucket=E6=95=B0=E9=87=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../optimizer/commands/explain.cpp            |  19 +--
 src/gausskernel/runtime/executor/nodeHash.cpp | 126 +++++++++++++++++-
 src/include/executor/hashjoin.h               |   5 +
 .../cstore_replication_table_delete.out       |   4 +-
 .../regress/expected/hw_explain_pretty2.out   |   2 +-
 .../regress/expected/hw_explain_pretty3.out   |  26 ++--
 .../regress/expected/upsert_where_sublink.out |   4 +-
 7 files changed, 154 insertions(+), 32 deletions(-)
diff --git a/src/gausskernel/optimizer/commands/explain.cpp b/src/gausskernel/optimizer/commands/explain.cpp
index 51b218c02..19dfdfed4 100755
--- a/src/gausskernel/optimizer/commands/explain.cpp
+++ b/src/gausskernel/optimizer/commands/explain.cpp
@@ -216,7 +216,7 @@ static void show_datanode_time(ExplainState* es, PlanState* planstate);
 static void ShowStreamRunNodeInfo(Stream* stream, ExplainState* es);
 static void ShowRunNodeInfo(const ExecNodes* en, ExplainState* es, const char* qlabel);
 template <bool is_detail>
-static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbatch_original, int nbuckets, long spacePeakKb);
+static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbuckets_original, int nbatch_original, int nbuckets, long spacePeakKb);
 static void ShowRoughCheckInfo(ExplainState* es, Instrumentation* instrument, int nodeIdx, int smpIdx);
 static void show_hashAgg_info(AggState* hashaggstate, ExplainState* es);
 static void ExplainPrettyList(List* data, ExplainState* es);
@@ -4284,10 +4284,11 @@ static void show_sort_info(SortState* sortstate, ExplainState* es)
 }
 
 template <bool is_detail>
-static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbatch_original, int nbuckets, long spacePeakKb)
+static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbuckets_original, int nbatch_original, int nbuckets, long spacePeakKb)
 {
     if (es->format != EXPLAIN_FORMAT_TEXT) {
         ExplainPropertyLong("Hash Buckets", nbuckets, es);
+        ExplainPropertyLong("Original Hash Buckets", nbuckets_original, es);
         ExplainPropertyLong("Hash Batches", nbatch, es);
         ExplainPropertyLong("Original Hash Batches", nbatch_original, es);
         ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es);
@@ -4295,8 +4296,8 @@ static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbatch_ori
                es->planinfo->m_staticInfo) {
         if (nbatch_original != nbatch) {
             appendStringInfo(es->planinfo->m_staticInfo->info_str,
-                " Buckets: %d  Batches: %d (originally %d)  Memory Usage: %ldkB\n",
-                nbuckets,
+                " Buckets: %d (originally %d) Batches: %d (originally %d)  Memory Usage: %ldkB\n",
+                nbuckets, nbuckets_original,
                 nbatch,
                 nbatch_original,
                 spacePeakKb);
@@ -4310,8 +4311,8 @@ static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbatch_ori
     } else {
         if (nbatch_original != nbatch) {
             appendStringInfo(es->str,
-                " Buckets: %d  Batches: %d (originally %d)	Memory Usage: %ldkB\n",
-                nbuckets,
+                " Buckets: %d (originally %d) Batches: %d (originally %d)	Memory Usage: %ldkB\n",
+                nbuckets, nbuckets_original,
                 nbatch,
                 nbatch_original,
                 spacePeakKb);
@@ -4767,9 +4768,9 @@ static void show_hash_info(HashState* hashstate, ExplainState* es)
 
                     es->planinfo->m_staticInfo->set_plan_name<false, true>();
                     appendStringInfo(es->planinfo->m_staticInfo->info_str, "%s ", node_name);
-                    show_datanode_hash_info<false>(es, nbatch, nbatch_original, nbuckets, spacePeakKb);
+                    show_datanode_hash_info<false>(es, nbatch, hashtable->nbuckets_original, nbatch_original, nbuckets, spacePeakKb);
                 }
-                show_datanode_hash_info<true>(es, nbatch, nbatch_original, nbuckets, spacePeakKb);
+                show_datanode_hash_info<true>(es, nbatch, hashtable->nbuckets_original, nbatch_original, nbuckets, spacePeakKb);
                 ExplainCloseGroup("Plan", NULL, true, es);
             }
             ExplainCloseGroup("Hash Detail", "Hash Detail", false, es);
@@ -4888,7 +4889,7 @@ static void show_hash_info(HashState* hashstate, ExplainState* es)
         if (es->wlm_statistics_plan_max_digit == NULL) {
             if (es->format == EXPLAIN_FORMAT_TEXT)
                 appendStringInfoSpaces(es->str, es->indent * 2);
-            show_datanode_hash_info<false>(es, nbatch, nbatch_original, nbuckets, spacePeakKb);
+            show_datanode_hash_info<false>(es, nbatch, hashtable->nbuckets_original, nbatch_original, nbuckets, spacePeakKb);
         }
     }
 }
diff --git a/src/gausskernel/runtime/executor/nodeHash.cpp b/src/gausskernel/runtime/executor/nodeHash.cpp
index a802e5ba6..b9791f700 100644
--- a/src/gausskernel/runtime/executor/nodeHash.cpp
+++ b/src/gausskernel/runtime/executor/nodeHash.cpp
@@ -50,6 +50,7 @@
 #include "workload/workload.h"
 
 static void ExecHashIncreaseNumBatches(HashJoinTable hashtable);
+static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable);
 static void ExecHashBuildSkewHash(HashJoinTable hashtable, Hash* node, int mcvsToUse);
 static void ExecHashSkewTableInsert(HashJoinTable hashtable, TupleTableSlot* slot, uint32 hashvalue, int bucketNumber);
 static void ExecHashRemoveNextSkewBucket(HashJoinTable hashtable);
@@ -128,6 +129,7 @@ Node* MultiExecHash(HashState* node)
             if (bucketNumber != INVALID_SKEW_BUCKET_NO) {
                 /* It's a skew tuple, so put it into that hash table */
                 ExecHashSkewTableInsert(hashtable, slot, hashvalue, bucketNumber);
+                hashtable->skewTuples += 1;
             } else {
                 /* Not subject to skew optimization, so insert normally */
                 ExecHashTableInsert(hashtable,
@@ -142,13 +144,29 @@ Node* MultiExecHash(HashState* node)
     }
     (void)pgstat_report_waitstatus(oldStatus);
 
+    /* analyze hash table information for unique sql hash state */
+    UpdateUniqueSQLHashStats(hashtable, &start_time);
+
+    /* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */
+    if (hashtable->nbuckets != hashtable->nbuckets_optimal) {
+        /* We never decrease the number of buckets. */
+        Assert(hashtable->nbuckets_optimal > hashtable->nbuckets);
+
+#ifdef HJDEBUG
+        printf("Increasing nbuckets %d => %d\n", hashtable->nbuckets, hashtable->nbuckets_optimal);
+#endif
+
+        ExecHashIncreaseNumBuckets(hashtable);
+    }
+
     /* analysis hash table information created in memory */
     if (anls_opt_is_on(ANLS_HASH_CONFLICT))
         ExecHashTableStats(hashtable, node->ps.plan->plan_node_id);
 
-    /* analyze hash table information for unique sql hash state */
-    UpdateUniqueSQLHashStats(hashtable, &start_time);
-
+    /* Account for the buckets in spaceUsed (reported in EXPLAIN ANALYZE) */
+    hashtable->spaceUsed += hashtable->nbuckets * sizeof(HashJoinTuple);
+    if (hashtable->spaceUsed > hashtable->spacePeak)
+        hashtable->spacePeak = hashtable->spaceUsed;
 
     /* must provide our own instrumentation support */
     if (node->ps.instrument) {
@@ -320,7 +338,10 @@ HashJoinTable ExecHashTableCreate(Hash* node, List* hashOperators, bool keepNull
      */
     hashtable = (HashJoinTable)palloc(sizeof(HashJoinTableData));
     hashtable->nbuckets = nbuckets;
+    hashtable->nbuckets_original = nbuckets;
+    hashtable->nbuckets_optimal = nbuckets;
     hashtable->log2_nbuckets = log2_nbuckets;
+    hashtable->log2_nbuckets_optimal = log2_nbuckets;
     hashtable->buckets = NULL;
     hashtable->keepNulls = keepNulls;
     hashtable->skewEnabled = false;
@@ -334,6 +355,7 @@ HashJoinTable ExecHashTableCreate(Hash* node, List* hashOperators, bool keepNull
     hashtable->nbatch_outstart = nbatch;
     hashtable->growEnabled = true;
     hashtable->totalTuples = 0;
+    hashtable->skewTuples = 0;
     hashtable->innerBatchFile = NULL;
     hashtable->outerBatchFile = NULL;
     hashtable->spaceUsed = 0;
@@ -999,6 +1021,18 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable)
      */
     ninmemory = nfreed = 0;
 
+    /* If know we need to resize nbuckets, we can do it while rebatching. */
+    if (hashtable->nbuckets_optimal != hashtable->nbuckets) {
+        /* we never decrease the number of buckets */
+        Assert(hashtable->nbuckets_optimal > hashtable->nbuckets);
+
+        hashtable->nbuckets = hashtable->nbuckets_optimal;
+        hashtable->log2_nbuckets = hashtable->log2_nbuckets_optimal;
+
+        hashtable->buckets = (struct HashJoinTupleData**) repalloc(
+                hashtable->buckets, sizeof(HashJoinTuple) * hashtable->nbuckets);
+    }
+
     /*
      * We will scan through the chunks directly, so that we can reset the
      * buckets now and not have to keep track which tuples in the buckets have
@@ -1080,6 +1114,73 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable)
     }
 }
 
+/*
+ * ExecHashIncreaseNumBuckets
+ *		increase the original number of buckets in order to reduce
+ *		number of tuples per bucket
+ */
+static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
+{
+    HashMemoryChunk chunk;
+
+    /* do nothing if not an increase (it's called increase for a reason) */
+    if (hashtable->nbuckets >= hashtable->nbuckets_optimal)
+        return;
+
+    /*
+     * We already know the optimal number of buckets, so let's just
+     * compute the log2_nbuckets for it.
+     */
+    hashtable->nbuckets = hashtable->nbuckets_optimal;
+    hashtable->log2_nbuckets = my_log2(hashtable->nbuckets_optimal);
+
+    Assert(hashtable->nbuckets > 1);
+    Assert(hashtable->nbuckets <= (INT_MAX / 2));
+    Assert(hashtable->nbuckets == (1 << hashtable->log2_nbuckets));
+
+#ifdef HJDEBUG
+    printf("Increasing nbuckets to %d\n", hashtable->nbuckets);
+#endif
+
+    /*
+     * Just reallocate the proper number of buckets - we don't need to
+     * walk through them - we can walk the dense-allocated chunks
+     * (just like in ExecHashIncreaseNumBatches, but without all the
+     * copying into new chunks)
+     */
+    hashtable->buckets = (HashJoinTuple *)repalloc(hashtable->buckets, hashtable->nbuckets * sizeof(HashJoinTuple));
+
+    memset_s(hashtable->buckets, 
+        sizeof(void *) * hashtable->nbuckets, 
+        0, 
+        sizeof(void *) * hashtable->nbuckets);
+
+    /* scan through all tuples in all chunks to rebuild the hash table */
+    for (chunk = hashtable->chunks; chunk != NULL; chunk = chunk->next) {
+        /* process all tuples stored in this chunk */
+        size_t idx = 0;
+        while (idx < chunk->used) {
+            HashJoinTuple hashTuple = (HashJoinTuple)(chunk->data + idx);
+            int bucketno;
+            int batchno;
+
+            ExecHashGetBucketAndBatch(hashtable, hashTuple->hashvalue, &bucketno, &batchno);
+
+            /* add the tuple to the proper bucket */
+            hashTuple->next = hashtable->buckets[bucketno];
+            hashtable->buckets[bucketno] = hashTuple;
+
+            /* advance index past the tuple */
+            idx += MAXALIGN(HJTUPLE_OVERHEAD + HJTUPLE_MINTUPLE(hashTuple)->t_len);
+        }
+    }
+
+#ifdef HJDEBUG
+    printf("Nbuckets increased to %d, average items per bucket %.1f\n", hashtable->nbuckets,
+           batchTuples / hashtable->nbuckets);
+#endif
+}
+
 /*
  * ExecHashTableInsert
  *		insert a tuple into the hash table depending on the hash value
@@ -1110,6 +1211,7 @@ void ExecHashTableInsert(
          */
         HashJoinTuple hashTuple;
         int hashTupleSize;
+        double ntuples = (hashtable->totalTuples - hashtable->skewTuples);
 
         /* Create the HashJoinTuple */
         hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
@@ -1136,13 +1238,24 @@ void ExecHashTableInsert(
             hashtable->width[1] += tuple->t_len;
         }
 
+        /*
+         * Increase the (optimal) number of buckets if we just exceeded the
+         * NTUP_PER_BUCKET threshold, but only when there's still a single batch.
+         */
+        if ((hashtable->nbatch == 1) && (hashtable->nbuckets_optimal <= INT_MAX / 2) && /* overflow protection */
+            (ntuples >= (hashtable->nbuckets_optimal * NTUP_PER_BUCKET))) {
+            hashtable->nbuckets_optimal *= 2;
+            hashtable->log2_nbuckets_optimal += 1;
+        }
+
         /* Account for space used, and back off if we've used too much */
         hashtable->spaceUsed += hashTupleSize;
         if (hashtable->spaceUsed > hashtable->spacePeak) {
             hashtable->spacePeak = hashtable->spaceUsed;
         }
         bool sysBusy = gs_sysmemory_busy(hashtable->spaceUsed * dop, false);
-        if (hashtable->spaceUsed > hashtable->spaceAllowed || sysBusy) {
+        if (hashtable->spaceUsed + int64(hashtable->nbuckets_optimal * sizeof(HashJoinTuple)) > hashtable->spaceAllowed 
+            || sysBusy) {
             AllocSetContext* set = (AllocSetContext*)(hashtable->hashCxt);
             if (sysBusy) {
                 hashtable->causedBySysRes = true;
@@ -1309,7 +1422,10 @@ bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext* econtext, List*
  * functions are good about randomizing all their output bits, else we are
  * likely to have very skewed bucket or batch occupancy.)
  *
- * nbuckets doesn't change over the course of the join.
+ * nbuckets and log2_nbuckets may change while nbatch == 1 because of dynamic
+ * bucket count growth.  Once we start batching, the value is fixed and does
+ * not change over the course of the join (making it possible to compute batch
+ * number the way we do here).
  *
  * nbatch is always a power of 2; we increase it only by doubling it.  This
  * effectively adds one more bit to the top of the batchno.
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index d154f9b2a..347694ce8 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -126,6 +126,10 @@ typedef struct HashJoinTableData {
     struct HashJoinTupleData** buckets;
     /* buckets array is per-batch storage, as are all the tuples */
 
+    int nbuckets_original;     /* # buckets when starting the first hash */
+    int nbuckets_optimal;      /* optimal # buckets (per batch) */
+    int log2_nbuckets_optimal; /* same as log2_nbuckets optimal */
+
     bool keepNulls; /* true to store unmatchable NULL tuples */
 
     bool skewEnabled;            /* are we using skew optimization? */
@@ -143,6 +147,7 @@ typedef struct HashJoinTableData {
     bool growEnabled; /* flag to shut off nbatch increases */
 
     double totalTuples; /* # tuples obtained from inner plan */
+    double skewTuples; /* # tuples inserted into skew tuples */
 
     /*
      * These arrays are allocated for the life of the hash join, but only if
diff --git a/src/test/regress/expected/cstore_replication_table_delete.out b/src/test/regress/expected/cstore_replication_table_delete.out
index 0de6731fd..cb9cf7700 100644
--- a/src/test/regress/expected/cstore_replication_table_delete.out
+++ b/src/test/regress/expected/cstore_replication_table_delete.out
@@ -85,7 +85,7 @@ explain (ANALYSE on,costs off, timing off) delete from col_rep_tb2 using hash_tb
                      Hash Cond: (hash_tb1.b = col_rep_tb2.a)
                      ->  Seq Scan on hash_tb1 (actual rows=5 loops=1)
                      ->  Hash (actual rows=9 loops=1)
-                            Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?                            Buckets: 32768  Batches: 1  Memory Usage: .*kB
                            ->  Row Adapter (actual rows=9 loops=1)
                                  ->  CStore Scan on col_rep_tb2 (actual rows=9 loops=1)
                                        Filter: (c > 2)
@@ -596,7 +596,7 @@ explain (ANALYSE on,costs off, timing off) delete from row_rep_tb using hash_tb3
          Hash Cond: (hash_tb3.b = row_rep_tb.a)
          ->  Seq Scan on hash_tb3 (actual rows=5 loops=1)
          ->  Hash (actual rows=9 loops=1)
-                Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?                Buckets: 32768  Batches: 1  Memory Usage: .*kB
                ->  Seq Scan on row_rep_tb (actual rows=9 loops=1)
                      Filter: (c > 2)
                      Rows Removed by Filter: 3
diff --git a/src/test/regress/expected/hw_explain_pretty2.out b/src/test/regress/expected/hw_explain_pretty2.out
index 189223bdf..062af3c50 100644
--- a/src/test/regress/expected/hw_explain_pretty2.out
+++ b/src/test/regress/expected/hw_explain_pretty2.out
@@ -100,7 +100,7 @@ explain (analyze on, costs off, timing off) select * from row_append_table_01 jo
    Rows Removed by Join Filter: 9
    ->  Seq Scan on row_append_table_01 (actual rows=12 loops=1)
    ->  Hash (actual rows=5 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
          ->  Seq Scan on row_append_table_02 (actual rows=5 loops=1)
 --? Total runtime: .* ms
 (9 rows)
diff --git a/src/test/regress/expected/hw_explain_pretty3.out b/src/test/regress/expected/hw_explain_pretty3.out
index b0e50d9ca..c2bc4527e 100644
--- a/src/test/regress/expected/hw_explain_pretty3.out
+++ b/src/test/regress/expected/hw_explain_pretty3.out
@@ -141,7 +141,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -162,7 +162,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -182,7 +182,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -204,7 +204,7 @@ explain analyze select aa_t1.num from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=4) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=4) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -225,7 +225,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -256,7 +256,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=31.49..31.49 rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -278,7 +278,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -326,7 +326,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: 1.476 ms
 (7 rows)
@@ -435,7 +435,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -456,7 +456,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -484,7 +484,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -629,7 +629,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
@@ -654,7 +654,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id;
    Hash Cond: (aa_t1.id = aa_t2.id)
 --?   ->  Seq Scan on aa_t1  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --?   ->  Hash  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
-          Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?          Buckets: 32768  Batches: 1  Memory Usage: .*kB
 --?         ->  Seq Scan on aa_t2  (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1)
 --? Total runtime: .* ms
 (7 rows)
diff --git a/src/test/regress/expected/upsert_where_sublink.out b/src/test/regress/expected/upsert_where_sublink.out
index 6d7290f55..f9dcbb403 100644
--- a/src/test/regress/expected/upsert_where_sublink.out
+++ b/src/test/regress/expected/upsert_where_sublink.out
@@ -1911,7 +1911,7 @@ execute p1(5, 6, 7, 'conflict5', 5, 5);
                  Filter: (c1 >= 5)
                  Rows Removed by Filter: 4
            ->  Hash (actual rows=5 loops=1)
-                  Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?                  Buckets: 32768  Batches: 1  Memory Usage: .*kB
                  ->  HashAggregate (actual rows=5 loops=1)
                        Group By Key: schema_upsert_where_sublink.tab_source.c2
                        ->  Seq Scan on tab_source (actual rows=5 loops=1)
@@ -1938,7 +1938,7 @@ execute p1(5, 6, 7, 'ERROR', 4, 5);
                  Filter: (c1 >= 5)
                  Rows Removed by Filter: 4
            ->  Hash (actual rows=4 loops=1)
-                  Buckets: 32768  Batches: 1  Memory Usage: 1kB
+--?                  Buckets: 32768  Batches: 1  Memory Usage: .*kB
                  ->  HashAggregate (actual rows=4 loops=1)
                        Group By Key: schema_upsert_where_sublink.tab_source.c2
                        ->  Seq Scan on tab_source (actual rows=4 loops=1)

From 214297acef1efacda9f945636554697641065a9d Mon Sep 17 00:00:00 2001
From: wanghao19920907 <wanghao1@vastdata.com.cn>
Date: Mon, 6 Feb 2023 19:02:08 -0800
Subject: [PATCH 2/3] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=AE=89=E5=85=A8?=
 =?UTF-8?q?=E5=87=BD=E6=95=B0=E8=BF=94=E5=9B=9E=E5=80=BC=E6=A3=80=E6=9F=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/gausskernel/runtime/executor/nodeHash.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gausskernel/runtime/executor/nodeHash.cpp b/src/gausskernel/runtime/executor/nodeHash.cpp
index b9791f700..fdf6f8e79 100644
--- a/src/gausskernel/runtime/executor/nodeHash.cpp
+++ b/src/gausskernel/runtime/executor/nodeHash.cpp
@@ -1154,7 +1154,8 @@ static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
         sizeof(void *) * hashtable->nbuckets, 
         0, 
         sizeof(void *) * hashtable->nbuckets);
-
+    securec_check(rc, "\0", "\0");
+    
     /* scan through all tuples in all chunks to rebuild the hash table */
     for (chunk = hashtable->chunks; chunk != NULL; chunk = chunk->next) {
         /* process all tuples stored in this chunk */

From 7c60335e24a3d5994ae26c1f1c216e8ee2bf6ddd Mon Sep 17 00:00:00 2001
From: wanghao19920907 <wanghao1@vastdata.com.cn>
Date: Mon, 6 Feb 2023 19:14:29 -0800
Subject: [PATCH 3/3] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=AE=89=E5=85=A8?=
 =?UTF-8?q?=E5=87=BD=E6=95=B0=E8=BF=94=E5=9B=9E=E5=80=BC=E6=A3=80=E6=9F=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/gausskernel/runtime/executor/nodeHash.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gausskernel/runtime/executor/nodeHash.cpp b/src/gausskernel/runtime/executor/nodeHash.cpp
index fdf6f8e79..7afda4a48 100644
--- a/src/gausskernel/runtime/executor/nodeHash.cpp
+++ b/src/gausskernel/runtime/executor/nodeHash.cpp
@@ -1122,6 +1122,7 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
 {
     HashMemoryChunk chunk;
+    errno_t rc;
 
     /* do nothing if not an increase (it's called increase for a reason) */
     if (hashtable->nbuckets >= hashtable->nbuckets_optimal)
@@ -1150,12 +1151,12 @@ static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
      */
     hashtable->buckets = (HashJoinTuple *)repalloc(hashtable->buckets, hashtable->nbuckets * sizeof(HashJoinTuple));
 
-    memset_s(hashtable->buckets, 
+    rc = memset_s(hashtable->buckets, 
         sizeof(void *) * hashtable->nbuckets, 
         0, 
         sizeof(void *) * hashtable->nbuckets);
     securec_check(rc, "\0", "\0");
-    
+
     /* scan through all tuples in all chunks to rebuild the hash table */
     for (chunk = hashtable->chunks; chunk != NULL; chunk = chunk->next) {
         /* process all tuples stored in this chunk */