From c69efe24e4a9301e1a6e0d437ecf6aea129e67c5 Mon Sep 17 00:00:00 2001 From: wanghao19920907 Date: Mon, 6 Feb 2023 00:57:37 -0800 Subject: [PATCH 1/3] =?UTF-8?q?=E5=BF=85=E8=A6=81=E6=97=B6=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0hashjoin=E7=9A=84bucket=E6=95=B0=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../optimizer/commands/explain.cpp | 19 +-- src/gausskernel/runtime/executor/nodeHash.cpp | 126 +++++++++++++++++- src/include/executor/hashjoin.h | 5 + .../cstore_replication_table_delete.out | 4 +- .../regress/expected/hw_explain_pretty2.out | 2 +- .../regress/expected/hw_explain_pretty3.out | 26 ++-- .../regress/expected/upsert_where_sublink.out | 4 +- 7 files changed, 154 insertions(+), 32 deletions(-) diff --git a/src/gausskernel/optimizer/commands/explain.cpp b/src/gausskernel/optimizer/commands/explain.cpp index 51b218c02..19dfdfed4 100755 --- a/src/gausskernel/optimizer/commands/explain.cpp +++ b/src/gausskernel/optimizer/commands/explain.cpp @@ -216,7 +216,7 @@ static void show_datanode_time(ExplainState* es, PlanState* planstate); static void ShowStreamRunNodeInfo(Stream* stream, ExplainState* es); static void ShowRunNodeInfo(const ExecNodes* en, ExplainState* es, const char* qlabel); template -static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbatch_original, int nbuckets, long spacePeakKb); +static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbuckets_original, int nbatch_original, int nbuckets, long spacePeakKb); static void ShowRoughCheckInfo(ExplainState* es, Instrumentation* instrument, int nodeIdx, int smpIdx); static void show_hashAgg_info(AggState* hashaggstate, ExplainState* es); static void ExplainPrettyList(List* data, ExplainState* es); @@ -4284,10 +4284,11 @@ static void show_sort_info(SortState* sortstate, ExplainState* es) } template -static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbatch_original, int nbuckets, long spacePeakKb) +static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbuckets_original, int nbatch_original, int nbuckets, long spacePeakKb) { if (es->format != EXPLAIN_FORMAT_TEXT) { ExplainPropertyLong("Hash Buckets", nbuckets, es); + ExplainPropertyLong("Original Hash Buckets", nbuckets_original, es); ExplainPropertyLong("Hash Batches", nbatch, es); ExplainPropertyLong("Original Hash Batches", nbatch_original, es); ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es); @@ -4295,8 +4296,8 @@ static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbatch_ori es->planinfo->m_staticInfo) { if (nbatch_original != nbatch) { appendStringInfo(es->planinfo->m_staticInfo->info_str, - " Buckets: %d Batches: %d (originally %d) Memory Usage: %ldkB\n", - nbuckets, + " Buckets: %d (originally %d) Batches: %d (originally %d) Memory Usage: %ldkB\n", + nbuckets, nbuckets_original, nbatch, nbatch_original, spacePeakKb); @@ -4310,8 +4311,8 @@ static void show_datanode_hash_info(ExplainState* es, int nbatch, int nbatch_ori } else { if (nbatch_original != nbatch) { appendStringInfo(es->str, - " Buckets: %d Batches: %d (originally %d) Memory Usage: %ldkB\n", - nbuckets, + " Buckets: %d (originally %d) Batches: %d (originally %d) Memory Usage: %ldkB\n", + nbuckets, nbuckets_original, nbatch, nbatch_original, spacePeakKb); @@ -4767,9 +4768,9 @@ static void show_hash_info(HashState* hashstate, ExplainState* es) es->planinfo->m_staticInfo->set_plan_name(); appendStringInfo(es->planinfo->m_staticInfo->info_str, "%s ", node_name); - show_datanode_hash_info(es, nbatch, nbatch_original, nbuckets, spacePeakKb); + show_datanode_hash_info(es, nbatch, hashtable->nbuckets_original, nbatch_original, nbuckets, spacePeakKb); } - show_datanode_hash_info(es, nbatch, nbatch_original, nbuckets, spacePeakKb); + show_datanode_hash_info(es, nbatch, hashtable->nbuckets_original, nbatch_original, nbuckets, spacePeakKb); ExplainCloseGroup("Plan", NULL, true, es); } ExplainCloseGroup("Hash Detail", "Hash Detail", false, es); @@ -4888,7 +4889,7 @@ static void show_hash_info(HashState* hashstate, ExplainState* es) if (es->wlm_statistics_plan_max_digit == NULL) { if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoSpaces(es->str, es->indent * 2); - show_datanode_hash_info(es, nbatch, nbatch_original, nbuckets, spacePeakKb); + show_datanode_hash_info(es, nbatch, hashtable->nbuckets_original, nbatch_original, nbuckets, spacePeakKb); } } } diff --git a/src/gausskernel/runtime/executor/nodeHash.cpp b/src/gausskernel/runtime/executor/nodeHash.cpp index a802e5ba6..b9791f700 100644 --- a/src/gausskernel/runtime/executor/nodeHash.cpp +++ b/src/gausskernel/runtime/executor/nodeHash.cpp @@ -50,6 +50,7 @@ #include "workload/workload.h" static void ExecHashIncreaseNumBatches(HashJoinTable hashtable); +static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable); static void ExecHashBuildSkewHash(HashJoinTable hashtable, Hash* node, int mcvsToUse); static void ExecHashSkewTableInsert(HashJoinTable hashtable, TupleTableSlot* slot, uint32 hashvalue, int bucketNumber); static void ExecHashRemoveNextSkewBucket(HashJoinTable hashtable); @@ -128,6 +129,7 @@ Node* MultiExecHash(HashState* node) if (bucketNumber != INVALID_SKEW_BUCKET_NO) { /* It's a skew tuple, so put it into that hash table */ ExecHashSkewTableInsert(hashtable, slot, hashvalue, bucketNumber); + hashtable->skewTuples += 1; } else { /* Not subject to skew optimization, so insert normally */ ExecHashTableInsert(hashtable, @@ -142,13 +144,29 @@ Node* MultiExecHash(HashState* node) } (void)pgstat_report_waitstatus(oldStatus); + /* analyze hash table information for unique sql hash state */ + UpdateUniqueSQLHashStats(hashtable, &start_time); + + /* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */ + if (hashtable->nbuckets != hashtable->nbuckets_optimal) { + /* We never decrease the number of buckets. */ + Assert(hashtable->nbuckets_optimal > hashtable->nbuckets); + +#ifdef HJDEBUG + printf("Increasing nbuckets %d => %d\n", hashtable->nbuckets, hashtable->nbuckets_optimal); +#endif + + ExecHashIncreaseNumBuckets(hashtable); + } + /* analysis hash table information created in memory */ if (anls_opt_is_on(ANLS_HASH_CONFLICT)) ExecHashTableStats(hashtable, node->ps.plan->plan_node_id); - /* analyze hash table information for unique sql hash state */ - UpdateUniqueSQLHashStats(hashtable, &start_time); - + /* Account for the buckets in spaceUsed (reported in EXPLAIN ANALYZE) */ + hashtable->spaceUsed += hashtable->nbuckets * sizeof(HashJoinTuple); + if (hashtable->spaceUsed > hashtable->spacePeak) + hashtable->spacePeak = hashtable->spaceUsed; /* must provide our own instrumentation support */ if (node->ps.instrument) { @@ -320,7 +338,10 @@ HashJoinTable ExecHashTableCreate(Hash* node, List* hashOperators, bool keepNull */ hashtable = (HashJoinTable)palloc(sizeof(HashJoinTableData)); hashtable->nbuckets = nbuckets; + hashtable->nbuckets_original = nbuckets; + hashtable->nbuckets_optimal = nbuckets; hashtable->log2_nbuckets = log2_nbuckets; + hashtable->log2_nbuckets_optimal = log2_nbuckets; hashtable->buckets = NULL; hashtable->keepNulls = keepNulls; hashtable->skewEnabled = false; @@ -334,6 +355,7 @@ HashJoinTable ExecHashTableCreate(Hash* node, List* hashOperators, bool keepNull hashtable->nbatch_outstart = nbatch; hashtable->growEnabled = true; hashtable->totalTuples = 0; + hashtable->skewTuples = 0; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->spaceUsed = 0; @@ -999,6 +1021,18 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable) */ ninmemory = nfreed = 0; + /* If know we need to resize nbuckets, we can do it while rebatching. */ + if (hashtable->nbuckets_optimal != hashtable->nbuckets) { + /* we never decrease the number of buckets */ + Assert(hashtable->nbuckets_optimal > hashtable->nbuckets); + + hashtable->nbuckets = hashtable->nbuckets_optimal; + hashtable->log2_nbuckets = hashtable->log2_nbuckets_optimal; + + hashtable->buckets = (struct HashJoinTupleData**) repalloc( + hashtable->buckets, sizeof(HashJoinTuple) * hashtable->nbuckets); + } + /* * We will scan through the chunks directly, so that we can reset the * buckets now and not have to keep track which tuples in the buckets have @@ -1080,6 +1114,73 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable) } } +/* + * ExecHashIncreaseNumBuckets + * increase the original number of buckets in order to reduce + * number of tuples per bucket + */ +static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable) +{ + HashMemoryChunk chunk; + + /* do nothing if not an increase (it's called increase for a reason) */ + if (hashtable->nbuckets >= hashtable->nbuckets_optimal) + return; + + /* + * We already know the optimal number of buckets, so let's just + * compute the log2_nbuckets for it. + */ + hashtable->nbuckets = hashtable->nbuckets_optimal; + hashtable->log2_nbuckets = my_log2(hashtable->nbuckets_optimal); + + Assert(hashtable->nbuckets > 1); + Assert(hashtable->nbuckets <= (INT_MAX / 2)); + Assert(hashtable->nbuckets == (1 << hashtable->log2_nbuckets)); + +#ifdef HJDEBUG + printf("Increasing nbuckets to %d\n", hashtable->nbuckets); +#endif + + /* + * Just reallocate the proper number of buckets - we don't need to + * walk through them - we can walk the dense-allocated chunks + * (just like in ExecHashIncreaseNumBatches, but without all the + * copying into new chunks) + */ + hashtable->buckets = (HashJoinTuple *)repalloc(hashtable->buckets, hashtable->nbuckets * sizeof(HashJoinTuple)); + + memset_s(hashtable->buckets, + sizeof(void *) * hashtable->nbuckets, + 0, + sizeof(void *) * hashtable->nbuckets); + + /* scan through all tuples in all chunks to rebuild the hash table */ + for (chunk = hashtable->chunks; chunk != NULL; chunk = chunk->next) { + /* process all tuples stored in this chunk */ + size_t idx = 0; + while (idx < chunk->used) { + HashJoinTuple hashTuple = (HashJoinTuple)(chunk->data + idx); + int bucketno; + int batchno; + + ExecHashGetBucketAndBatch(hashtable, hashTuple->hashvalue, &bucketno, &batchno); + + /* add the tuple to the proper bucket */ + hashTuple->next = hashtable->buckets[bucketno]; + hashtable->buckets[bucketno] = hashTuple; + + /* advance index past the tuple */ + idx += MAXALIGN(HJTUPLE_OVERHEAD + HJTUPLE_MINTUPLE(hashTuple)->t_len); + } + } + +#ifdef HJDEBUG + printf("Nbuckets increased to %d, average items per bucket %.1f\n", hashtable->nbuckets, + batchTuples / hashtable->nbuckets); +#endif +} + /* * ExecHashTableInsert * insert a tuple into the hash table depending on the hash value @@ -1110,6 +1211,7 @@ void ExecHashTableInsert( */ HashJoinTuple hashTuple; int hashTupleSize; + double ntuples = (hashtable->totalTuples - hashtable->skewTuples); /* Create the HashJoinTuple */ hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len; @@ -1136,13 +1238,24 @@ void ExecHashTableInsert( hashtable->width[1] += tuple->t_len; } + /* + * Increase the (optimal) number of buckets if we just exceeded the + * NTUP_PER_BUCKET threshold, but only when there's still a single batch. + */ + if ((hashtable->nbatch == 1) && (hashtable->nbuckets_optimal <= INT_MAX / 2) && /* overflow protection */ + (ntuples >= (hashtable->nbuckets_optimal * NTUP_PER_BUCKET))) { + hashtable->nbuckets_optimal *= 2; + hashtable->log2_nbuckets_optimal += 1; + } + /* Account for space used, and back off if we've used too much */ hashtable->spaceUsed += hashTupleSize; if (hashtable->spaceUsed > hashtable->spacePeak) { hashtable->spacePeak = hashtable->spaceUsed; } bool sysBusy = gs_sysmemory_busy(hashtable->spaceUsed * dop, false); - if (hashtable->spaceUsed > hashtable->spaceAllowed || sysBusy) { + if (hashtable->spaceUsed + int64(hashtable->nbuckets_optimal * sizeof(HashJoinTuple)) > hashtable->spaceAllowed + || sysBusy) { AllocSetContext* set = (AllocSetContext*)(hashtable->hashCxt); if (sysBusy) { hashtable->causedBySysRes = true; @@ -1309,7 +1422,10 @@ bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext* econtext, List* * functions are good about randomizing all their output bits, else we are * likely to have very skewed bucket or batch occupancy.) * - * nbuckets doesn't change over the course of the join. + * nbuckets and log2_nbuckets may change while nbatch == 1 because of dynamic + * bucket count growth. Once we start batching, the value is fixed and does + * not change over the course of the join (making it possible to compute batch + * number the way we do here). * * nbatch is always a power of 2; we increase it only by doubling it. This * effectively adds one more bit to the top of the batchno. diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index d154f9b2a..347694ce8 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -126,6 +126,10 @@ typedef struct HashJoinTableData { struct HashJoinTupleData** buckets; /* buckets array is per-batch storage, as are all the tuples */ + int nbuckets_original; /* # buckets when starting the first hash */ + int nbuckets_optimal; /* optimal # buckets (per batch) */ + int log2_nbuckets_optimal; /* same as log2_nbuckets optimal */ + bool keepNulls; /* true to store unmatchable NULL tuples */ bool skewEnabled; /* are we using skew optimization? */ @@ -143,6 +147,7 @@ typedef struct HashJoinTableData { bool growEnabled; /* flag to shut off nbatch increases */ double totalTuples; /* # tuples obtained from inner plan */ + double skewTuples; /* # tuples inserted into skew tuples */ /* * These arrays are allocated for the life of the hash join, but only if diff --git a/src/test/regress/expected/cstore_replication_table_delete.out b/src/test/regress/expected/cstore_replication_table_delete.out index 0de6731fd..cb9cf7700 100644 --- a/src/test/regress/expected/cstore_replication_table_delete.out +++ b/src/test/regress/expected/cstore_replication_table_delete.out @@ -85,7 +85,7 @@ explain (ANALYSE on,costs off, timing off) delete from col_rep_tb2 using hash_tb Hash Cond: (hash_tb1.b = col_rep_tb2.a) -> Seq Scan on hash_tb1 (actual rows=5 loops=1) -> Hash (actual rows=9 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB -> Row Adapter (actual rows=9 loops=1) -> CStore Scan on col_rep_tb2 (actual rows=9 loops=1) Filter: (c > 2) @@ -596,7 +596,7 @@ explain (ANALYSE on,costs off, timing off) delete from row_rep_tb using hash_tb3 Hash Cond: (hash_tb3.b = row_rep_tb.a) -> Seq Scan on hash_tb3 (actual rows=5 loops=1) -> Hash (actual rows=9 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB -> Seq Scan on row_rep_tb (actual rows=9 loops=1) Filter: (c > 2) Rows Removed by Filter: 3 diff --git a/src/test/regress/expected/hw_explain_pretty2.out b/src/test/regress/expected/hw_explain_pretty2.out index 189223bdf..062af3c50 100644 --- a/src/test/regress/expected/hw_explain_pretty2.out +++ b/src/test/regress/expected/hw_explain_pretty2.out @@ -100,7 +100,7 @@ explain (analyze on, costs off, timing off) select * from row_append_table_01 jo Rows Removed by Join Filter: 9 -> Seq Scan on row_append_table_01 (actual rows=12 loops=1) -> Hash (actual rows=5 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB -> Seq Scan on row_append_table_02 (actual rows=5 loops=1) --? Total runtime: .* ms (9 rows) diff --git a/src/test/regress/expected/hw_explain_pretty3.out b/src/test/regress/expected/hw_explain_pretty3.out index b0e50d9ca..c2bc4527e 100644 --- a/src/test/regress/expected/hw_explain_pretty3.out +++ b/src/test/regress/expected/hw_explain_pretty3.out @@ -141,7 +141,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -162,7 +162,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -182,7 +182,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -204,7 +204,7 @@ explain analyze select aa_t1.num from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=4) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=4) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -225,7 +225,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -256,7 +256,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=31.49..31.49 rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -278,7 +278,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -326,7 +326,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: 1.476 ms (7 rows) @@ -435,7 +435,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -456,7 +456,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -484,7 +484,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -629,7 +629,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) @@ -654,7 +654,7 @@ explain analyze select * from aa_t1, aa_t2 where aa_t1.id = aa_t2.id; Hash Cond: (aa_t1.id = aa_t2.id) --? -> Seq Scan on aa_t1 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? -> Hash (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB --? -> Seq Scan on aa_t2 (cost=.* rows=2149 width=8) (actual time=.* rows=2 loops=1) --? Total runtime: .* ms (7 rows) diff --git a/src/test/regress/expected/upsert_where_sublink.out b/src/test/regress/expected/upsert_where_sublink.out index 6d7290f55..f9dcbb403 100644 --- a/src/test/regress/expected/upsert_where_sublink.out +++ b/src/test/regress/expected/upsert_where_sublink.out @@ -1911,7 +1911,7 @@ execute p1(5, 6, 7, 'conflict5', 5, 5); Filter: (c1 >= 5) Rows Removed by Filter: 4 -> Hash (actual rows=5 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB -> HashAggregate (actual rows=5 loops=1) Group By Key: schema_upsert_where_sublink.tab_source.c2 -> Seq Scan on tab_source (actual rows=5 loops=1) @@ -1938,7 +1938,7 @@ execute p1(5, 6, 7, 'ERROR', 4, 5); Filter: (c1 >= 5) Rows Removed by Filter: 4 -> Hash (actual rows=4 loops=1) - Buckets: 32768 Batches: 1 Memory Usage: 1kB +--? Buckets: 32768 Batches: 1 Memory Usage: .*kB -> HashAggregate (actual rows=4 loops=1) Group By Key: schema_upsert_where_sublink.tab_source.c2 -> Seq Scan on tab_source (actual rows=4 loops=1) From 214297acef1efacda9f945636554697641065a9d Mon Sep 17 00:00:00 2001 From: wanghao19920907 Date: Mon, 6 Feb 2023 19:02:08 -0800 Subject: [PATCH 2/3] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=AE=89=E5=85=A8?= =?UTF-8?q?=E5=87=BD=E6=95=B0=E8=BF=94=E5=9B=9E=E5=80=BC=E6=A3=80=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gausskernel/runtime/executor/nodeHash.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gausskernel/runtime/executor/nodeHash.cpp b/src/gausskernel/runtime/executor/nodeHash.cpp index b9791f700..fdf6f8e79 100644 --- a/src/gausskernel/runtime/executor/nodeHash.cpp +++ b/src/gausskernel/runtime/executor/nodeHash.cpp @@ -1154,7 +1154,8 @@ static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable) sizeof(void *) * hashtable->nbuckets, 0, sizeof(void *) * hashtable->nbuckets); - + securec_check(rc, "\0", "\0"); + /* scan through all tuples in all chunks to rebuild the hash table */ for (chunk = hashtable->chunks; chunk != NULL; chunk = chunk->next) { /* process all tuples stored in this chunk */ From 7c60335e24a3d5994ae26c1f1c216e8ee2bf6ddd Mon Sep 17 00:00:00 2001 From: wanghao19920907 Date: Mon, 6 Feb 2023 19:14:29 -0800 Subject: [PATCH 3/3] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=AE=89=E5=85=A8?= =?UTF-8?q?=E5=87=BD=E6=95=B0=E8=BF=94=E5=9B=9E=E5=80=BC=E6=A3=80=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gausskernel/runtime/executor/nodeHash.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gausskernel/runtime/executor/nodeHash.cpp b/src/gausskernel/runtime/executor/nodeHash.cpp index fdf6f8e79..7afda4a48 100644 --- a/src/gausskernel/runtime/executor/nodeHash.cpp +++ b/src/gausskernel/runtime/executor/nodeHash.cpp @@ -1122,6 +1122,7 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable) static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable) { HashMemoryChunk chunk; + errno_t rc; /* do nothing if not an increase (it's called increase for a reason) */ if (hashtable->nbuckets >= hashtable->nbuckets_optimal) @@ -1150,12 +1151,12 @@ static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable) */ hashtable->buckets = (HashJoinTuple *)repalloc(hashtable->buckets, hashtable->nbuckets * sizeof(HashJoinTuple)); - memset_s(hashtable->buckets, + rc = memset_s(hashtable->buckets, sizeof(void *) * hashtable->nbuckets, 0, sizeof(void *) * hashtable->nbuckets); securec_check(rc, "\0", "\0"); - + /* scan through all tuples in all chunks to rebuild the hash table */ for (chunk = hashtable->chunks; chunk != NULL; chunk = chunk->next) { /* process all tuples stored in this chunk */