From d95ff6d97471b634602bf7c0244a04eb7a27acb9 Mon Sep 17 00:00:00 2001 From: wanghao19920907 Date: Wed, 1 Mar 2023 04:14:32 -0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96hashjoin=E7=9A=84hash?= =?UTF-8?q?=E7=AD=96=E7=95=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gausskernel/runtime/executor/nodeHash.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/gausskernel/runtime/executor/nodeHash.cpp b/src/gausskernel/runtime/executor/nodeHash.cpp index 5bc0c4993..76fb6c0f7 100644 --- a/src/gausskernel/runtime/executor/nodeHash.cpp +++ b/src/gausskernel/runtime/executor/nodeHash.cpp @@ -37,6 +37,7 @@ #include "pgstat.h" #include "pgxc/pgxc.h" #include "instruments/instr_unique_sql.h" +#include "port/pg_bitutils.h" #include "utils/anls_opt.h" #include "utils/dynahash.h" #include "utils/lsyscache.h" @@ -1424,7 +1425,7 @@ bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext* econtext, List* * chains), and must only cause the batch number to remain the same or * increase. Our algorithm is * bucketno = hashvalue MOD nbuckets - * batchno = (hashvalue DIV nbuckets) MOD nbatch + * batchno = ROR(hashvalue, log2_nbuckets) MOD nbatch * where nbuckets and nbatch are both expected to be powers of 2, so we can * do the computations by shifting and masking. (This assumes that all hash * functions are good about randomizing all their output bits, else we are @@ -1436,7 +1437,11 @@ bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext* econtext, List* * number the way we do here). * * nbatch is always a power of 2; we increase it only by doubling it. This - * effectively adds one more bit to the top of the batchno. + * effectively adds one more bit to the top of the batchno. In very large + * joins, we might run out of bits to add, so we do this by rotating the hash + * value. This causes batchno to steal bits from bucketno when the number of + * virtual buckets exceeds 2^32. It's better to have longer bucket chains + * than to lose the ability to divide batches. */ void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int* bucketno, int* batchno) { @@ -1446,7 +1451,7 @@ void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int* b if (nbatch > 1) { /* we can do MOD by masking, DIV by shifting */ *bucketno = hashvalue & (nbuckets - 1); - *batchno = (hashvalue >> hashtable->log2_nbuckets) & (nbatch - 1); + *batchno = pg_rotate_right32(hashvalue, hashtable->log2_nbuckets) & (nbatch - 1); } else { *bucketno = hashvalue & (nbuckets - 1); *batchno = 0;