From 4e6767e708e4ec28e37b3008b71596f32cf1c8fb Mon Sep 17 00:00:00 2001 From: congzhou2603 Date: Sat, 23 Mar 2024 14:58:56 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=E4=BF=AE=E5=A4=8DBuffe?= =?UTF-8?q?rAlloc=E8=A7=A3pin=E5=90=8E=EF=BC=8Cbuffer=E5=8F=AF=E8=83=BD?= =?UTF-8?q?=E8=A2=AB=E6=B7=98=E6=B1=B0=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ondemand_extreme_rto/page_redo.cpp | 18 ++++++++++++++++++ src/gausskernel/storage/buffer/bufmgr.cpp | 19 +++++++++++++++++-- .../access/ondemand_extreme_rto/page_redo.h | 1 + 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp index d9024f084..2b66d4329 100644 --- a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp +++ b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp @@ -2126,6 +2126,24 @@ void RedoPageWorkerRedoBcmBlock(XLogRecParseState *procState) } } +LWLock* OndemandGetXLogPartitionLock(BufferDesc* bufHdr, ForkNumber forkNum, BlockNumber blockNum) { + LWLock *xlog_partition_lock = NULL; + ondemand_extreme_rto::RedoItemTag redoItemTag; + INIT_REDO_ITEM_TAG(redoItemTag, bufHdr->tag.rnode, forkNum, blockNum); + uint32 slotId = GetSlotId(redoItemTag.rNode, 0, 0, GetBatchCount()); + HTAB *hashMap = g_instance.comm_cxt.predo_cxt.redoItemHashCtrl[slotId]->hTab; + if (hashMap == NULL) { + ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("redo item hash table corrupted, there has invalid hashtable."))); + } + + /* get partition lock by redoItemTag */ + unsigned int partitionLockHash = XlogTrackTableHashCode(&redoItemTag); + xlog_partition_lock = XlogTrackMappingPartitionLock(partitionLockHash); + + return xlog_partition_lock; +} + /** * Check the block if need to redo and try hashmap lock. * There are three kinds of result as follow: diff --git a/src/gausskernel/storage/buffer/bufmgr.cpp b/src/gausskernel/storage/buffer/bufmgr.cpp index 2ac39618a..991ab1d67 100644 --- a/src/gausskernel/storage/buffer/bufmgr.cpp +++ b/src/gausskernel/storage/buffer/bufmgr.cpp @@ -3394,13 +3394,28 @@ retry_new_buffer: bool hasUnpinned = false; while (ondemand_extreme_rto::checkBlockRedoStateAndTryHashMapLock(buf, fork_num, block_num) == ONDEMAND_HASHMAP_ENTRY_REDOING) { if (!hasUnpinned) { - UnpinBuffer(buf, true); - hasUnpinned = true; + UnpinBuffer(buf, true); + hasUnpinned = true; } pg_usleep(TEN_MICROSECOND); } + + /* Pin buffer again after getlock, and check if buffer has been eliminated. */ if (hasUnpinned) { PinBuffer(buf, strategy); + INIT_BUFFERTAG(new_tag, rel_file_node, fork_num, block_num); + if (!BUFFERTAGS_PTR_EQUAL(&buf->tag, &new_tag)) { + UnpinBuffer(buf, true); + LWLock *xlog_partition_lock = ondemand_extreme_rto::OndemandGetXLogPartitionLock(buf, fork_num, block_num); + if (LWLockHeldByMe(xlog_partition_lock)) { + LWLockRelease(xlog_partition_lock); + } + ereport(DEBUG1, (errmodule(MOD_REDO), errcode(ERRCODE_LOG), + errmsg("buffer has been eliminated, goto retry: spc/db/rel/bucket fork-block: %u/%u/%u/%d %d-%u.", + buf->tag.rnode.spcNode, buf->tag.rnode.dbNode, buf->tag.rnode.relNode, buf->tag.rnode.bucketNode, + buf->tag.forkNum, buf->tag.blockNum))); + goto retry; + } } } diff --git a/src/include/access/ondemand_extreme_rto/page_redo.h b/src/include/access/ondemand_extreme_rto/page_redo.h index f808d03fb..d5af1845a 100644 --- a/src/include/access/ondemand_extreme_rto/page_redo.h +++ b/src/include/access/ondemand_extreme_rto/page_redo.h @@ -271,6 +271,7 @@ void BatchClearRecoveryThreadHashTbl(Oid spcNode, Oid dbNode); void RecordBadBlockAndPushToRemote(XLogBlockDataParse *datadecode, PageErrorType error_type, XLogRecPtr old_lsn, XLogPhyBlock pblk); const char *RedoWokerRole2Str(RedoRole role); +LWLock* OndemandGetXLogPartitionLock(BufferDesc* bufHdr, ForkNumber forkNum, BlockNumber blockNum); int checkBlockRedoStateAndTryHashMapLock(BufferDesc* bufHdr, ForkNumber forkNum, BlockNumber blockNum); bool checkBlockRedoDoneFromHashMapAndLock(LWLock **lock, RedoItemTag redoItemTag, RedoItemHashEntry **redoItemEntry, bool holdLock);