From dcda84c3337ef8ad508d1ff9958ee41eb5fc4259 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BE=90=E8=BE=BE=E6=A0=87?= <848833284@qq.com> Date: Thu, 13 Jun 2024 12:18:15 +0000 Subject: [PATCH] =?UTF-8?q?toast=E5=9C=BA=E6=99=AF=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/backend/utils/error/be_module.cpp | 1 + src/gausskernel/storage/access/heap/hio.cpp | 9 +- .../storage/access/nbtree/nbtinsert.cpp | 2 +- .../storage/access/ubtree/ubtinsert.cpp | 20 +++- .../storage/access/ubtree/ubtpage.cpp | 81 ++++++++++++- .../storage/access/ubtree/ubtrecycle.cpp | 52 +++++++-- .../storage/access/ustore/knl_uheap.cpp | 6 +- .../storage/access/ustore/knl_utuple.cpp | 4 +- .../storage/access/ustore/knl_utuptoaster.cpp | 108 ++++++++++++++++-- src/include/access/hio.h | 3 +- src/include/access/ubtree.h | 36 +++++- src/include/utils/be_module.h | 1 + 12 files changed, 287 insertions(+), 36 deletions(-) diff --git a/src/common/backend/utils/error/be_module.cpp b/src/common/backend/utils/error/be_module.cpp index c564d5264..2058d0e82 100755 --- a/src/common/backend/utils/error/be_module.cpp +++ b/src/common/backend/utils/error/be_module.cpp @@ -136,6 +136,7 @@ const module_data module_map[] = {{MOD_ALL, "ALL"}, {MOD_DSS, "DSS_API"}, {MOD_GPI, "GPI"}, {MOD_PARTITION, "PARTITION"}, + {MOD_UBT_NEWPAGE, "UBT_NEWPAGE"}, {MOD_SRF, "SRF"}, {MOD_SS_TXNSTATUS, "SS_TXNSTATUS"}, diff --git a/src/gausskernel/storage/access/heap/hio.cpp b/src/gausskernel/storage/access/heap/hio.cpp index fb71d5a7e..c559613c9 100644 --- a/src/gausskernel/storage/access/heap/hio.cpp +++ b/src/gausskernel/storage/access/heap/hio.cpp @@ -129,7 +129,7 @@ void CheckRelation(const Relation relation, int* extraBlocks, int lockWaiters) } } -static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate) +static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate, NewPageState* npstate) { int extraBlocks = 0; int lockWaiters = RelationExtensionLockWaiterCount(relation); @@ -137,6 +137,9 @@ static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate) return; } CheckRelation(relation, &extraBlocks, lockWaiters); + if (npstate != NULL) { + npstate->extendBlocks += (uint32)extraBlocks; + } while (extraBlocks-- >= 0) { /* Ouch - an unnecessary lseek() each time through the loop! */ Buffer buffer = ReadBufferBI(relation, P_NEW, RBM_NORMAL, bistate); @@ -145,7 +148,7 @@ static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate) } } -void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) +void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate, NewPageState* npstate) { BlockNumber block_num = InvalidBlockNumber; BlockNumber first_block = InvalidBlockNumber; @@ -155,7 +158,7 @@ void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) if (RelationIsUstoreIndex(relation)) { /* ubtree, use another bypass */ - UBtreeAddExtraBlocks(relation, bistate); + UBtreeAddExtraBlocks(relation, bistate, npstate); return; } diff --git a/src/gausskernel/storage/access/nbtree/nbtinsert.cpp b/src/gausskernel/storage/access/nbtree/nbtinsert.cpp index 563880059..e3437966c 100644 --- a/src/gausskernel/storage/access/nbtree/nbtinsert.cpp +++ b/src/gausskernel/storage/access/nbtree/nbtinsert.cpp @@ -2586,7 +2586,7 @@ static bool CheckItemIsAlive(ItemPointer tid, Relation relation, Snapshot snapsh bool* all_dead, CUDescScan* cudescScan) { if (!RelationIsCUFormat(relation)) { - return heap_hot_search(tid, relation, snapshot, all_dead); + return TableIndexFetchTupleCheck(relation, tid, snapshot, all_dead); } else { return cudescScan->CheckItemIsAlive(tid); } diff --git a/src/gausskernel/storage/access/ubtree/ubtinsert.cpp b/src/gausskernel/storage/access/ubtree/ubtinsert.cpp index 41d7bc907..40eea3756 100644 --- a/src/gausskernel/storage/access/ubtree/ubtinsert.cpp +++ b/src/gausskernel/storage/access/ubtree/ubtinsert.cpp @@ -1403,7 +1403,15 @@ static Buffer UBTreeSplit(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber fi * before we release the Exclusive lock. */ UBTRecycleQueueAddress addr; - rbuf = UBTreeGetNewPage(rel, &addr); + NewPageState *npstate = NULL; + if (module_logging_is_on(MOD_UBT_NEWPAGE)) { + npstate = (NewPageState *)palloc0(sizeof(NewPageState)); + } + rbuf = UBTreeGetNewPage(rel, &addr, npstate); + if (npstate != NULL) { + UBTreePrintNewPageState(npstate); + pfree(npstate); + } /* * origpage is the original page to be split. leftpage is a temporary @@ -2471,7 +2479,15 @@ static Buffer UBTreeNewRoot(Relation rel, Buffer lbuf, Buffer rbuf) * before we release the Exclusive lock. */ UBTRecycleQueueAddress addr; - rootbuf = UBTreeGetNewPage(rel, &addr); + NewPageState *npstate = NULL; + if (module_logging_is_on(MOD_UBT_NEWPAGE)) { + npstate = (NewPageState *)palloc0(sizeof(NewPageState)); + } + rootbuf = UBTreeGetNewPage(rel, &addr, npstate); + if (npstate != NULL) { + UBTreePrintNewPageState(npstate); + pfree(npstate); + } rootpage = BufferGetPage(rootbuf); rootblknum = BufferGetBlockNumber(rootbuf); diff --git a/src/gausskernel/storage/access/ubtree/ubtpage.cpp b/src/gausskernel/storage/access/ubtree/ubtpage.cpp index eb88afea7..43b596fc3 100644 --- a/src/gausskernel/storage/access/ubtree/ubtpage.cpp +++ b/src/gausskernel/storage/access/ubtree/ubtpage.cpp @@ -41,6 +41,7 @@ #include "storage/procarray.h" #include "utils/inval.h" #include "utils/snapmgr.h" +#include "datatype/timestamp.h" static bool UBTreeMarkPageHalfDead(Relation rel, Buffer leafbuf, BTStack stack); static bool UBTreeUnlinkHalfDeadPage(Relation rel, Buffer leafbuf, bool *rightsib_empty, BTStack del_blknos = NULL); @@ -88,6 +89,27 @@ void UBTreeInitMetaPage(Page page, BlockNumber rootbknum, uint32 level) ((PageHeader)page)->pd_lower = (uint16)(((char *)metad + sizeof(BTMetaPageData)) - (char *)page); } +void UBTreePrintNewPageState(NewPageState* npstate) +{ + ereport(LOG, (errmodule(MOD_UBT_NEWPAGE), (errmsg( + "NewPageState: first_get_available_page_time:%ld, count:%u; second_get_available_page_time:%ld, count:%u; " + "extend_blocks_time:%ld, count:%u, blocks:%u; extend_one_time:%ld, count:%u; " + "get_head_time:%ld; get_available_page_on_page_time:%ld, get_available_page_on_page_time_max:%ld; " + "buffer_invalid_count:%u; need_lock_count:%u; queue_count:%u, items_count:%u, items_valid_count:%u; " + "conditional_lock_count:%u; get_available_page_on_page_count:%u; goto_restart_count first:%u, second:%u;" + "new_create_pages_count check:%u, get:%u; avg_travel_queue pages:%.2f, items:%.2f.", + npstate->firstGetAvailablePageTime, npstate->firstGetAvailablePageCount, + npstate->secondGetAvailablePageTime, npstate->secondGetAvailablePageCount, + npstate->extendBlocksTime, npstate->extendBlocksCount, npstate->extendBlocks, + npstate->extendOneTime, npstate->extendOneCount, npstate->getHeadTime, + npstate->getAvailablePageOnPageTime, npstate->getAvailablePageOnPageTimeMax, + npstate->bufferInvalidCount, npstate->needLockCount, npstate->queueCount, npstate->itemsCount, + npstate->itemsValidCount, npstate->itemsValidConditionalLockCount, + npstate->getAvailablePageOnPageCount, npstate->firstGotoRestartCount, + npstate->secondGotoRestartCount, npstate->checkNewCreatePagesCount, + npstate->getFromNewCreatePagesCount, npstate->avgTravelQueuePages, npstate->avgTravelQueueItems)))); +} + /* * UBTreeGetRoot() -- Get the root page of the btree. * @@ -255,7 +277,15 @@ Buffer UBTreeGetRoot(Relation rel, int access) * before we release the Exclusive lock. */ UBTRecycleQueueAddress addr; - rootbuf = UBTreeGetNewPage(rel, &addr); + NewPageState *npstate = NULL; + if (module_logging_is_on(MOD_UBT_NEWPAGE)) { + npstate = (NewPageState *)palloc0(sizeof(NewPageState)); + } + rootbuf = UBTreeGetNewPage(rel, &addr, npstate); + if (npstate != NULL) { + UBTreePrintNewPageState(npstate); + pfree(npstate); + } rootblkno = BufferGetBlockNumber(rootbuf); rootpage = BufferGetPage(rootbuf); rootopaque = (UBTPageOpaqueInternal)PageGetSpecialPointer(rootpage); @@ -1453,11 +1483,19 @@ static bool UBTreeUnlinkHalfDeadPage(Relation rel, Buffer leafbuf, bool *rightsi * page in the Recycle Queue, and we need to call UBTreeRecordUsedPage() * with this addr when the returned page is used correctly. */ -Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr) +Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr, NewPageState* npstate) { WHITEBOX_TEST_STUB("UBTreeGetNewPage-begin", WhiteboxDefaultErrorEmit); + TimestampTz startTime = 0; restart: - Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr); + if (npstate != NULL) { + startTime = GetCurrentTimestamp(); + } + Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr, npstate); + if (npstate != NULL) { + npstate->firstGetAvailablePageTime += GetCurrentTimestamp() - startTime; + npstate->firstGetAvailablePageCount++; + } if (buf == InvalidBuffer) { /* * No free page left, need to extend the relation @@ -1469,24 +1507,51 @@ restart: * page. We can skip locking for new or temp relations, however, * since no one else could be accessing them. */ + if (npstate != NULL) { + npstate->bufferInvalidCount++; + } bool needLock = !RELATION_IS_LOCAL(rel); if (needLock) { + if (npstate != NULL) { + npstate->needLockCount++; + } if (!ConditionalLockRelationForExtension(rel, ExclusiveLock)) { /* couldn't get the lock immediately; wait for it. */ LockRelationForExtension(rel, ExclusiveLock); + if (npstate != NULL) { + startTime = GetCurrentTimestamp(); + } /* check again, relation may extended by other backends */ - buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr); + buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr, npstate); + if (npstate != NULL) { + npstate->secondGetAvailablePageTime += GetCurrentTimestamp() - startTime; + npstate->secondGetAvailablePageCount++; + } if (buf != InvalidBuffer) { UnlockRelationForExtension(rel, ExclusiveLock); goto out; } + if (npstate != NULL) { + startTime = GetCurrentTimestamp(); + } /* Time to bulk-extend. */ - RelationAddExtraBlocks(rel, NULL); + RelationAddExtraBlocks(rel, NULL, npstate); + if (npstate != NULL) { + npstate->extendBlocksTime += GetCurrentTimestamp() - startTime; + npstate->extendBlocksCount++; + } WHITEBOX_TEST_STUB("UBTreeGetNewPage-bulk-extend", WhiteboxDefaultErrorEmit); } } + if (npstate != NULL) { + startTime = GetCurrentTimestamp(); + } /* extend by one page */ buf = ReadBuffer(rel, P_NEW); + if (npstate != NULL) { + npstate->extendOneTime += GetCurrentTimestamp() - startTime; + npstate->extendOneCount++; + } WHITEBOX_TEST_STUB("UBTreeGetNewPage-extend", WhiteboxDefaultErrorEmit); if (!ConditionalLockBuffer(buf)) { /* lock failed. To avoid dead lock, we need to retry */ @@ -1494,6 +1559,9 @@ restart: UnlockRelationForExtension(rel, ExclusiveLock); } ReleaseBuffer(buf); + if (npstate != NULL) { + npstate->firstGotoRestartCount++; + } goto restart; } /* @@ -1518,6 +1586,9 @@ out: ReleaseBuffer(addr->queueBuf); addr->queueBuf = InvalidBuffer; } + if (npstate != NULL) { + npstate->secondGotoRestartCount++; + } goto restart; } diff --git a/src/gausskernel/storage/access/ubtree/ubtrecycle.cpp b/src/gausskernel/storage/access/ubtree/ubtrecycle.cpp index 03500b8cd..387840ca5 100644 --- a/src/gausskernel/storage/access/ubtree/ubtrecycle.cpp +++ b/src/gausskernel/storage/access/ubtree/ubtrecycle.cpp @@ -29,6 +29,7 @@ #include "storage/procarray.h" #include "utils/aiomem.h" #include "utils/builtins.h" +#include "datatype/timestamp.h" static uint32 BlockGetMaxItems(BlockNumber blkno); static void UBTreeInitRecycleQueuePage(Relation rel, Page page, Size size, BlockNumber blkno); @@ -37,7 +38,7 @@ static void UBTreeRecycleQueueAddPage(Relation rel, UBTRecycleForkNumber forkNum BlockNumber blkno, TransactionId xid); static Buffer StepNextPage(Relation rel, Buffer buf); static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumber, Buffer buf, - TransactionId waterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan); + TransactionId waterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan, NewPageState* npstate = NULL); static Buffer MoveToEndpointPage(Relation rel, Buffer buf, bool needHead, int access); static uint16 PageAllocateItem(Buffer buf); static void RecycleQueueLinkNewPage(Relation rel, Buffer leftBuf, Buffer newBuf); @@ -284,8 +285,8 @@ void UBTreeInitializeRecycleQueue(Relation rel) static bool UBTreeTryRecycleEmptyPageInternal(Relation rel) { UBTRecycleQueueAddress addr; - - Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_EMPTY_FORK, &addr); + NewPageState *npstate = NULL; + Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_EMPTY_FORK, &addr, npstate); if (!BufferIsValid(buf)) { return false; /* no available page to recycle */ } @@ -356,13 +357,16 @@ static Buffer StepNextPage(Relation rel, Buffer buf) } static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumber, Buffer buf, - TransactionId WaterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan) + TransactionId WaterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan, NewPageState* npstate) { Page page = BufferGetPage(buf); UBTRecycleQueueHeader header = GetRecycleQueueHeader(page, BufferGetBlockNumber(buf)); uint16 curOffset = header->head; while (IsNormalOffset(curOffset)) { + if (npstate != NULL) { + npstate->itemsCount++; + } UBTRecycleQueueItem item = HeaderGetItem(header, curOffset); if (TransactionIdFollowsOrEquals(item->xid, WaterLevelXid)) { *continueScan = false; @@ -372,6 +376,9 @@ static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumb curOffset = item->next; continue; } + if (npstate != NULL) { + npstate->itemsValidCount++; + } Buffer targetBuf = ReadBuffer(rel, item->blkno); _bt_checkbuffer_valid(rel, targetBuf); if (ConditionalLockBuffer(targetBuf)) { @@ -379,6 +386,9 @@ static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumb bool pageUsable = true; if (forkNumber == RECYCLE_FREED_FORK) { pageUsable = UBTreePageRecyclable(BufferGetPage(targetBuf)); + if (npstate != NULL) { + npstate->itemsValidConditionalLockCount++; + } } else if (forkNumber == RECYCLE_EMPTY_FORK) { /* make sure that it's not half-dead or the deletion is not reserved yet */ Page indexPage = BufferGetPage(targetBuf); @@ -415,8 +425,12 @@ static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumb return InvalidBuffer; } -Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress *addr) +Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress *addr, + NewPageState *npstate) { + TimestampTz startTime = 0; + TimestampTz elapsedTime = 0; + uint32 getAvailablePageCount = 0; TransactionId oldestXmin = u_sess->utils_cxt.RecentGlobalDataXmin; if (RelationGetNamespace(rel) == PG_TOAST_NAMESPACE) { TransactionId frozenXid = g_instance.undo_cxt.globalFrozenXid; @@ -424,12 +438,29 @@ Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBT TransactionId waterLevelXid = ((forkNumber == RECYCLE_EMPTY_FORK) ? recycleXid : frozenXid); oldestXmin = Min(oldestXmin, waterLevelXid); } + if (npstate != NULL) { + getAvailablePageCount = npstate->firstGetAvailablePageCount + npstate->secondGetAvailablePageCount; + startTime = GetCurrentTimestamp(); + } Buffer queueBuf = RecycleQueueGetEndpointPage(rel, forkNumber, true, BT_READ); - + if (npstate != NULL) { + npstate->getHeadTime += GetCurrentTimestamp() - startTime; + } Buffer indexBuf = InvalidBuffer; bool continueScan = false; for (BlockNumber bufCount = 0; bufCount < URQ_MAX_GET_PAGE_TIMES; bufCount++) { - indexBuf = GetAvailablePageOnPage(rel, forkNumber, queueBuf, oldestXmin, addr, &continueScan); + if (npstate != NULL) { + npstate->getAvailablePageOnPageCount++; + npstate->avgTravelQueuePages = (npstate->getAvailablePageOnPageCount * 1.0) / getAvailablePageCount; + startTime = GetCurrentTimestamp(); + } + indexBuf = GetAvailablePageOnPage(rel, forkNumber, queueBuf, oldestXmin, addr, &continueScan, npstate); + if (npstate != NULL) { + elapsedTime = GetCurrentTimestamp() - startTime; + npstate->getAvailablePageOnPageTimeMax = Max(npstate->getAvailablePageOnPageTimeMax, elapsedTime); + npstate->getAvailablePageOnPageTime += elapsedTime; + npstate->avgTravelQueueItems = (npstate->itemsCount * 1.0) / npstate->getAvailablePageOnPageCount; + } if (!continueScan) { break; } @@ -449,6 +480,10 @@ Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBT return InvalidBuffer; } + if (npstate != NULL) { + npstate->checkNewCreatePagesCount++; + } + /* no available page found, but we can check new created pages */ BlockNumber nblocks = RelationGetNumberOfBlocks(rel); bool metaChanged = false; @@ -471,6 +506,9 @@ Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBT indexBuf = ReadBuffer(rel, curBlkno); if (ConditionalLockBuffer(indexBuf)) { if (PageIsNew(BufferGetPage(indexBuf))) { + if (npstate != NULL) { + npstate->getFromNewCreatePagesCount++; + } break; } LockBuffer(indexBuf, BUFFER_LOCK_UNLOCK); diff --git a/src/gausskernel/storage/access/ustore/knl_uheap.cpp b/src/gausskernel/storage/access/ustore/knl_uheap.cpp index e8e0b49d3..cb76634cd 100644 --- a/src/gausskernel/storage/access/ustore/knl_uheap.cpp +++ b/src/gausskernel/storage/access/ustore/knl_uheap.cpp @@ -2645,8 +2645,8 @@ check_tup_satisfies_update: Assert(!UHeapTupleHasExternal(newtup)); needToast = false; } else { - needToast = (newtup->disk_tuple_size >= UTOAST_TUPLE_THRESHOLD || UHeapTupleHasExternal(&oldtup) || - UHeapTupleHasExternal(newtup)); + needToast = ((newtup->disk_tuple_size >= UTOAST_TUPLE_THRESHOLD && UHeapDiskTupHasVarWidth(newtup->disk_tuple)) + || UHeapTupleHasExternal(&oldtup) || UHeapTupleHasExternal(newtup)); } oldtupsize = SHORTALIGN(oldtup.disk_tuple_size); @@ -3003,7 +3003,7 @@ check_tup_satisfies_update: bool isOldTupleCopied = false; char identity; UHeapTuple oldKeyTuple = UHeapExtractReplicaIdentity(relation, &oldtup, &isOldTupleCopied, &identity); - + /* Prepare an undo record for this operation. */ /* Save the previous updated information in the undo record */ TD oldTD; diff --git a/src/gausskernel/storage/access/ustore/knl_utuple.cpp b/src/gausskernel/storage/access/ustore/knl_utuple.cpp index 10629c11c..2e84ec64f 100644 --- a/src/gausskernel/storage/access/ustore/knl_utuple.cpp +++ b/src/gausskernel/storage/access/ustore/knl_utuple.cpp @@ -201,7 +201,7 @@ void UHeapFillDiskTuple(TupleDesc tupleDesc, Datum *values, const bool *isnull, /* varlena */ Pointer val = DatumGetPointer(values[i]); - diskTuple->flag |= HEAP_HASVARWIDTH; + diskTuple->flag |= UHEAP_HASVARWIDTH; if (VARATT_IS_EXTERNAL(val)) { diskTuple->flag |= HEAP_HASEXTERNAL; @@ -226,7 +226,7 @@ void UHeapFillDiskTuple(TupleDesc tupleDesc, Datum *values, const bool *isnull, securec_check(rc, "\0", "\0"); } } else if (att[i].attlen == LEN_CSTRING) { - diskTuple->flag |= HEAP_HASVARWIDTH; + diskTuple->flag |= UHEAP_HASVARWIDTH; Assert(att[i].attalign == 'c'); attrLength = strlen(DatumGetCString(values[i])) + 1; Assert(attrLength <= MaxPossibleUHeapTupleSize); diff --git a/src/gausskernel/storage/access/ustore/knl_utuptoaster.cpp b/src/gausskernel/storage/access/ustore/knl_utuptoaster.cpp index 67965033f..c6a2892aa 100644 --- a/src/gausskernel/storage/access/ustore/knl_utuptoaster.cpp +++ b/src/gausskernel/storage/access/ustore/knl_utuptoaster.cpp @@ -19,6 +19,7 @@ #include "access/genam.h" #include "access/heapam.h" +#include "access/nbtree.h" #include "access/tableam.h" #include "nodes/relation.h" #include "access/tuptoaster.h" @@ -45,20 +46,22 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde static Datum UHeapToastCompressDatum(Datum value); static bool UHeapToastIdValueIdExists(Oid toastrelid, Oid valueid, int2 bucketid); static bool UHeapToastRelValueidExists(Relation toastrel, Oid valueid); -static Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn); +static Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn, + bool *inconsistent); static Datum UHeapToastCompressDatum(Datum value) { return toast_compress_datum(value); } -Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn) +Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn, bool *inconsistent) { Oid newOid; SysScanDesc scan; ScanKeyData key; bool collides = false; Assert(RelationIsUstoreFormat(relation) || RelationIsToast(relation)); + Assert(inconsistent != NULL); TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(relation), false, relation->rd_tam_ops); /* Generate new OIDs until we find one not in the table */ do { @@ -69,13 +72,40 @@ Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn * chunk_id for toast datum to prevent wrap around. */ newOid = GetNewObjectId(IsToastNamespace(RelationGetNamespace(relation))); + *inconsistent = false; ScanKeyInit(&key, oidcolumn, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(newOid)); /* see notes above about using SnapshotAny */ scan = systable_beginscan(relation, indexId, true, SnapshotAny, ATTR_FIRST, &key); - collides = UHeapSysIndexGetnextSlot(scan, ForwardScanDirection, slot); - + while (UHeapSysIndexGetnextSlot(scan, ForwardScanDirection, slot)) { + bool isnull = false; + UHeapTuple ttup = ExecGetUHeapTupleFromSlot(slot); + Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, RelationGetDescr(relation), &isnull)); + Assert(!isnull); + if (chunk_id == newOid) { + collides = true; + break; + } else { + *inconsistent = true; + if (scan->iscan != NULL && (!scan->iscan->xactStartedInRecovery)) { + scan->iscan->kill_prior_tuple = true; + BTScanOpaque so = (BTScanOpaque)scan->iscan->opaque; + if (so != NULL) { + BTScanPosItem indexItem = so->currPos.items[so->currPos.itemIndex]; + OffsetNumber indexOffset = indexItem.indexOffset; + ItemPointerData heapTid = indexItem.heapTid; + ereport(LOG, (errcode(ERRCODE_UNEXPECTED_CHUNK_VALUE), + errmsg("found toast chunk %u is not scan toast value %u of toast relation %u, will skip." + "toast index tuple at offset %hu with ctid (%u, %u) is marked dead.", + chunk_id, newOid, relation->rd_node.relNode, indexOffset, + ItemPointerGetBlockNumber(&heapTid), ItemPointerGetOffsetNumber(&heapTid)), + errcause("found toast chunk is not scan toast value."), + erraction("Check the toast chunk."))); + } + } + } + } systable_endscan(scan); } while (collides); ExecDropSingleTupleTableSlot(slot); @@ -665,6 +695,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde Pointer dval = DatumGetPointer(value); errno_t rc; int2 bucketid = InvalidBktId; + bool inconsistent = false; Assert(!VARATT_IS_EXTERNAL(value)); rc = memset_s(&chunkData, sizeof(chunkData), 0, sizeof(chunkData)); securec_check(rc, "", ""); @@ -736,7 +767,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde */ if (!OidIsValid(rel->rd_toastoid)) { /* normal case: just choose an unused OID */ - toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1); + toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1, &inconsistent); } else { /* rewrite case: check to see if value was in old toast table */ toastPointer.va_valueid = InvalidOid; @@ -781,7 +812,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde * old or new toast table */ do { - toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1); + toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1, &inconsistent); } while (UHeapToastIdValueIdExists(rel->rd_toastoid, toastPointer.va_valueid, bucketid)); } } @@ -824,7 +855,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde * the TOAST table, since we don't bother to update anything else. */ (void)index_insert(toastidx, tValues, tIsnull, &(toasttup->ctid), toastrel, - toastidx->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO); + (toastidx->rd_index->indisunique && !inconsistent) ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO); /* * Free memory @@ -882,6 +913,8 @@ static void UHeapToastDeleteDatum(Relation rel, Datum value, int options) SysScanDesc toastscan; UHeapTuple toasttup; int2 bucketid; + bool found = false; + bool isnull = false; if (!VARATT_IS_EXTERNAL_ONDISK_B(attr)) return; @@ -927,6 +960,14 @@ static void UHeapToastDeleteDatum(Relation rel, Datum value, int options) * Have a chunk, delete it */ toasttup = ExecGetUHeapTupleFromSlot(slot); + Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(toasttup, ATTR_FIRST, RelationGetDescr(toastrel), &isnull)); + Assert(!isnull); + if (chunk_id != toastPointer.va_valueid) { + ereport(LOG, (errmsg("Delete toast chunk %u is not scan toast chunk %u of toast relation is %u, will skip", + chunk_id, toastPointer.va_valueid, toastPointer.va_toastrelid))); + continue; + } + found = true; SimpleUHeapDelete(toastrel, &toasttup->ctid, SnapshotToast); Datum values[INDEX_MAX_KEYS]; @@ -940,6 +981,11 @@ static void UHeapToastDeleteDatum(Relation rel, Datum value, int options) index_delete(toastidx, values, isnulls, &toasttup->ctid, false); } + if (!found) { + ereport(LOG, (errmsg("Toast chunk %u of toast relation is %u delete 0 rows", toastPointer.va_valueid, + toastPointer.va_toastrelid))); + } + /* * End scan and close relations */ @@ -1008,6 +1054,31 @@ struct varlena *UHeapInternalToastFetchDatum(struct varatt_external toastPointer * Have a chunk, extract the sequence number and the data */ ttup = ExecGetUHeapTupleFromSlot(slot); + Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, toastTupDesc, &isnull)); + Assert(!isnull); + if (chunk_id != toastPointer.va_valueid) { + if (toastscan->iscan != NULL && (!toastscan->iscan->xactStartedInRecovery)) { + toastscan->iscan->kill_prior_tuple = true; + BTScanOpaque so = (BTScanOpaque)toastscan->iscan->opaque; + if (so != NULL) { + BTScanPosItem indexItem = so->currPos.items[so->currPos.itemIndex]; + OffsetNumber indexOffset = indexItem.indexOffset; + ItemPointerData heapTid = indexItem.heapTid; + ereport(LOG, (errcode(ERRCODE_UNEXPECTED_CHUNK_VALUE), + errmsg("UHeapInternalToastFetchDatum found toast chunk %u is not scan toast chunk %u of " + "toast relation %u toast size detail (%d, %d), will skip." + "toast index tuple at offset %hu with ctid (%u, %u) is marked dead," + "toast tuple ctid is (%u, %u).", + chunk_id, toastPointer.va_valueid, toastPointer.va_toastrelid, + toastPointer.va_rawsize, toastPointer.va_extsize, indexOffset, + ItemPointerGetBlockNumber(&heapTid), ItemPointerGetOffsetNumber(&heapTid), + ItemPointerGetBlockNumber(&(ttup->ctid)), ItemPointerGetOffsetNumber(&(ttup->ctid))), + errcause("found toast chunk is not scan toast value."), + erraction("Check the toast chunk."))); + } + } + continue; + } residx = DatumGetInt32(UHeapFastGetAttr(ttup, ATTR_SECOND, toastTupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(UHeapFastGetAttr(ttup, ATTR_THIRD, toastTupDesc, &isnull)); @@ -1194,6 +1265,13 @@ struct varlena *UHeapInternalToastFetchDatumSlice(struct varatt_external toastPo * Have a chunk, extract the sequence number and the data */ ttup = ExecGetUHeapTupleFromSlot(slot); + Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, toastTupDesc, &isnull)); + Assert(!isnull); + if (chunk_id != toastPointer.va_valueid) { + ereport(LOG, (errmsg("UHeapInternalToastFetchDatumSlice find toast chunk %u is not scan toast chunk %u of " + "toast relation %u, will skip", chunk_id, toastPointer.va_valueid, toastPointer.va_toastrelid))); + continue; + } residx = DatumGetInt32(UHeapFastGetAttr(ttup, CHUNK_ID_ATTR, toastTupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(UHeapFastGetAttr(ttup, CHUNK_DATA_ATTR, toastTupDesc, &isnull)); @@ -1304,8 +1382,20 @@ static bool UHeapToastRelValueidExists(Relation toastrel, Oid valueid) * Is there any such chunk? */ toastscan = systable_beginscan(toastrel, toastrel->rd_rel->reltoastidxid, true, SnapshotAny, 1, &toastkey); - result = UHeapSysIndexGetnextSlot(toastscan, ForwardScanDirection, slot); - + while (UHeapSysIndexGetnextSlot(toastscan, ForwardScanDirection, slot)) { + bool isnull = false; + UHeapTuple ttup = ExecGetUHeapTupleFromSlot(slot); + Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, RelationGetDescr(toastrel), &isnull)); + Assert(!isnull); + if (chunk_id == valueid) { + result = true; + break; + } + else { + ereport(LOG, (errmsg("UHeapToastRelValueidExists find toast chunk %u is not scan toast chunk %u of toast " + "relation %u, will skip", chunk_id, valueid, toastrel->rd_id))); + } + } systable_endscan(toastscan); ExecDropSingleTupleTableSlot(slot); diff --git a/src/include/access/hio.h b/src/include/access/hio.h index 432eec40d..c6ea2275a 100644 --- a/src/include/access/hio.h +++ b/src/include/access/hio.h @@ -20,6 +20,7 @@ #include "utils/relcache.h" #include "storage/buf/buf.h" #include "storage/buf/bufmgr.h" +#include "ubtree.h" /* * state for bulk inserts --- private to heapam.c and hio.c @@ -39,6 +40,6 @@ extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer othe BulkInsertState bistate, Buffer* vmbuffer, Buffer* vmbuffer_other, BlockNumber end_rel_block); extern Buffer RelationGetNewBufferForBulkInsert(Relation relation, Size len, Size dictSize, BulkInsertState bistate); extern Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate); -extern void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate); +extern void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate, NewPageState* npState = NULL); #endif /* HIO_H */ diff --git a/src/include/access/ubtree.h b/src/include/access/ubtree.h index 00bb49c55..530b3b4a9 100644 --- a/src/include/access/ubtree.h +++ b/src/include/access/ubtree.h @@ -27,6 +27,7 @@ #include "catalog/pg_index.h" #include "lib/stringinfo.h" #include "storage/buf/bufmgr.h" +#include "datatype/timestamp.h" /* * prototypes for functions in ubtree.cpp (external entry points for ubtree) @@ -475,6 +476,34 @@ typedef struct { OffsetNumber previousdead[MaxIndexTuplesPerPage]; } IndexPruneState; +typedef struct { + TimestampTz firstGetAvailablePageTime; + TimestampTz secondGetAvailablePageTime; + TimestampTz extendBlocksTime; + TimestampTz extendOneTime; + TimestampTz getHeadTime; + TimestampTz getAvailablePageOnPageTime; + TimestampTz getAvailablePageOnPageTimeMax; + uint32 firstGetAvailablePageCount; + uint32 secondGetAvailablePageCount; + uint32 bufferInvalidCount; + uint32 needLockCount; + uint32 extendBlocksCount; + uint32 extendBlocks; + uint32 extendOneCount; + uint32 queueCount; + uint32 itemsCount; + uint32 itemsValidCount; + uint32 itemsValidConditionalLockCount; + uint32 getAvailablePageOnPageCount; + uint32 firstGotoRestartCount; + uint32 secondGotoRestartCount; + uint32 checkNewCreatePagesCount; + uint32 getFromNewCreatePagesCount; + double avgTravelQueuePages; + double avgTravelQueueItems; +} NewPageState; + #define TXNINFOSIZE (sizeof(ShortTransactionId) * 2) /* @@ -547,8 +576,8 @@ extern OffsetNumber UBTreeFindsplitloc(Relation rel, Buffer buf, OffsetNumber ne extern OffsetNumber UBTreeFindsplitlocInsertpt(Relation rel, Buffer buf, OffsetNumber newitemoff, Size newitemsz, bool *newitemonleft, IndexTuple newitem); -extern Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr); - +extern Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr, NewPageState* npState = NULL); +extern void UBTreePrintNewPageState(NewPageState* npstate); /* * prototypes for functions in ubtxlog.cpp */ @@ -628,7 +657,8 @@ extern void UBTreeTryRecycleEmptyPage(Relation rel); extern void UBTreeRecordFreePage(Relation rel, BlockNumber blkno, TransactionId xid); extern void UBTreeRecordEmptyPage(Relation rel, BlockNumber blkno, TransactionId xid); extern void UBTreeRecordUsedPage(Relation rel, UBTRecycleQueueAddress addr); -extern Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress* addr); +extern Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress* addr, + NewPageState* npState = NULL); extern void UBTreeRecycleQueueInitPage(Relation rel, Page page, BlockNumber blkno, BlockNumber prevBlkno, BlockNumber nextBlkno); extern void UBtreeRecycleQueueChangeChain(Buffer buf, BlockNumber newBlkno, bool setNext); diff --git a/src/include/utils/be_module.h b/src/include/utils/be_module.h index a288a1e64..d7f5af85c 100755 --- a/src/include/utils/be_module.h +++ b/src/include/utils/be_module.h @@ -150,6 +150,7 @@ enum ModuleId { MOD_GPI, /* debug info for global partition index */ MOD_PARTITION, + MOD_UBT_NEWPAGE, /* debug info for UBTreeGetNewPage */ MOD_SRF, /* debug info for SRF */ MOD_SS_TXNSTATUS, /* debug info for SS TXNSTATUS */