toast场景优化

This commit is contained in:
徐达标
2024-06-13 12:18:15 +00:00
committed by yaoxin
parent 4987c6884c
commit dcda84c333
12 changed files with 287 additions and 36 deletions

View File

@ -136,6 +136,7 @@ const module_data module_map[] = {{MOD_ALL, "ALL"},
{MOD_DSS, "DSS_API"},
{MOD_GPI, "GPI"},
{MOD_PARTITION, "PARTITION"},
{MOD_UBT_NEWPAGE, "UBT_NEWPAGE"},
{MOD_SRF, "SRF"},
{MOD_SS_TXNSTATUS, "SS_TXNSTATUS"},

View File

@ -129,7 +129,7 @@ void CheckRelation(const Relation relation, int* extraBlocks, int lockWaiters)
}
}
static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate)
static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate, NewPageState* npstate)
{
int extraBlocks = 0;
int lockWaiters = RelationExtensionLockWaiterCount(relation);
@ -137,6 +137,9 @@ static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate)
return;
}
CheckRelation(relation, &extraBlocks, lockWaiters);
if (npstate != NULL) {
npstate->extendBlocks += (uint32)extraBlocks;
}
while (extraBlocks-- >= 0) {
/* Ouch - an unnecessary lseek() each time through the loop! */
Buffer buffer = ReadBufferBI(relation, P_NEW, RBM_NORMAL, bistate);
@ -145,7 +148,7 @@ static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate)
}
}
void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate, NewPageState* npstate)
{
BlockNumber block_num = InvalidBlockNumber;
BlockNumber first_block = InvalidBlockNumber;
@ -155,7 +158,7 @@ void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
if (RelationIsUstoreIndex(relation)) {
/* ubtree, use another bypass */
UBtreeAddExtraBlocks(relation, bistate);
UBtreeAddExtraBlocks(relation, bistate, npstate);
return;
}

View File

@ -2586,7 +2586,7 @@ static bool CheckItemIsAlive(ItemPointer tid, Relation relation, Snapshot snapsh
bool* all_dead, CUDescScan* cudescScan)
{
if (!RelationIsCUFormat(relation)) {
return heap_hot_search(tid, relation, snapshot, all_dead);
return TableIndexFetchTupleCheck(relation, tid, snapshot, all_dead);
} else {
return cudescScan->CheckItemIsAlive(tid);
}

View File

@ -1403,7 +1403,15 @@ static Buffer UBTreeSplit(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber fi
* before we release the Exclusive lock.
*/
UBTRecycleQueueAddress addr;
rbuf = UBTreeGetNewPage(rel, &addr);
NewPageState *npstate = NULL;
if (module_logging_is_on(MOD_UBT_NEWPAGE)) {
npstate = (NewPageState *)palloc0(sizeof(NewPageState));
}
rbuf = UBTreeGetNewPage(rel, &addr, npstate);
if (npstate != NULL) {
UBTreePrintNewPageState(npstate);
pfree(npstate);
}
/*
* origpage is the original page to be split. leftpage is a temporary
@ -2471,7 +2479,15 @@ static Buffer UBTreeNewRoot(Relation rel, Buffer lbuf, Buffer rbuf)
* before we release the Exclusive lock.
*/
UBTRecycleQueueAddress addr;
rootbuf = UBTreeGetNewPage(rel, &addr);
NewPageState *npstate = NULL;
if (module_logging_is_on(MOD_UBT_NEWPAGE)) {
npstate = (NewPageState *)palloc0(sizeof(NewPageState));
}
rootbuf = UBTreeGetNewPage(rel, &addr, npstate);
if (npstate != NULL) {
UBTreePrintNewPageState(npstate);
pfree(npstate);
}
rootpage = BufferGetPage(rootbuf);
rootblknum = BufferGetBlockNumber(rootbuf);

View File

@ -41,6 +41,7 @@
#include "storage/procarray.h"
#include "utils/inval.h"
#include "utils/snapmgr.h"
#include "datatype/timestamp.h"
static bool UBTreeMarkPageHalfDead(Relation rel, Buffer leafbuf, BTStack stack);
static bool UBTreeUnlinkHalfDeadPage(Relation rel, Buffer leafbuf, bool *rightsib_empty, BTStack del_blknos = NULL);
@ -88,6 +89,27 @@ void UBTreeInitMetaPage(Page page, BlockNumber rootbknum, uint32 level)
((PageHeader)page)->pd_lower = (uint16)(((char *)metad + sizeof(BTMetaPageData)) - (char *)page);
}
void UBTreePrintNewPageState(NewPageState* npstate)
{
ereport(LOG, (errmodule(MOD_UBT_NEWPAGE), (errmsg(
"NewPageState: first_get_available_page_time:%ld, count:%u; second_get_available_page_time:%ld, count:%u; "
"extend_blocks_time:%ld, count:%u, blocks:%u; extend_one_time:%ld, count:%u; "
"get_head_time:%ld; get_available_page_on_page_time:%ld, get_available_page_on_page_time_max:%ld; "
"buffer_invalid_count:%u; need_lock_count:%u; queue_count:%u, items_count:%u, items_valid_count:%u; "
"conditional_lock_count:%u; get_available_page_on_page_count:%u; goto_restart_count first:%u, second:%u;"
"new_create_pages_count check:%u, get:%u; avg_travel_queue pages:%.2f, items:%.2f.",
npstate->firstGetAvailablePageTime, npstate->firstGetAvailablePageCount,
npstate->secondGetAvailablePageTime, npstate->secondGetAvailablePageCount,
npstate->extendBlocksTime, npstate->extendBlocksCount, npstate->extendBlocks,
npstate->extendOneTime, npstate->extendOneCount, npstate->getHeadTime,
npstate->getAvailablePageOnPageTime, npstate->getAvailablePageOnPageTimeMax,
npstate->bufferInvalidCount, npstate->needLockCount, npstate->queueCount, npstate->itemsCount,
npstate->itemsValidCount, npstate->itemsValidConditionalLockCount,
npstate->getAvailablePageOnPageCount, npstate->firstGotoRestartCount,
npstate->secondGotoRestartCount, npstate->checkNewCreatePagesCount,
npstate->getFromNewCreatePagesCount, npstate->avgTravelQueuePages, npstate->avgTravelQueueItems))));
}
/*
* UBTreeGetRoot() -- Get the root page of the btree.
*
@ -255,7 +277,15 @@ Buffer UBTreeGetRoot(Relation rel, int access)
* before we release the Exclusive lock.
*/
UBTRecycleQueueAddress addr;
rootbuf = UBTreeGetNewPage(rel, &addr);
NewPageState *npstate = NULL;
if (module_logging_is_on(MOD_UBT_NEWPAGE)) {
npstate = (NewPageState *)palloc0(sizeof(NewPageState));
}
rootbuf = UBTreeGetNewPage(rel, &addr, npstate);
if (npstate != NULL) {
UBTreePrintNewPageState(npstate);
pfree(npstate);
}
rootblkno = BufferGetBlockNumber(rootbuf);
rootpage = BufferGetPage(rootbuf);
rootopaque = (UBTPageOpaqueInternal)PageGetSpecialPointer(rootpage);
@ -1453,11 +1483,19 @@ static bool UBTreeUnlinkHalfDeadPage(Relation rel, Buffer leafbuf, bool *rightsi
* page in the Recycle Queue, and we need to call UBTreeRecordUsedPage()
* with this addr when the returned page is used correctly.
*/
Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr)
Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr, NewPageState* npstate)
{
WHITEBOX_TEST_STUB("UBTreeGetNewPage-begin", WhiteboxDefaultErrorEmit);
TimestampTz startTime = 0;
restart:
Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr);
if (npstate != NULL) {
startTime = GetCurrentTimestamp();
}
Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr, npstate);
if (npstate != NULL) {
npstate->firstGetAvailablePageTime += GetCurrentTimestamp() - startTime;
npstate->firstGetAvailablePageCount++;
}
if (buf == InvalidBuffer) {
/*
* No free page left, need to extend the relation
@ -1469,24 +1507,51 @@ restart:
* page. We can skip locking for new or temp relations, however,
* since no one else could be accessing them.
*/
if (npstate != NULL) {
npstate->bufferInvalidCount++;
}
bool needLock = !RELATION_IS_LOCAL(rel);
if (needLock) {
if (npstate != NULL) {
npstate->needLockCount++;
}
if (!ConditionalLockRelationForExtension(rel, ExclusiveLock)) {
/* couldn't get the lock immediately; wait for it. */
LockRelationForExtension(rel, ExclusiveLock);
if (npstate != NULL) {
startTime = GetCurrentTimestamp();
}
/* check again, relation may extended by other backends */
buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr);
buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr, npstate);
if (npstate != NULL) {
npstate->secondGetAvailablePageTime += GetCurrentTimestamp() - startTime;
npstate->secondGetAvailablePageCount++;
}
if (buf != InvalidBuffer) {
UnlockRelationForExtension(rel, ExclusiveLock);
goto out;
}
if (npstate != NULL) {
startTime = GetCurrentTimestamp();
}
/* Time to bulk-extend. */
RelationAddExtraBlocks(rel, NULL);
RelationAddExtraBlocks(rel, NULL, npstate);
if (npstate != NULL) {
npstate->extendBlocksTime += GetCurrentTimestamp() - startTime;
npstate->extendBlocksCount++;
}
WHITEBOX_TEST_STUB("UBTreeGetNewPage-bulk-extend", WhiteboxDefaultErrorEmit);
}
}
if (npstate != NULL) {
startTime = GetCurrentTimestamp();
}
/* extend by one page */
buf = ReadBuffer(rel, P_NEW);
if (npstate != NULL) {
npstate->extendOneTime += GetCurrentTimestamp() - startTime;
npstate->extendOneCount++;
}
WHITEBOX_TEST_STUB("UBTreeGetNewPage-extend", WhiteboxDefaultErrorEmit);
if (!ConditionalLockBuffer(buf)) {
/* lock failed. To avoid dead lock, we need to retry */
@ -1494,6 +1559,9 @@ restart:
UnlockRelationForExtension(rel, ExclusiveLock);
}
ReleaseBuffer(buf);
if (npstate != NULL) {
npstate->firstGotoRestartCount++;
}
goto restart;
}
/*
@ -1518,6 +1586,9 @@ out:
ReleaseBuffer(addr->queueBuf);
addr->queueBuf = InvalidBuffer;
}
if (npstate != NULL) {
npstate->secondGotoRestartCount++;
}
goto restart;
}

View File

@ -29,6 +29,7 @@
#include "storage/procarray.h"
#include "utils/aiomem.h"
#include "utils/builtins.h"
#include "datatype/timestamp.h"
static uint32 BlockGetMaxItems(BlockNumber blkno);
static void UBTreeInitRecycleQueuePage(Relation rel, Page page, Size size, BlockNumber blkno);
@ -37,7 +38,7 @@ static void UBTreeRecycleQueueAddPage(Relation rel, UBTRecycleForkNumber forkNum
BlockNumber blkno, TransactionId xid);
static Buffer StepNextPage(Relation rel, Buffer buf);
static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumber, Buffer buf,
TransactionId waterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan);
TransactionId waterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan, NewPageState* npstate = NULL);
static Buffer MoveToEndpointPage(Relation rel, Buffer buf, bool needHead, int access);
static uint16 PageAllocateItem(Buffer buf);
static void RecycleQueueLinkNewPage(Relation rel, Buffer leftBuf, Buffer newBuf);
@ -284,8 +285,8 @@ void UBTreeInitializeRecycleQueue(Relation rel)
static bool UBTreeTryRecycleEmptyPageInternal(Relation rel)
{
UBTRecycleQueueAddress addr;
Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_EMPTY_FORK, &addr);
NewPageState *npstate = NULL;
Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_EMPTY_FORK, &addr, npstate);
if (!BufferIsValid(buf)) {
return false; /* no available page to recycle */
}
@ -356,13 +357,16 @@ static Buffer StepNextPage(Relation rel, Buffer buf)
}
static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumber, Buffer buf,
TransactionId WaterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan)
TransactionId WaterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan, NewPageState* npstate)
{
Page page = BufferGetPage(buf);
UBTRecycleQueueHeader header = GetRecycleQueueHeader(page, BufferGetBlockNumber(buf));
uint16 curOffset = header->head;
while (IsNormalOffset(curOffset)) {
if (npstate != NULL) {
npstate->itemsCount++;
}
UBTRecycleQueueItem item = HeaderGetItem(header, curOffset);
if (TransactionIdFollowsOrEquals(item->xid, WaterLevelXid)) {
*continueScan = false;
@ -372,6 +376,9 @@ static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumb
curOffset = item->next;
continue;
}
if (npstate != NULL) {
npstate->itemsValidCount++;
}
Buffer targetBuf = ReadBuffer(rel, item->blkno);
_bt_checkbuffer_valid(rel, targetBuf);
if (ConditionalLockBuffer(targetBuf)) {
@ -379,6 +386,9 @@ static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumb
bool pageUsable = true;
if (forkNumber == RECYCLE_FREED_FORK) {
pageUsable = UBTreePageRecyclable(BufferGetPage(targetBuf));
if (npstate != NULL) {
npstate->itemsValidConditionalLockCount++;
}
} else if (forkNumber == RECYCLE_EMPTY_FORK) {
/* make sure that it's not half-dead or the deletion is not reserved yet */
Page indexPage = BufferGetPage(targetBuf);
@ -415,8 +425,12 @@ static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumb
return InvalidBuffer;
}
Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress *addr)
Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress *addr,
NewPageState *npstate)
{
TimestampTz startTime = 0;
TimestampTz elapsedTime = 0;
uint32 getAvailablePageCount = 0;
TransactionId oldestXmin = u_sess->utils_cxt.RecentGlobalDataXmin;
if (RelationGetNamespace(rel) == PG_TOAST_NAMESPACE) {
TransactionId frozenXid = g_instance.undo_cxt.globalFrozenXid;
@ -424,12 +438,29 @@ Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBT
TransactionId waterLevelXid = ((forkNumber == RECYCLE_EMPTY_FORK) ? recycleXid : frozenXid);
oldestXmin = Min(oldestXmin, waterLevelXid);
}
if (npstate != NULL) {
getAvailablePageCount = npstate->firstGetAvailablePageCount + npstate->secondGetAvailablePageCount;
startTime = GetCurrentTimestamp();
}
Buffer queueBuf = RecycleQueueGetEndpointPage(rel, forkNumber, true, BT_READ);
if (npstate != NULL) {
npstate->getHeadTime += GetCurrentTimestamp() - startTime;
}
Buffer indexBuf = InvalidBuffer;
bool continueScan = false;
for (BlockNumber bufCount = 0; bufCount < URQ_MAX_GET_PAGE_TIMES; bufCount++) {
indexBuf = GetAvailablePageOnPage(rel, forkNumber, queueBuf, oldestXmin, addr, &continueScan);
if (npstate != NULL) {
npstate->getAvailablePageOnPageCount++;
npstate->avgTravelQueuePages = (npstate->getAvailablePageOnPageCount * 1.0) / getAvailablePageCount;
startTime = GetCurrentTimestamp();
}
indexBuf = GetAvailablePageOnPage(rel, forkNumber, queueBuf, oldestXmin, addr, &continueScan, npstate);
if (npstate != NULL) {
elapsedTime = GetCurrentTimestamp() - startTime;
npstate->getAvailablePageOnPageTimeMax = Max(npstate->getAvailablePageOnPageTimeMax, elapsedTime);
npstate->getAvailablePageOnPageTime += elapsedTime;
npstate->avgTravelQueueItems = (npstate->itemsCount * 1.0) / npstate->getAvailablePageOnPageCount;
}
if (!continueScan) {
break;
}
@ -449,6 +480,10 @@ Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBT
return InvalidBuffer;
}
if (npstate != NULL) {
npstate->checkNewCreatePagesCount++;
}
/* no available page found, but we can check new created pages */
BlockNumber nblocks = RelationGetNumberOfBlocks(rel);
bool metaChanged = false;
@ -471,6 +506,9 @@ Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBT
indexBuf = ReadBuffer(rel, curBlkno);
if (ConditionalLockBuffer(indexBuf)) {
if (PageIsNew(BufferGetPage(indexBuf))) {
if (npstate != NULL) {
npstate->getFromNewCreatePagesCount++;
}
break;
}
LockBuffer(indexBuf, BUFFER_LOCK_UNLOCK);

View File

@ -2645,8 +2645,8 @@ check_tup_satisfies_update:
Assert(!UHeapTupleHasExternal(newtup));
needToast = false;
} else {
needToast = (newtup->disk_tuple_size >= UTOAST_TUPLE_THRESHOLD || UHeapTupleHasExternal(&oldtup) ||
UHeapTupleHasExternal(newtup));
needToast = ((newtup->disk_tuple_size >= UTOAST_TUPLE_THRESHOLD && UHeapDiskTupHasVarWidth(newtup->disk_tuple))
|| UHeapTupleHasExternal(&oldtup) || UHeapTupleHasExternal(newtup));
}
oldtupsize = SHORTALIGN(oldtup.disk_tuple_size);
@ -3003,7 +3003,7 @@ check_tup_satisfies_update:
bool isOldTupleCopied = false;
char identity;
UHeapTuple oldKeyTuple = UHeapExtractReplicaIdentity(relation, &oldtup, &isOldTupleCopied, &identity);
/* Prepare an undo record for this operation. */
/* Save the previous updated information in the undo record */
TD oldTD;

View File

@ -201,7 +201,7 @@ void UHeapFillDiskTuple(TupleDesc tupleDesc, Datum *values, const bool *isnull,
/* varlena */
Pointer val = DatumGetPointer(values[i]);
diskTuple->flag |= HEAP_HASVARWIDTH;
diskTuple->flag |= UHEAP_HASVARWIDTH;
if (VARATT_IS_EXTERNAL(val)) {
diskTuple->flag |= HEAP_HASEXTERNAL;
@ -226,7 +226,7 @@ void UHeapFillDiskTuple(TupleDesc tupleDesc, Datum *values, const bool *isnull,
securec_check(rc, "\0", "\0");
}
} else if (att[i].attlen == LEN_CSTRING) {
diskTuple->flag |= HEAP_HASVARWIDTH;
diskTuple->flag |= UHEAP_HASVARWIDTH;
Assert(att[i].attalign == 'c');
attrLength = strlen(DatumGetCString(values[i])) + 1;
Assert(attrLength <= MaxPossibleUHeapTupleSize);

View File

@ -19,6 +19,7 @@
#include "access/genam.h"
#include "access/heapam.h"
#include "access/nbtree.h"
#include "access/tableam.h"
#include "nodes/relation.h"
#include "access/tuptoaster.h"
@ -45,20 +46,22 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
static Datum UHeapToastCompressDatum(Datum value);
static bool UHeapToastIdValueIdExists(Oid toastrelid, Oid valueid, int2 bucketid);
static bool UHeapToastRelValueidExists(Relation toastrel, Oid valueid);
static Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn);
static Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn,
bool *inconsistent);
static Datum UHeapToastCompressDatum(Datum value)
{
return toast_compress_datum(value);
}
Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn, bool *inconsistent)
{
Oid newOid;
SysScanDesc scan;
ScanKeyData key;
bool collides = false;
Assert(RelationIsUstoreFormat(relation) || RelationIsToast(relation));
Assert(inconsistent != NULL);
TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(relation), false, relation->rd_tam_ops);
/* Generate new OIDs until we find one not in the table */
do {
@ -69,13 +72,40 @@ Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn
* chunk_id for toast datum to prevent wrap around.
*/
newOid = GetNewObjectId(IsToastNamespace(RelationGetNamespace(relation)));
*inconsistent = false;
ScanKeyInit(&key, oidcolumn, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(newOid));
/* see notes above about using SnapshotAny */
scan = systable_beginscan(relation, indexId, true, SnapshotAny, ATTR_FIRST, &key);
collides = UHeapSysIndexGetnextSlot(scan, ForwardScanDirection, slot);
while (UHeapSysIndexGetnextSlot(scan, ForwardScanDirection, slot)) {
bool isnull = false;
UHeapTuple ttup = ExecGetUHeapTupleFromSlot(slot);
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, RelationGetDescr(relation), &isnull));
Assert(!isnull);
if (chunk_id == newOid) {
collides = true;
break;
} else {
*inconsistent = true;
if (scan->iscan != NULL && (!scan->iscan->xactStartedInRecovery)) {
scan->iscan->kill_prior_tuple = true;
BTScanOpaque so = (BTScanOpaque)scan->iscan->opaque;
if (so != NULL) {
BTScanPosItem indexItem = so->currPos.items[so->currPos.itemIndex];
OffsetNumber indexOffset = indexItem.indexOffset;
ItemPointerData heapTid = indexItem.heapTid;
ereport(LOG, (errcode(ERRCODE_UNEXPECTED_CHUNK_VALUE),
errmsg("found toast chunk %u is not scan toast value %u of toast relation %u, will skip."
"toast index tuple at offset %hu with ctid (%u, %u) is marked dead.",
chunk_id, newOid, relation->rd_node.relNode, indexOffset,
ItemPointerGetBlockNumber(&heapTid), ItemPointerGetOffsetNumber(&heapTid)),
errcause("found toast chunk is not scan toast value."),
erraction("Check the toast chunk.")));
}
}
}
}
systable_endscan(scan);
} while (collides);
ExecDropSingleTupleTableSlot(slot);
@ -665,6 +695,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
Pointer dval = DatumGetPointer(value);
errno_t rc;
int2 bucketid = InvalidBktId;
bool inconsistent = false;
Assert(!VARATT_IS_EXTERNAL(value));
rc = memset_s(&chunkData, sizeof(chunkData), 0, sizeof(chunkData));
securec_check(rc, "", "");
@ -736,7 +767,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
*/
if (!OidIsValid(rel->rd_toastoid)) {
/* normal case: just choose an unused OID */
toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1);
toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1, &inconsistent);
} else {
/* rewrite case: check to see if value was in old toast table */
toastPointer.va_valueid = InvalidOid;
@ -781,7 +812,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
* old or new toast table
*/
do {
toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1);
toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1, &inconsistent);
} while (UHeapToastIdValueIdExists(rel->rd_toastoid, toastPointer.va_valueid, bucketid));
}
}
@ -824,7 +855,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
* the TOAST table, since we don't bother to update anything else.
*/
(void)index_insert(toastidx, tValues, tIsnull, &(toasttup->ctid), toastrel,
toastidx->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
(toastidx->rd_index->indisunique && !inconsistent) ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
/*
* Free memory
@ -882,6 +913,8 @@ static void UHeapToastDeleteDatum(Relation rel, Datum value, int options)
SysScanDesc toastscan;
UHeapTuple toasttup;
int2 bucketid;
bool found = false;
bool isnull = false;
if (!VARATT_IS_EXTERNAL_ONDISK_B(attr))
return;
@ -927,6 +960,14 @@ static void UHeapToastDeleteDatum(Relation rel, Datum value, int options)
* Have a chunk, delete it
*/
toasttup = ExecGetUHeapTupleFromSlot(slot);
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(toasttup, ATTR_FIRST, RelationGetDescr(toastrel), &isnull));
Assert(!isnull);
if (chunk_id != toastPointer.va_valueid) {
ereport(LOG, (errmsg("Delete toast chunk %u is not scan toast chunk %u of toast relation is %u, will skip",
chunk_id, toastPointer.va_valueid, toastPointer.va_toastrelid)));
continue;
}
found = true;
SimpleUHeapDelete(toastrel, &toasttup->ctid, SnapshotToast);
Datum values[INDEX_MAX_KEYS];
@ -940,6 +981,11 @@ static void UHeapToastDeleteDatum(Relation rel, Datum value, int options)
index_delete(toastidx, values, isnulls, &toasttup->ctid, false);
}
if (!found) {
ereport(LOG, (errmsg("Toast chunk %u of toast relation is %u delete 0 rows", toastPointer.va_valueid,
toastPointer.va_toastrelid)));
}
/*
* End scan and close relations
*/
@ -1008,6 +1054,31 @@ struct varlena *UHeapInternalToastFetchDatum(struct varatt_external toastPointer
* Have a chunk, extract the sequence number and the data
*/
ttup = ExecGetUHeapTupleFromSlot(slot);
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, toastTupDesc, &isnull));
Assert(!isnull);
if (chunk_id != toastPointer.va_valueid) {
if (toastscan->iscan != NULL && (!toastscan->iscan->xactStartedInRecovery)) {
toastscan->iscan->kill_prior_tuple = true;
BTScanOpaque so = (BTScanOpaque)toastscan->iscan->opaque;
if (so != NULL) {
BTScanPosItem indexItem = so->currPos.items[so->currPos.itemIndex];
OffsetNumber indexOffset = indexItem.indexOffset;
ItemPointerData heapTid = indexItem.heapTid;
ereport(LOG, (errcode(ERRCODE_UNEXPECTED_CHUNK_VALUE),
errmsg("UHeapInternalToastFetchDatum found toast chunk %u is not scan toast chunk %u of "
"toast relation %u toast size detail (%d, %d), will skip."
"toast index tuple at offset %hu with ctid (%u, %u) is marked dead,"
"toast tuple ctid is (%u, %u).",
chunk_id, toastPointer.va_valueid, toastPointer.va_toastrelid,
toastPointer.va_rawsize, toastPointer.va_extsize, indexOffset,
ItemPointerGetBlockNumber(&heapTid), ItemPointerGetOffsetNumber(&heapTid),
ItemPointerGetBlockNumber(&(ttup->ctid)), ItemPointerGetOffsetNumber(&(ttup->ctid))),
errcause("found toast chunk is not scan toast value."),
erraction("Check the toast chunk.")));
}
}
continue;
}
residx = DatumGetInt32(UHeapFastGetAttr(ttup, ATTR_SECOND, toastTupDesc, &isnull));
Assert(!isnull);
chunk = DatumGetPointer(UHeapFastGetAttr(ttup, ATTR_THIRD, toastTupDesc, &isnull));
@ -1194,6 +1265,13 @@ struct varlena *UHeapInternalToastFetchDatumSlice(struct varatt_external toastPo
* Have a chunk, extract the sequence number and the data
*/
ttup = ExecGetUHeapTupleFromSlot(slot);
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, toastTupDesc, &isnull));
Assert(!isnull);
if (chunk_id != toastPointer.va_valueid) {
ereport(LOG, (errmsg("UHeapInternalToastFetchDatumSlice find toast chunk %u is not scan toast chunk %u of "
"toast relation %u, will skip", chunk_id, toastPointer.va_valueid, toastPointer.va_toastrelid)));
continue;
}
residx = DatumGetInt32(UHeapFastGetAttr(ttup, CHUNK_ID_ATTR, toastTupDesc, &isnull));
Assert(!isnull);
chunk = DatumGetPointer(UHeapFastGetAttr(ttup, CHUNK_DATA_ATTR, toastTupDesc, &isnull));
@ -1304,8 +1382,20 @@ static bool UHeapToastRelValueidExists(Relation toastrel, Oid valueid)
* Is there any such chunk?
*/
toastscan = systable_beginscan(toastrel, toastrel->rd_rel->reltoastidxid, true, SnapshotAny, 1, &toastkey);
result = UHeapSysIndexGetnextSlot(toastscan, ForwardScanDirection, slot);
while (UHeapSysIndexGetnextSlot(toastscan, ForwardScanDirection, slot)) {
bool isnull = false;
UHeapTuple ttup = ExecGetUHeapTupleFromSlot(slot);
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, RelationGetDescr(toastrel), &isnull));
Assert(!isnull);
if (chunk_id == valueid) {
result = true;
break;
}
else {
ereport(LOG, (errmsg("UHeapToastRelValueidExists find toast chunk %u is not scan toast chunk %u of toast "
"relation %u, will skip", chunk_id, valueid, toastrel->rd_id)));
}
}
systable_endscan(toastscan);
ExecDropSingleTupleTableSlot(slot);

View File

@ -20,6 +20,7 @@
#include "utils/relcache.h"
#include "storage/buf/buf.h"
#include "storage/buf/bufmgr.h"
#include "ubtree.h"
/*
* state for bulk inserts --- private to heapam.c and hio.c
@ -39,6 +40,6 @@ extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer othe
BulkInsertState bistate, Buffer* vmbuffer, Buffer* vmbuffer_other, BlockNumber end_rel_block);
extern Buffer RelationGetNewBufferForBulkInsert(Relation relation, Size len, Size dictSize, BulkInsertState bistate);
extern Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate);
extern void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate);
extern void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate, NewPageState* npState = NULL);
#endif /* HIO_H */

View File

@ -27,6 +27,7 @@
#include "catalog/pg_index.h"
#include "lib/stringinfo.h"
#include "storage/buf/bufmgr.h"
#include "datatype/timestamp.h"
/*
* prototypes for functions in ubtree.cpp (external entry points for ubtree)
@ -475,6 +476,34 @@ typedef struct {
OffsetNumber previousdead[MaxIndexTuplesPerPage];
} IndexPruneState;
typedef struct {
TimestampTz firstGetAvailablePageTime;
TimestampTz secondGetAvailablePageTime;
TimestampTz extendBlocksTime;
TimestampTz extendOneTime;
TimestampTz getHeadTime;
TimestampTz getAvailablePageOnPageTime;
TimestampTz getAvailablePageOnPageTimeMax;
uint32 firstGetAvailablePageCount;
uint32 secondGetAvailablePageCount;
uint32 bufferInvalidCount;
uint32 needLockCount;
uint32 extendBlocksCount;
uint32 extendBlocks;
uint32 extendOneCount;
uint32 queueCount;
uint32 itemsCount;
uint32 itemsValidCount;
uint32 itemsValidConditionalLockCount;
uint32 getAvailablePageOnPageCount;
uint32 firstGotoRestartCount;
uint32 secondGotoRestartCount;
uint32 checkNewCreatePagesCount;
uint32 getFromNewCreatePagesCount;
double avgTravelQueuePages;
double avgTravelQueueItems;
} NewPageState;
#define TXNINFOSIZE (sizeof(ShortTransactionId) * 2)
/*
@ -547,8 +576,8 @@ extern OffsetNumber UBTreeFindsplitloc(Relation rel, Buffer buf, OffsetNumber ne
extern OffsetNumber UBTreeFindsplitlocInsertpt(Relation rel, Buffer buf, OffsetNumber newitemoff, Size newitemsz,
bool *newitemonleft, IndexTuple newitem);
extern Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr);
extern Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr, NewPageState* npState = NULL);
extern void UBTreePrintNewPageState(NewPageState* npstate);
/*
* prototypes for functions in ubtxlog.cpp
*/
@ -628,7 +657,8 @@ extern void UBTreeTryRecycleEmptyPage(Relation rel);
extern void UBTreeRecordFreePage(Relation rel, BlockNumber blkno, TransactionId xid);
extern void UBTreeRecordEmptyPage(Relation rel, BlockNumber blkno, TransactionId xid);
extern void UBTreeRecordUsedPage(Relation rel, UBTRecycleQueueAddress addr);
extern Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress* addr);
extern Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress* addr,
NewPageState* npState = NULL);
extern void UBTreeRecycleQueueInitPage(Relation rel, Page page, BlockNumber blkno, BlockNumber prevBlkno,
BlockNumber nextBlkno);
extern void UBtreeRecycleQueueChangeChain(Buffer buf, BlockNumber newBlkno, bool setNext);

View File

@ -150,6 +150,7 @@ enum ModuleId {
MOD_GPI, /* debug info for global partition index */
MOD_PARTITION,
MOD_UBT_NEWPAGE, /* debug info for UBTreeGetNewPage */
MOD_SRF, /* debug info for SRF */
MOD_SS_TXNSTATUS, /* debug info for SS TXNSTATUS */