toast场景优化
This commit is contained in:
@ -136,6 +136,7 @@ const module_data module_map[] = {{MOD_ALL, "ALL"},
|
||||
{MOD_DSS, "DSS_API"},
|
||||
{MOD_GPI, "GPI"},
|
||||
{MOD_PARTITION, "PARTITION"},
|
||||
{MOD_UBT_NEWPAGE, "UBT_NEWPAGE"},
|
||||
{MOD_SRF, "SRF"},
|
||||
{MOD_SS_TXNSTATUS, "SS_TXNSTATUS"},
|
||||
|
||||
|
||||
@ -129,7 +129,7 @@ void CheckRelation(const Relation relation, int* extraBlocks, int lockWaiters)
|
||||
}
|
||||
}
|
||||
|
||||
static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate)
|
||||
static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate, NewPageState* npstate)
|
||||
{
|
||||
int extraBlocks = 0;
|
||||
int lockWaiters = RelationExtensionLockWaiterCount(relation);
|
||||
@ -137,6 +137,9 @@ static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate)
|
||||
return;
|
||||
}
|
||||
CheckRelation(relation, &extraBlocks, lockWaiters);
|
||||
if (npstate != NULL) {
|
||||
npstate->extendBlocks += (uint32)extraBlocks;
|
||||
}
|
||||
while (extraBlocks-- >= 0) {
|
||||
/* Ouch - an unnecessary lseek() each time through the loop! */
|
||||
Buffer buffer = ReadBufferBI(relation, P_NEW, RBM_NORMAL, bistate);
|
||||
@ -145,7 +148,7 @@ static void UBtreeAddExtraBlocks(Relation relation, BulkInsertState bistate)
|
||||
}
|
||||
}
|
||||
|
||||
void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
|
||||
void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate, NewPageState* npstate)
|
||||
{
|
||||
BlockNumber block_num = InvalidBlockNumber;
|
||||
BlockNumber first_block = InvalidBlockNumber;
|
||||
@ -155,7 +158,7 @@ void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
|
||||
|
||||
if (RelationIsUstoreIndex(relation)) {
|
||||
/* ubtree, use another bypass */
|
||||
UBtreeAddExtraBlocks(relation, bistate);
|
||||
UBtreeAddExtraBlocks(relation, bistate, npstate);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -2586,7 +2586,7 @@ static bool CheckItemIsAlive(ItemPointer tid, Relation relation, Snapshot snapsh
|
||||
bool* all_dead, CUDescScan* cudescScan)
|
||||
{
|
||||
if (!RelationIsCUFormat(relation)) {
|
||||
return heap_hot_search(tid, relation, snapshot, all_dead);
|
||||
return TableIndexFetchTupleCheck(relation, tid, snapshot, all_dead);
|
||||
} else {
|
||||
return cudescScan->CheckItemIsAlive(tid);
|
||||
}
|
||||
|
||||
@ -1403,7 +1403,15 @@ static Buffer UBTreeSplit(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber fi
|
||||
* before we release the Exclusive lock.
|
||||
*/
|
||||
UBTRecycleQueueAddress addr;
|
||||
rbuf = UBTreeGetNewPage(rel, &addr);
|
||||
NewPageState *npstate = NULL;
|
||||
if (module_logging_is_on(MOD_UBT_NEWPAGE)) {
|
||||
npstate = (NewPageState *)palloc0(sizeof(NewPageState));
|
||||
}
|
||||
rbuf = UBTreeGetNewPage(rel, &addr, npstate);
|
||||
if (npstate != NULL) {
|
||||
UBTreePrintNewPageState(npstate);
|
||||
pfree(npstate);
|
||||
}
|
||||
|
||||
/*
|
||||
* origpage is the original page to be split. leftpage is a temporary
|
||||
@ -2471,7 +2479,15 @@ static Buffer UBTreeNewRoot(Relation rel, Buffer lbuf, Buffer rbuf)
|
||||
* before we release the Exclusive lock.
|
||||
*/
|
||||
UBTRecycleQueueAddress addr;
|
||||
rootbuf = UBTreeGetNewPage(rel, &addr);
|
||||
NewPageState *npstate = NULL;
|
||||
if (module_logging_is_on(MOD_UBT_NEWPAGE)) {
|
||||
npstate = (NewPageState *)palloc0(sizeof(NewPageState));
|
||||
}
|
||||
rootbuf = UBTreeGetNewPage(rel, &addr, npstate);
|
||||
if (npstate != NULL) {
|
||||
UBTreePrintNewPageState(npstate);
|
||||
pfree(npstate);
|
||||
}
|
||||
rootpage = BufferGetPage(rootbuf);
|
||||
rootblknum = BufferGetBlockNumber(rootbuf);
|
||||
|
||||
|
||||
@ -41,6 +41,7 @@
|
||||
#include "storage/procarray.h"
|
||||
#include "utils/inval.h"
|
||||
#include "utils/snapmgr.h"
|
||||
#include "datatype/timestamp.h"
|
||||
|
||||
static bool UBTreeMarkPageHalfDead(Relation rel, Buffer leafbuf, BTStack stack);
|
||||
static bool UBTreeUnlinkHalfDeadPage(Relation rel, Buffer leafbuf, bool *rightsib_empty, BTStack del_blknos = NULL);
|
||||
@ -88,6 +89,27 @@ void UBTreeInitMetaPage(Page page, BlockNumber rootbknum, uint32 level)
|
||||
((PageHeader)page)->pd_lower = (uint16)(((char *)metad + sizeof(BTMetaPageData)) - (char *)page);
|
||||
}
|
||||
|
||||
void UBTreePrintNewPageState(NewPageState* npstate)
|
||||
{
|
||||
ereport(LOG, (errmodule(MOD_UBT_NEWPAGE), (errmsg(
|
||||
"NewPageState: first_get_available_page_time:%ld, count:%u; second_get_available_page_time:%ld, count:%u; "
|
||||
"extend_blocks_time:%ld, count:%u, blocks:%u; extend_one_time:%ld, count:%u; "
|
||||
"get_head_time:%ld; get_available_page_on_page_time:%ld, get_available_page_on_page_time_max:%ld; "
|
||||
"buffer_invalid_count:%u; need_lock_count:%u; queue_count:%u, items_count:%u, items_valid_count:%u; "
|
||||
"conditional_lock_count:%u; get_available_page_on_page_count:%u; goto_restart_count first:%u, second:%u;"
|
||||
"new_create_pages_count check:%u, get:%u; avg_travel_queue pages:%.2f, items:%.2f.",
|
||||
npstate->firstGetAvailablePageTime, npstate->firstGetAvailablePageCount,
|
||||
npstate->secondGetAvailablePageTime, npstate->secondGetAvailablePageCount,
|
||||
npstate->extendBlocksTime, npstate->extendBlocksCount, npstate->extendBlocks,
|
||||
npstate->extendOneTime, npstate->extendOneCount, npstate->getHeadTime,
|
||||
npstate->getAvailablePageOnPageTime, npstate->getAvailablePageOnPageTimeMax,
|
||||
npstate->bufferInvalidCount, npstate->needLockCount, npstate->queueCount, npstate->itemsCount,
|
||||
npstate->itemsValidCount, npstate->itemsValidConditionalLockCount,
|
||||
npstate->getAvailablePageOnPageCount, npstate->firstGotoRestartCount,
|
||||
npstate->secondGotoRestartCount, npstate->checkNewCreatePagesCount,
|
||||
npstate->getFromNewCreatePagesCount, npstate->avgTravelQueuePages, npstate->avgTravelQueueItems))));
|
||||
}
|
||||
|
||||
/*
|
||||
* UBTreeGetRoot() -- Get the root page of the btree.
|
||||
*
|
||||
@ -255,7 +277,15 @@ Buffer UBTreeGetRoot(Relation rel, int access)
|
||||
* before we release the Exclusive lock.
|
||||
*/
|
||||
UBTRecycleQueueAddress addr;
|
||||
rootbuf = UBTreeGetNewPage(rel, &addr);
|
||||
NewPageState *npstate = NULL;
|
||||
if (module_logging_is_on(MOD_UBT_NEWPAGE)) {
|
||||
npstate = (NewPageState *)palloc0(sizeof(NewPageState));
|
||||
}
|
||||
rootbuf = UBTreeGetNewPage(rel, &addr, npstate);
|
||||
if (npstate != NULL) {
|
||||
UBTreePrintNewPageState(npstate);
|
||||
pfree(npstate);
|
||||
}
|
||||
rootblkno = BufferGetBlockNumber(rootbuf);
|
||||
rootpage = BufferGetPage(rootbuf);
|
||||
rootopaque = (UBTPageOpaqueInternal)PageGetSpecialPointer(rootpage);
|
||||
@ -1453,11 +1483,19 @@ static bool UBTreeUnlinkHalfDeadPage(Relation rel, Buffer leafbuf, bool *rightsi
|
||||
* page in the Recycle Queue, and we need to call UBTreeRecordUsedPage()
|
||||
* with this addr when the returned page is used correctly.
|
||||
*/
|
||||
Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr)
|
||||
Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr, NewPageState* npstate)
|
||||
{
|
||||
WHITEBOX_TEST_STUB("UBTreeGetNewPage-begin", WhiteboxDefaultErrorEmit);
|
||||
TimestampTz startTime = 0;
|
||||
restart:
|
||||
Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr);
|
||||
if (npstate != NULL) {
|
||||
startTime = GetCurrentTimestamp();
|
||||
}
|
||||
Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr, npstate);
|
||||
if (npstate != NULL) {
|
||||
npstate->firstGetAvailablePageTime += GetCurrentTimestamp() - startTime;
|
||||
npstate->firstGetAvailablePageCount++;
|
||||
}
|
||||
if (buf == InvalidBuffer) {
|
||||
/*
|
||||
* No free page left, need to extend the relation
|
||||
@ -1469,24 +1507,51 @@ restart:
|
||||
* page. We can skip locking for new or temp relations, however,
|
||||
* since no one else could be accessing them.
|
||||
*/
|
||||
if (npstate != NULL) {
|
||||
npstate->bufferInvalidCount++;
|
||||
}
|
||||
bool needLock = !RELATION_IS_LOCAL(rel);
|
||||
if (needLock) {
|
||||
if (npstate != NULL) {
|
||||
npstate->needLockCount++;
|
||||
}
|
||||
if (!ConditionalLockRelationForExtension(rel, ExclusiveLock)) {
|
||||
/* couldn't get the lock immediately; wait for it. */
|
||||
LockRelationForExtension(rel, ExclusiveLock);
|
||||
if (npstate != NULL) {
|
||||
startTime = GetCurrentTimestamp();
|
||||
}
|
||||
/* check again, relation may extended by other backends */
|
||||
buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr);
|
||||
buf = UBTreeGetAvailablePage(rel, RECYCLE_FREED_FORK, addr, npstate);
|
||||
if (npstate != NULL) {
|
||||
npstate->secondGetAvailablePageTime += GetCurrentTimestamp() - startTime;
|
||||
npstate->secondGetAvailablePageCount++;
|
||||
}
|
||||
if (buf != InvalidBuffer) {
|
||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||
goto out;
|
||||
}
|
||||
if (npstate != NULL) {
|
||||
startTime = GetCurrentTimestamp();
|
||||
}
|
||||
/* Time to bulk-extend. */
|
||||
RelationAddExtraBlocks(rel, NULL);
|
||||
RelationAddExtraBlocks(rel, NULL, npstate);
|
||||
if (npstate != NULL) {
|
||||
npstate->extendBlocksTime += GetCurrentTimestamp() - startTime;
|
||||
npstate->extendBlocksCount++;
|
||||
}
|
||||
WHITEBOX_TEST_STUB("UBTreeGetNewPage-bulk-extend", WhiteboxDefaultErrorEmit);
|
||||
}
|
||||
}
|
||||
if (npstate != NULL) {
|
||||
startTime = GetCurrentTimestamp();
|
||||
}
|
||||
/* extend by one page */
|
||||
buf = ReadBuffer(rel, P_NEW);
|
||||
if (npstate != NULL) {
|
||||
npstate->extendOneTime += GetCurrentTimestamp() - startTime;
|
||||
npstate->extendOneCount++;
|
||||
}
|
||||
WHITEBOX_TEST_STUB("UBTreeGetNewPage-extend", WhiteboxDefaultErrorEmit);
|
||||
if (!ConditionalLockBuffer(buf)) {
|
||||
/* lock failed. To avoid dead lock, we need to retry */
|
||||
@ -1494,6 +1559,9 @@ restart:
|
||||
UnlockRelationForExtension(rel, ExclusiveLock);
|
||||
}
|
||||
ReleaseBuffer(buf);
|
||||
if (npstate != NULL) {
|
||||
npstate->firstGotoRestartCount++;
|
||||
}
|
||||
goto restart;
|
||||
}
|
||||
/*
|
||||
@ -1518,6 +1586,9 @@ out:
|
||||
ReleaseBuffer(addr->queueBuf);
|
||||
addr->queueBuf = InvalidBuffer;
|
||||
}
|
||||
if (npstate != NULL) {
|
||||
npstate->secondGotoRestartCount++;
|
||||
}
|
||||
goto restart;
|
||||
}
|
||||
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
#include "storage/procarray.h"
|
||||
#include "utils/aiomem.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "datatype/timestamp.h"
|
||||
|
||||
static uint32 BlockGetMaxItems(BlockNumber blkno);
|
||||
static void UBTreeInitRecycleQueuePage(Relation rel, Page page, Size size, BlockNumber blkno);
|
||||
@ -37,7 +38,7 @@ static void UBTreeRecycleQueueAddPage(Relation rel, UBTRecycleForkNumber forkNum
|
||||
BlockNumber blkno, TransactionId xid);
|
||||
static Buffer StepNextPage(Relation rel, Buffer buf);
|
||||
static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumber, Buffer buf,
|
||||
TransactionId waterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan);
|
||||
TransactionId waterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan, NewPageState* npstate = NULL);
|
||||
static Buffer MoveToEndpointPage(Relation rel, Buffer buf, bool needHead, int access);
|
||||
static uint16 PageAllocateItem(Buffer buf);
|
||||
static void RecycleQueueLinkNewPage(Relation rel, Buffer leftBuf, Buffer newBuf);
|
||||
@ -284,8 +285,8 @@ void UBTreeInitializeRecycleQueue(Relation rel)
|
||||
static bool UBTreeTryRecycleEmptyPageInternal(Relation rel)
|
||||
{
|
||||
UBTRecycleQueueAddress addr;
|
||||
|
||||
Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_EMPTY_FORK, &addr);
|
||||
NewPageState *npstate = NULL;
|
||||
Buffer buf = UBTreeGetAvailablePage(rel, RECYCLE_EMPTY_FORK, &addr, npstate);
|
||||
if (!BufferIsValid(buf)) {
|
||||
return false; /* no available page to recycle */
|
||||
}
|
||||
@ -356,13 +357,16 @@ static Buffer StepNextPage(Relation rel, Buffer buf)
|
||||
}
|
||||
|
||||
static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumber, Buffer buf,
|
||||
TransactionId WaterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan)
|
||||
TransactionId WaterLevelXid, UBTRecycleQueueAddress *addr, bool *continueScan, NewPageState* npstate)
|
||||
{
|
||||
Page page = BufferGetPage(buf);
|
||||
UBTRecycleQueueHeader header = GetRecycleQueueHeader(page, BufferGetBlockNumber(buf));
|
||||
|
||||
uint16 curOffset = header->head;
|
||||
while (IsNormalOffset(curOffset)) {
|
||||
if (npstate != NULL) {
|
||||
npstate->itemsCount++;
|
||||
}
|
||||
UBTRecycleQueueItem item = HeaderGetItem(header, curOffset);
|
||||
if (TransactionIdFollowsOrEquals(item->xid, WaterLevelXid)) {
|
||||
*continueScan = false;
|
||||
@ -372,6 +376,9 @@ static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumb
|
||||
curOffset = item->next;
|
||||
continue;
|
||||
}
|
||||
if (npstate != NULL) {
|
||||
npstate->itemsValidCount++;
|
||||
}
|
||||
Buffer targetBuf = ReadBuffer(rel, item->blkno);
|
||||
_bt_checkbuffer_valid(rel, targetBuf);
|
||||
if (ConditionalLockBuffer(targetBuf)) {
|
||||
@ -379,6 +386,9 @@ static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumb
|
||||
bool pageUsable = true;
|
||||
if (forkNumber == RECYCLE_FREED_FORK) {
|
||||
pageUsable = UBTreePageRecyclable(BufferGetPage(targetBuf));
|
||||
if (npstate != NULL) {
|
||||
npstate->itemsValidConditionalLockCount++;
|
||||
}
|
||||
} else if (forkNumber == RECYCLE_EMPTY_FORK) {
|
||||
/* make sure that it's not half-dead or the deletion is not reserved yet */
|
||||
Page indexPage = BufferGetPage(targetBuf);
|
||||
@ -415,8 +425,12 @@ static Buffer GetAvailablePageOnPage(Relation rel, UBTRecycleForkNumber forkNumb
|
||||
return InvalidBuffer;
|
||||
}
|
||||
|
||||
Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress *addr)
|
||||
Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress *addr,
|
||||
NewPageState *npstate)
|
||||
{
|
||||
TimestampTz startTime = 0;
|
||||
TimestampTz elapsedTime = 0;
|
||||
uint32 getAvailablePageCount = 0;
|
||||
TransactionId oldestXmin = u_sess->utils_cxt.RecentGlobalDataXmin;
|
||||
if (RelationGetNamespace(rel) == PG_TOAST_NAMESPACE) {
|
||||
TransactionId frozenXid = g_instance.undo_cxt.globalFrozenXid;
|
||||
@ -424,12 +438,29 @@ Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBT
|
||||
TransactionId waterLevelXid = ((forkNumber == RECYCLE_EMPTY_FORK) ? recycleXid : frozenXid);
|
||||
oldestXmin = Min(oldestXmin, waterLevelXid);
|
||||
}
|
||||
if (npstate != NULL) {
|
||||
getAvailablePageCount = npstate->firstGetAvailablePageCount + npstate->secondGetAvailablePageCount;
|
||||
startTime = GetCurrentTimestamp();
|
||||
}
|
||||
Buffer queueBuf = RecycleQueueGetEndpointPage(rel, forkNumber, true, BT_READ);
|
||||
|
||||
if (npstate != NULL) {
|
||||
npstate->getHeadTime += GetCurrentTimestamp() - startTime;
|
||||
}
|
||||
Buffer indexBuf = InvalidBuffer;
|
||||
bool continueScan = false;
|
||||
for (BlockNumber bufCount = 0; bufCount < URQ_MAX_GET_PAGE_TIMES; bufCount++) {
|
||||
indexBuf = GetAvailablePageOnPage(rel, forkNumber, queueBuf, oldestXmin, addr, &continueScan);
|
||||
if (npstate != NULL) {
|
||||
npstate->getAvailablePageOnPageCount++;
|
||||
npstate->avgTravelQueuePages = (npstate->getAvailablePageOnPageCount * 1.0) / getAvailablePageCount;
|
||||
startTime = GetCurrentTimestamp();
|
||||
}
|
||||
indexBuf = GetAvailablePageOnPage(rel, forkNumber, queueBuf, oldestXmin, addr, &continueScan, npstate);
|
||||
if (npstate != NULL) {
|
||||
elapsedTime = GetCurrentTimestamp() - startTime;
|
||||
npstate->getAvailablePageOnPageTimeMax = Max(npstate->getAvailablePageOnPageTimeMax, elapsedTime);
|
||||
npstate->getAvailablePageOnPageTime += elapsedTime;
|
||||
npstate->avgTravelQueueItems = (npstate->itemsCount * 1.0) / npstate->getAvailablePageOnPageCount;
|
||||
}
|
||||
if (!continueScan) {
|
||||
break;
|
||||
}
|
||||
@ -449,6 +480,10 @@ Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBT
|
||||
return InvalidBuffer;
|
||||
}
|
||||
|
||||
if (npstate != NULL) {
|
||||
npstate->checkNewCreatePagesCount++;
|
||||
}
|
||||
|
||||
/* no available page found, but we can check new created pages */
|
||||
BlockNumber nblocks = RelationGetNumberOfBlocks(rel);
|
||||
bool metaChanged = false;
|
||||
@ -471,6 +506,9 @@ Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBT
|
||||
indexBuf = ReadBuffer(rel, curBlkno);
|
||||
if (ConditionalLockBuffer(indexBuf)) {
|
||||
if (PageIsNew(BufferGetPage(indexBuf))) {
|
||||
if (npstate != NULL) {
|
||||
npstate->getFromNewCreatePagesCount++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
LockBuffer(indexBuf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
@ -2645,8 +2645,8 @@ check_tup_satisfies_update:
|
||||
Assert(!UHeapTupleHasExternal(newtup));
|
||||
needToast = false;
|
||||
} else {
|
||||
needToast = (newtup->disk_tuple_size >= UTOAST_TUPLE_THRESHOLD || UHeapTupleHasExternal(&oldtup) ||
|
||||
UHeapTupleHasExternal(newtup));
|
||||
needToast = ((newtup->disk_tuple_size >= UTOAST_TUPLE_THRESHOLD && UHeapDiskTupHasVarWidth(newtup->disk_tuple))
|
||||
|| UHeapTupleHasExternal(&oldtup) || UHeapTupleHasExternal(newtup));
|
||||
}
|
||||
|
||||
oldtupsize = SHORTALIGN(oldtup.disk_tuple_size);
|
||||
@ -3003,7 +3003,7 @@ check_tup_satisfies_update:
|
||||
bool isOldTupleCopied = false;
|
||||
char identity;
|
||||
UHeapTuple oldKeyTuple = UHeapExtractReplicaIdentity(relation, &oldtup, &isOldTupleCopied, &identity);
|
||||
|
||||
|
||||
/* Prepare an undo record for this operation. */
|
||||
/* Save the previous updated information in the undo record */
|
||||
TD oldTD;
|
||||
|
||||
@ -201,7 +201,7 @@ void UHeapFillDiskTuple(TupleDesc tupleDesc, Datum *values, const bool *isnull,
|
||||
/* varlena */
|
||||
Pointer val = DatumGetPointer(values[i]);
|
||||
|
||||
diskTuple->flag |= HEAP_HASVARWIDTH;
|
||||
diskTuple->flag |= UHEAP_HASVARWIDTH;
|
||||
|
||||
if (VARATT_IS_EXTERNAL(val)) {
|
||||
diskTuple->flag |= HEAP_HASEXTERNAL;
|
||||
@ -226,7 +226,7 @@ void UHeapFillDiskTuple(TupleDesc tupleDesc, Datum *values, const bool *isnull,
|
||||
securec_check(rc, "\0", "\0");
|
||||
}
|
||||
} else if (att[i].attlen == LEN_CSTRING) {
|
||||
diskTuple->flag |= HEAP_HASVARWIDTH;
|
||||
diskTuple->flag |= UHEAP_HASVARWIDTH;
|
||||
Assert(att[i].attalign == 'c');
|
||||
attrLength = strlen(DatumGetCString(values[i])) + 1;
|
||||
Assert(attrLength <= MaxPossibleUHeapTupleSize);
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
|
||||
#include "access/genam.h"
|
||||
#include "access/heapam.h"
|
||||
#include "access/nbtree.h"
|
||||
#include "access/tableam.h"
|
||||
#include "nodes/relation.h"
|
||||
#include "access/tuptoaster.h"
|
||||
@ -45,20 +46,22 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
|
||||
static Datum UHeapToastCompressDatum(Datum value);
|
||||
static bool UHeapToastIdValueIdExists(Oid toastrelid, Oid valueid, int2 bucketid);
|
||||
static bool UHeapToastRelValueidExists(Relation toastrel, Oid valueid);
|
||||
static Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn);
|
||||
static Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn,
|
||||
bool *inconsistent);
|
||||
|
||||
static Datum UHeapToastCompressDatum(Datum value)
|
||||
{
|
||||
return toast_compress_datum(value);
|
||||
}
|
||||
|
||||
Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
|
||||
Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn, bool *inconsistent)
|
||||
{
|
||||
Oid newOid;
|
||||
SysScanDesc scan;
|
||||
ScanKeyData key;
|
||||
bool collides = false;
|
||||
Assert(RelationIsUstoreFormat(relation) || RelationIsToast(relation));
|
||||
Assert(inconsistent != NULL);
|
||||
TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(relation), false, relation->rd_tam_ops);
|
||||
/* Generate new OIDs until we find one not in the table */
|
||||
do {
|
||||
@ -69,13 +72,40 @@ Oid UHeapGetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn
|
||||
* chunk_id for toast datum to prevent wrap around.
|
||||
*/
|
||||
newOid = GetNewObjectId(IsToastNamespace(RelationGetNamespace(relation)));
|
||||
*inconsistent = false;
|
||||
|
||||
ScanKeyInit(&key, oidcolumn, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(newOid));
|
||||
|
||||
/* see notes above about using SnapshotAny */
|
||||
scan = systable_beginscan(relation, indexId, true, SnapshotAny, ATTR_FIRST, &key);
|
||||
collides = UHeapSysIndexGetnextSlot(scan, ForwardScanDirection, slot);
|
||||
|
||||
while (UHeapSysIndexGetnextSlot(scan, ForwardScanDirection, slot)) {
|
||||
bool isnull = false;
|
||||
UHeapTuple ttup = ExecGetUHeapTupleFromSlot(slot);
|
||||
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, RelationGetDescr(relation), &isnull));
|
||||
Assert(!isnull);
|
||||
if (chunk_id == newOid) {
|
||||
collides = true;
|
||||
break;
|
||||
} else {
|
||||
*inconsistent = true;
|
||||
if (scan->iscan != NULL && (!scan->iscan->xactStartedInRecovery)) {
|
||||
scan->iscan->kill_prior_tuple = true;
|
||||
BTScanOpaque so = (BTScanOpaque)scan->iscan->opaque;
|
||||
if (so != NULL) {
|
||||
BTScanPosItem indexItem = so->currPos.items[so->currPos.itemIndex];
|
||||
OffsetNumber indexOffset = indexItem.indexOffset;
|
||||
ItemPointerData heapTid = indexItem.heapTid;
|
||||
ereport(LOG, (errcode(ERRCODE_UNEXPECTED_CHUNK_VALUE),
|
||||
errmsg("found toast chunk %u is not scan toast value %u of toast relation %u, will skip."
|
||||
"toast index tuple at offset %hu with ctid (%u, %u) is marked dead.",
|
||||
chunk_id, newOid, relation->rd_node.relNode, indexOffset,
|
||||
ItemPointerGetBlockNumber(&heapTid), ItemPointerGetOffsetNumber(&heapTid)),
|
||||
errcause("found toast chunk is not scan toast value."),
|
||||
erraction("Check the toast chunk.")));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
systable_endscan(scan);
|
||||
} while (collides);
|
||||
ExecDropSingleTupleTableSlot(slot);
|
||||
@ -665,6 +695,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
|
||||
Pointer dval = DatumGetPointer(value);
|
||||
errno_t rc;
|
||||
int2 bucketid = InvalidBktId;
|
||||
bool inconsistent = false;
|
||||
Assert(!VARATT_IS_EXTERNAL(value));
|
||||
rc = memset_s(&chunkData, sizeof(chunkData), 0, sizeof(chunkData));
|
||||
securec_check(rc, "", "");
|
||||
@ -736,7 +767,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
|
||||
*/
|
||||
if (!OidIsValid(rel->rd_toastoid)) {
|
||||
/* normal case: just choose an unused OID */
|
||||
toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1);
|
||||
toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1, &inconsistent);
|
||||
} else {
|
||||
/* rewrite case: check to see if value was in old toast table */
|
||||
toastPointer.va_valueid = InvalidOid;
|
||||
@ -781,7 +812,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
|
||||
* old or new toast table
|
||||
*/
|
||||
do {
|
||||
toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1);
|
||||
toastPointer.va_valueid = UHeapGetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber)1, &inconsistent);
|
||||
} while (UHeapToastIdValueIdExists(rel->rd_toastoid, toastPointer.va_valueid, bucketid));
|
||||
}
|
||||
}
|
||||
@ -824,7 +855,7 @@ static Datum UHeapToastSaveDatum(Relation rel, Datum value, struct varlena *olde
|
||||
* the TOAST table, since we don't bother to update anything else.
|
||||
*/
|
||||
(void)index_insert(toastidx, tValues, tIsnull, &(toasttup->ctid), toastrel,
|
||||
toastidx->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
|
||||
(toastidx->rd_index->indisunique && !inconsistent) ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
|
||||
|
||||
/*
|
||||
* Free memory
|
||||
@ -882,6 +913,8 @@ static void UHeapToastDeleteDatum(Relation rel, Datum value, int options)
|
||||
SysScanDesc toastscan;
|
||||
UHeapTuple toasttup;
|
||||
int2 bucketid;
|
||||
bool found = false;
|
||||
bool isnull = false;
|
||||
|
||||
if (!VARATT_IS_EXTERNAL_ONDISK_B(attr))
|
||||
return;
|
||||
@ -927,6 +960,14 @@ static void UHeapToastDeleteDatum(Relation rel, Datum value, int options)
|
||||
* Have a chunk, delete it
|
||||
*/
|
||||
toasttup = ExecGetUHeapTupleFromSlot(slot);
|
||||
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(toasttup, ATTR_FIRST, RelationGetDescr(toastrel), &isnull));
|
||||
Assert(!isnull);
|
||||
if (chunk_id != toastPointer.va_valueid) {
|
||||
ereport(LOG, (errmsg("Delete toast chunk %u is not scan toast chunk %u of toast relation is %u, will skip",
|
||||
chunk_id, toastPointer.va_valueid, toastPointer.va_toastrelid)));
|
||||
continue;
|
||||
}
|
||||
found = true;
|
||||
SimpleUHeapDelete(toastrel, &toasttup->ctid, SnapshotToast);
|
||||
|
||||
Datum values[INDEX_MAX_KEYS];
|
||||
@ -940,6 +981,11 @@ static void UHeapToastDeleteDatum(Relation rel, Datum value, int options)
|
||||
index_delete(toastidx, values, isnulls, &toasttup->ctid, false);
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
ereport(LOG, (errmsg("Toast chunk %u of toast relation is %u delete 0 rows", toastPointer.va_valueid,
|
||||
toastPointer.va_toastrelid)));
|
||||
}
|
||||
|
||||
/*
|
||||
* End scan and close relations
|
||||
*/
|
||||
@ -1008,6 +1054,31 @@ struct varlena *UHeapInternalToastFetchDatum(struct varatt_external toastPointer
|
||||
* Have a chunk, extract the sequence number and the data
|
||||
*/
|
||||
ttup = ExecGetUHeapTupleFromSlot(slot);
|
||||
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, toastTupDesc, &isnull));
|
||||
Assert(!isnull);
|
||||
if (chunk_id != toastPointer.va_valueid) {
|
||||
if (toastscan->iscan != NULL && (!toastscan->iscan->xactStartedInRecovery)) {
|
||||
toastscan->iscan->kill_prior_tuple = true;
|
||||
BTScanOpaque so = (BTScanOpaque)toastscan->iscan->opaque;
|
||||
if (so != NULL) {
|
||||
BTScanPosItem indexItem = so->currPos.items[so->currPos.itemIndex];
|
||||
OffsetNumber indexOffset = indexItem.indexOffset;
|
||||
ItemPointerData heapTid = indexItem.heapTid;
|
||||
ereport(LOG, (errcode(ERRCODE_UNEXPECTED_CHUNK_VALUE),
|
||||
errmsg("UHeapInternalToastFetchDatum found toast chunk %u is not scan toast chunk %u of "
|
||||
"toast relation %u toast size detail (%d, %d), will skip."
|
||||
"toast index tuple at offset %hu with ctid (%u, %u) is marked dead,"
|
||||
"toast tuple ctid is (%u, %u).",
|
||||
chunk_id, toastPointer.va_valueid, toastPointer.va_toastrelid,
|
||||
toastPointer.va_rawsize, toastPointer.va_extsize, indexOffset,
|
||||
ItemPointerGetBlockNumber(&heapTid), ItemPointerGetOffsetNumber(&heapTid),
|
||||
ItemPointerGetBlockNumber(&(ttup->ctid)), ItemPointerGetOffsetNumber(&(ttup->ctid))),
|
||||
errcause("found toast chunk is not scan toast value."),
|
||||
erraction("Check the toast chunk.")));
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
residx = DatumGetInt32(UHeapFastGetAttr(ttup, ATTR_SECOND, toastTupDesc, &isnull));
|
||||
Assert(!isnull);
|
||||
chunk = DatumGetPointer(UHeapFastGetAttr(ttup, ATTR_THIRD, toastTupDesc, &isnull));
|
||||
@ -1194,6 +1265,13 @@ struct varlena *UHeapInternalToastFetchDatumSlice(struct varatt_external toastPo
|
||||
* Have a chunk, extract the sequence number and the data
|
||||
*/
|
||||
ttup = ExecGetUHeapTupleFromSlot(slot);
|
||||
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, toastTupDesc, &isnull));
|
||||
Assert(!isnull);
|
||||
if (chunk_id != toastPointer.va_valueid) {
|
||||
ereport(LOG, (errmsg("UHeapInternalToastFetchDatumSlice find toast chunk %u is not scan toast chunk %u of "
|
||||
"toast relation %u, will skip", chunk_id, toastPointer.va_valueid, toastPointer.va_toastrelid)));
|
||||
continue;
|
||||
}
|
||||
residx = DatumGetInt32(UHeapFastGetAttr(ttup, CHUNK_ID_ATTR, toastTupDesc, &isnull));
|
||||
Assert(!isnull);
|
||||
chunk = DatumGetPointer(UHeapFastGetAttr(ttup, CHUNK_DATA_ATTR, toastTupDesc, &isnull));
|
||||
@ -1304,8 +1382,20 @@ static bool UHeapToastRelValueidExists(Relation toastrel, Oid valueid)
|
||||
* Is there any such chunk?
|
||||
*/
|
||||
toastscan = systable_beginscan(toastrel, toastrel->rd_rel->reltoastidxid, true, SnapshotAny, 1, &toastkey);
|
||||
result = UHeapSysIndexGetnextSlot(toastscan, ForwardScanDirection, slot);
|
||||
|
||||
while (UHeapSysIndexGetnextSlot(toastscan, ForwardScanDirection, slot)) {
|
||||
bool isnull = false;
|
||||
UHeapTuple ttup = ExecGetUHeapTupleFromSlot(slot);
|
||||
Oid chunk_id = DatumGetObjectId(UHeapFastGetAttr(ttup, ATTR_FIRST, RelationGetDescr(toastrel), &isnull));
|
||||
Assert(!isnull);
|
||||
if (chunk_id == valueid) {
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
ereport(LOG, (errmsg("UHeapToastRelValueidExists find toast chunk %u is not scan toast chunk %u of toast "
|
||||
"relation %u, will skip", chunk_id, valueid, toastrel->rd_id)));
|
||||
}
|
||||
}
|
||||
systable_endscan(toastscan);
|
||||
ExecDropSingleTupleTableSlot(slot);
|
||||
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
#include "utils/relcache.h"
|
||||
#include "storage/buf/buf.h"
|
||||
#include "storage/buf/bufmgr.h"
|
||||
#include "ubtree.h"
|
||||
|
||||
/*
|
||||
* state for bulk inserts --- private to heapam.c and hio.c
|
||||
@ -39,6 +40,6 @@ extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer othe
|
||||
BulkInsertState bistate, Buffer* vmbuffer, Buffer* vmbuffer_other, BlockNumber end_rel_block);
|
||||
extern Buffer RelationGetNewBufferForBulkInsert(Relation relation, Size len, Size dictSize, BulkInsertState bistate);
|
||||
extern Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate);
|
||||
extern void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate);
|
||||
extern void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate, NewPageState* npState = NULL);
|
||||
|
||||
#endif /* HIO_H */
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include "catalog/pg_index.h"
|
||||
#include "lib/stringinfo.h"
|
||||
#include "storage/buf/bufmgr.h"
|
||||
#include "datatype/timestamp.h"
|
||||
|
||||
/*
|
||||
* prototypes for functions in ubtree.cpp (external entry points for ubtree)
|
||||
@ -475,6 +476,34 @@ typedef struct {
|
||||
OffsetNumber previousdead[MaxIndexTuplesPerPage];
|
||||
} IndexPruneState;
|
||||
|
||||
typedef struct {
|
||||
TimestampTz firstGetAvailablePageTime;
|
||||
TimestampTz secondGetAvailablePageTime;
|
||||
TimestampTz extendBlocksTime;
|
||||
TimestampTz extendOneTime;
|
||||
TimestampTz getHeadTime;
|
||||
TimestampTz getAvailablePageOnPageTime;
|
||||
TimestampTz getAvailablePageOnPageTimeMax;
|
||||
uint32 firstGetAvailablePageCount;
|
||||
uint32 secondGetAvailablePageCount;
|
||||
uint32 bufferInvalidCount;
|
||||
uint32 needLockCount;
|
||||
uint32 extendBlocksCount;
|
||||
uint32 extendBlocks;
|
||||
uint32 extendOneCount;
|
||||
uint32 queueCount;
|
||||
uint32 itemsCount;
|
||||
uint32 itemsValidCount;
|
||||
uint32 itemsValidConditionalLockCount;
|
||||
uint32 getAvailablePageOnPageCount;
|
||||
uint32 firstGotoRestartCount;
|
||||
uint32 secondGotoRestartCount;
|
||||
uint32 checkNewCreatePagesCount;
|
||||
uint32 getFromNewCreatePagesCount;
|
||||
double avgTravelQueuePages;
|
||||
double avgTravelQueueItems;
|
||||
} NewPageState;
|
||||
|
||||
#define TXNINFOSIZE (sizeof(ShortTransactionId) * 2)
|
||||
|
||||
/*
|
||||
@ -547,8 +576,8 @@ extern OffsetNumber UBTreeFindsplitloc(Relation rel, Buffer buf, OffsetNumber ne
|
||||
extern OffsetNumber UBTreeFindsplitlocInsertpt(Relation rel, Buffer buf, OffsetNumber newitemoff, Size newitemsz,
|
||||
bool *newitemonleft, IndexTuple newitem);
|
||||
|
||||
extern Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr);
|
||||
|
||||
extern Buffer UBTreeGetNewPage(Relation rel, UBTRecycleQueueAddress* addr, NewPageState* npState = NULL);
|
||||
extern void UBTreePrintNewPageState(NewPageState* npstate);
|
||||
/*
|
||||
* prototypes for functions in ubtxlog.cpp
|
||||
*/
|
||||
@ -628,7 +657,8 @@ extern void UBTreeTryRecycleEmptyPage(Relation rel);
|
||||
extern void UBTreeRecordFreePage(Relation rel, BlockNumber blkno, TransactionId xid);
|
||||
extern void UBTreeRecordEmptyPage(Relation rel, BlockNumber blkno, TransactionId xid);
|
||||
extern void UBTreeRecordUsedPage(Relation rel, UBTRecycleQueueAddress addr);
|
||||
extern Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress* addr);
|
||||
extern Buffer UBTreeGetAvailablePage(Relation rel, UBTRecycleForkNumber forkNumber, UBTRecycleQueueAddress* addr,
|
||||
NewPageState* npState = NULL);
|
||||
extern void UBTreeRecycleQueueInitPage(Relation rel, Page page, BlockNumber blkno, BlockNumber prevBlkno,
|
||||
BlockNumber nextBlkno);
|
||||
extern void UBtreeRecycleQueueChangeChain(Buffer buf, BlockNumber newBlkno, bool setNext);
|
||||
|
||||
@ -150,6 +150,7 @@ enum ModuleId {
|
||||
|
||||
MOD_GPI, /* debug info for global partition index */
|
||||
MOD_PARTITION,
|
||||
MOD_UBT_NEWPAGE, /* debug info for UBTreeGetNewPage */
|
||||
MOD_SRF, /* debug info for SRF */
|
||||
MOD_SS_TXNSTATUS, /* debug info for SS TXNSTATUS */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user