From c077603b7ad11b1df46bcff07923f790fb4ebe86 Mon Sep 17 00:00:00 2001 From: chendong76 <1209756284@qq.com> Date: Sat, 8 Jul 2023 16:33:55 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E6=8C=89=E9=9C=80=E5=9B=9E?= =?UTF-8?q?=E6=94=BE=E9=83=A8=E5=88=86=E9=97=AE=E9=A2=98=EF=BC=9A=201.?= =?UTF-8?q?=E5=9B=9E=E6=94=BEdrop=E7=B1=BB=E5=9E=8B=E6=93=8D=E4=BD=9C?= =?UTF-8?q?=E6=97=B6=E7=9A=84=E7=A9=BA=E6=8C=87=E9=92=88=E5=BC=82=E5=B8=B8?= =?UTF-8?q?=EF=BC=9B=202.=E6=8C=89=E9=9C=80=E5=9B=9E=E6=94=BE=E6=95=85?= =?UTF-8?q?=E9=9A=9C=E5=86=85=E5=8F=88=E6=95=85=E9=9A=9C=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E7=AC=AC=E4=BA=8C=E6=AC=A1=E6=95=85=E9=9A=9C=E7=A6=81=E7=94=A8?= =?UTF-8?q?=E6=8C=89=E9=9C=80=E5=9B=9E=E6=94=BE=EF=BC=9B=203.=E6=8C=89?= =?UTF-8?q?=E9=9C=80=E5=9B=9E=E6=94=BE=E9=98=B6=E6=AE=B5=E8=A2=AB=E6=81=A2?= =?UTF-8?q?=E5=A4=8D=E8=8A=82=E7=82=B9=E4=B8=8D=E5=85=81=E8=AE=B8=E5=8A=A0?= =?UTF-8?q?=E5=85=A5=E9=9B=86=E7=BE=A4=EF=BC=8C=E7=94=B1=E6=89=93=E5=8D=B0?= =?UTF-8?q?warning=E5=B9=B6=E5=9C=A8=E5=90=8E=E5=8F=B0=E7=AD=89=E5=BE=85?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=B8=BA=E9=80=80=E5=87=BA=EF=BC=8C=E9=81=BF?= =?UTF-8?q?=E5=85=8Dnormal=20reform=E6=97=B6=E9=9B=86=E7=BE=A4=E5=8D=A1?= =?UTF-8?q?=E4=BD=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../process/postmaster/postmaster.cpp | 7 ++-- .../access/transam/extreme_rto_redo_api.cpp | 6 ++-- .../ondemand_extreme_rto/dispatcher.cpp | 1 - .../ondemand_extreme_rto/page_redo.cpp | 32 +++++++++++++++++++ .../storage/access/transam/xlog.cpp | 21 +++++++----- .../access/ondemand_extreme_rto/page_redo.h | 6 +++- src/include/ddes/dms/ss_common_attr.h | 3 +- 7 files changed, 58 insertions(+), 18 deletions(-) diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index 40f7a3860..c569de2d5 100644 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -3034,10 +3034,9 @@ int PostmasterMain(int argc, char* argv[]) ereport(LOG, (errmsg("[SS reform] Success: node:%d wait for PRIMARY:%d to finish 1st reform", g_instance.attr.attr_storage.dms_attr.instance_id, src_id))); - while (SS_OFFICIAL_RECOVERY_NODE && SS_CLUSTER_NOT_NORAML) { - pg_usleep(SLEEP_ONE_SEC); - SSReadControlFile(REFORM_CTRL_PAGE); - ereport(WARNING, (errmsg("[on-demand] node%d is last primary node, waiting for on-demand recovery done", + if (SS_OFFICIAL_RECOVERY_NODE && SS_CLUSTER_ONDEMAND_NOT_NORAML) { + ereport(FATAL, (errmsg( + "[on-demand] node%d is last primary node, do not allow join cluster until on-demand recovery done", g_instance.attr.attr_storage.dms_attr.instance_id))); } } diff --git a/src/gausskernel/storage/access/transam/extreme_rto_redo_api.cpp b/src/gausskernel/storage/access/transam/extreme_rto_redo_api.cpp index 65e449757..bc5ca2582 100644 --- a/src/gausskernel/storage/access/transam/extreme_rto_redo_api.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto_redo_api.cpp @@ -114,14 +114,14 @@ static const f_extreme_rto_redo extreme_rto_redosw[] = { ondemand_extreme_rto::WaitAllReplayWorkerIdle, ondemand_extreme_rto::DispatchCleanInvalidPageMarkToAllRedoWorker, ondemand_extreme_rto::DispatchClosefdMarkToAllRedoWorker, - NULL, + ondemand_extreme_rto::RecordBadBlockAndPushToRemote, ondemand_extreme_rto::CheckCommittingCsnList, ondemand_extreme_rto::ReadNextXLogRecord, ondemand_extreme_rto::ExtremeRtoStopHere, ondemand_extreme_rto::WaitAllRedoWorkerQueueEmpty, ondemand_extreme_rto::GetSafeMinCheckPoint, - NULL, - NULL, + ondemand_extreme_rto::ClearRecoveryThreadHashTbl, + ondemand_extreme_rto::BatchClearRecoveryThreadHashTbl, ondemand_extreme_rto::RedoWorkerIsUndoSpaceWorker, ondemand_extreme_rto::StartRecoveryWorkers, ondemand_extreme_rto::DispatchRedoRecordToFile, diff --git a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/dispatcher.cpp b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/dispatcher.cpp index e50af152a..7f28fe310 100644 --- a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/dispatcher.cpp @@ -439,7 +439,6 @@ void StartRecoveryWorkers(XLogReaderState *xlogreader, uint32 privateLen) ALLOCSET_DEFAULT_MAXSIZE, SHARED_CONTEXT); g_instance.comm_cxt.predo_cxt.redoItemHash = PRRedoItemHashInitialize(g_instance.comm_cxt.redoItemCtx); - g_dispatcher->maxItemNum = ((get_batch_redo_num() + 4) * PAGE_WORK_QUEUE_SIZE) * ITEM_QUQUE_SIZE_RATIO; uint32 maxParseBufNum = (uint32)((uint64)g_instance.attr.attr_storage.dms_attr.ondemand_recovery_mem_size * 1024 / (sizeof(XLogRecParseState) + sizeof(ParseBufferDesc) + sizeof(RedoMemSlot))); g_dispatcher->maxItemNum = 4 * PAGE_WORK_QUEUE_SIZE * ITEM_QUQUE_SIZE_RATIO + maxParseBufNum; diff --git a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp index 4ec868350..4b5a6415a 100644 --- a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp +++ b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/page_redo.cpp @@ -634,6 +634,7 @@ void RedoPageManagerDistributeToAllOneBlock(XLogRecParseState *ddlParseState) for (uint32 i = 0; i < WorkerNumPerMng; ++i) { XLogRecParseState *newState = XLogParseBufferCopy(ddlParseState); + newState->distributeStatus = XLOG_HEAD_DISTRIBUTE; AddPageRedoItem(myRedoLine->redoThd[i], newState); } } @@ -940,6 +941,7 @@ void PageManagerDistributeBcmBlock(XLogRecParseState *preState) PageRedoPipeline *myRedoLine = &g_dispatcher->pageLines[g_redoWorker->slotId]; const uint32 WorkerNumPerMng = myRedoLine->redoThdNum; uint32 workId = GetWorkerId((uint32)preState->blockparse.blockhead.forknum, WorkerNumPerMng); + preState->distributeStatus = XLOG_HEAD_DISTRIBUTE; AddPageRedoItem(myRedoLine->redoThd[workId], preState); } @@ -2931,4 +2933,34 @@ bool XactHasSegpageRelFiles(XLogReaderState *record) return false; } +/* RecordBadBlockAndPushToRemote + * If the bad page has been stored, record the xlog. If the bad page + * has not been stored, need push to page repair thread hash table and record to + * recovery thread hash table. + */ +void RecordBadBlockAndPushToRemote(XLogBlockDataParse *datadecode, PageErrorType error_type, + XLogRecPtr old_lsn, XLogPhyBlock pblk) +{ + return; +} + +/* ClearPageRepairHashTbl + * drop table, or truncate table, need clear the page repair hashTbl, if the + * repair page Filenode match need remove. + */ +void ClearRecoveryThreadHashTbl(const RelFileNode &node, ForkNumber forknum, BlockNumber minblkno, + bool segment_shrink) +{ + return; +} + +/* BatchClearPageRepairHashTbl + * drop database, or drop segmentspace, need clear the page repair hashTbl, + * if the repair page key dbNode match and spcNode match, need remove. + */ +void BatchClearRecoveryThreadHashTbl(Oid spcNode, Oid dbNode) +{ + return; +} + } // namespace ondemand_extreme_rto \ No newline at end of file diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp index 7c20bf5b7..d8203dd31 100755 --- a/src/gausskernel/storage/access/transam/xlog.cpp +++ b/src/gausskernel/storage/access/transam/xlog.cpp @@ -8733,6 +8733,7 @@ void StartupXLOG(void) bool RecoveryByPending = false; /* recovery caused by pending mode */ bool ArchiveRecoveryByPending = false; /* archive recovery caused by pending mode */ bool AbnormalShutdown = true; + bool SSOndemandRecoveryExitNormal = true; /* status of last ondemand recovery */ struct stat st; errno_t rcm = 0; TransactionId latestCompletedXid; @@ -8804,15 +8805,15 @@ void StartupXLOG(void) if (ENABLE_DMS && ENABLE_DSS) { int src_id = INVALID_INSTANCEID; SSReadControlFile(REFORM_CTRL_PAGE); - if ((SS_CLUSTER_ONDEMAND_BUILD || SS_CLUSTER_ONDEMAND_RECOVERY) && SS_PRIMARY_MODE) { + if (SS_CLUSTER_ONDEMAND_NOT_NORAML && SS_PRIMARY_MODE) { if (SS_STANDBY_PROMOTING) { ereport(FATAL, (errmsg("Do not allow switchover if on-demand recovery is not finish"))); } - Assert(g_instance.dms_cxt.SSReformerControl.recoveryInstId != INVALID_INSTANCEID); src_id = g_instance.dms_cxt.SSReformerControl.recoveryInstId; ereport(LOG, (errmsg("[on-demand]: On-demand recovery do not finish in last reform, " "reading control file of original primary:%d", src_id))); + SSOndemandRecoveryExitNormal = false; } else { if (SS_STANDBY_FAILOVER || SS_STANDBY_PROMOTING) { src_id = SSGetPrimaryInstId(); @@ -9480,19 +9481,20 @@ void StartupXLOG(void) t_thrd.xlog_cxt.InRecovery = false; } - if (SS_PRIMARY_MODE) { - if (ENABLE_ONDEMAND_RECOVERY && (SS_STANDBY_FAILOVER || SS_PRIMARY_NORMAL_REFORM) && - t_thrd.xlog_cxt.InRecovery == true) { + if (SS_PRIMARY_MODE && ENABLE_ONDEMAND_RECOVERY && (SS_STANDBY_FAILOVER || SS_PRIMARY_NORMAL_REFORM) && + t_thrd.xlog_cxt.InRecovery == true) { + if (SSOndemandRecoveryExitNormal) { g_instance.dms_cxt.SSRecoveryInfo.in_ondemand_recovery = true; /* for other nodes in cluster and ondeamnd recovery failed */ g_instance.dms_cxt.SSReformerControl.clusterStatus = CLUSTER_IN_ONDEMAND_BUILD; g_instance.dms_cxt.SSReformerControl.recoveryInstId = g_instance.dms_cxt.SSRecoveryInfo.recovery_inst_id; + SSSaveReformerCtrl(); SetOndemandExtremeRtoMode(); ereport(LOG, (errmsg("[On-demand] replayed in extreme rto ondemand recovery mode"))); } else { - g_instance.dms_cxt.SSReformerControl.clusterStatus = CLUSTER_NORMAL; + ereport(LOG, (errmsg("[On-demand] do not allow replay in ondemand recovery if last ondemand recovery " + "crash, replayed in extreme rto recovery mode"))); } - SSSaveReformerCtrl(); } ReadRemainSegsFile(); @@ -10544,10 +10546,13 @@ void StartupXLOG(void) state->start = state->end; (void)LWLockRelease(state->recovery_queue_lock); } + g_instance.dms_cxt.SSRecoveryInfo.in_ondemand_recovery = false; + } + + if (SS_PRIMARY_MODE) { /* for other nodes in cluster */ g_instance.dms_cxt.SSReformerControl.clusterStatus = CLUSTER_NORMAL; SSSaveReformerCtrl(); - g_instance.dms_cxt.SSRecoveryInfo.in_ondemand_recovery = false; } ereport(LOG, (errmsg("redo done, nextXid: " XID_FMT ", startupMaxXid: " XID_FMT ", recentLocalXmin: " XID_FMT diff --git a/src/include/access/ondemand_extreme_rto/page_redo.h b/src/include/access/ondemand_extreme_rto/page_redo.h index 9d55e598c..005d76a9d 100644 --- a/src/include/access/ondemand_extreme_rto/page_redo.h +++ b/src/include/access/ondemand_extreme_rto/page_redo.h @@ -242,7 +242,11 @@ void WaitAllRedoWorkerQueueEmpty(); void WaitAllReplayWorkerIdle(); void DispatchClosefdMarkToAllRedoWorker(); void DispatchCleanInvalidPageMarkToAllRedoWorker(RepairFileKey key); - +void ClearRecoveryThreadHashTbl(const RelFileNode &node, ForkNumber forknum, BlockNumber minblkno, + bool segment_shrink); +void BatchClearRecoveryThreadHashTbl(Oid spcNode, Oid dbNode); +void RecordBadBlockAndPushToRemote(XLogBlockDataParse *datadecode, PageErrorType error_type, + XLogRecPtr old_lsn, XLogPhyBlock pblk); const char *RedoWokerRole2Str(RedoRole role); } // namespace ondemand_extreme_rto diff --git a/src/include/ddes/dms/ss_common_attr.h b/src/include/ddes/dms/ss_common_attr.h index 99fb9259c..74866823d 100644 --- a/src/include/ddes/dms/ss_common_attr.h +++ b/src/include/ddes/dms/ss_common_attr.h @@ -143,7 +143,8 @@ #define SS_PRIMARY_STANDBY_CLUSTER_NORMAL_STANDBY \ (SS_NORMAL_STANDBY && (g_instance.attr.attr_storage.xlog_file_path != 0)) -#define SS_CLUSTER_NOT_NORAML (ENABLE_DMS && (g_instance.dms_cxt.SSReformerControl.clusterStatus != CLUSTER_NORMAL)) +#define SS_CLUSTER_ONDEMAND_NOT_NORAML \ + (ENABLE_DMS && (g_instance.dms_cxt.SSReformerControl.clusterStatus != CLUSTER_NORMAL)) #define SS_CLUSTER_ONDEMAND_BUILD \ (ENABLE_DMS && (g_instance.dms_cxt.SSReformerControl.clusterStatus == CLUSTER_IN_ONDEMAND_BUILD)) #define SS_CLUSTER_ONDEMAND_RECOVERY \