diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index c53e8aadc..f011f6a58 100644 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -481,6 +481,7 @@ static bool CheckSignalByFile(const char *filename, void *infoPtr, size_t infoSi int GaussDbThreadMain(knl_thread_arg* arg); const char* GetThreadName(knl_thread_role role); +void SSOndemandProcExitIfStayWaitBackends(); #ifdef EXEC_BACKEND @@ -3842,6 +3843,10 @@ static int ServerLoop(void) result = BackendStartup(port, isConnectHaPort); } + if (SS_CLUSTER_ONDEMAND_RECOVERY && SS_IN_REFORM && + result != STATUS_OK && pmState == PM_WAIT_BACKENDS) { + SSOndemandProcExitIfStayWaitBackends(); + } if (result != STATUS_OK) { if (port->is_logic_conn) { gs_close_gsocket(&port->gs_sock); @@ -14864,3 +14869,20 @@ void SSRestartFailoverPromote() pmState = PM_WAIT_BACKENDS; SShandle_promote_signal(); } + +void SSOndemandProcExitIfStayWaitBackends() +{ + int failTimes = 0; + while (failTimes < WAIT_PMSTATE_UPDATE_TRIES && pmState == PM_WAIT_BACKENDS) { + PostmasterStateMachine(); + pg_usleep(REFORM_WAIT_LONG); + failTimes++; + } + if (pmState == PM_WAIT_BACKENDS) { + ereport(PANIC, (errmsg("Proc exit because pmState stay %s for %d times, " + "when reform failed and in ondemand recovery, " + "to avoid pmState being stuck in PM_WAIT_BACKENDS.", + GetPMState(pmState), WAIT_PMSTATE_UPDATE_TRIES))); + proc_exit(1); + } +} \ No newline at end of file diff --git a/src/include/ddes/dms/ss_reform_common.h b/src/include/ddes/dms/ss_reform_common.h index a934c3f37..a79c1378d 100644 --- a/src/include/ddes/dms/ss_reform_common.h +++ b/src/include/ddes/dms/ss_reform_common.h @@ -31,6 +31,8 @@ #define REFORM_WAIT_LONG 100000 /* 0.1 sec */ #define WAIT_REFORM_CTRL_REFRESH_TRIES 1000 +#define WAIT_PMSTATE_UPDATE_TRIES 100 + #define REFORM_CTRL_VERSION 1 typedef struct SSBroadcastCancelTrx {