【bugfix】 修复postmaster线程卡在PM_WAIT_BACKEND

This commit is contained in:
congzhou2603
2023-09-19 10:43:51 +08:00
parent cd2c889a10
commit e1a8b45117
2 changed files with 24 additions and 0 deletions

View File

@ -486,6 +486,7 @@ static bool CheckSignalByFile(const char *filename, void *infoPtr, size_t infoSi
int GaussDbThreadMain(knl_thread_arg* arg);
const char* GetThreadName(knl_thread_role role);
void SSOndemandProcExitIfStayWaitBackends();
#ifdef EXEC_BACKEND
@ -3976,6 +3977,10 @@ static int ServerLoop(void)
result = BackendStartup(port, isConnectHaPort);
}
if (SS_CLUSTER_ONDEMAND_RECOVERY && SS_IN_REFORM &&
result != STATUS_OK && pmState == PM_WAIT_BACKENDS) {
SSOndemandProcExitIfStayWaitBackends();
}
if (result != STATUS_OK) {
if (port->is_logic_conn) {
gs_close_gsocket(&port->gs_sock);
@ -15086,3 +15091,20 @@ void SSRestartFailoverPromote()
pmState = PM_WAIT_BACKENDS;
SShandle_promote_signal();
}
void SSOndemandProcExitIfStayWaitBackends()
{
int failTimes = 0;
while (failTimes < WAIT_PMSTATE_UPDATE_TRIES && pmState == PM_WAIT_BACKENDS) {
PostmasterStateMachine();
pg_usleep(REFORM_WAIT_LONG);
failTimes++;
}
if (pmState == PM_WAIT_BACKENDS) {
ereport(PANIC, (errmsg("Proc exit because pmState stay %s for %d times, "
"when reform failed and in ondemand recovery, "
"to avoid pmState being stuck in PM_WAIT_BACKENDS.",
GetPMState(pmState), WAIT_PMSTATE_UPDATE_TRIES)));
proc_exit(1);
}
}

View File

@ -31,6 +31,8 @@
#define REFORM_WAIT_LONG 100000 /* 0.1 sec */
#define WAIT_REFORM_CTRL_REFRESH_TRIES 1000
#define WAIT_PMSTATE_UPDATE_TRIES 100
#define REFORM_CTRL_VERSION 1
typedef struct SSBroadcastCancelTrx {