From f9d7ef43fd7fa515046556f3688d72d341dda89f Mon Sep 17 00:00:00 2001 From: dongning12 Date: Mon, 12 Aug 2024 15:50:00 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90=E8=B5=84=E6=BA=90=E6=B1=A0=E5=8C=96?= =?UTF-8?q?=E3=80=91dms=E7=9A=84=E7=BA=BF=E7=A8=8B=E6=89=93=E5=8D=B0error?= =?UTF-8?q?=E6=97=A5=E5=BF=97=E9=9C=80=E8=A6=81=E4=BD=BF=E7=94=A8try-catch?= =?UTF-8?q?=E7=BB=93=E6=9E=84=EF=BC=8C=E5=90=A6=E5=88=99=E7=BA=BF=E7=A8=8B?= =?UTF-8?q?=E4=BC=9A=E9=80=80=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ddes/adapter/ss_dms_callback.cpp | 78 +++++++++++-------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/src/gausskernel/ddes/adapter/ss_dms_callback.cpp b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp index 20ff559cb..58f162151 100644 --- a/src/gausskernel/ddes/adapter/ss_dms_callback.cpp +++ b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp @@ -52,6 +52,8 @@ #include "storage/buf/bufmgr.h" #include "storage/ipc.h" +static void ReleaseResource(); + /* * Wake up startup process to replay WAL, or to notice that * failover has been requested. @@ -495,41 +497,55 @@ static int SetPrimaryIdOnStandby(int primary_id, unsigned long long list_stable) { char* type_string = NULL; type_string = SSGetLogHeaderTypeStr(); + int ret = DMS_SUCCESS; - for (int ntries = 0;; ntries++) { - SSReadControlFile(REFORM_CTRL_PAGE); /* need to double check */ - if (g_instance.dms_cxt.SSReformerControl.primaryInstId == primary_id && - g_instance.dms_cxt.SSReformerControl.list_stable == list_stable) { - ereport(LOG, (errmodule(MOD_DMS), - errmsg("%s Reform success, this is a standby:%d confirming new primary:%d, list_stable:%llu, " - "confirm ntries=%d.", type_string, SS_MY_INST_ID, primary_id, list_stable, ntries))); - return DMS_SUCCESS; - } else { - if (dms_reform_failed()) { - ereport(ERROR, - (errmodule(MOD_DMS), errmsg("%s Failed to confirm new primary: %d, list_stable:%llu, " - "control file indicates primary is %d, list_stable%llu; dms reform failed.", - type_string, (int)primary_id, list_stable, - g_instance.dms_cxt.SSReformerControl.primaryInstId, - g_instance.dms_cxt.SSReformerControl.list_stable))); - return DMS_ERROR; - } - if (ntries >= WAIT_REFORM_CTRL_REFRESH_TRIES) { - ereport(ERROR, - (errmodule(MOD_DMS), errmsg("%s Failed to confirm new primary: %d, list_stable:%llu, " - " control file indicates primary is %d, list_stable%llu; wait timeout.", - type_string, (int)primary_id, list_stable, - g_instance.dms_cxt.SSReformerControl.primaryInstId, - g_instance.dms_cxt.SSReformerControl.list_stable))); - return DMS_ERROR; + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + for (int ntries = 0;; ntries++) { + SSReadControlFile(REFORM_CTRL_PAGE); /* need to double check */ + if (g_instance.dms_cxt.SSReformerControl.primaryInstId == primary_id && + g_instance.dms_cxt.SSReformerControl.list_stable == list_stable) { + ereport(LOG, (errmodule(MOD_DMS), + errmsg("%s Reform success, this is a standby:%d confirming new primary:%d, list_stable:%llu, " + "confirm ntries=%d.", type_string, SS_MY_INST_ID, primary_id, list_stable, ntries))); + ret = DMS_SUCCESS; + break; + } else { + if (dms_reform_failed()) { + ereport(ERROR, + (errmodule(MOD_DMS), errmsg("%s Failed to confirm new primary: %d, list_stable:%llu, " + "control file indicates primary is %d, list_stable%llu; dms reform failed.", + type_string, (int)primary_id, list_stable, + g_instance.dms_cxt.SSReformerControl.primaryInstId, + g_instance.dms_cxt.SSReformerControl.list_stable))); + ret = DMS_ERROR; + break; + } + if (ntries >= WAIT_REFORM_CTRL_REFRESH_TRIES) { + ereport(ERROR, + (errmodule(MOD_DMS), errmsg("%s Failed to confirm new primary: %d, list_stable:%llu, " + " control file indicates primary is %d, list_stable%llu; wait timeout.", + type_string, (int)primary_id, list_stable, + g_instance.dms_cxt.SSReformerControl.primaryInstId, + g_instance.dms_cxt.SSReformerControl.list_stable))); + ret = DMS_ERROR; + break; + } } + + CHECK_FOR_INTERRUPTS(); + pg_usleep(REFORM_WAIT_TIME); /* wait 0.01 sec, then retry */ } - - CHECK_FOR_INTERRUPTS(); - pg_usleep(REFORM_WAIT_TIME); /* wait 0.01 sec, then retry */ } - - return DMS_ERROR; + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + ReleaseResource(); + ret = DMS_ERROR; + } + PG_END_TRY(); + return ret; } /* called on both new primary and all standby nodes to refresh status */