From b8459b52b822543dffc4944d0e69db28b9a54886 Mon Sep 17 00:00:00 2001 From: xue_meng_en <1836611252@qq.com> Date: Tue, 9 May 2023 15:14:21 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E9=85=8D=E7=BD=AE=E7=AD=89?= =?UTF-8?q?=E5=BE=85=E9=9D=99=E6=80=81=E4=B8=BB=E7=9A=84=E6=97=B6=E9=97=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cm_ctl/ctl_param_check.cpp | 3 +++ src/cm_server/cm_server.centralized.conf.sample | 2 ++ src/cm_server/cms_arbitrate_datanode_pms.cpp | 2 +- src/cm_server/cms_arbitrate_datanode_pms_utils.cpp | 2 +- src/cm_server/cms_common.cpp | 6 ++++++ src/cm_server/cms_global_params.cpp | 1 + src/cm_server/cms_monitor_main.cpp | 11 +++++++++-- .../cm/cm_server/cms_arbitrate_datanode_pms_utils.h | 1 - src/include/cm/cm_server/cms_global_params.h | 1 + 9 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/cm_ctl/ctl_param_check.cpp b/src/cm_ctl/ctl_param_check.cpp index 022dfc6..a5acc7d 100644 --- a/src/cm_ctl/ctl_param_check.cpp +++ b/src/cm_ctl/ctl_param_check.cpp @@ -127,6 +127,9 @@ const char *g_cmsParamInfo[] = { "cms_enable_failover_on2nodes|bool|0,0|NULL|NULL|", "cms_enable_db_crash_recovery|bool|0,0|NULL|NULL|", "cms_network_isolation_timeout|int|10,2147483647|NULL|NULL|", +#ifndef ENABLE_PRIVATEGAUSS + "wait_static_primary_times|int|5,2147483647|NULL|NULL|", +#endif }; const char *g_valueTypeStr[] = { diff --git a/src/cm_server/cm_server.centralized.conf.sample b/src/cm_server/cm_server.centralized.conf.sample index f71e94a..fddf8c4 100644 --- a/src/cm_server/cm_server.centralized.conf.sample +++ b/src/cm_server/cm_server.centralized.conf.sample @@ -87,4 +87,6 @@ cms_enable_db_crash_recovery = false # used in 2 nodes cluster. when network re cms_network_isolation_timeout = 20 # cms judges the network is isolated when it finds ddb cluster is not sync with each other nodes, # after cms_network_isolation_timeout times. # default 20 +wait_static_primary_times = 6 # Time to wait for the primary recovery after the primary stopped unexpectedly. + # default value is 6 ############### must leave a new line at the end ################### diff --git a/src/cm_server/cms_arbitrate_datanode_pms.cpp b/src/cm_server/cms_arbitrate_datanode_pms.cpp index a9a31af..9f6463a 100644 --- a/src/cm_server/cms_arbitrate_datanode_pms.cpp +++ b/src/cm_server/cms_arbitrate_datanode_pms.cpp @@ -2248,7 +2248,7 @@ static void InitDnArbCond(DnArbCtx *ctx) ctx->cond.maxMemArbiTime = 0; ctx->cond.instMainta = IsMaintance(ctx->maintaMode); ctx->cond.switchoverIdx = INVALID_INDEX; - ctx->cond.arbitInterval = g_clusterStarting ? g_clusterStartingArbitDelay : DATANODE_ARBITE_DELAY; + ctx->cond.arbitInterval = g_clusterStarting ? g_clusterStartingArbitDelay : g_waitStaticPrimaryTimes; ctx->cond.arbitStaticInterval = 5; ctx->cond.setOffline = SetOfflineNode(); ctx->cond.snameAzDnCount = 0; diff --git a/src/cm_server/cms_arbitrate_datanode_pms_utils.cpp b/src/cm_server/cms_arbitrate_datanode_pms_utils.cpp index 40ad114..0171108 100644 --- a/src/cm_server/cms_arbitrate_datanode_pms_utils.cpp +++ b/src/cm_server/cms_arbitrate_datanode_pms_utils.cpp @@ -554,7 +554,7 @@ uint32 GetDnArbitateDelayTime(const DnArbCtx *ctx) /* if static primary has finished redo, not need to wait for 180s */ cm_local_replconninfo *status = &(ctx->dnReport[cond->staticPriIdx].local_status); if (status->local_role == INSTANCE_ROLE_STANDBY && status->disconn_mode == PROHIBIT_CONNECTION) { - return DATANODE_ARBITE_DELAY; + return g_waitStaticPrimaryTimes; } return cond->arbitInterval; } diff --git a/src/cm_server/cms_common.cpp b/src/cm_server/cms_common.cpp index 68260d7..9f19d27 100644 --- a/src/cm_server/cms_common.cpp +++ b/src/cm_server/cms_common.cpp @@ -615,6 +615,12 @@ void get_parameters_from_configfile() g_diskTimeout = get_uint32_value_from_config(configDir, "disk_timeout", 200); g_agentNetworkTimeout = get_uint32_value_from_config(configDir, "agent_network_timeout", 6); GetDnArbitrateMode(); +#ifndef ENABLE_PRIVATEGAUSS + g_waitStaticPrimaryTimes = get_uint32_value_from_config(configDir, "wait_static_primary_times", 6); + if (g_waitStaticPrimaryTimes < 5) { + g_waitStaticPrimaryTimes = 5; + } +#endif } void clean_init_cluster_state() diff --git a/src/cm_server/cms_global_params.cpp b/src/cm_server/cms_global_params.cpp index e3889e2..1e38d58 100644 --- a/src/cm_server/cms_global_params.cpp +++ b/src/cm_server/cms_global_params.cpp @@ -328,6 +328,7 @@ uint32 g_delayArbiTime = 0; int32 g_clusterArbiTime = 300; bool g_isPauseArbitration = false; char g_cmManualPausePath[MAX_PATH_LEN] = {0}; +uint32 g_waitStaticPrimaryTimes = 6; bool isLargerNode() { diff --git a/src/cm_server/cms_monitor_main.cpp b/src/cm_server/cms_monitor_main.cpp index f6610f5..ca8a851 100644 --- a/src/cm_server/cms_monitor_main.cpp +++ b/src/cm_server/cms_monitor_main.cpp @@ -343,11 +343,18 @@ static void ReloadParametersFromConfigfile() g_diskTimeout = get_uint32_value_from_config(configDir, "disk_timeout", 200); g_agentNetworkTimeout = get_uint32_value_from_config(configDir, "agent_network_timeout", 6); GetDnArbitrateMode(); +#ifndef ENABLE_PRIVATEGAUSS + g_waitStaticPrimaryTimes = get_uint32_value_from_config(configDir, "wait_static_primary_times", 6); + if (g_waitStaticPrimaryTimes < 5) { + g_waitStaticPrimaryTimes = 5; + } +#endif if (g_cm_server_num == CMS_ONE_PRIMARY_ONE_STANDBY) { GetTwoNodesArbitrateParams(); } + #ifdef ENABLE_MULTIPLE_NODES write_runlog(LOG, "reload cm_server parameters:\n" @@ -378,13 +385,13 @@ static void ReloadParametersFromConfigfile() "datastorage_threshold_check_interval=%d,\n" " max_datastorage_threshold_check=%d, enableSetReadOnly=%s, enableSetReadOnlyThreshold=%u, " "switch_rto=%d, force_promote=%d, cluster_starting_aribt_delay=%u, enable_e2e_rto=%u, " - "g_delayArbiTime=%u, g_clusterArbiTime=%d.\n", + "g_delayArbiTime=%u, g_clusterArbiTime=%d, wait_static_primary_times=%u.\n", log_min_messages, maxLogFileSize, sys_log_path, g_alarmComponentPath, g_alarmReportInterval, instance_heartbeat_timeout, g_ddbArbicfg.haHeartBeatTimeOut, cmserver_self_vote_timeout, g_ddbArbicfg.haStatusInterval, cmserver_ha_connect_timeout, instance_failover_delay_timeout, datastorage_threshold_check_interval, max_datastorage_threshold_check, g_enableSetReadOnly, g_readOnlyThreshold, switch_rto, force_promote, g_clusterStartingArbitDelay, - g_enableE2ERto, g_delayArbiTime, g_clusterArbiTime); + g_enableE2ERto, g_delayArbiTime, g_clusterArbiTime, g_waitStaticPrimaryTimes); #endif } diff --git a/src/include/cm/cm_server/cms_arbitrate_datanode_pms_utils.h b/src/include/cm/cm_server/cms_arbitrate_datanode_pms_utils.h index 8fd16ad..f1f99c1 100644 --- a/src/include/cm/cm_server/cms_arbitrate_datanode_pms_utils.h +++ b/src/include/cm/cm_server/cms_arbitrate_datanode_pms_utils.h @@ -44,7 +44,6 @@ typedef struct DnArbitInfo_t { uint32 maxTerm; } DnArbitInfo; -const uint32 DATANODE_ARBITE_DELAY = 6; extern bool CheckPotentialTermRollback(); extern void GroupStatusShow(const char *str, const uint32 groupIndex, const uint32 instanceId, diff --git a/src/include/cm/cm_server/cms_global_params.h b/src/include/cm/cm_server/cms_global_params.h index 0d7c657..d6dfa2b 100644 --- a/src/include/cm/cm_server/cms_global_params.h +++ b/src/include/cm/cm_server/cms_global_params.h @@ -475,6 +475,7 @@ extern uint32 g_delayArbiTime; extern int32 g_clusterArbiTime; extern bool g_isPauseArbitration; extern char g_cmManualPausePath[MAX_PATH_LEN]; +extern uint32 g_waitStaticPrimaryTimes; extern void clean_init_cluster_state(); extern void instance_delay_arbitrate_time_out_direct_clean(uint32 group_index, int member_index,