diff --git a/src/cm_agent/cm_agent.centralized.conf.sample b/src/cm_agent/cm_agent.centralized.conf.sample index 47a96ac..d2004db 100644 --- a/src/cm_agent/cm_agent.centralized.conf.sample +++ b/src/cm_agent/cm_agent.centralized.conf.sample @@ -44,4 +44,7 @@ agent_rhb_interval = 1000 # the heatbeat of enable_ssl = on # enable cma to cma ssl ssl_cert_expire_alert_threshold = 90 ssl_cert_expire_check_interval = 86400 +enable_fence_dn = off #enable fence the datanode when cma cannot connect to any cms. + #if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode. + #default off ############### must leave a new line at the end ################### diff --git a/src/cm_agent/cm_agent.centralized_new.conf.sample b/src/cm_agent/cm_agent.centralized_new.conf.sample index 3acafd6..2e541f4 100644 --- a/src/cm_agent/cm_agent.centralized_new.conf.sample +++ b/src/cm_agent/cm_agent.centralized_new.conf.sample @@ -42,4 +42,7 @@ agent_rhb_interval = 1000 # the heatbeat of enable_ssl = on # enable cma to cma ssl ssl_cert_expire_alert_threshold = 90 ssl_cert_expire_check_interval = 86400 +enable_fence_dn = off #enable fence the datanode when cma cannot connect to any cms. + #if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode. + #default off ############### must leave a new line at the end ################### diff --git a/src/cm_agent/cm_agent.conf.sample b/src/cm_agent/cm_agent.conf.sample index 221fac8..96c3e45 100644 --- a/src/cm_agent/cm_agent.conf.sample +++ b/src/cm_agent/cm_agent.conf.sample @@ -45,4 +45,7 @@ agent_rhb_interval = 1000 # the heatbeat of enable_ssl = off # enable cma to cma ssl ssl_cert_expire_alert_threshold = 90 ssl_cert_expire_check_interval = 86400 +enable_fence_dn = off #enable fence the datanode when cma cannot connect to any cms. + #if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode. + #default off ############### must leave a new line at the end ################### diff --git a/src/cm_agent/cma_common.cpp b/src/cm_agent/cma_common.cpp index faba925..abce493 100644 --- a/src/cm_agent/cma_common.cpp +++ b/src/cm_agent/cma_common.cpp @@ -389,6 +389,11 @@ void ReloadParametersFromConfigfile() log_saved_days = (uint32)get_int_value_from_config(configDir, "log_saved_days", 90); log_max_count = (uint32)get_int_value_from_config(configDir, "log_max_count", 10000); +#ifndef ENABLE_MULTIPLE_NODES + if (get_config_param(configDir, "enable_fence_dn", g_enableFenceDn, sizeof(g_enableFenceDn)) < 0) + write_runlog(ERROR, "get_config_param() get enable_fence_dn fail.\n"); +#endif + write_runlog(LOG, "reload cm_agent parameters:\n" " log_min_messages=%d, maxLogFileSize=%d, sys_log_path=%s, \n alarm_component=%s, " @@ -397,7 +402,11 @@ void ReloadParametersFromConfigfile() "agent_check_interval=%u, agent_kill_instance_timeout=%u,\n" " log_threshold_check_interval=%u, log_max_size=%ld, log_max_count=%u, log_saved_days=%u, upgrade_from=%u,\n" " enableLogCompress=%s, security_mode=%s, incremental_build=%d, unix_socket_directory=%s, " +#ifndef ENABLE_MULTIPLE_NODES + "enable_e2e_rto=%u, disaster_recovery_type=%d, environment_threshold=%s, enable_fence_dn=%s\n", +#else "enable_e2e_rto=%u, disaster_recovery_type=%d, environment_threshold=%s\n", +#endif log_min_messages, maxLogFileSize, sys_log_path, @@ -421,7 +430,12 @@ void ReloadParametersFromConfigfile() g_unixSocketDirectory, g_enableE2ERto, g_disasterRecoveryType, +#ifndef ENABLE_MULTIPLE_NODES + g_environmentThreshold, + g_enableFenceDn); +#else g_environmentThreshold); +#endif } int ReadDBStateFile(GaussState *state, const char *statePath) diff --git a/src/cm_agent/cma_create_conn_cms.cpp b/src/cm_agent/cma_create_conn_cms.cpp index 78af583..ec24e06 100644 --- a/src/cm_agent/cma_create_conn_cms.cpp +++ b/src/cm_agent/cma_create_conn_cms.cpp @@ -432,12 +432,20 @@ void* ConnCmsPMain(void* arg) } /* agentStopInstanceDelayTime: The delay time of stopping instances. - * If isToStopInstances is true, agentStopInstanceDelayTime is FENCE_TIMEOUT, 30 seconds. + * If isToStopInstances is true, and g_enableFenceDn is true, + * agentStopInstanceDelayTime is FENCE_TIMEOUT, 30 seconds. + * If isToStopInstances is true, and g_enableFenceDn is false, + * agentStopInstanceDelayTime is DISABLE_TIMEOUT, 0 seconds, never timeout. * If isToStopInstances is false, agentStopInstanceDelayTime is agent_kill_instance_timeout, * 0 second by default, * and the operation of stopping instances will not be executed. */ +#ifndef ENABLE_MULTIPLE_NODES + uint32 timeout = IsBoolCmParamTrue(g_enableFenceDn) ? FENCE_TIMEOUT : DISABLE_TIMEOUT; + uint32 agentStopInstanceDelayTime = isToStopInstances ? timeout : agent_kill_instance_timeout; +#else uint32 agentStopInstanceDelayTime = isToStopInstances ? DISABLE_TIMEOUT : agent_kill_instance_timeout; +#endif if (isDisconnectTimeout(g_disconnectTime, (int)agentStopInstanceDelayTime) && !have_killed_nodes) { if ((undocumentedVersion == 0) && isMaintenanceModeDisableOperation(CMA_KILL_SELF_INSTANCES)) { have_killed_nodes = false; @@ -451,6 +459,16 @@ void* ConnCmsPMain(void* arg) "sync_dropped_coordinator change to false.\n", agentStopInstanceDelayTime); g_syncDroppedCoordinator = false; have_killed_nodes = true; + + #ifndef ENABLE_MULTIPLE_NODES + /* + * Kill datanode proccess, so that it can be restarted with pending mode. + */ + uint32 i; + for (i = 0; i < g_currentNode->datanodeCount; i++) { + immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN); + } + #endif } } } diff --git a/src/cm_agent/cma_global_params.cpp b/src/cm_agent/cma_global_params.cpp index 3bfcf23..7ffab36 100644 --- a/src/cm_agent/cma_global_params.cpp +++ b/src/cm_agent/cma_global_params.cpp @@ -225,6 +225,9 @@ char g_enableMesSsl[BOOL_STR_MAX_LEN] = {0}; uint32 g_sslCertExpireCheckInterval = SECONDS_PER_DAY; uint32 g_cmaRhbItvl = 1000; CmResConfList g_resConf[CM_MAX_RES_INST_COUNT] = {{{0}}}; +#ifndef ENABLE_MULTIPLE_NODES +char g_enableFenceDn[10] = {0}; +#endif bool &GetIsSharedStorageMode() { diff --git a/src/cm_agent/cma_main.cpp b/src/cm_agent/cma_main.cpp index 9662a92..b8d90fa 100644 --- a/src/cm_agent/cma_main.cpp +++ b/src/cm_agent/cma_main.cpp @@ -1430,6 +1430,11 @@ int get_agent_global_params_from_configfile() write_runlog(ERROR, "get_config_param() get enable_dcf fail.\n"); } +#ifndef ENABLE_MULTIPLE_NODES + if (get_config_param(configDir, "enable_fence_dn", g_enableFenceDn, sizeof(g_enableFenceDn)) < 0) + write_runlog(ERROR, "get_config_param() get enable_fence_dn fail.\n"); +#endif + #ifdef __aarch64__ agent_process_cpu_affinity = get_uint32_value_from_config(configDir, "process_cpu_affinity", 0); if (agent_process_cpu_affinity > CPU_AFFINITY_MAX) { diff --git a/src/cm_ctl/ctl_param_check.cpp b/src/cm_ctl/ctl_param_check.cpp index fa6243e..a4d78d2 100644 --- a/src/cm_ctl/ctl_param_check.cpp +++ b/src/cm_ctl/ctl_param_check.cpp @@ -53,6 +53,9 @@ const char *g_cmaParamInfo[] = { "disk_timeout|int|0,2147483647|NULL|NULL|", "voting_disk_path|string|0,0|NULL|NULL|", "agent_rhb_interval|int|0,2147483647|NULL|NULL|", +#ifndef ENABLE_MULTIPLE_NODES + "enable_fence_dn|string|0,0|NULL|NULL|", +#endif #ifdef ENABLE_MULTIPLE_NODES "enable_cn_auto_repair|bool|0,0|NULL|NULL|", "enable_gtm_phony_dead_check|int|0,1|NULL|NULL|", diff --git a/src/include/cm/cm_agent/cma_global_params.h b/src/include/cm/cm_agent/cma_global_params.h index a7ed69a..3a92d58 100644 --- a/src/include/cm/cm_agent/cma_global_params.h +++ b/src/include/cm/cm_agent/cma_global_params.h @@ -284,6 +284,9 @@ extern char g_agentQueryBarrier[BARRIERLEN]; extern char g_agentTargetBarrier[BARRIERLEN]; extern char g_environmentThreshold[CM_PATH_LENGTH]; extern char g_doradoIp[CM_IP_LENGTH]; +#ifndef ENABLE_MULTIPLE_NODES +extern char g_enableFenceDn[10]; +#endif extern uint32 g_diskTimeout; extern char g_enableMesSsl[BOOL_STR_MAX_LEN]; extern uint32 g_sslCertExpireCheckInterval;