!64 Datanode主节点被网络孤立后,Datanode未降备,导致datanode双主
Merge pull request !64 from alfredwang/I5TGP3
This commit is contained in:
commit
a582181c2f
@ -44,4 +44,7 @@ agent_rhb_interval = 1000 # the heatbeat of
|
||||
enable_ssl = on # enable cma to cma ssl
|
||||
ssl_cert_expire_alert_threshold = 90
|
||||
ssl_cert_expire_check_interval = 86400
|
||||
enable_fence_dn = off #enable fence the datanode when cma cannot connect to any cms.
|
||||
#if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode.
|
||||
#default off
|
||||
############### must leave a new line at the end ###################
|
||||
|
@ -42,4 +42,7 @@ agent_rhb_interval = 1000 # the heatbeat of
|
||||
enable_ssl = on # enable cma to cma ssl
|
||||
ssl_cert_expire_alert_threshold = 90
|
||||
ssl_cert_expire_check_interval = 86400
|
||||
enable_fence_dn = off #enable fence the datanode when cma cannot connect to any cms.
|
||||
#if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode.
|
||||
#default off
|
||||
############### must leave a new line at the end ###################
|
||||
|
@ -45,4 +45,7 @@ agent_rhb_interval = 1000 # the heatbeat of
|
||||
enable_ssl = off # enable cma to cma ssl
|
||||
ssl_cert_expire_alert_threshold = 90
|
||||
ssl_cert_expire_check_interval = 86400
|
||||
enable_fence_dn = off #enable fence the datanode when cma cannot connect to any cms.
|
||||
#if set to on, restart datenode after 30 seconds. otherwise, don't restart datanode.
|
||||
#default off
|
||||
############### must leave a new line at the end ###################
|
||||
|
@ -389,6 +389,11 @@ void ReloadParametersFromConfigfile()
|
||||
log_saved_days = (uint32)get_int_value_from_config(configDir, "log_saved_days", 90);
|
||||
log_max_count = (uint32)get_int_value_from_config(configDir, "log_max_count", 10000);
|
||||
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
if (get_config_param(configDir, "enable_fence_dn", g_enableFenceDn, sizeof(g_enableFenceDn)) < 0)
|
||||
write_runlog(ERROR, "get_config_param() get enable_fence_dn fail.\n");
|
||||
#endif
|
||||
|
||||
write_runlog(LOG,
|
||||
"reload cm_agent parameters:\n"
|
||||
" log_min_messages=%d, maxLogFileSize=%d, sys_log_path=%s, \n alarm_component=%s, "
|
||||
@ -397,7 +402,11 @@ void ReloadParametersFromConfigfile()
|
||||
"agent_check_interval=%u, agent_kill_instance_timeout=%u,\n"
|
||||
" log_threshold_check_interval=%u, log_max_size=%ld, log_max_count=%u, log_saved_days=%u, upgrade_from=%u,\n"
|
||||
" enableLogCompress=%s, security_mode=%s, incremental_build=%d, unix_socket_directory=%s, "
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
"enable_e2e_rto=%u, disaster_recovery_type=%d, environment_threshold=%s, enable_fence_dn=%s\n",
|
||||
#else
|
||||
"enable_e2e_rto=%u, disaster_recovery_type=%d, environment_threshold=%s\n",
|
||||
#endif
|
||||
log_min_messages,
|
||||
maxLogFileSize,
|
||||
sys_log_path,
|
||||
@ -421,7 +430,12 @@ void ReloadParametersFromConfigfile()
|
||||
g_unixSocketDirectory,
|
||||
g_enableE2ERto,
|
||||
g_disasterRecoveryType,
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
g_environmentThreshold,
|
||||
g_enableFenceDn);
|
||||
#else
|
||||
g_environmentThreshold);
|
||||
#endif
|
||||
}
|
||||
|
||||
int ReadDBStateFile(GaussState *state, const char *statePath)
|
||||
|
@ -432,12 +432,20 @@ void* ConnCmsPMain(void* arg)
|
||||
}
|
||||
|
||||
/* agentStopInstanceDelayTime: The delay time of stopping instances.
|
||||
* If isToStopInstances is true, agentStopInstanceDelayTime is FENCE_TIMEOUT, 30 seconds.
|
||||
* If isToStopInstances is true, and g_enableFenceDn is true,
|
||||
* agentStopInstanceDelayTime is FENCE_TIMEOUT, 30 seconds.
|
||||
* If isToStopInstances is true, and g_enableFenceDn is false,
|
||||
* agentStopInstanceDelayTime is DISABLE_TIMEOUT, 0 seconds, never timeout.
|
||||
* If isToStopInstances is false, agentStopInstanceDelayTime is agent_kill_instance_timeout,
|
||||
* 0 second by default,
|
||||
* and the operation of stopping instances will not be executed.
|
||||
*/
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
uint32 timeout = IsBoolCmParamTrue(g_enableFenceDn) ? FENCE_TIMEOUT : DISABLE_TIMEOUT;
|
||||
uint32 agentStopInstanceDelayTime = isToStopInstances ? timeout : agent_kill_instance_timeout;
|
||||
#else
|
||||
uint32 agentStopInstanceDelayTime = isToStopInstances ? DISABLE_TIMEOUT : agent_kill_instance_timeout;
|
||||
#endif
|
||||
if (isDisconnectTimeout(g_disconnectTime, (int)agentStopInstanceDelayTime) && !have_killed_nodes) {
|
||||
if ((undocumentedVersion == 0) && isMaintenanceModeDisableOperation(CMA_KILL_SELF_INSTANCES)) {
|
||||
have_killed_nodes = false;
|
||||
@ -451,6 +459,16 @@ void* ConnCmsPMain(void* arg)
|
||||
"sync_dropped_coordinator change to false.\n", agentStopInstanceDelayTime);
|
||||
g_syncDroppedCoordinator = false;
|
||||
have_killed_nodes = true;
|
||||
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
/*
|
||||
* Kill datanode proccess, so that it can be restarted with pending mode.
|
||||
*/
|
||||
uint32 i;
|
||||
for (i = 0; i < g_currentNode->datanodeCount; i++) {
|
||||
immediate_stop_one_instance(g_currentNode->datanode[i].datanodeLocalDataPath, INSTANCE_DN);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -225,6 +225,9 @@ char g_enableMesSsl[BOOL_STR_MAX_LEN] = {0};
|
||||
uint32 g_sslCertExpireCheckInterval = SECONDS_PER_DAY;
|
||||
uint32 g_cmaRhbItvl = 1000;
|
||||
CmResConfList g_resConf[CM_MAX_RES_INST_COUNT] = {{{0}}};
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
char g_enableFenceDn[10] = {0};
|
||||
#endif
|
||||
|
||||
bool &GetIsSharedStorageMode()
|
||||
{
|
||||
|
@ -1430,6 +1430,11 @@ int get_agent_global_params_from_configfile()
|
||||
write_runlog(ERROR, "get_config_param() get enable_dcf fail.\n");
|
||||
}
|
||||
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
if (get_config_param(configDir, "enable_fence_dn", g_enableFenceDn, sizeof(g_enableFenceDn)) < 0)
|
||||
write_runlog(ERROR, "get_config_param() get enable_fence_dn fail.\n");
|
||||
#endif
|
||||
|
||||
#ifdef __aarch64__
|
||||
agent_process_cpu_affinity = get_uint32_value_from_config(configDir, "process_cpu_affinity", 0);
|
||||
if (agent_process_cpu_affinity > CPU_AFFINITY_MAX) {
|
||||
|
@ -53,6 +53,9 @@ const char *g_cmaParamInfo[] = {
|
||||
"disk_timeout|int|0,2147483647|NULL|NULL|",
|
||||
"voting_disk_path|string|0,0|NULL|NULL|",
|
||||
"agent_rhb_interval|int|0,2147483647|NULL|NULL|",
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
"enable_fence_dn|string|0,0|NULL|NULL|",
|
||||
#endif
|
||||
#ifdef ENABLE_MULTIPLE_NODES
|
||||
"enable_cn_auto_repair|bool|0,0|NULL|NULL|",
|
||||
"enable_gtm_phony_dead_check|int|0,1|NULL|NULL|",
|
||||
|
@ -284,6 +284,9 @@ extern char g_agentQueryBarrier[BARRIERLEN];
|
||||
extern char g_agentTargetBarrier[BARRIERLEN];
|
||||
extern char g_environmentThreshold[CM_PATH_LENGTH];
|
||||
extern char g_doradoIp[CM_IP_LENGTH];
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
extern char g_enableFenceDn[10];
|
||||
#endif
|
||||
extern uint32 g_diskTimeout;
|
||||
extern char g_enableMesSsl[BOOL_STR_MAX_LEN];
|
||||
extern uint32 g_sslCertExpireCheckInterval;
|
||||
|
Loading…
x
Reference in New Issue
Block a user