@ -232,6 +232,8 @@ char g_enableFenceDn[10] = {0};
|
||||
#endif
|
||||
bool g_isPauseArbitration = false;
|
||||
char g_cmManualPausePath[MAX_PATH_LEN] = {0};
|
||||
bool g_isStarting = false;
|
||||
char g_cmManualStartingPath[MAX_PATH_LEN] = {0};
|
||||
|
||||
bool &GetIsSharedStorageMode()
|
||||
{
|
||||
|
||||
@ -398,7 +398,7 @@ void start_instance_check(void)
|
||||
|
||||
StartInstanceAndCheck(CheckAgentNicDown, "[CheckAgentNicDown]");
|
||||
|
||||
if (IsCusResExistLocal()) {
|
||||
if (IsCusResExistLocal() && !(g_isPauseArbitration && !g_isStarting)) {
|
||||
StartInstanceAndCheck(StartResourceCheck, "[StartResourceCheck]");
|
||||
}
|
||||
|
||||
|
||||
@ -488,7 +488,7 @@ void StopResourceCheck()
|
||||
if (IsInstManualStopped(g_resConf[i].cmInstanceId)) {
|
||||
OneResInstShutdown(&g_resConf[i]);
|
||||
}
|
||||
if (CmFileExist(g_cmManualStartPath) || !IsOneResInstWork(g_resConf[i].resName, g_resConf[i].cmInstanceId)) {
|
||||
if ((!IsOneResInstWork(g_resConf[i].resName, g_resConf[i].cmInstanceId) && !g_isPauseArbitration) || CmFileExist(g_cmManualStartPath)) {
|
||||
OneResInstClean(&g_resConf[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -381,6 +381,8 @@ int get_prog_path()
|
||||
securec_check_errno(rc, (void)rc);
|
||||
rc = memset_s(g_cmManualPausePath, MAX_PATH_LEN, 0, MAX_PATH_LEN);
|
||||
securec_check_errno(rc, (void)rc);
|
||||
rc = memset_s(g_cmManualStartingPath, MAX_PATH_LEN, 0, MAX_PATH_LEN);
|
||||
securec_check_errno(rc, (void)rc);
|
||||
if (GetHomePath(exec_path, sizeof(exec_path)) != 0) {
|
||||
(void)fprintf(stderr, "Get GAUSSHOME failed, please check.\n");
|
||||
return -1;
|
||||
@ -433,6 +435,9 @@ int get_prog_path()
|
||||
rcs = snprintf_s(
|
||||
g_cmManualPausePath, MAX_PATH_LEN, MAX_PATH_LEN - 1, "%s/bin/%s", exec_path, CM_CLUSTER_MANUAL_PAUSE);
|
||||
securec_check_intval(rcs, (void)rcs);
|
||||
rcs = snprintf_s(
|
||||
g_cmManualStartingPath, MAX_PATH_LEN, MAX_PATH_LEN - 1, "%s/bin/%s", exec_path, CM_CLUSTER_MANUAL_STARTING);
|
||||
securec_check_intval(rcs, (void)rcs);
|
||||
InitClientCrt(exec_path);
|
||||
}
|
||||
|
||||
@ -1049,6 +1054,12 @@ void server_loop(void)
|
||||
pauseLogTimes = 0;
|
||||
}
|
||||
|
||||
if (access(g_cmManualStartingPath, F_OK) == 0) {
|
||||
g_isStarting = true;
|
||||
} else {
|
||||
g_isStarting = false;
|
||||
}
|
||||
|
||||
(void)clock_gettime(CLOCK_MONOTONIC, &endTime);
|
||||
if (g_isStart) {
|
||||
g_suppressAlarm = true;
|
||||
|
||||
@ -41,6 +41,8 @@ static bool g_shutdownClient = false;
|
||||
static SendMsgQueue *g_sendMsg = NULL;
|
||||
static OneResStatList *g_clientStatusList = NULL;
|
||||
static volatile bool g_needReconnect = false;
|
||||
#define CLUSTER_MANUAL_PAUSE "cluster_manual_pause"
|
||||
char g_manualPausePath[MAX_PATH_LEN];
|
||||
|
||||
static pthread_t *g_conThreadId = NULL;
|
||||
static pthread_t *g_sendThreadId = NULL;
|
||||
@ -608,6 +610,7 @@ static void InitGlobalVariable(const char *resName)
|
||||
|
||||
status_t PreInit(uint32 instanceId, const char *resName, CmNotifyFunc func, bool *isFirstInit)
|
||||
{
|
||||
get_pause_path();
|
||||
if (isFirstInit) {
|
||||
InitGlobalVariable(resName);
|
||||
CM_RETURN_IFERR(InitLogFile());
|
||||
@ -695,3 +698,22 @@ ClientLockResult SendLockMsgAndWaitResult(char *msgPtr, uint32 msgLen)
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void get_pause_path()
|
||||
{
|
||||
char exec_path[MAX_PATH_LEN] = {0};
|
||||
errno_t rc;
|
||||
int rcs;
|
||||
|
||||
rc = memset_s(g_manualPausePath, MAX_PATH_LEN, 0, MAX_PATH_LEN);
|
||||
securec_check_errno(rc, (void)rc);
|
||||
if (GetHomePath(exec_path, sizeof(exec_path)) != 0) {
|
||||
(void)fprintf(stderr, "Get GAUSSHOME failed, please check.\n");
|
||||
return;
|
||||
} else {
|
||||
check_input_for_security(exec_path);
|
||||
rcs = snprintf_s(
|
||||
g_manualPausePath, MAX_PATH_LEN, MAX_PATH_LEN - 1, "%s/bin/%s", exec_path, CLUSTER_MANUAL_PAUSE);
|
||||
securec_check_intval(rcs, (void)rcs);
|
||||
}
|
||||
}
|
||||
@ -73,6 +73,10 @@ ClientCmLockMsg *GetLockSendMsg(const char *lockName, LockOption opt)
|
||||
|
||||
int ResLockCore(const char *lockName)
|
||||
{
|
||||
if (access(g_manualPausePath, F_OK) == 0 && strcmp(lockName, "dms_reformer_lock") == 0) {
|
||||
write_runlog(LOG, "cm is pause, don't lock(%s).\n", lockName);
|
||||
return 1;
|
||||
}
|
||||
ClientCmLockMsg *sendMsg = GetLockSendMsg(lockName, CM_RES_LOCK);
|
||||
if (sendMsg == NULL) {
|
||||
write_runlog(ERROR, "generate (%s)lock msg failed.\n", lockName);
|
||||
|
||||
@ -55,6 +55,7 @@
|
||||
#define MINORITY_AZ_ARBITRATE "minority_az_arbitrate_hist"
|
||||
#define RESUMING_CN_STOP "resuming_cn_stop"
|
||||
#define CLUSTER_MANUAL_PAUSE "cluster_manual_pause"
|
||||
#define CLUSTER_MANUAL_STARTING "cluster_manual_starting"
|
||||
|
||||
char* g_bin_name = NULL;
|
||||
char* g_bin_path = NULL;
|
||||
@ -121,6 +122,7 @@ static char* hotpatch_path = NULL;
|
||||
|
||||
char manual_start_file[MAXPGPATH];
|
||||
char instance_manual_start_file[MAXPGPATH];
|
||||
char cluster_manual_starting_file[MAXPGPATH];
|
||||
char etcd_manual_start_file[MAXPGPATH];
|
||||
static bool coordinator_dynamic_view = false;
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
@ -865,6 +867,9 @@ static void init_ctl_global_variable()
|
||||
ret = snprintf_s(
|
||||
instance_manual_start_file, MAXPGPATH, MAXPGPATH - 1, "%s/bin/%s", g_appPath, INSTANCE_MANUAL_START);
|
||||
securec_check_intval(ret, (void)ret);
|
||||
ret = snprintf_s(
|
||||
cluster_manual_starting_file, MAXPGPATH, MAXPGPATH - 1, "%s/bin/%s", g_appPath, CLUSTER_MANUAL_STARTING);
|
||||
securec_check_intval(ret, (void)ret);
|
||||
ret = snprintf_s(etcd_manual_start_file, MAXPGPATH, MAXPGPATH - 1, "%s/bin/%s", g_appPath, ETCD_MANUAL_START);
|
||||
securec_check_intval(ret, (void)ret);
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
@ -2249,6 +2254,7 @@ static void CtlCommandProcessCore(int *status, CtlOption *ctlCtx)
|
||||
#endif
|
||||
case START_COMMAND:
|
||||
*status = do_start();
|
||||
removeStartingFile();
|
||||
break;
|
||||
case CM_SWITCHOVER_COMMAND:
|
||||
*status = DoSwitchover(ctlCtx);
|
||||
|
||||
@ -52,6 +52,7 @@ extern char result_path[MAXPGPATH];
|
||||
extern char* g_command_operation_lcName;
|
||||
extern uint32 g_nodeId;
|
||||
extern char instance_manual_start_file[MAXPGPATH];
|
||||
extern char cluster_manual_starting_file[MAXPGPATH];
|
||||
extern bool switchover_all_quick;
|
||||
|
||||
#ifdef ENABLE_MULTIPLE_NODES
|
||||
|
||||
@ -90,6 +90,7 @@ static struct timespec g_endTime;
|
||||
extern char g_cmData[CM_PATH_LENGTH];
|
||||
extern char manual_start_file[MAXPGPATH];
|
||||
extern char instance_manual_start_file[MAXPGPATH];
|
||||
extern char cluster_manual_starting_file[MAXPGPATH];
|
||||
extern char etcd_manual_start_file[MAXPGPATH];
|
||||
extern char minority_az_start_file[MAX_PATH_LEN];
|
||||
extern char g_minorityAzArbitrateFile[MAX_PATH_LEN];
|
||||
@ -388,6 +389,7 @@ status_t do_start(void)
|
||||
} else {
|
||||
if (CheckOfflineInstance(g_commandOperationNodeId)) {
|
||||
write_runlog(LOG, "the instance(node:%u) is Offline, no need to start.\n", g_commandOperationNodeId);
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
}
|
||||
write_runlog(LOG, "start the node:%u,datapath:%s. \n", g_commandOperationNodeId, g_cmData);
|
||||
@ -489,12 +491,13 @@ static void start_cluster(void)
|
||||
ret = snprintf_s(command,
|
||||
MAXPGPATH,
|
||||
MAXPGPATH - 1,
|
||||
SYSTEMQUOTE "source /etc/profile;pssh -i %s -h %s \"rm -f %s %s_*\" > %s; "
|
||||
SYSTEMQUOTE "source /etc/profile;pssh -i %s -h %s \"rm -f %s %s_*; touch %s\" > %s; "
|
||||
"if [ $? -ne 0 ]; then cat %s; fi; rm -f %s" SYSTEMQUOTE,
|
||||
PSSH_TIMEOUT_OPTION,
|
||||
hosts_path,
|
||||
manual_start_file,
|
||||
instance_manual_start_file,
|
||||
cluster_manual_starting_file,
|
||||
pssh_out_path,
|
||||
pssh_out_path,
|
||||
pssh_out_path);
|
||||
@ -502,13 +505,14 @@ static void start_cluster(void)
|
||||
ret = snprintf_s(command,
|
||||
MAXPGPATH,
|
||||
MAXPGPATH - 1,
|
||||
SYSTEMQUOTE "source /etc/profile;pssh -i %s -h %s \"source %s;rm -f %s %s_*\" > %s; "
|
||||
SYSTEMQUOTE "source /etc/profile;pssh -i %s -h %s \"source %s;rm -f %s %s_*; touch %s\" > %s; "
|
||||
"if [ $? -ne 0 ]; then cat %s; fi; rm -f %s" SYSTEMQUOTE,
|
||||
PSSH_TIMEOUT_OPTION,
|
||||
hosts_path,
|
||||
mpp_env_separate_file,
|
||||
manual_start_file,
|
||||
instance_manual_start_file,
|
||||
cluster_manual_starting_file,
|
||||
pssh_out_path,
|
||||
pssh_out_path,
|
||||
pssh_out_path);
|
||||
@ -1320,8 +1324,10 @@ static void start_node(uint32 nodeid)
|
||||
char command[MAXPGPATH];
|
||||
uint32 ii;
|
||||
errno_t rc;
|
||||
rc = snprintf_s(command, MAXPGPATH, MAXPGPATH - 1, SYSTEMQUOTE "rm -f %s %s %s_* < \"%s\" 2>&1 &" SYSTEMQUOTE,
|
||||
manual_start_file, etcd_manual_start_file, instance_manual_start_file, DEVNULL);
|
||||
rc = snprintf_s(command, MAXPGPATH, MAXPGPATH - 1,
|
||||
SYSTEMQUOTE "rm -f %s %s %s_*; touch %s < \"%s\" 2>&1 &" SYSTEMQUOTE,
|
||||
manual_start_file, etcd_manual_start_file,
|
||||
instance_manual_start_file, cluster_manual_starting_file, DEVNULL);
|
||||
securec_check_intval(rc, (void)rc);
|
||||
|
||||
if (nodeid == g_currentNode->node) {
|
||||
@ -1415,6 +1421,52 @@ void start_instance(uint32 nodeid, const char* datapath)
|
||||
}
|
||||
}
|
||||
|
||||
void removeStartingFile()
|
||||
{
|
||||
int ret;
|
||||
char command[MAX_COMMAND_LEN] = {0};
|
||||
|
||||
init_hosts();
|
||||
if (mpp_env_separate_file[0] == '\0') {
|
||||
ret = snprintf_s(command,
|
||||
MAX_COMMAND_LEN,
|
||||
MAX_COMMAND_LEN - 1,
|
||||
SYSTEMQUOTE "source /etc/profile;pssh -i %s -h %s \"rm -f %s\" > %s; "
|
||||
"if [ $? -ne 0 ]; then cat %s; fi; rm -f %s" SYSTEMQUOTE,
|
||||
PSSH_TIMEOUT_OPTION,
|
||||
hosts_path,
|
||||
cluster_manual_starting_file,
|
||||
pssh_out_path,
|
||||
pssh_out_path,
|
||||
pssh_out_path);
|
||||
} else {
|
||||
ret = snprintf_s(command,
|
||||
MAX_COMMAND_LEN,
|
||||
MAX_COMMAND_LEN - 1,
|
||||
SYSTEMQUOTE "source /etc/profile;pssh -i %s -h %s \"source %s;rm -f %s\" > %s; "
|
||||
"if [ $? -ne 0 ]; then cat %s; fi; rm -f %s" SYSTEMQUOTE,
|
||||
PSSH_TIMEOUT_OPTION,
|
||||
hosts_path,
|
||||
mpp_env_separate_file,
|
||||
cluster_manual_starting_file,
|
||||
pssh_out_path,
|
||||
pssh_out_path,
|
||||
pssh_out_path);
|
||||
}
|
||||
securec_check_intval(ret, (void)ret);
|
||||
ret = system(command);
|
||||
if (ret != 0) {
|
||||
write_runlog(DEBUG1,
|
||||
"Failed to delete the startingFile with executing the command: command=\"%s\","
|
||||
" nodeId=%u, systemReturn=%d, shellReturn=%d, errno=%d.\n",
|
||||
command,
|
||||
g_currentNode->node,
|
||||
ret,
|
||||
SHELL_RETURN_CODE(ret),
|
||||
errno);
|
||||
}
|
||||
}
|
||||
|
||||
static void* check_cluster_start_status(void* arg)
|
||||
{
|
||||
int count = 0;
|
||||
@ -1428,9 +1480,11 @@ static void* check_cluster_start_status(void* arg)
|
||||
while (startingTime < g_waitSeconds) {
|
||||
if (g_cluster_start_status == CM_STATUS_NORMAL) {
|
||||
write_runlog(LOG, "start cluster successfully.\n");
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
} else if (g_cluster_start_status == CM_STATUS_NORMAL_WITH_CN_DELETED) {
|
||||
write_runlog(LOG, "start cluster successfully. There is a coordinator that has been deleted. \n");
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
} else if (g_az_start_status == CM_STATUS_NORMAL) {
|
||||
for (uint32 ii = 0; ii < g_node_num; ii++) {
|
||||
@ -1440,6 +1494,7 @@ static void* check_cluster_start_status(void* arg)
|
||||
}
|
||||
|
||||
write_runlog(LOG, "start availability zone successfully.\n");
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
} else if (g_az_start_status == CM_STATUS_NORMAL_WITH_CN_DELETED) {
|
||||
for (uint32 ii = 0; ii < g_node_num; ii++) {
|
||||
@ -1449,12 +1504,15 @@ static void* check_cluster_start_status(void* arg)
|
||||
}
|
||||
|
||||
write_runlog(LOG, "start availability zone successfully. There is a coordinator that has been deleted. \n");
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
} else if (g_node_start_status == CM_STATUS_NORMAL) {
|
||||
write_runlog(LOG, "start node successfully.\n");
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
} else if (g_node_start_status == CM_STATUS_NORMAL_WITH_CN_DELETED) {
|
||||
write_runlog(LOG, "start node successfully. There is a coordinator that has been deleted. \n");
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
} else if (g_instance_start_status == CM_STATUS_NORMAL) {
|
||||
/*
|
||||
@ -1464,16 +1522,19 @@ static void* check_cluster_start_status(void* arg)
|
||||
count++;
|
||||
if (count > INSTANCE_START_CONFIRM_TIME) {
|
||||
write_runlog(LOG, "start instance successfully.\n");
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
}
|
||||
} else if (g_dn_relation_start_status == CM_STATUS_NORMAL) {
|
||||
/* check whether the relation datanodes have been started successfully */
|
||||
write_runlog(LOG, "start relation datanodes successfully(node:%u, path:%s).\n",
|
||||
g_commandOperationNodeId, g_cmData);
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
} else if (g_resStartStatus == CM_STATUS_NORMAL) {
|
||||
write_runlog(LOG, "start resource instance successfully(nodeId:%u, instId:%u).\n",
|
||||
g_commandOperationNodeId, g_commandOperationInstanceId);
|
||||
removeStartingFile();
|
||||
exit(0);
|
||||
} else {
|
||||
count = 0;
|
||||
@ -1535,6 +1596,7 @@ static void* check_cluster_start_status(void* arg)
|
||||
g_waitSeconds);
|
||||
}
|
||||
|
||||
removeStartingFile();
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
|
||||
@ -47,8 +47,8 @@ void InitDbListsByStaticConfig()
|
||||
return;
|
||||
}
|
||||
g_dbResNodeIdxInfo.idxLists[g_dbResNodeIdxInfo.hwl] = i;
|
||||
rcs =
|
||||
snprintf_s(info, maxInfoLen, maxInfoLen - 1, " %u:[%u-%u]", g_dbResNodeIdxInfo.hwl, i, g_node[i].node);
|
||||
rcs = snprintf_s(info, maxInfoLen, maxInfoLen - 1, " %u:[%u-%u]",
|
||||
g_dbResNodeIdxInfo.hwl, i, g_node[i].node);
|
||||
securec_check_intval(rcs, (void)rcs);
|
||||
rcs = strncat_s(buf, MAX_LOG_BUFF_LEN, info, strlen(info));
|
||||
securec_check_errno(rcs, (void)rcs);
|
||||
|
||||
@ -312,6 +312,8 @@ extern bool g_dn_report_msg_ok;
|
||||
|
||||
extern bool g_isPauseArbitration;
|
||||
extern char g_cmManualPausePath[MAX_PATH_LEN];
|
||||
extern bool g_isStarting;
|
||||
extern char g_cmManualStartingPath[MAX_PATH_LEN];
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@ -47,6 +47,7 @@
|
||||
#define SYSTEM_CALL_LOG "system_call"
|
||||
#define MAX_LOGFILE_TIMESTAMP "99991231235959"
|
||||
#define CM_CLUSTER_MANUAL_PAUSE "cluster_manual_pause"
|
||||
#define CM_CLUSTER_MANUAL_STARTING "cluster_manual_starting"
|
||||
|
||||
#define CONN_FAIL_TIMES 3
|
||||
/* time style length */
|
||||
|
||||
@ -120,6 +120,9 @@ OneResStatList *GetClientStatusList();
|
||||
status_t SendInitMsg(uint32 instanceId, const char *resName);
|
||||
bool SendInitMsgAndGetResult(const char *resName, uint32 instId);
|
||||
ClientLockResult SendLockMsgAndWaitResult(char *msgPtr, uint32 msgLen);
|
||||
void get_pause_path();
|
||||
|
||||
extern char g_manualPausePath[MAX_PATH_LEN];
|
||||
|
||||
#endif
|
||||
#endif // CM_CLIENT_H
|
||||
|
||||
@ -312,5 +312,6 @@ int DoRhbPrint();
|
||||
int DoPause();
|
||||
int DoResume();
|
||||
bool CheckTrustAndNet();
|
||||
void removeStartingFile();
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user