Compare commits
38 Commits
master
...
v5.0.2-la-
Author | SHA1 | Date | |
---|---|---|---|
3f2c2949fa | |||
6017938349 | |||
![]() |
b7f7f22f2e | ||
![]() |
59584b5616 | ||
![]() |
2c01dc47b6 | ||
![]() |
29bcb6e032 | ||
![]() |
44cbc5a644 | ||
![]() |
25c5b03e2f | ||
![]() |
754296e912 | ||
![]() |
56686f265b | ||
![]() |
01003a2774 | ||
![]() |
052f131ec9 | ||
![]() |
589a0f02b6 | ||
![]() |
a27488226c | ||
![]() |
6b034ab905 | ||
![]() |
943abc13d7 | ||
![]() |
a7ad16c4e3 | ||
![]() |
a83e87d8dd | ||
![]() |
6c460b45ca | ||
![]() |
9c334cd82f | ||
![]() |
c52edcc563 | ||
![]() |
273fe7f1d5 | ||
![]() |
3f1d6bb706 | ||
![]() |
9b122f0771 | ||
![]() |
8e54841941 | ||
![]() |
2b2e46abc2 | ||
![]() |
a8041337c6 | ||
![]() |
b8459b52b8 | ||
![]() |
3563bfde1f | ||
![]() |
8f9ef9afac | ||
![]() |
c0a00dd73c | ||
![]() |
4dd55aff6f | ||
![]() |
dd9c929942 | ||
![]() |
3b53eae1c7 | ||
![]() |
d7210e29ec | ||
![]() |
8b5d4275e7 | ||
![]() |
6ba8937db0 | ||
![]() |
bdfdfd0f3b |
@ -72,7 +72,7 @@ ifeq ($(ENABLE_MEMCHECK)_$(ENABLE_UT), ON_ON)
|
||||
endif
|
||||
|
||||
PLAT_FORM_NAME = $(shell sh $(SCRIPT_PATH)/get_PlatForm_str.sh)
|
||||
BUILD_TUPLE = $(shell uname -p)
|
||||
BUILD_TUPLE = $(shell uname -m)
|
||||
|
||||
ifeq ($(BUILD_TUPLE), x86_64)
|
||||
OS_OPTIONS = -msse4.2 -mcx16
|
||||
|
@ -5,6 +5,12 @@ set -e
|
||||
SCRIPT_PATH=$(cd $(dirname $0) && pwd)
|
||||
PROJECT_ROOT_PATH=$(cd ${SCRIPT_PATH}/.. && pwd)
|
||||
|
||||
BUILD_TUPLE=$(uname -m)
|
||||
if [ x"$BUILD_TUPLE" = x"loongarch64" ]; then
|
||||
export GCC="8.3"
|
||||
else
|
||||
export GCC="7.3"
|
||||
fi
|
||||
export COMPONENT="CM"
|
||||
export VERSION_MODE="release"
|
||||
export THIRD="${PROJECT_ROOT_PATH}/binarylibs"
|
||||
@ -17,7 +23,7 @@ export MULTIPLE_NODES="ON"
|
||||
export OPEN_SOURCE_MODE="inc"
|
||||
export LIBPQ="ON"
|
||||
export KRB="OFF"
|
||||
export GCC="7.3"
|
||||
|
||||
export PRIVATEGAUSS="ON"
|
||||
export ALARM="ON"
|
||||
|
||||
@ -120,7 +126,8 @@ function gcc_env() {
|
||||
fi
|
||||
export CC=$GCC_INSTALL_HOME/bin/gcc
|
||||
export CXX=$GCC_INSTALL_HOME/bin/g++
|
||||
export LD_LIBRARY_PATH=${GCC_INSTALL_HOME}/lib64:${LD_LIBRARY_PATH}
|
||||
export LD_LIBRARY_PATH=/opt/rh/devtoolset-10/root/usr/lib:${GCC_INSTALL_HOME}/lib64:${LD_LIBRARY_PATH}
|
||||
|
||||
}
|
||||
|
||||
function compile_open_source() {
|
||||
|
@ -1,2 +1,2 @@
|
||||
PRODUCT="openGauss"
|
||||
VERSION="5.0.0"
|
||||
VERSION="5.0.2"
|
||||
|
@ -1,4 +1,4 @@
|
||||
execute_process(COMMAND uname -p OUTPUT_VARIABLE BUILD_TUPLE OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
execute_process(COMMAND uname -m OUTPUT_VARIABLE BUILD_TUPLE OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if (${BUILD_TUPLE} STREQUAL "x86_64")
|
||||
set(OS_OPTIONS -msse4.2 -mcx16)
|
||||
add_definitions(-DUSE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
|
||||
|
@ -37,7 +37,7 @@ then
|
||||
fi
|
||||
|
||||
# get cpu bit
|
||||
cpu_bit=$(uname -p)
|
||||
cpu_bit=$(uname -m)
|
||||
|
||||
# the result info
|
||||
plat_form_str=""
|
||||
|
@ -112,14 +112,14 @@ status_t SetVotingDiskNodeData(char *data, uint32 dataLen)
|
||||
return CM_SUCCESS;
|
||||
}
|
||||
|
||||
status_t UpdateAllNodeHeartBeat()
|
||||
status_t UpdateAllNodeHeartBeat(uint32 nodeNum)
|
||||
{
|
||||
uint32 dataLen = VOTING_DISK_DATA_SIZE;
|
||||
uint32 dataLen = nodeNum * VOTING_DISK_EACH_NODE_OFFSET;
|
||||
if (GetVotingDiskNodeData(g_nodeDataBuff, dataLen) != CM_SUCCESS) {
|
||||
write_runlog(ERROR, "[%s] get voting disk node data failed.\n", __FUNCTION__);
|
||||
return CM_ERROR;
|
||||
}
|
||||
for (uint32 i = 0; i < VOTING_DISK_MAX_NODE_NUM; i++) {
|
||||
for (uint32 i = 0; i < nodeNum; i++) {
|
||||
uint32 offset = i * VOTING_DISK_EACH_NODE_OFFSET;
|
||||
VotingDiskNodeInfo *nodeInfo = (VotingDiskNodeInfo*)(g_nodeDataBuff + offset);
|
||||
if (nodeInfo->nodeTime == 0) {
|
||||
|
@ -71,8 +71,14 @@ static int CusResCmdExecute(const char *scriptPath, const char *oper, uint32 tim
|
||||
|
||||
status_t StartOneResInst(const CmResConfList *conf)
|
||||
{
|
||||
int ret;
|
||||
char oper[MAX_OPTION_LEN] = {0};
|
||||
int ret = snprintf_s(oper, MAX_OPTION_LEN, MAX_OPTION_LEN - 1, "-start %u %s", conf->resInstanceId, conf->arg);
|
||||
if (conf->resType == CUSTOM_RESOURCE_DN && undocumentedVersion > 0) {
|
||||
ret = snprintf_s(oper, MAX_OPTION_LEN, MAX_OPTION_LEN - 1, "-start %u %s '-u %u'", conf->resInstanceId,
|
||||
conf->arg, undocumentedVersion);
|
||||
} else {
|
||||
ret = snprintf_s(oper, MAX_OPTION_LEN, MAX_OPTION_LEN - 1, "-start %u %s", conf->resInstanceId, conf->arg);
|
||||
}
|
||||
securec_check_intval(ret, (void)ret);
|
||||
|
||||
ret = CusResCmdExecute(conf->script, oper, (uint32)conf->checkInfo.timeOut, CM_FALSE);
|
||||
@ -118,13 +124,13 @@ void OneResInstClean(const CmResConfList *oneResConf)
|
||||
}
|
||||
}
|
||||
|
||||
status_t RegOneResInst(const CmResConfList *conf, uint32 destInstId)
|
||||
status_t RegOneResInst(const CmResConfList *conf, uint32 destInstId, bool8 needNohup)
|
||||
{
|
||||
char oper[MAX_OPTION_LEN] = {0};
|
||||
int ret = snprintf_s(oper, MAX_OPTION_LEN, MAX_OPTION_LEN - 1, "-reg %u %s", destInstId, conf->arg);
|
||||
securec_check_intval(ret, (void)ret);
|
||||
|
||||
ret = CusResCmdExecute(conf->script, oper, (uint32)conf->checkInfo.timeOut, CM_TRUE);
|
||||
ret = CusResCmdExecute(conf->script, oper, (uint32)conf->checkInfo.timeOut, needNohup);
|
||||
if (ret != 0) {
|
||||
write_runlog(ERROR, "[%s]: cmd:(%s %s) execute failed, ret=%d.\n", __FUNCTION__, conf->script, oper, ret);
|
||||
return CM_ERROR;
|
||||
@ -251,7 +257,7 @@ int CheckOneResInst(const CmResConfList *conf)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ManualStopLocalResInst(CmResConfList *conf)
|
||||
static status_t ManualStopOneLocalResInst(CmResConfList *conf)
|
||||
{
|
||||
char instanceStartFile[MAX_PATH_LEN] = {0};
|
||||
int ret = snprintf_s(instanceStartFile, MAX_PATH_LEN, MAX_PATH_LEN - 1,
|
||||
@ -260,7 +266,7 @@ static void ManualStopLocalResInst(CmResConfList *conf)
|
||||
|
||||
if (CmFileExist(instanceStartFile)) {
|
||||
write_runlog(LOG, "instanceStartFile(%s) is exist, can't create again.\n", instanceStartFile);
|
||||
return;
|
||||
return CM_SUCCESS;
|
||||
}
|
||||
|
||||
char command[MAX_PATH_LEN] = {0};
|
||||
@ -272,8 +278,28 @@ static void ManualStopLocalResInst(CmResConfList *conf)
|
||||
ret = system(command);
|
||||
if (ret != 0) {
|
||||
write_runlog(ERROR, "manual stop res(%s) inst(%u) failed, ret=%d.\n", conf->resName, conf->resInstanceId, ret);
|
||||
} else {
|
||||
write_runlog(LOG, "manual stop res(%s) inst(%u) success.\n", conf->resName, conf->resInstanceId);
|
||||
return CM_ERROR;
|
||||
}
|
||||
|
||||
write_runlog(LOG, "manual stop res(%s) inst(%u) success.\n", conf->resName, conf->resInstanceId);
|
||||
return CM_SUCCESS;
|
||||
}
|
||||
|
||||
static status_t ManuallStopAllLocalResInst()
|
||||
{
|
||||
status_t result = CM_SUCCESS;
|
||||
for (uint32 i = 0; i < GetLocalResConfCount(); ++i) {
|
||||
if (ManualStopOneLocalResInst(&g_resConf[i]) != CM_SUCCESS) {
|
||||
result = CM_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void ManualStopLocalResInst(CmResConfList *conf)
|
||||
{
|
||||
if (ManuallStopAllLocalResInst() == CM_SUCCESS) {
|
||||
CleanOneInstCheckCount(conf);
|
||||
}
|
||||
}
|
||||
@ -290,21 +316,17 @@ bool IsInstManualStopped(uint32 instId)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool CanCusInstDoRestart(const CmResConfList *conf)
|
||||
static inline void RestartOneResInst(CmResConfList *conf)
|
||||
{
|
||||
ResIsregStatus stat = IsregOneResInst(conf, conf->resInstanceId);
|
||||
if ((stat == CM_RES_ISREG_REG) || (stat == CM_RES_ISREG_NOT_SUPPORT)) {
|
||||
return true;
|
||||
if ((stat != CM_RES_ISREG_REG) && (stat != CM_RES_ISREG_NOT_SUPPORT)) {
|
||||
if (RegOneResInst(conf, conf->resInstanceId, CM_FALSE) != CM_SUCCESS) {
|
||||
write_runlog(LOG, "cur inst(%u) isreg stat=(%u), and reg failed, restart failed.\n",
|
||||
conf->cmInstanceId, (uint32)stat);
|
||||
return;
|
||||
}
|
||||
}
|
||||
write_runlog(LOG, "cur inst(%u) isreg stat=(%u), can't do restart.\n", conf->cmInstanceId, (uint32)stat);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline status_t RestartOneResInst(CmResConfList *conf)
|
||||
{
|
||||
(void)CleanOneResInst(conf);
|
||||
CM_RETURN_IFERR(StartOneResInst(conf));
|
||||
return CM_SUCCESS;
|
||||
(void)StartOneResInst(conf);
|
||||
}
|
||||
|
||||
static void ProcessOfflineInstance(CmResConfList *conf)
|
||||
@ -312,9 +334,7 @@ static void ProcessOfflineInstance(CmResConfList *conf)
|
||||
long curTime = GetCurMonotonicTimeSec();
|
||||
|
||||
if (conf->checkInfo.restartTimes == -1) {
|
||||
if (CanCusInstDoRestart(conf)) {
|
||||
(void)RestartOneResInst(conf);
|
||||
}
|
||||
RestartOneResInst(conf);
|
||||
return;
|
||||
}
|
||||
if (conf->checkInfo.brokeTime == 0) {
|
||||
@ -338,10 +358,7 @@ static void ProcessOfflineInstance(CmResConfList *conf)
|
||||
conf->resName, conf->resInstanceId, conf->checkInfo.startTime, conf->checkInfo.restartPeriod);
|
||||
return;
|
||||
}
|
||||
if (!CanCusInstDoRestart(conf)) {
|
||||
return;
|
||||
}
|
||||
CM_RETVOID_IFERR(RestartOneResInst(conf));
|
||||
RestartOneResInst(conf);
|
||||
conf->checkInfo.startCount++;
|
||||
conf->checkInfo.startTime = curTime;
|
||||
write_runlog(LOG, "res(%s) inst(%u) has been restart (%d) times, restart more than (%d) time will manually stop.\n",
|
||||
@ -376,14 +393,12 @@ static void ProcessAbnormalInstance(CmResConfList *conf)
|
||||
write_runlog(LOG, "res(%s) inst(%u) has been abnormal (%d)s, >= timeout(%d)s, need restart.\n",
|
||||
conf->resName, conf->cmInstanceId, duration, conf->checkInfo.abnormalTimeout);
|
||||
|
||||
CM_RETVOID_IFERR(RestartOneResInst(conf));
|
||||
conf->checkInfo.startCount++;
|
||||
conf->checkInfo.startTime = curTime;
|
||||
|
||||
if (conf->checkInfo.restartTimes != -1) {
|
||||
write_runlog(LOG, "res(%s) inst(%u) has been restart (%d) times, restart more (%d) time will manually stop.\n",
|
||||
conf->resName, conf->cmInstanceId, conf->checkInfo.startCount, conf->checkInfo.restartTimes);
|
||||
if (CleanOneResInst(conf) == CM_SUCCESS) {
|
||||
write_runlog(LOG, "res(%s) inst(%u) clean abnormal time.\n", conf->resName, conf->cmInstanceId);
|
||||
} else {
|
||||
conf->checkInfo.startCount++;
|
||||
}
|
||||
conf->checkInfo.startTime = curTime;
|
||||
}
|
||||
|
||||
static inline bool NeedStopResInst(const char *resName, uint32 cmInstId)
|
||||
@ -625,10 +640,8 @@ static status_t InitLocalAllDnResInstConf(const CusResConfJson *resJson, CmResCo
|
||||
|
||||
static status_t InitLocalOneResConf(const OneCusResConfJson *oneResJson)
|
||||
{
|
||||
CmResConfList newLocalConf;
|
||||
errno_t rc = memset_s(&newLocalConf, sizeof(CmResConfList), 0, sizeof(CmResConfList));
|
||||
securec_check_errno(rc, (void)rc);
|
||||
|
||||
CmResConfList newLocalConf = {{0}};
|
||||
newLocalConf.resType = (int)oneResJson->resType;
|
||||
if (oneResJson->resType == CUSTOM_RESOURCE_APP) {
|
||||
CM_RETURN_IFERR(InitLocalCommConfOfDefRes(&oneResJson->appResConf, &newLocalConf));
|
||||
CM_RETURN_IFERR(InitLocalAllAppResInstConf(&oneResJson->appResConf, &newLocalConf));
|
||||
|
@ -20,9 +20,9 @@
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
#include "cma_mes.h"
|
||||
#include "mes_interface.h"
|
||||
|
||||
#include "mes.h"
|
||||
#include "cm_debug.h"
|
||||
|
||||
#include "cm_config.h"
|
||||
#include "cm_elog.h"
|
||||
@ -37,6 +37,14 @@
|
||||
#define AGENT_RHB_BUFF_POOL_SIZE (1024)
|
||||
#define AGENT_RHB_CHECK_SID (0)
|
||||
|
||||
typedef struct CmaMesMsgHeadT {
|
||||
uint32 version;
|
||||
uint32 cmd; // command
|
||||
char reserved[64];
|
||||
uint32 bufSize;
|
||||
char buf[0];
|
||||
} CmaMesMsgHead; // total size is 76
|
||||
|
||||
static pthread_t g_rhbThread;
|
||||
static const uint32 PASSWD_MAX_LEN = 64;
|
||||
|
||||
@ -122,10 +130,6 @@ static void InitTaskCmdGroup(mes_profile_t *pf)
|
||||
pf->task_group[MES_TASK_GROUP_ONE] = 0;
|
||||
pf->task_group[MES_TASK_GROUP_TWO] = 0;
|
||||
pf->task_group[MES_TASK_GROUP_THREE] = 0;
|
||||
|
||||
for (uint8 i = (uint8)RHB_MSG_BEGIN; i < (uint8)RHB_MSG_CEIL; i++) {
|
||||
mes_set_command_task_group(i, MES_TASK_GROUP_ZERO);
|
||||
}
|
||||
}
|
||||
|
||||
static void InitBuffPool(mes_profile_t *pf)
|
||||
@ -147,7 +151,7 @@ static void initPfile(mes_profile_t *pf, const RhbCtx *ctx)
|
||||
pf->mes_elapsed_switch = 0;
|
||||
|
||||
pf->inst_cnt = ctx->instCount;
|
||||
error_t rc = memcpy_s(
|
||||
errno_t rc = memcpy_s(
|
||||
pf->inst_net_addr, sizeof(mes_addr_t) * MES_MAX_INSTANCES, ctx->instAddrs, sizeof(mes_addr_t) * MAX_RHB_NUM);
|
||||
securec_check_errno(rc, (void)rc);
|
||||
|
||||
@ -218,7 +222,7 @@ static void LogCallBack(int logType, int logLevel, const char *codeFileName, uns
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
typedef void (*CmMesMsgProc)(mes_message_t *mgs);
|
||||
typedef void (*CmMesMsgProc)(mes_msg_t *mgs);
|
||||
|
||||
typedef struct ProcessorFunc_ {
|
||||
RhbMsgCmd cmd;
|
||||
@ -242,35 +246,42 @@ void GetHbs(time_t *hbs, unsigned int *hwl)
|
||||
securec_check_errno(rc, (void)rc);
|
||||
}
|
||||
|
||||
void CmaHdlRhbReq(mes_message_t *msg)
|
||||
void CmaHdlRhbReq(mes_msg_t *msg)
|
||||
{
|
||||
write_runlog(DEBUG1, "[RHB] receive a hb msg from inst[%hhu]!\n", msg->head->src_inst);
|
||||
if (msg->head->src_inst < g_curNodeHb.hwl) {
|
||||
g_curNodeHb.hbs[msg->head->src_inst] = time(NULL);
|
||||
write_runlog(DEBUG1, "[RHB] receive a hb msg from inst[%hhu]!\n", msg->src_inst);
|
||||
if (msg->src_inst < g_curNodeHb.hwl) {
|
||||
g_curNodeHb.hbs[msg->src_inst] = time(NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void CmaHdlRhbAck(mes_message_t *msg)
|
||||
{
|
||||
mes_notify_broadcast_msg_recv_and_release(msg);
|
||||
}
|
||||
|
||||
static const ProcessorFunc g_processors[RHB_MSG_CEIL] = {
|
||||
{RHB_MSG_HB_BC, CmaHdlRhbReq, CM_FALSE, "handle cma rhb broadcast message"},
|
||||
};
|
||||
|
||||
void MesMsgProc(uint32 workThread, mes_message_t *msg)
|
||||
void MesMsgProc(unsigned int work_idx, ruid_type ruid, mes_msg_t *msg)
|
||||
{
|
||||
mes_message_head_t *head = msg->head;
|
||||
if (head->cmd >= (uint8)RHB_MSG_CEIL) {
|
||||
write_runlog(ERROR, "unknow cmd(%hhu) from inst:[%hhu], size:[%hu]!\n", head->cmd, head->src_inst, head->size);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
if (msg == NULL || msg->buffer == NULL) {
|
||||
write_runlog(ERROR, "invaild msg, when msg or buffer is null.\n");
|
||||
break;
|
||||
}
|
||||
if (msg->size < sizeof(CmaMesMsgHead)) {
|
||||
write_runlog(ERROR, "unknown msg head from inst:[%u], size:[%u].\n", msg->src_inst, msg->size);
|
||||
break;
|
||||
}
|
||||
|
||||
const ProcessorFunc *processor = &g_processors[head->cmd];
|
||||
CmaMesMsgHead *head = (CmaMesMsgHead *)msg->buffer;
|
||||
if (head->cmd >= (uint32)RHB_MSG_CEIL) {
|
||||
write_runlog(ERROR, "unknow cmd(%hhu) from inst:[%hhu], size:[%hu]!\n",
|
||||
head->cmd, msg->src_inst, head->bufSize);
|
||||
break;
|
||||
}
|
||||
|
||||
processor->proc(msg);
|
||||
mes_release_message_buf(msg);
|
||||
const ProcessorFunc *processor = &g_processors[head->cmd];
|
||||
CM_ASSERT(processor->proc != NULL);
|
||||
processor->proc(msg);
|
||||
} while (0);
|
||||
mes_release_msg(msg);
|
||||
}
|
||||
|
||||
status_t CmaRhbInit(const RhbCtx *ctx)
|
||||
@ -314,10 +325,6 @@ status_t CmaRhbInit(const RhbCtx *ctx)
|
||||
write_runlog(WARNING, "mes ssl not enable!.\n");
|
||||
}
|
||||
|
||||
for (uint32 i = (uint32)RHB_MSG_BEGIN; i < (uint32)RHB_MSG_CEIL; i++) {
|
||||
mes_set_msg_enqueue((uint32)g_processors[i].cmd, (uint32)g_processors[i].isEnqueue);
|
||||
}
|
||||
|
||||
status_t ret = (status_t)mes_init(&pf);
|
||||
if (ret != CM_SUCCESS) {
|
||||
write_runlog(ERROR, "mes init failed!.\n");
|
||||
@ -328,10 +335,11 @@ status_t CmaRhbInit(const RhbCtx *ctx)
|
||||
return CM_SUCCESS;
|
||||
}
|
||||
|
||||
static void InitMsgHead(mes_message_head_t *head, const RhbCtx *ctx)
|
||||
static void InitMsgHead(CmaMesMsgHead *head, const RhbCtx *ctx)
|
||||
{
|
||||
MES_INIT_MESSAGE_HEAD(head, RHB_MSG_HB_BC, 0, ctx->instId, 0, ctx->sid, 0xFFFF);
|
||||
head->size = sizeof(mes_message_head_t);
|
||||
head->version = 0;
|
||||
head->cmd = (uint32)RHB_MSG_HB_BC;
|
||||
head->bufSize = 0;
|
||||
}
|
||||
|
||||
static void checkMesSslCertExpire()
|
||||
@ -369,11 +377,9 @@ void *CmaRhbMain(void *args)
|
||||
(void)atexit(CmaRhbUnInit);
|
||||
|
||||
write_runlog(LOG, "RHB check is ready to work!\n");
|
||||
mes_message_head_t head = {0};
|
||||
CmaMesMsgHead head = {0};
|
||||
InitMsgHead(&head, &ctx);
|
||||
uint64 succInsts = 0;
|
||||
|
||||
uint64 bcInsts = ctx.instMap & (~((uint64)0x1 << (ctx.instId)));
|
||||
int32 ret = 0;
|
||||
int itv = 0;
|
||||
struct timespec curTime = {0, 0};
|
||||
struct timespec lastTime = {0, 0};
|
||||
@ -391,12 +397,9 @@ void *CmaRhbMain(void *args)
|
||||
}
|
||||
|
||||
write_runlog(DEBUG1, "RHB broadcast hb to all nodes.!\n");
|
||||
mes_broadcast(ctx.sid, bcInsts, &head, &succInsts);
|
||||
if (bcInsts != succInsts) {
|
||||
write_runlog(DEBUG1,
|
||||
"bc not all success, send idx:[%llu], success status:[%llu]!\n",
|
||||
(long long unsigned int)bcInsts,
|
||||
(long long unsigned int)succInsts);
|
||||
ret = mes_broadcast(0, (char*)&head, sizeof(CmaMesMsgHead));
|
||||
if (ret != 0) {
|
||||
write_runlog(DEBUG1, "bc not all success, ret=%d.\n", ret);
|
||||
}
|
||||
|
||||
const int printItv = 5;
|
||||
|
@ -356,7 +356,7 @@ static void ProcessRegResInst(const CmsNotifyAgentRegMsg *recvMsg)
|
||||
} else if ((isreg == CM_RES_ISREG_UNREG) || (isreg == CM_RES_ISREG_PENDING) || (isreg == CM_RES_ISREG_UNKNOWN)) {
|
||||
write_runlog(LOG, "before reg res inst, need clean res inst first.\n");
|
||||
if ((CheckOneResInst(local) == CUS_RES_CHECK_STAT_OFFLINE) || (CleanOneResInst(local) == CM_SUCCESS)) {
|
||||
(void)RegOneResInst(local, recvMsg->resInstId);
|
||||
(void)RegOneResInst(local, recvMsg->resInstId, CM_TRUE);
|
||||
}
|
||||
} else if (isreg == CM_RES_ISREG_NOT_SUPPORT) {
|
||||
write_runlog(LOG, "res inst[%s:%u] don't support reg, not need reg.\n", recvMsg->resName, recvMsg->resInstId);
|
||||
|
@ -174,24 +174,22 @@ int FindInstanceIdAndType(uint32 node, const char *dataPath, uint32 *instanceId,
|
||||
*/
|
||||
int ssh_exec(const staticNodeConfig* node, const char* cmd, int32 logLevel)
|
||||
{
|
||||
#define MAXLINE 1024
|
||||
char actualCmd[MAXLINE];
|
||||
char actualCmd[MAX_COMMAND_LEN] = {0};
|
||||
int rc = -1;
|
||||
int ret;
|
||||
|
||||
for (uint32 ii = 0; ii < node->sshCount; ii++) {
|
||||
if (mpp_env_separate_file[0] == '\0') {
|
||||
ret = snprintf_s(actualCmd, MAXLINE, MAXLINE - 1,
|
||||
"pssh %s -s -H %s \"( %s ) > %s 2>&1\" < %s > /dev/null 2>&1",
|
||||
PSSH_TIMEOUT_OPTION, node->sshChannel[ii], cmd, "/dev/null", "/dev/null");
|
||||
securec_check_intval(ret, (void)ret);
|
||||
ret = snprintf_s(actualCmd, MAX_COMMAND_LEN, MAX_COMMAND_LEN - 1,
|
||||
"pssh %s -s -H %s \"( %s ) > %s 2>&1\" > /dev/null 2>&1",
|
||||
PSSH_TIMEOUT_OPTION, node->sshChannel[ii], cmd, "/dev/null");
|
||||
} else {
|
||||
ret = snprintf_s(actualCmd, MAXLINE, MAXLINE - 1,
|
||||
"pssh %s -s -H %s \"( source %s;%s ) > %s 2>&1\" < %s > /dev/null 2>&1",
|
||||
ret = snprintf_s(actualCmd, MAX_COMMAND_LEN, MAX_COMMAND_LEN - 1,
|
||||
"pssh %s -s -H %s \"( source %s;%s ) > %s 2>&1\" > /dev/null 2>&1",
|
||||
PSSH_TIMEOUT_OPTION, node->sshChannel[ii], mpp_env_separate_file, cmd,
|
||||
"/dev/null", "/dev/null");
|
||||
securec_check_intval(ret, (void)ret);
|
||||
"/dev/null");
|
||||
}
|
||||
securec_check_intval(ret, (void)ret);
|
||||
rc = system(actualCmd);
|
||||
if (rc != 0) {
|
||||
write_runlog(logLevel, "ssh failed at \"%s\".\n", node->sshChannel[ii]);
|
||||
|
@ -43,6 +43,8 @@ static char g_confFile[CM_PATH_LENGTH];
|
||||
extern char g_appPath[MAXPGPATH];
|
||||
extern char mpp_env_separate_file[MAXPGPATH];
|
||||
|
||||
static status_t CheckGucOption(const GucOption &gucCtx);
|
||||
|
||||
static inline void SkipSpace(char *&ptr)
|
||||
{
|
||||
while (isspace((unsigned char)*ptr)) {
|
||||
@ -308,13 +310,31 @@ static void GetRemoteGucCommand(const CtlOption *ctx, char *cmd, size_t cmdLen)
|
||||
securec_check_intval(ret, (void)ret);
|
||||
curLen = (size_t)ret;
|
||||
|
||||
if (ctx->guc.gucCommand == SET_CONF_COMMAND && ctx->guc.value != NULL && ctx->guc.parameter != NULL) {
|
||||
if (ctx->guc.gucCommand != SET_CONF_COMMAND || ctx->guc.value == NULL || ctx->guc.parameter == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (strcmp(ctx->guc.parameter, "event_triggers") != 0) {
|
||||
ret = snprintf_s((cmd + curLen), (cmdLen - curLen), ((cmdLen - curLen) - 1),
|
||||
SYSTEMQUOTE "-k %s=\\\"%s\\\" " SYSTEMQUOTE, ctx->guc.parameter, ctx->guc.value);
|
||||
securec_check_intval(ret, (void)ret);
|
||||
} else {
|
||||
// event_triggers value contain double quotes, so an escape character is added before remote execution
|
||||
const char *value = ctx->guc.value;
|
||||
char valueCopy[cmdLen] = {0};
|
||||
int j = 0;
|
||||
for (size_t i = 0; i < strlen(value); ++i) {
|
||||
if (value[i] == '"') {
|
||||
valueCopy[j++] = '\\';
|
||||
valueCopy[j++] = '\\';
|
||||
valueCopy[j++] = '\\';
|
||||
}
|
||||
valueCopy[j++] = value[i];
|
||||
}
|
||||
ret = snprintf_s((cmd + curLen), (cmdLen - curLen), ((cmdLen - curLen) - 1),
|
||||
SYSTEMQUOTE "-k %s=\\\"%s\\\" " SYSTEMQUOTE, ctx->guc.parameter, valueCopy);
|
||||
securec_check_intval(ret, (void)ret);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void PrintOneParameterAndValue(char *line)
|
||||
@ -672,39 +692,6 @@ status_t ExeGucCommand(const GucOption *gucCtx)
|
||||
return result;
|
||||
}
|
||||
|
||||
status_t ProcessInLocalInstance(const GucOption *gucCtx)
|
||||
{
|
||||
errno_t rc;
|
||||
char cmDir[CM_PATH_LENGTH] = { 0 };
|
||||
char instanceDir[CM_PATH_LENGTH] = { 0 };
|
||||
|
||||
rc = memcpy_s(cmDir, sizeof(cmDir), g_currentNode->cmDataPath, sizeof(cmDir));
|
||||
securec_check_errno(rc, (void)rc);
|
||||
|
||||
if (cmDir[0] == '\0') {
|
||||
write_runlog(ERROR, "Failed to get cm base data path from static config file.");
|
||||
return CM_ERROR;
|
||||
}
|
||||
|
||||
if (gucCtx->nodeType == NODE_TYPE_AGENT) {
|
||||
rc = snprintf_s(instanceDir, sizeof(instanceDir), sizeof(instanceDir) - 1, "%s/cm_agent", cmDir);
|
||||
securec_check_intval(rc, (void)rc);
|
||||
} else {
|
||||
if (g_currentNode->cmServerLevel != 1) {
|
||||
write_runlog(LOG, "There is no cmserver instance on local node.");
|
||||
return CM_ERROR;
|
||||
}
|
||||
rc = snprintf_s(instanceDir, sizeof(instanceDir), sizeof(instanceDir) - 1, "%s/cm_server", cmDir);
|
||||
securec_check_intval(rc, (void)rc);
|
||||
}
|
||||
GetInstanceConfigfile(gucCtx->nodeType, instanceDir);
|
||||
if (ExeGucCommand(gucCtx) != CM_SUCCESS) {
|
||||
return CM_ERROR;
|
||||
}
|
||||
|
||||
return CM_SUCCESS;
|
||||
}
|
||||
|
||||
static uint32 GetNodeIndex(uint32 nodeId)
|
||||
{
|
||||
for (uint32 i = 0; i < g_node_num; ++i) {
|
||||
@ -766,18 +753,56 @@ static status_t ListRemoteConfMain(staticNodeConfig *node, const char *cmd)
|
||||
return CM_ERROR;
|
||||
}
|
||||
|
||||
static status_t ProcessInRemoteInstance(const CtlOption *ctx)
|
||||
status_t ProcessInLocalInstanceExec(const GucOption *gucCtx)
|
||||
{
|
||||
char remoteCmd[CM_PATH_LENGTH] = {0};
|
||||
errno_t rc;
|
||||
char cmDir[CM_PATH_LENGTH] = { 0 };
|
||||
char instanceDir[CM_PATH_LENGTH] = { 0 };
|
||||
|
||||
if (ctx->comm.nodeId == g_currentNode->node) {
|
||||
if (ProcessInLocalInstance(&ctx->guc) == CM_ERROR) {
|
||||
write_runlog(DEBUG1, "cm_ctl fail to execute in local.\n");
|
||||
return CM_ERROR;
|
||||
}
|
||||
return CM_SUCCESS;
|
||||
rc = memcpy_s(cmDir, sizeof(cmDir), g_currentNode->cmDataPath, sizeof(cmDir));
|
||||
securec_check_errno(rc, (void)rc);
|
||||
|
||||
if (cmDir[0] == '\0') {
|
||||
write_runlog(ERROR, "Failed to get cm base data path from static config file.");
|
||||
return CM_ERROR;
|
||||
}
|
||||
|
||||
if (gucCtx->nodeType == NODE_TYPE_AGENT) {
|
||||
rc = snprintf_s(instanceDir, sizeof(instanceDir), sizeof(instanceDir) - 1, "%s/cm_agent", cmDir);
|
||||
securec_check_intval(rc, (void)rc);
|
||||
} else {
|
||||
if (g_currentNode->cmServerLevel != 1) {
|
||||
write_runlog(LOG, "There is no cmserver instance on local node.");
|
||||
return CM_ERROR;
|
||||
}
|
||||
rc = snprintf_s(instanceDir, sizeof(instanceDir), sizeof(instanceDir) - 1, "%s/cm_server", cmDir);
|
||||
securec_check_intval(rc, (void)rc);
|
||||
}
|
||||
GetInstanceConfigfile(gucCtx->nodeType, instanceDir);
|
||||
if (ExeGucCommand(gucCtx) != CM_SUCCESS) {
|
||||
return CM_ERROR;
|
||||
}
|
||||
|
||||
return CM_SUCCESS;
|
||||
}
|
||||
|
||||
status_t ProcessInLocalInstance(const CtlOption *ctx)
|
||||
{
|
||||
if (CheckGucOption(ctx->guc) != CM_SUCCESS) {
|
||||
return CM_ERROR;
|
||||
}
|
||||
|
||||
if (ctx->guc.gucCommand == SET_CONF_COMMAND && CheckGucOptionValidate(ctx->guc) != CM_SUCCESS) {
|
||||
DoAdvice();
|
||||
return CM_ERROR;
|
||||
}
|
||||
|
||||
return ProcessInLocalInstanceExec(&ctx->guc);
|
||||
}
|
||||
|
||||
static status_t ProcessInRemoteInstance(const CtlOption *ctx)
|
||||
{
|
||||
char remoteCmd[MAX_COMMAND_LEN] = {0};
|
||||
GetRemoteGucCommand(ctx, remoteCmd, sizeof(remoteCmd));
|
||||
if (ctx->guc.gucCommand == LIST_CONF_COMMAND) {
|
||||
return ListRemoteConfMain(&g_node[GetNodeIndex(ctx->comm.nodeId)], remoteCmd);
|
||||
@ -799,8 +824,10 @@ static status_t ProcessInAllNodesInstance(CtlOption *ctx)
|
||||
continue;
|
||||
}
|
||||
ctx->comm.nodeId = g_node[i].node;
|
||||
if (ProcessInRemoteInstance(ctx) == CM_ERROR) {
|
||||
result = CM_ERROR;
|
||||
if (ctx->comm.nodeId == g_currentNode->node) {
|
||||
result = ProcessInLocalInstance(ctx);
|
||||
} else {
|
||||
result = ProcessInRemoteInstance(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@ -809,15 +836,21 @@ static status_t ProcessInAllNodesInstance(CtlOption *ctx)
|
||||
|
||||
status_t ProcessClusterGucOption(CtlOption *ctx)
|
||||
{
|
||||
status_t result;
|
||||
|
||||
if (ctx->comm.nodeId == 0) {
|
||||
result = ProcessInAllNodesInstance(ctx);
|
||||
} else {
|
||||
result = ProcessInRemoteInstance(ctx);
|
||||
return ProcessInAllNodesInstance(ctx);
|
||||
}
|
||||
|
||||
return result;
|
||||
if (ctx->comm.nodeId != g_currentNode->node) {
|
||||
return ProcessInRemoteInstance(ctx);
|
||||
}
|
||||
|
||||
status_t res = ProcessInLocalInstance(ctx);
|
||||
if (res == CM_ERROR) {
|
||||
write_runlog(DEBUG1, "cm_ctl fail to execute in local.\n");
|
||||
}
|
||||
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
static status_t CheckGucOption(const GucOption &gucCtx)
|
||||
@ -837,22 +870,10 @@ static status_t CheckGucOption(const GucOption &gucCtx)
|
||||
// cm_ctl integration guc set reload and check capacity
|
||||
int DoGuc(CtlOption *ctx)
|
||||
{
|
||||
if (CheckGucOption(ctx->guc) != CM_SUCCESS) {
|
||||
return 1;
|
||||
}
|
||||
status_t res = ProcessClusterGucOption(ctx);
|
||||
PrintResults(res == CM_SUCCESS, ctx);
|
||||
|
||||
if ((ctx->guc.gucCommand == SET_CONF_COMMAND) && (CheckGucOptionValidate(ctx->guc) != CM_SUCCESS)) {
|
||||
DoAdvice();
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ProcessClusterGucOption(ctx) != CM_SUCCESS) {
|
||||
PrintResults(false, ctx);
|
||||
return 1;
|
||||
}
|
||||
PrintResults(true, ctx);
|
||||
|
||||
return 0;
|
||||
return (int)res;
|
||||
}
|
||||
|
||||
static void MemsetPassword(char **password)
|
||||
|
@ -1656,6 +1656,7 @@ char *DoConcatCmd(const CtlOption *ctx)
|
||||
int rc = memset_s(cmd, CM_PATH_LENGTH, 0, CM_PATH_LENGTH);
|
||||
securec_check_errno(rc, (void)rc);
|
||||
if (DoCheckRole(&ctx->dcfOption) == -1) {
|
||||
free(cmd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -127,6 +127,9 @@ const char *g_cmsParamInfo[] = {
|
||||
"cms_enable_failover_on2nodes|bool|0,0|NULL|NULL|",
|
||||
"cms_enable_db_crash_recovery|bool|0,0|NULL|NULL|",
|
||||
"cms_network_isolation_timeout|int|10,2147483647|NULL|NULL|",
|
||||
#ifndef ENABLE_PRIVATEGAUSS
|
||||
"wait_static_primary_times|int|5,2147483647|NULL|NULL|",
|
||||
#endif
|
||||
};
|
||||
|
||||
const char *g_valueTypeStr[] = {
|
||||
@ -304,6 +307,7 @@ char *GetParamLineInfo(const char *paramName, const char * const *paramInfos, in
|
||||
|
||||
if (paramInfos == NULL) {
|
||||
write_runlog(ERROR, "Fail to get param info.\n");
|
||||
free(info);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -1276,68 +1276,60 @@ static status_t PrintResult(uint32 *pre_node, cm_to_ctl_instance_status *cm_to_c
|
||||
|
||||
if (cm_to_ctl_instance_status_ptr->data_node_member.local_status.local_role == INSTANCE_ROLE_PRIMARY) {
|
||||
if (g_node[node_index].datanode[instance_index].datanodePeerRole == STANDBY_DN ||
|
||||
g_node[node_index].datanode[instance_index].datanodePeerRole == PRIMARY_DN) {
|
||||
g_node[node_index].datanode[instance_index].datanodePeerRole == DUMMY_STANDBY_DN) {
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"standby_node : %s\n",
|
||||
g_node[node_index].datanode[instance_index].datanodePeerHAIP[0]);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"standby_data_path : %s\n",
|
||||
g_node[node_index].datanode[instance_index].datanodePeerDataPath);
|
||||
}
|
||||
if (g_node[node_index].datanode[instance_index].datanodePeer2Role == STANDBY_DN ||
|
||||
g_node[node_index].datanode[instance_index].datanodePeer2Role == PRIMARY_DN) {
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"standby_node : %s\n",
|
||||
g_node[node_index].datanode[instance_index].datanodePeer2HAIP[0]);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"standby_data_path : %s\n",
|
||||
g_node[node_index].datanode[instance_index].datanodePeer2DataPath);
|
||||
}
|
||||
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"standby_state : %s\n",
|
||||
datanode_role_int_to_string(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].peer_role));
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_sent_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_sent_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_sent_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_write_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_write_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_write_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_flush_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_flush_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_flush_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_replay_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_replay_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_replay_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_received_location: %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_received_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_received_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_write_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_write_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_write_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_flush_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_flush_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_flush_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_replay_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_replay_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_replay_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sync_state : %s\n",
|
||||
datanode_wal_sync_state_int_to_string(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sync_state));
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"standby_state : %s\n",
|
||||
datanode_role_int_to_string(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].peer_role));
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_sent_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_sent_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_sent_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_write_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_write_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_write_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_flush_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_flush_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_flush_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_replay_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_replay_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sender_replay_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_received_location: %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_received_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_received_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_write_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_write_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_write_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_flush_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_flush_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_flush_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_replay_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_replay_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].receiver_replay_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sync_state : %s\n",
|
||||
datanode_wal_sync_state_int_to_string(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[0].sync_state));
|
||||
}
|
||||
|
||||
if (g_node[node_index].datanode[instance_index].datanodePeerRole == DUMMY_STANDBY_DN) {
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"secondary_node : %s\n",
|
||||
@ -1346,6 +1338,62 @@ static status_t PrintResult(uint32 *pre_node, cm_to_ctl_instance_status *cm_to_c
|
||||
"secondary_data_path : %s\n",
|
||||
g_node[node_index].datanode[instance_index].datanodePeerDataPath);
|
||||
}
|
||||
|
||||
if (g_node[node_index].datanode[instance_index].datanodePeer2Role == STANDBY_DN ||
|
||||
g_node[node_index].datanode[instance_index].datanodePeer2Role == DUMMY_STANDBY_DN) {
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"standby_node : %s\n",
|
||||
g_node[node_index].datanode[instance_index].datanodePeer2HAIP[0]);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"standby_data_path : %s\n",
|
||||
g_node[node_index].datanode[instance_index].datanodePeer2DataPath);
|
||||
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"secondary_state : %s\n",
|
||||
datanode_role_int_to_string(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].peer_role));
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_sent_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_sent_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_sent_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_write_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_write_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_write_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_flush_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_flush_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_flush_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_replay_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_replay_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_replay_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_received_location: %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_received_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_received_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_write_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_write_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_write_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_flush_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_flush_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_flush_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_replay_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_replay_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_replay_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sync_state : %s\n\n",
|
||||
datanode_wal_sync_state_int_to_string(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sync_state));
|
||||
}
|
||||
|
||||
if (g_node[node_index].datanode[instance_index].datanodePeer2Role == DUMMY_STANDBY_DN) {
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"secondary_node : %s\n",
|
||||
@ -1354,50 +1402,7 @@ static status_t PrintResult(uint32 *pre_node, cm_to_ctl_instance_status *cm_to_c
|
||||
"secondary_data_path : %s\n",
|
||||
g_node[node_index].datanode[instance_index].datanodePeer2DataPath);
|
||||
}
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"secondary_state : %s\n",
|
||||
datanode_role_int_to_string(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].peer_role));
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_sent_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_sent_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_sent_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_write_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_write_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_write_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_flush_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_flush_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_flush_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_replay_location : %X/%X\n",
|
||||
(uint32)(cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_replay_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sender_replay_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_received_location: %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_received_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_received_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_write_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_write_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_write_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_flush_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_flush_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_flush_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"receiver_replay_location : %X/%X\n",
|
||||
(uint32)(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_replay_location >> 32),
|
||||
(uint32)cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].receiver_replay_location);
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sync_state : %s\n\n",
|
||||
datanode_wal_sync_state_int_to_string(
|
||||
cm_to_ctl_instance_status_ptr->data_node_member.sender_status[1].sync_state));
|
||||
|
||||
} else {
|
||||
(void)fprintf(g_logFilePtr,
|
||||
"sender_sent_location : %X/%X\n",
|
||||
|
@ -146,7 +146,7 @@ static status_t CheckResNumberOptInfo(cJSON *resItem, const char *resName, const
|
||||
cJSON *objValue = cJSON_GetObjectItem(resItem, checkKey);
|
||||
CM_RETERR_IF_FALSE(CmCheckIsJsonNumber(objValue, resName, checkKey, WARNING));
|
||||
if (!IsResConfValid(checkKey, objValue->valueint)) {
|
||||
PrintCheckJsonInfo(WARNING, "resource(%s)'s %s=%d out of range, range[%u %u], default(%s).\n",
|
||||
PrintCheckJsonInfo(WARNING, "resource(%s)'s %s=%d out of range, range[%d %d], default(%s).\n",
|
||||
resName, checkKey, objValue->valueint,
|
||||
ResConfMinValue(checkKey), ResConfMaxValue(checkKey), ResConfDefValue(checkKey));
|
||||
return CM_ERROR;
|
||||
@ -353,7 +353,7 @@ static status_t CheckAndGetNumberFromJson(cJSON *resItem, const char *resName, c
|
||||
CM_RETERR_IF_FALSE(CmCheckIsJsonNumber(objValue, resName, checkKey, ERROR));
|
||||
|
||||
if (!IsResConfValid(checkKey, objValue->valueint)) {
|
||||
PrintCheckJsonInfo(ERROR, "resource(%s)'s %s=%d out of range, range[%u %u].\n", resName, checkKey,
|
||||
PrintCheckJsonInfo(ERROR, "resource(%s)'s %s=%d out of range, range[%d %d].\n", resName, checkKey,
|
||||
objValue->valueint, ResConfMinValue(checkKey), ResConfMaxValue(checkKey));
|
||||
return CM_ERROR;
|
||||
}
|
||||
@ -463,13 +463,13 @@ static void GetAllRestypeStr(char *typeStr, uint32 maxlen)
|
||||
uint32 arrLen = (uint32)(sizeof(g_resTypeMap) / sizeof(g_resTypeMap[0]));
|
||||
char tmpStr[MAX_PATH_LEN] = {0};
|
||||
for (uint32 i = 0; i < arrLen; ++i) {
|
||||
if (g_resTypeMap[i].type == RES_TYPE_UNKNOWN) {
|
||||
if (g_resTypeMap[i].type == RES_TYPE_INIT || g_resTypeMap[i].type == RES_TYPE_UNKNOWN) {
|
||||
continue;
|
||||
}
|
||||
if (strlen(typeStr) + strlen(g_resTypeMap[i].typeStr) >= maxlen) {
|
||||
return;
|
||||
}
|
||||
if (i == 0) {
|
||||
if (typeStr[0] == '\0') {
|
||||
rc = snprintf_s(tmpStr, MAX_PATH_LEN, MAX_PATH_LEN - 1, "\"%s\"", g_resTypeMap[i].typeStr);
|
||||
} else {
|
||||
rc = snprintf_s(tmpStr, MAX_PATH_LEN, MAX_PATH_LEN - 1, ", \"%s\"", g_resTypeMap[i].typeStr);
|
||||
@ -517,7 +517,7 @@ static uint32 GetResTypeIndex(cJSON *resItem, const char *resName)
|
||||
{
|
||||
cJSON *objValue = cJSON_GetObjectItem(resItem, RESOURCE_TYPE);
|
||||
if (!CmCheckIsJsonString(objValue, resName, RESOURCE_TYPE, WARNING)) {
|
||||
return 0;
|
||||
return RES_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
uint32 index = 0;
|
||||
@ -527,8 +527,8 @@ static uint32 GetResTypeIndex(cJSON *resItem, const char *resName)
|
||||
char allResName[MAX_PATH_LEN] = {0};
|
||||
GetAllRestypeStr(allResName, MAX_PATH_LEN);
|
||||
PrintCheckJsonInfo(WARNING, "resource(%s)'s resources_type is (%s), not in range(%s), default(%s).\n",
|
||||
resName, objValue->string, allResName, ResConfDefValue(RESOURCE_TYPE));
|
||||
return 0;
|
||||
resName, objValue->valuestring, allResName, ResConfDefValue(RESOURCE_TYPE));
|
||||
return RES_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
ResType GetResTypeFromCjson(cJSON *resItem)
|
||||
|
@ -140,7 +140,7 @@ static status_t SetResBaseInfoInArray(ResBaseInfo *info, const cJSON *resArray,
|
||||
}
|
||||
resName = GetValueStrFromCJson(item, RES_NAME);
|
||||
if (resName == NULL) {
|
||||
resName = NULL;
|
||||
resName = PRINT_NULL;
|
||||
} else {
|
||||
isCanPrint = CM_TRUE;
|
||||
}
|
||||
@ -188,7 +188,7 @@ static void PrintAllResInfoBody(const ResBaseInfo *info, const cJSON *resArray)
|
||||
}
|
||||
resName = GetValueStrFromCJson(item, RES_NAME);
|
||||
if (resName == NULL) {
|
||||
resName = NULL;
|
||||
resName = PRINT_NULL;
|
||||
}
|
||||
resType = GetValueStrFromCJson(item, RESOURCE_TYPE);
|
||||
if (resType == NULL) {
|
||||
@ -497,13 +497,15 @@ static status_t ComputeListTableItemLen(
|
||||
}
|
||||
cJSON *item;
|
||||
uint32 tmpAttrValue = valueArr->attrValue;
|
||||
uint32 tempIndex;
|
||||
cJSON_ArrayForEach(item, objArray) {
|
||||
if (!cJSON_IsObject(item)) {
|
||||
continue;
|
||||
}
|
||||
tempIndex = index;
|
||||
attrValue = CM_MAX(valueArr->attrValue, attrValue);
|
||||
valueArr->attrValue = tmpAttrValue;
|
||||
ComputeArrItemLen(item, valueArr, &index, printInfo, resCtx);
|
||||
ComputeArrItemLen(item, valueArr, &tempIndex, printInfo, resCtx);
|
||||
}
|
||||
valueArr->attrValue = CM_MAX(valueArr->attrValue, attrValue);
|
||||
|
||||
|
@ -87,4 +87,6 @@ cms_enable_db_crash_recovery = false # used in 2 nodes cluster. when network re
|
||||
cms_network_isolation_timeout = 20 # cms judges the network is isolated when it finds ddb cluster is not sync with each other nodes,
|
||||
# after cms_network_isolation_timeout times.
|
||||
# default 20
|
||||
wait_static_primary_times = 6 # Time to wait for the primary recovery after the primary stopped unexpectedly.
|
||||
# default value is 6
|
||||
############### must leave a new line at the end ###################
|
||||
|
@ -80,6 +80,13 @@ typedef enum MaxClusterStatEn {
|
||||
MAX_CLUSTER_EXCLUDE,
|
||||
} MaxClusterStat;
|
||||
|
||||
typedef struct CurCmRhbStatSt {
|
||||
uint32 hwl;
|
||||
time_t baseTime;
|
||||
time_t hbs[MAX_RHB_NUM][MAX_RHB_NUM];
|
||||
} CurCmRhbStat;
|
||||
|
||||
static CurCmRhbStat g_curRhbStat = {0};
|
||||
static const int32 CHECK_DELAY_IN_ROLE_CHANGING = 10;
|
||||
|
||||
static MaxNodeCluster g_curCluster = {{0}};
|
||||
@ -320,6 +327,23 @@ static status_t InitMaxNodeCluster(MaxNodeCluster *maxNodeCluster)
|
||||
return CM_SUCCESS;
|
||||
}
|
||||
|
||||
static MaxClusterResStatus GetNodesConnStatByRhb(int idx1, int idx2, int timeout)
|
||||
{
|
||||
if (timeout == 0) {
|
||||
return MAX_CLUSTER_STATUS_AVAIL;
|
||||
}
|
||||
|
||||
if (g_curRhbStat.hbs[idx1][idx2] == 0 || g_curRhbStat.hbs[idx2][idx1] == 0) {
|
||||
return MAX_CLUSTER_STATUS_INIT;
|
||||
}
|
||||
|
||||
if (IsRhbTimeout(g_curRhbStat.hbs[idx1][idx2], g_curRhbStat.baseTime, timeout) ||
|
||||
IsRhbTimeout(g_curRhbStat.hbs[idx2][idx1], g_curRhbStat.baseTime, timeout)) {
|
||||
return MAX_CLUSTER_STATUS_UNAVAIL;
|
||||
}
|
||||
return MAX_CLUSTER_STATUS_AVAIL;
|
||||
}
|
||||
|
||||
static bool CheckPoint2PointConn(int32 resIdx1, int32 resIdx2)
|
||||
{
|
||||
MaxClusterResStatus connStatus = GetNodesConnStatByRhb(resIdx1, resIdx2, (int)g_agentNetworkTimeout);
|
||||
@ -454,6 +478,8 @@ static void FindMaxNodeCluster(MaxNodeCluster *maxCluster)
|
||||
{
|
||||
NodeCluster *nodeCluster = &(maxCluster->nodeCluster);
|
||||
nodeCluster->clusterNum = -1;
|
||||
g_curRhbStat.baseTime = time(NULL);
|
||||
GetRhbStat(g_curRhbStat.hbs, &g_curRhbStat.hwl);
|
||||
// assume that all meet the conditions.
|
||||
for (int32 i = nodeCluster->maxNodeNum - 1; i >= 0; --i) {
|
||||
if (!IsAllResAvailInNode(i)) {
|
||||
@ -885,23 +911,35 @@ static bool IsNodeInCluster(int32 resIdx, const MaxNodeCluster *nodeCluster)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void PrintRhbStatus()
|
||||
static void PrintOneRhbLine(time_t *timeArr)
|
||||
{
|
||||
uint32 hwl = 0;
|
||||
time_t hbs[MAX_RHB_NUM][MAX_RHB_NUM] = {{0}};
|
||||
GetRhbStat(hbs, &hwl);
|
||||
char *rhbStr = GetRhbSimple((time_t *)hbs, MAX_RHB_NUM, hwl, time(NULL), g_agentNetworkTimeout);
|
||||
CM_RETURN_IF_NULL(rhbStr);
|
||||
size_t rhbLen = strlen(rhbStr);
|
||||
if (rhbLen >= MAX_LOG_BUFF_LEN) {
|
||||
write_runlog(LOG, "rhbStr len(%lu) is exceed max log buff len(%d), can't print network stat.\n",
|
||||
rhbLen, MAX_LOG_BUFF_LEN);
|
||||
FREE_AND_RESET(rhbStr);
|
||||
return;
|
||||
int ret;
|
||||
errno_t rc;
|
||||
char rhbStr[MAX_PATH_LEN] = {0};
|
||||
const uint32 maxInfoLen = TIME_STR_MAX_LEN + 1;
|
||||
|
||||
for (uint32 i = 0; i < g_curRhbStat.hwl; ++i) {
|
||||
char info[maxInfoLen] = {0};
|
||||
char timeBuf[TIME_STR_MAX_LEN] = {0};
|
||||
GetTimeStr(timeArr[i], timeBuf, TIME_STR_MAX_LEN);
|
||||
ret = snprintf_s(info, maxInfoLen, maxInfoLen - 1, "%s|", timeBuf);
|
||||
securec_check_intval(ret, (void)ret);
|
||||
rc = strncat_s(rhbStr, MAX_PATH_LEN, info, strlen(info));
|
||||
securec_check_errno(rc, (void)rc);
|
||||
}
|
||||
write_runlog(LOG, "[RHB] hb infos: |%s\n", rhbStr);
|
||||
}
|
||||
|
||||
static void PrintAllRhbStatus()
|
||||
{
|
||||
char timeBuf[TIME_STR_MAX_LEN] = {0};
|
||||
GetTimeStr(g_curRhbStat.baseTime, timeBuf, TIME_STR_MAX_LEN);
|
||||
|
||||
write_runlog(LOG, "Network timeout:%u\n", g_agentNetworkTimeout);
|
||||
write_runlog(LOG, "Network stat('Y' means connected, otherwise 'N'):\n%s\n", rhbStr);
|
||||
FREE_AND_RESET(rhbStr);
|
||||
write_runlog(LOG, "Network base_time:%s\n", timeBuf);
|
||||
for (uint32 i = 0; i < g_curRhbStat.hwl; ++i) {
|
||||
PrintOneRhbLine(&g_curRhbStat.hbs[i][0]);
|
||||
}
|
||||
}
|
||||
|
||||
static void PrintKickOutResult(int32 resIdx, const MaxNodeCluster *maxCluster)
|
||||
@ -927,11 +965,10 @@ static void PrintKickOutResult(int32 resIdx, const MaxNodeCluster *maxCluster)
|
||||
if (!CheckPoint2PointConn(resIdx, maxCluster->nodeCluster.cluster[i])) {
|
||||
write_runlog(LOG, "kick out result: (index=%d,nodeId=%u) disconnect with (index=%d,nodeId=%u).\n",
|
||||
resIdx, GetNodeByPoint(resIdx), i, GetNodeByPoint(i));
|
||||
PrintHbsInfo(resIdx, GetNodeByPoint(resIdx), i, GetNodeByPoint(i), LOG);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PrintRhbStatus();
|
||||
PrintAllRhbStatus();
|
||||
}
|
||||
|
||||
static void PrintArbitrateResult(const MaxNodeCluster *lastCluster, const MaxNodeCluster *curCluster)
|
||||
@ -999,7 +1036,7 @@ static status_t CheckVotingDisk()
|
||||
const uint32 timeout = 6;
|
||||
uint32 time = timeout;
|
||||
while (time > 0) {
|
||||
if (UpdateAllNodeHeartBeat() == CM_SUCCESS) {
|
||||
if (UpdateAllNodeHeartBeat(g_node_num) == CM_SUCCESS) {
|
||||
return CM_SUCCESS;
|
||||
}
|
||||
time--;
|
||||
@ -1028,6 +1065,8 @@ void *MaxNodeClusterArbitrateMain(void *arg)
|
||||
write_runlog(FATAL, "Alloc voting disk memory failed!\n");
|
||||
exit(-1);
|
||||
}
|
||||
g_curRhbStat.baseTime = time(NULL);
|
||||
GetRhbStat(g_curRhbStat.hbs, &g_curRhbStat.hwl);
|
||||
for (;;) {
|
||||
if (got_stop) {
|
||||
g_threadProcessStatus = THREAD_PROCESS_STOP;
|
||||
|
@ -2213,11 +2213,7 @@ static void DnArbitrateInner(DnArbCtx *ctx)
|
||||
|
||||
static bool IsMaintance(maintenance_mode mode)
|
||||
{
|
||||
if (mode == MAINTENANCE_MODE_UPGRADE || mode == MAINTENANCE_MODE_UPGRADE_OBSERVATION ||
|
||||
mode == MAINTENANCE_MODE_DILATATION || mode == MAINTENANCE_NODE_DISASTER_RECOVERY) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return mode != MAINTENANCE_MODE_NONE;
|
||||
}
|
||||
|
||||
static void InitDnArbCond(DnArbCtx *ctx)
|
||||
@ -2248,7 +2244,7 @@ static void InitDnArbCond(DnArbCtx *ctx)
|
||||
ctx->cond.maxMemArbiTime = 0;
|
||||
ctx->cond.instMainta = IsMaintance(ctx->maintaMode);
|
||||
ctx->cond.switchoverIdx = INVALID_INDEX;
|
||||
ctx->cond.arbitInterval = g_clusterStarting ? g_clusterStartingArbitDelay : DATANODE_ARBITE_DELAY;
|
||||
ctx->cond.arbitInterval = g_clusterStarting ? g_clusterStartingArbitDelay : g_waitStaticPrimaryTimes;
|
||||
ctx->cond.arbitStaticInterval = 5;
|
||||
ctx->cond.setOffline = SetOfflineNode();
|
||||
ctx->cond.snameAzDnCount = 0;
|
||||
@ -2287,6 +2283,7 @@ void DatanodeInstanceArbitrate(MsgRecvInfo* recvMsgInfo, const agent_to_cm_datan
|
||||
(void)pthread_rwlock_wrlock(ctx.lock);
|
||||
ResetHeartbeat(&ctx);
|
||||
SaveDnStatusFromReport(agentRep, &ctx);
|
||||
InitDnInfo(&ctx);
|
||||
|
||||
/* skip arbitration when the cluster is pausing,
|
||||
* but cm_ctl operation is allowed, it's necessary to clean some falgs.
|
||||
@ -2298,7 +2295,6 @@ void DatanodeInstanceArbitrate(MsgRecvInfo* recvMsgInfo, const agent_to_cm_datan
|
||||
return;
|
||||
}
|
||||
|
||||
InitDnInfo(&ctx);
|
||||
DnArbitrateInner(&ctx);
|
||||
(void)pthread_rwlock_unlock(ctx.lock);
|
||||
}
|
||||
|
@ -554,7 +554,7 @@ uint32 GetDnArbitateDelayTime(const DnArbCtx *ctx)
|
||||
/* if static primary has finished redo, not need to wait for 180s */
|
||||
cm_local_replconninfo *status = &(ctx->dnReport[cond->staticPriIdx].local_status);
|
||||
if (status->local_role == INSTANCE_ROLE_STANDBY && status->disconn_mode == PROHIBIT_CONNECTION) {
|
||||
return DATANODE_ARBITE_DELAY;
|
||||
return g_waitStaticPrimaryTimes;
|
||||
}
|
||||
return cond->arbitInterval;
|
||||
}
|
||||
|
@ -615,6 +615,12 @@ void get_parameters_from_configfile()
|
||||
g_diskTimeout = get_uint32_value_from_config(configDir, "disk_timeout", 200);
|
||||
g_agentNetworkTimeout = get_uint32_value_from_config(configDir, "agent_network_timeout", 6);
|
||||
GetDnArbitrateMode();
|
||||
#ifndef ENABLE_PRIVATEGAUSS
|
||||
g_waitStaticPrimaryTimes = get_uint32_value_from_config(configDir, "wait_static_primary_times", 6);
|
||||
if (g_waitStaticPrimaryTimes < 5) {
|
||||
g_waitStaticPrimaryTimes = 5;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void clean_init_cluster_state()
|
||||
|
@ -328,6 +328,7 @@ uint32 g_delayArbiTime = 0;
|
||||
int32 g_clusterArbiTime = 300;
|
||||
bool g_isPauseArbitration = false;
|
||||
char g_cmManualPausePath[MAX_PATH_LEN] = {0};
|
||||
uint32 g_waitStaticPrimaryTimes = 6;
|
||||
|
||||
bool isLargerNode()
|
||||
{
|
||||
|
@ -1727,7 +1727,7 @@ static int cm_server_process_startup_packet(int epollfd, CM_Connection* con, CM_
|
||||
|
||||
if ((con->port->user_name != NULL) && strncmp(con->port->user_name, pw->pw_name, SP_USER - 1)) {
|
||||
write_runlog(WARNING, "invalid connection\n");
|
||||
if (CmsSendAndFlushMsg(con, 'E', "invalid connection", CM_SERVER_PACKET_ERROR_MSG) != 0) {
|
||||
if (CmsSendAndFlushMsg(con, 'E', "invalid connection", sizeof("invalid connection")) != 0) {
|
||||
RemoveConnAfterSendMsgFailed(con);
|
||||
write_runlog(ERROR, "[%s][line:%d] CmsSendAndFlushMsg fail.\n", __FUNCTION__, __LINE__);
|
||||
}
|
||||
|
@ -343,11 +343,18 @@ static void ReloadParametersFromConfigfile()
|
||||
g_diskTimeout = get_uint32_value_from_config(configDir, "disk_timeout", 200);
|
||||
g_agentNetworkTimeout = get_uint32_value_from_config(configDir, "agent_network_timeout", 6);
|
||||
GetDnArbitrateMode();
|
||||
#ifndef ENABLE_PRIVATEGAUSS
|
||||
g_waitStaticPrimaryTimes = get_uint32_value_from_config(configDir, "wait_static_primary_times", 6);
|
||||
if (g_waitStaticPrimaryTimes < 5) {
|
||||
g_waitStaticPrimaryTimes = 5;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (g_cm_server_num == CMS_ONE_PRIMARY_ONE_STANDBY) {
|
||||
GetTwoNodesArbitrateParams();
|
||||
}
|
||||
|
||||
|
||||
#ifdef ENABLE_MULTIPLE_NODES
|
||||
write_runlog(LOG,
|
||||
"reload cm_server parameters:\n"
|
||||
@ -378,13 +385,13 @@ static void ReloadParametersFromConfigfile()
|
||||
"datastorage_threshold_check_interval=%d,\n"
|
||||
" max_datastorage_threshold_check=%d, enableSetReadOnly=%s, enableSetReadOnlyThreshold=%u, "
|
||||
"switch_rto=%d, force_promote=%d, cluster_starting_aribt_delay=%u, enable_e2e_rto=%u, "
|
||||
"g_delayArbiTime=%u, g_clusterArbiTime=%d.\n",
|
||||
"g_delayArbiTime=%u, g_clusterArbiTime=%d, wait_static_primary_times=%u.\n",
|
||||
log_min_messages, maxLogFileSize, sys_log_path, g_alarmComponentPath, g_alarmReportInterval,
|
||||
instance_heartbeat_timeout, g_ddbArbicfg.haHeartBeatTimeOut, cmserver_self_vote_timeout,
|
||||
g_ddbArbicfg.haStatusInterval, cmserver_ha_connect_timeout, instance_failover_delay_timeout,
|
||||
datastorage_threshold_check_interval, max_datastorage_threshold_check, g_enableSetReadOnly,
|
||||
g_readOnlyThreshold, switch_rto, force_promote, g_clusterStartingArbitDelay,
|
||||
g_enableE2ERto, g_delayArbiTime, g_clusterArbiTime);
|
||||
g_enableE2ERto, g_delayArbiTime, g_clusterArbiTime, g_waitStaticPrimaryTimes);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -95,31 +95,13 @@ void GetRhbStat(time_t hbs[MAX_RHB_NUM][MAX_RHB_NUM], unsigned int *hwl)
|
||||
securec_check_errno(rc, (void)rc);
|
||||
}
|
||||
|
||||
MaxClusterResStatus GetNodesConnStatByRhb(int resIdx1, int resIdx2, int timeout)
|
||||
{
|
||||
if (timeout == 0) {
|
||||
return MAX_CLUSTER_STATUS_AVAIL;
|
||||
}
|
||||
|
||||
if (g_hbs[resIdx1][resIdx2] == 0 || g_hbs[resIdx2][resIdx1] == 0) {
|
||||
return MAX_CLUSTER_STATUS_INIT;
|
||||
}
|
||||
|
||||
time_t curTime = time(NULL);
|
||||
if (IsRhbTimeout(g_hbs[resIdx1][resIdx2], curTime, timeout) ||
|
||||
IsRhbTimeout(g_hbs[resIdx2][resIdx1], curTime, timeout)) {
|
||||
return MAX_CLUSTER_STATUS_UNAVAIL;
|
||||
}
|
||||
return MAX_CLUSTER_STATUS_AVAIL;
|
||||
}
|
||||
|
||||
void ResetNodeConnStat()
|
||||
{
|
||||
errno_t rc = memset_s(g_hbs, sizeof(g_hbs), 0, sizeof(g_hbs));
|
||||
securec_check_errno(rc, (void)rc);
|
||||
}
|
||||
|
||||
void PrintOneHbInfo(int resIdx1, uint32 nodeId1, int resIdx2, uint32 nodeId2, int logLevel)
|
||||
static void PrintOneHbInfo(int resIdx1, uint32 nodeId1, int resIdx2, uint32 nodeId2, int logLevel)
|
||||
{
|
||||
struct tm result;
|
||||
GetLocalTime(&g_hbs[resIdx1][resIdx2], &result);
|
||||
@ -135,3 +117,10 @@ void PrintHbsInfo(int resIdx1, uint32 nodeId1, int resIdx2, uint32 nodeId2, int
|
||||
PrintOneHbInfo(resIdx1, nodeId1, resIdx2, nodeId2, logLevel);
|
||||
PrintOneHbInfo(resIdx2, nodeId2, resIdx1, nodeId1, logLevel);
|
||||
}
|
||||
|
||||
void GetTimeStr(time_t baseTime, char *timeStr, uint32 strLen)
|
||||
{
|
||||
struct tm result;
|
||||
GetLocalTime(&baseTime, &result);
|
||||
(void)strftime(timeStr, strLen, "%Y-%m-%d %H:%M:%S", &result);
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ status_t GetVotingDiskData(char *data, uint32 dataLen, uint32 offset);
|
||||
status_t GetVotingDiskSingleNodeInfo(VotingDiskNodeInfo *nodeInfo, uint32 nodeIndex);
|
||||
status_t InitVotingDiskHandler(const char *scsiDev, uint32 offset);
|
||||
status_t InitVotingDisk(const char *votingDiskPath);
|
||||
status_t UpdateAllNodeHeartBeat();
|
||||
status_t UpdateAllNodeHeartBeat(uint32 nodeNum);
|
||||
void ResetVotingdiskHeartBeat();
|
||||
VotingDiskStatus GetNodeHeartbeatStat(uint32 nodeIndex, uint32 diskTimeout, int logLevel);
|
||||
status_t AllocVotingDiskMem();
|
||||
|
@ -30,7 +30,7 @@
|
||||
status_t StartOneResInst(const CmResConfList *conf);
|
||||
void StopOneResInst(const CmResConfList *conf);
|
||||
void OneResInstShutdown(const CmResConfList *oneResConf);
|
||||
status_t RegOneResInst(const CmResConfList *conf, uint32 destInstId);
|
||||
status_t RegOneResInst(const CmResConfList *conf, uint32 destInstId, bool8 needNohup);
|
||||
status_t UnregOneResInst(const CmResConfList *conf, uint32 destInstId);
|
||||
ResIsregStatus IsregOneResInst(const CmResConfList *conf, uint32 destInstId);
|
||||
status_t CleanOneResInst(const CmResConfList *conf);
|
||||
|
@ -38,6 +38,7 @@
|
||||
#define DYNAMIC_PRIMARY 0
|
||||
#define DYNAMIC_STANDBY 1
|
||||
#define RELOAD_WAIT_TIME 60
|
||||
#define MAX_COMMAND_LEN 2048
|
||||
|
||||
#define ETCD_BIN_NAME "etcd"
|
||||
#ifndef ENABLE_MULTIPLE_NODES
|
||||
|
@ -168,6 +168,7 @@ typedef struct CmResConfListSt {
|
||||
uint32 cmInstanceId;
|
||||
uint32 resInstanceId;
|
||||
ResStatusCheckInfo checkInfo;
|
||||
int resType;
|
||||
} CmResConfList;
|
||||
|
||||
// instance type before INST_TYPE_UNKNOWN shouldn't be change
|
||||
|
@ -44,7 +44,6 @@ typedef struct DnArbitInfo_t {
|
||||
uint32 maxTerm;
|
||||
} DnArbitInfo;
|
||||
|
||||
const uint32 DATANODE_ARBITE_DELAY = 6;
|
||||
|
||||
extern bool CheckPotentialTermRollback();
|
||||
extern void GroupStatusShow(const char *str, const uint32 groupIndex, const uint32 instanceId,
|
||||
|
@ -475,6 +475,7 @@ extern uint32 g_delayArbiTime;
|
||||
extern int32 g_clusterArbiTime;
|
||||
extern bool g_isPauseArbitration;
|
||||
extern char g_cmManualPausePath[MAX_PATH_LEN];
|
||||
extern uint32 g_waitStaticPrimaryTimes;
|
||||
|
||||
extern void clean_init_cluster_state();
|
||||
extern void instance_delay_arbitrate_time_out_direct_clean(uint32 group_index, int member_index,
|
||||
|
@ -26,13 +26,14 @@
|
||||
#define CMS_RHB_H
|
||||
|
||||
#include <time.h>
|
||||
#include "cms_arbitrate_cluster.h"
|
||||
|
||||
#define TIME_STR_MAX_LEN 20
|
||||
|
||||
void InitDbListsByStaticConfig();
|
||||
void RefreshNodeRhbInfo(unsigned int nodeId, const time_t *hbs, unsigned int hwl);
|
||||
MaxClusterResStatus GetNodesConnStatByRhb(int resIdx1, int resIdx2, int timeout);
|
||||
void GetRhbStat(time_t hbs[MAX_RHB_NUM][MAX_RHB_NUM], unsigned int *hwl);
|
||||
void ResetNodeConnStat();
|
||||
void PrintHbsInfo(int resIdx1, uint32 nodeId1, int resIdx2, uint32 nodeId2, int logLevel);
|
||||
void GetTimeStr(time_t baseTime, char *timeStr, uint32 strLen);
|
||||
|
||||
#endif
|
||||
|
@ -50,7 +50,7 @@ typedef struct st_spin_statis {
|
||||
uint64 fails;
|
||||
} spin_statis_t;
|
||||
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
#if defined(__arm__) || defined(__aarch64__) || defined(__loongarch__)
|
||||
#define fas_cpu_pause() \
|
||||
do { \
|
||||
__asm__ volatile("nop"); \
|
||||
|
@ -379,6 +379,22 @@ static __inline__ int tas(volatile slock_t* lock)
|
||||
#endif /* HAVE_GCC_INT_ATOMICS */
|
||||
#endif /* __arm__ */
|
||||
|
||||
#if defined(__loongarch__)
|
||||
#define HAS_TEST_AND_SET
|
||||
|
||||
#define TAS(lock) tas(lock)
|
||||
|
||||
typedef int slock_t;
|
||||
|
||||
static __inline__ int
|
||||
tas(volatile slock_t *lock)
|
||||
{
|
||||
return __sync_lock_test_and_set(lock, 1);
|
||||
}
|
||||
|
||||
#define S_UNLOCK(lock) __sync_lock_release(lock)
|
||||
#endif /* __loongarch__ */
|
||||
|
||||
/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
|
||||
#if defined(__s390__) || defined(__s390x__)
|
||||
#define HAS_TEST_AND_SET
|
||||
|
@ -288,7 +288,7 @@ General options:
|
||||
isLocal = False
|
||||
if host == self.localhostName:
|
||||
isLocal = True
|
||||
findPrimaryCmd = "source %s; gs_ctl query -D %s | grep 'local_role.*Primary' > /dev/null" % \
|
||||
findPrimaryCmd = "source %s; gs_ctl query -D %s | grep -i 'local_role.*Primary' > /dev/null" % \
|
||||
(self.envFile, self.nodesInfo[host]["dataPath"])
|
||||
notPrimary, output = executeCmdOnHost(host, findPrimaryCmd, isLocal)
|
||||
if notPrimary == 0:
|
||||
|
Loading…
x
Reference in New Issue
Block a user