cm支持双集群中备集群switchover

This commit is contained in:
liuzhanfeng2 2024-04-18 14:12:27 +08:00
parent 0e0bd1c6e1
commit 7299703207
14 changed files with 132 additions and 38 deletions

View File

@ -130,6 +130,7 @@ const char *g_cmsParamInfo[] = {
#ifndef ENABLE_PRIVATEGAUSS
"wait_static_primary_times|int|5,2147483647|NULL|NULL|",
#endif
"ss_double_cluster_mode|int|0,2|NULL|NULL|",
};
const char *g_valueTypeStr[] = {

View File

@ -28,6 +28,8 @@
#include "ctl_common.h"
#include "cm/libpq-int.h"
#include "cm/cm_agent/cma_main.h"
#include "cm_elog.h"
#include "cm_msg.h"
/* If DN switch take long time and do not complete, it will timeout, pending_command will be clear in server_main.cpp
CM_ThreadMonitorMain(), the default g_wait_seconds is 180s, we need to increase the g_wait_seconds to 1200s. */
@ -59,6 +61,7 @@ extern bool wait_seconds_set;
extern int g_waitSeconds;
extern CM_Conn* CmServer_conn;
extern char *g_command_operation_azName;
SSDoubleClusterMode g_ssDoubleClusterMode = SS_DOUBLE_NULL;
static int QueryNeedQuickSwitchInstances(int* need_quick_switchover_instance,
NeedQuickSwitchoverInstanceArray* needQuickSwitchoverInstance, bool* is_cluster_balance,
@ -69,11 +72,16 @@ static int GetDatapathByInstanceId(uint32 instanceId, int instanceType, char* da
static int JudgeInstanceRole(int instanceType, int member_index, int instance_role, const CommonOption *commCtx);
static int JudgeDatanodeStatus(uint32 node_id, const char *data_path, int db_state);
static int JudgeGtmStatus(uint32 node_id, const char *data_path, int gtm_state);
static void GetClusterMode();
static void SetSwitchoverOper(SwitchoverOper *oper, int32 localRole, uint32 instanceId)
{
if (localRole == INSTANCE_ROLE_STANDBY) {
oper->localRole = INSTANCE_ROLE_PRIMARY;
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
oper->localRole = INSTANCE_ROLE_MAIN_STANDBY;
} else {
oper->localRole = INSTANCE_ROLE_PRIMARY;
}
oper->peerRole = INSTANCE_ROLE_STANDBY;
} else if (localRole == INSTANCE_ROLE_CASCADE_STANDBY) {
oper->localRole = INSTANCE_ROLE_STANDBY;
@ -101,7 +109,12 @@ static int DoSwitchoverBase(const CtlOption *ctx)
cm_to_ctl_command_ack *ackMsg = NULL;
cm_to_ctl_instance_status *instStatusPtr = NULL;
cm_switchover_incomplete_msg *switchoverIncompletePtr = NULL;
SwitchoverOper oper = {INSTANCE_ROLE_PRIMARY, INSTANCE_ROLE_STANDBY};
SwitchoverOper oper;
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
oper = {INSTANCE_ROLE_MAIN_STANDBY, INSTANCE_ROLE_STANDBY};
} else {
oper = {INSTANCE_ROLE_PRIMARY, INSTANCE_ROLE_STANDBY};
}
// return conn to cm_server
do_conn_cmserver(false, 0);
@ -1489,6 +1502,7 @@ static int GetDatapathByInstanceId(uint32 instanceId, int instanceType, char* da
int DoSwitchover(const CtlOption *ctx)
{
GetClusterMode();
if (ctx->switchover.switchoverAll) {
if (switchover_all_quick && g_clusterType != V3SingleInstCluster) {
return DoSwitchoverAllQuick();
@ -1510,3 +1524,24 @@ int DoSwitchover(const CtlOption *ctx)
return DoSwitchoverBase(ctx);
}
static void GetClusterMode()
{
errno_t rc;
char cmDir[CM_PATH_LENGTH] = { 0 };
char configDir[CM_PATH_LENGTH] = { 0 };
rc = memcpy_s(cmDir, sizeof(cmDir), g_currentNode->cmDataPath, sizeof(cmDir));
securec_check_errno(rc, (void)rc);
if (cmDir[0] == '\0') {
write_runlog(ERROR, "Failed to get cm base data path from static config file.");
exit(-1);
}
rc = snprintf_s(configDir, sizeof(configDir), sizeof(configDir) - 1, "%s/cm_agent/cm_agent.conf", cmDir);
securec_check_intval(rc, (void)rc);
g_ssDoubleClusterMode =
(SSDoubleClusterMode)get_uint32_value_from_config(configDir, "ss_double_cluster_mode", SS_DOUBLE_NULL);
}

View File

@ -89,4 +89,5 @@ cms_network_isolation_timeout = 20 # cms judges the network is isolated when i
# default 20
wait_static_primary_times = 6 # Time to wait for the primary recovery after the primary stopped unexpectedly.
# default value is 6
ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2
############### must leave a new line at the end ###################

View File

@ -84,4 +84,5 @@ cms_enable_db_crash_recovery = false # used in 2 nodes cluster. when network re
cms_network_isolation_timeout = 20 # cms judges the network is isolated when it finds ddb cluster is not sync with each other nodes,
# after cms_network_isolation_timeout times.
# default 20
ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2
############### must leave a new line at the end ###################

View File

@ -84,4 +84,5 @@ cms_enable_db_crash_recovery = false # used in 2 nodes cluster. when network re
cms_network_isolation_timeout = 20 # cms judges the network is isolated when it finds ddb cluster is not sync with each other nodes,
# after cms_network_isolation_timeout times.
# default 20
ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2
############### must leave a new line at the end ###################

View File

@ -590,7 +590,7 @@ static void DnWillChangeStaticRole(const DnArbCtx *ctx, const char *str)
if (ctx->localRole->role != cmdSour) {
return;
}
if (cmdPur == INSTANCE_ROLE_PRIMARY) {
if (cmdPur == INSTANCE_ROLE_PRIMARY || cmdPur == INSTANCE_ROLE_MAIN_STANDBY) {
ChangeStaticRoleAndNotifyCn(ctx->groupIdx, ctx->memIdx);
} else {
ChangeDnMemberIndex(str, ctx->groupIdx, ctx->memIdx, cmdPur, cmdSour);

View File

@ -614,6 +614,8 @@ void get_parameters_from_configfile()
SECONDS_PER_DAY);
g_diskTimeout = get_uint32_value_from_config(configDir, "disk_timeout", 200);
g_agentNetworkTimeout = get_uint32_value_from_config(configDir, "agent_network_timeout", 6);
g_ssDoubleClusterMode =
(SSDoubleClusterMode)get_uint32_value_from_config(configDir, "ss_double_cluster_mode", SS_DOUBLE_NULL);
GetDnArbitrateMode();
#ifndef ENABLE_PRIVATEGAUSS
g_waitStaticPrimaryTimes = get_uint32_value_from_config(configDir, "wait_static_primary_times", 6);

View File

@ -251,6 +251,7 @@ char g_cmStaticConfigurePath[MAX_PATH_LEN] = {0};
cm_fenced_UDF_report_status *g_fenced_UDF_report_status_ptr = NULL;
int *cn_dn_disconnect_times = NULL;
int *g_lastCnDnDisconnectTimes = NULL;
SSDoubleClusterMode g_ssDoubleClusterMode = SS_DOUBLE_NULL;
volatile switchover_az_mode cm_switchover_az_mode = AUTOSWITCHOVER_AZ;
volatile logic_cluster_restart_mode cm_logic_cluster_restart_mode = INITIAL_LOGIC_CLUSTER_RESTART;

View File

@ -48,8 +48,8 @@ void ChangeDnMemberIndex(const char *str, uint32 groupIdx, int32 memIdx, int32 i
datanode_role_int_to_string(instTypePur));
instMem[i].role = instTypePur;
cmd[i].role_changed = INSTANCE_ROLE_CHANGED;
} else if ((instTypePur == INSTANCE_ROLE_PRIMARY || peerInstId == instMem[i].instanceId) &&
(i != memIdx) && instMem[i].role == instTypePur) {
} else if (((instTypePur == INSTANCE_ROLE_PRIMARY || instTypePur == INSTANCE_ROLE_MAIN_STANDBY)
|| peerInstId == instMem[i].instanceId) && (i != memIdx) && instMem[i].role == instTypePur) {
write_runlog(LOG, "%s: %d: instance(%u) static role(%s) will change to be %s.\n",
str, __LINE__, instMem[i].instanceId, datanode_role_int_to_string(instMem[i].role),
datanode_role_int_to_string(instTypeSor));
@ -63,8 +63,13 @@ void ChangeDnMemberIndex(const char *str, uint32 groupIdx, int32 memIdx, int32 i
void ChangeDnPrimaryMemberIndex(uint32 group_index, int primary_member_index)
{
if (g_one_master_multi_slave) {
ChangeDnMemberIndex("[ChangeDnPrimaryMemberIndex]",
group_index, primary_member_index, INSTANCE_ROLE_PRIMARY, INSTANCE_ROLE_STANDBY);
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
ChangeDnMemberIndex("[ChangeDnPrimaryMemberIndex]",
group_index, primary_member_index, INSTANCE_ROLE_MAIN_STANDBY, INSTANCE_ROLE_STANDBY);
} else {
ChangeDnMemberIndex("[ChangeDnPrimaryMemberIndex]",
group_index, primary_member_index, INSTANCE_ROLE_PRIMARY, INSTANCE_ROLE_STANDBY);
}
} else {
change_primary_member_index(group_index, primary_member_index);
}
@ -79,11 +84,17 @@ void change_primary_member_index(uint32 group_index, int primary_member_index)
for (int i = 0; i < count; i++) {
/* Does not change dummy standby member index, only change primary and standby member index */
if (i == primary_member_index && instanceMember[i].role != INSTANCE_ROLE_PRIMARY) {
instanceMember[i].role = INSTANCE_ROLE_PRIMARY;
if (i == primary_member_index &&
(instanceMember[i].role != INSTANCE_ROLE_PRIMARY && instanceMember[i].role != INSTANCE_ROLE_MAIN_STANDBY)) {
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
instanceMember[i].role = INSTANCE_ROLE_MAIN_STANDBY;
} else {
instanceMember[i].role = INSTANCE_ROLE_PRIMARY;
}
status[i].role_changed = INSTANCE_ROLE_CHANGED;
SetDynamicConfigChangeToDdb(group_index, i);
} else if (i != primary_member_index && instanceMember[i].role == INSTANCE_ROLE_PRIMARY) {
} else if (i != primary_member_index &&
(instanceMember[i].role == INSTANCE_ROLE_PRIMARY || instanceMember[i].role == INSTANCE_ROLE_MAIN_STANDBY)) {
instanceMember[i].role = INSTANCE_ROLE_STANDBY;
status[i].role_changed = INSTANCE_ROLE_CHANGED;
SetDynamicConfigChangeToDdb(group_index, i);
@ -386,7 +397,11 @@ void SetSwitchoverCmd(cm_instance_command_status *cmd, int32 localRole, uint32 i
cmd->command_status = INSTANCE_COMMAND_WAIT_EXEC;
cmd->pengding_command = (int)MSG_CM_AGENT_SWITCHOVER;
if (localRole == INSTANCE_ROLE_STANDBY) {
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
cmd->cmdPur = INSTANCE_ROLE_MAIN_STANDBY;
} else {
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
}
cmd->cmdSour = INSTANCE_ROLE_STANDBY;
} else if (localRole == INSTANCE_ROLE_CASCADE_STANDBY) {
cmd->cmdPur = INSTANCE_ROLE_STANDBY;

View File

@ -343,6 +343,8 @@ static void ReloadParametersFromConfigfile()
GetDelayArbitClusterTimeFromConf();
g_diskTimeout = get_uint32_value_from_config(configDir, "disk_timeout", 200);
g_agentNetworkTimeout = get_uint32_value_from_config(configDir, "agent_network_timeout", 6);
g_ssDoubleClusterMode =
(SSDoubleClusterMode)get_uint32_value_from_config(configDir, "ss_double_cluster_mode", SS_DOUBLE_NULL);
GetDnArbitrateMode();
#ifndef ENABLE_PRIVATEGAUSS
g_waitStaticPrimaryTimes = get_uint32_value_from_config(configDir, "wait_static_primary_times", 6);
@ -389,13 +391,15 @@ static void ReloadParametersFromConfigfile()
"datastorage_threshold_check_interval=%d,\n"
" max_datastorage_threshold_check=%d, enableSetReadOnly=%s, enableSetReadOnlyThreshold=%u, "
"switch_rto=%d, force_promote=%d, cluster_starting_aribt_delay=%u, enable_e2e_rto=%u, "
"g_delayArbiTime=%u, g_clusterArbiTime=%d, wait_static_primary_times=%u, backup_open=%d.\n",
"g_delayArbiTime=%u, g_clusterArbiTime=%d, wait_static_primary_times=%u, backup_open=%d, "
"g_ssDoubleClusterMode=%d.\n",
log_min_messages, maxLogFileSize, sys_log_path, g_alarmComponentPath, g_alarmReportInterval,
instance_heartbeat_timeout, g_ddbArbicfg.haHeartBeatTimeOut, cmserver_self_vote_timeout,
g_ddbArbicfg.haStatusInterval, cmserver_ha_connect_timeout, instance_failover_delay_timeout,
datastorage_threshold_check_interval, max_datastorage_threshold_check, g_enableSetReadOnly,
g_readOnlyThreshold, switch_rto, force_promote, g_clusterStartingArbitDelay,
g_enableE2ERto, g_delayArbiTime, g_clusterArbiTime, g_waitStaticPrimaryTimes, backup_open);
g_enableE2ERto, g_delayArbiTime, g_clusterArbiTime, g_waitStaticPrimaryTimes, backup_open,
g_ssDoubleClusterMode);
#endif
}

View File

@ -1064,14 +1064,16 @@ int isNodeBalanced(uint32 *switchedInstance)
logicClusterId = get_logicClusterId_by_dynamic_dataNodeId(
g_instance_role_group_ptr[i].instanceMember[0].instanceId);
if (g_single_node_cluster && dnStat->local_role == INSTANCE_ROLE_NORMAL &&
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_PRIMARY) {
(g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_PRIMARY ||
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_MAIN_STANDBY)) {
break;
}
if ((dnStat->local_role == INSTANCE_ROLE_PRIMARY &&
if (((dnStat->local_role == INSTANCE_ROLE_PRIMARY || dnStat->local_role == INSTANCE_ROLE_MAIN_STANDBY) &&
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_STANDBY) ||
(dnStat->local_role != INSTANCE_ROLE_PRIMARY &&
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_PRIMARY)) {
((dnStat->local_role != INSTANCE_ROLE_PRIMARY && dnStat->local_role != INSTANCE_ROLE_MAIN_STANDBY) &&
(g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_PRIMARY ||
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_MAIN_STANDBY))) {
if (switchedInstance != NULL) {
switchedInstance[switchedCount] = g_instance_role_group_ptr[i].instanceMember[j].instanceId;
}
@ -1156,8 +1158,10 @@ int switchoverFullDone(void)
case INSTANCE_TYPE_DATANODE:
if (g_instance_group_report_status_ptr[group_index].instance_status.command_member[member_index]
.pengding_command != (int32)MSG_CM_AGENT_SWITCHOVER &&
(g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[member_index]
.local_status.local_role != INSTANCE_ROLE_PRIMARY &&
g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[member_index]
.local_status.local_role != INSTANCE_ROLE_PRIMARY) {
.local_status.local_role != INSTANCE_ROLE_MAIN_STANDBY)) {
(void)pthread_rwlock_unlock(&(g_instance_group_report_status_ptr[group_index].lk_lock));
write_runlog(LOG, "the instance(node = %u instanceid = %u) switchover fail\n",
switchOverInstances[i].node, switchOverInstances[i].instanceId);
@ -1166,7 +1170,8 @@ int switchoverFullDone(void)
if (g_instance_group_report_status_ptr[group_index].instance_status.command_member[member_index]
.pengding_command == (int32)MSG_CM_AGENT_SWITCHOVER) {
for (int ii = 0; ii < g_instance_role_group_ptr[group_index].count; ii++) {
if (g_instance_role_group_ptr[group_index].instanceMember[ii].role == INSTANCE_ROLE_PRIMARY &&
if ((g_instance_role_group_ptr[group_index].instanceMember[ii].role == INSTANCE_ROLE_PRIMARY ||
g_instance_role_group_ptr[group_index].instanceMember[ii].role == INSTANCE_ROLE_MAIN_STANDBY) &&
g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[ii]
.local_status.db_state != INSTANCE_HA_STATE_NORMAL) {
(void)pthread_rwlock_unlock(&(g_instance_group_report_status_ptr[group_index].lk_lock));
@ -1180,7 +1185,9 @@ int switchoverFullDone(void)
if ((g_instance_group_report_status_ptr[group_index].instance_status.command_member[member_index]
.pengding_command == MSG_CM_AGENT_SWITCHOVER) ||
(g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[member_index]
.local_status.local_role != INSTANCE_ROLE_PRIMARY)) {
.local_status.local_role != INSTANCE_ROLE_PRIMARY &&
g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[member_index]
.local_status.local_role != INSTANCE_ROLE_MAIN_STANDBY)) {
(void)pthread_rwlock_unlock(&(g_instance_group_report_status_ptr[group_index].lk_lock));
write_runlog(LOG, "the instance(node = %u instanceid = %u) is executing switchover.\n",
switchOverInstances[i].node, switchOverInstances[i].instanceId);
@ -1224,7 +1231,11 @@ void SwitchOverSetting(int time_out, int instanceType, uint32 ptrIndex, int memb
&(g_instance_group_report_status_ptr[ptrIndex].instance_status.command_member[memberIndex]);
cmd->command_status = INSTANCE_COMMAND_WAIT_EXEC;
cmd->pengding_command = (int)MSG_CM_AGENT_SWITCHOVER;
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
cmd->cmdPur = INSTANCE_ROLE_MAIN_STANDBY;
} else {
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
}
cmd->cmdSour = INSTANCE_ROLE_STANDBY;
cmd->time_out = time_out;
cmd->peerInstId = GetPeerInstId(ptrIndex, memberIndex);

View File

@ -110,6 +110,7 @@ void ProcessCtlToCmSwitchoverMsg(MsgRecvInfo* recvMsgInfo, const ctl_to_cm_switc
// tell cm_ctl will switchover to primary or standby
ackMsg.pengding_command = localRole;
write_runlog(LOG, "ackMsg.pengding_command: %d\n", localRole);
(void)RespondMsg(recvMsgInfo, 'S', (char *)(&ackMsg), sizeof(ackMsg));
if (ackMsg.command_result == CM_INVALID_COMMAND) {
return;
@ -435,7 +436,11 @@ static void process_single_instance_switchover_info(switchover_instance *instanc
cm_instance_command_status *cmd = &(instReport->command_member[j]);
cmd->command_status = INSTANCE_COMMAND_WAIT_EXEC;
cmd->pengding_command = (int)MSG_CM_AGENT_SWITCHOVER;
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
cmd->cmdPur = INSTANCE_ROLE_MAIN_STANDBY;
} else {
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
}
cmd->cmdSour = INSTANCE_ROLE_STANDBY;
cmd->peerInstId = GetPeerInstId(i, j);
cmd->time_out = ctl_to_cm_swithover_ptr->wait_seconds;
@ -701,7 +706,9 @@ void ProcessCtlToCmSwitchoverAzMsg(MsgRecvInfo* recvMsgInfo, ctl_to_cm_switchove
break;
} else if (g_instance_role_group_ptr[i].instanceMember[j].instanceType == INSTANCE_TYPE_DATANODE &&
((g_instance_group_report_status_ptr[i].instance_status.data_node_member[j]
.local_status.local_role == INSTANCE_ROLE_PRIMARY && sameAz))) {
.local_status.local_role == INSTANCE_ROLE_PRIMARY ||
g_instance_group_report_status_ptr[i].instance_status.data_node_member[j]
.local_status.local_role == INSTANCE_ROLE_MAIN_STANDBY) && sameAz)) {
primaryInstanceInTargetAZ = true;
noNeedDoDnNum++;
checkSwitchoverInstance = true;
@ -849,7 +856,8 @@ static int SwitchoverDone(void)
int dnLocalRole = g_instance_group_report_status_ptr[i].instance_status.data_node_member[j]
.local_status.local_role;
bool enCheck = (CheckInstInSyncList(i, j, str) == SYNCLIST_IS_FINISTH);
if (initRole == INSTANCE_ROLE_PRIMARY && dnLocalRole != INSTANCE_ROLE_PRIMARY &&
if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
(dnLocalRole != INSTANCE_ROLE_PRIMARY && dnLocalRole != INSTANCE_ROLE_MAIN_STANDBY) &&
*command != (int)MSG_CM_AGENT_SWITCHOVER && enCheck) {
if (localStatus == INSTANCE_HA_STATE_NORMAL) {
set_pending_command(i, j, MSG_CM_AGENT_SWITCHOVER, SWITCHOVER_DEFAULT_WAIT);
@ -864,23 +872,26 @@ static int SwitchoverDone(void)
}
}
if (initRole == INSTANCE_ROLE_PRIMARY && dnLocalRole == INSTANCE_ROLE_STANDBY &&
if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) && dnLocalRole == INSTANCE_ROLE_STANDBY &&
localStatus == INSTANCE_HA_STATE_PROMOTING && *command == MSG_CM_AGENT_SWITCHOVER) {
anyInitPrimarySwitchover = true;
}
/* must keep three or in this if condition, otherwise will result to some problem. */
if (*command == MSG_CM_AGENT_SWITCHOVER &&
((dnLocalRole != INSTANCE_ROLE_PRIMARY && initRole == INSTANCE_ROLE_PRIMARY) ||
(g_instance_role_group_ptr[i].instanceMember[j].role == INSTANCE_ROLE_PRIMARY &&
(((dnLocalRole != INSTANCE_ROLE_PRIMARY && dnLocalRole != INSTANCE_ROLE_MAIN_STANDBY) &&
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY)) ||
((g_instance_role_group_ptr[i].instanceMember[j].role == INSTANCE_ROLE_PRIMARY ||
g_instance_role_group_ptr[i].instanceMember[j].role == INSTANCE_ROLE_MAIN_STANDBY) &&
localStatus != INSTANCE_HA_STATE_NORMAL))) {
(void)pthread_rwlock_unlock(&(g_instance_group_report_status_ptr[i].lk_lock));
write_runlog(LOG, "%s: inst(%u) is doing switchover.\n", str, instanceId);
return SWITCHOVER_EXECING;
}
if (*command != MSG_CM_AGENT_SWITCHOVER && dnLocalRole != INSTANCE_ROLE_PRIMARY &&
initRole == INSTANCE_ROLE_PRIMARY) {
if (*command != MSG_CM_AGENT_SWITCHOVER &&
(dnLocalRole != INSTANCE_ROLE_PRIMARY && dnLocalRole != INSTANCE_ROLE_MAIN_STANDBY) &&
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY)) {
write_runlog(LOG, "line %d: instanceId(%u) has not do switchover.\n", __LINE__, instanceId);
dnCount++;
partlySwitchover = true;
@ -2197,38 +2208,44 @@ void ProcessCtlToCmSwitchoverAllMsg(MsgRecvInfo* recvMsgInfo, const ctl_to_cm_sw
bool isCatchUp = IsInCatchUpState(i, j);
bool isCheckSyncList = (CheckInstInSyncList(i, j, str) == SYNCLIST_IS_FINISTH);
if ((dnLocalRole == INSTANCE_ROLE_STANDBY || dnLocalRole == INSTANCE_ROLE_CASCADE_STANDBY) &&
initRole == INSTANCE_ROLE_PRIMARY && localStatus == INSTANCE_HA_STATE_NORMAL && !isInVoteAz &&
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
localStatus == INSTANCE_HA_STATE_NORMAL && !isInVoteAz &&
!isCatchUp && isCheckSyncList) {
SetSwitchoverInSwitchoverProcess(i, j, switchoverMsg->wait_seconds);
needDoDnNum++;
} else if (initRole == INSTANCE_ROLE_PRIMARY && localStatus != INSTANCE_HA_STATE_NORMAL) {
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
localStatus != INSTANCE_HA_STATE_NORMAL) {
write_runlog(LOG, "dn instance=%u status=%s, will not switchover for status is unNormal.\n",
instanceId, datanode_dbstate_int_to_string(localStatus));
msgBalanceResult.instances[imbalanceIndex++] = instanceId;
noNeedDoDnNum++;
} else if (initRole == INSTANCE_ROLE_PRIMARY && dnLocalRole == INSTANCE_ROLE_PRIMARY) {
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
(dnLocalRole == INSTANCE_ROLE_PRIMARY || dnLocalRole == INSTANCE_ROLE_MAIN_STANDBY)) {
write_runlog(LOG,
"dn instance=%u status=%s, will not switchover for status is already primary.\n",
instanceId, datanode_dbstate_int_to_string(localStatus));
noNeedDoDnNum++;
} else if (initRole == INSTANCE_ROLE_PRIMARY && isInVoteAz && isCheckSyncList) {
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
isInVoteAz && isCheckSyncList) {
write_runlog(LOG, "dn instance=%u status=%s, will not switchover in vote AZ.\n", instanceId,
datanode_dbstate_int_to_string(localStatus));
noNeedDoDnNum++;
} else if (initRole == INSTANCE_ROLE_PRIMARY && isCatchUp) {
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) && isCatchUp) {
write_runlog(LOG,
"dn instance=%u status=%s, will not switchover for the xlog location gap"
"between the primary and standby is too large.\n",
instanceId, datanode_dbstate_int_to_string(localStatus));
if (dnLocalRole == INSTANCE_ROLE_STANDBY && initRole == INSTANCE_ROLE_PRIMARY) {
if (dnLocalRole == INSTANCE_ROLE_STANDBY &&
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY)) {
msgBalanceResult.instances[imbalanceIndex++] = instanceId;
}
noNeedDoDnNum++;
} else if (initRole == INSTANCE_ROLE_PRIMARY && isCheckSyncList) {
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) && isCheckSyncList) {
write_runlog(LOG,
"dn instance=%u status=%s, will not switchover for the inst not in synclist.\n", instanceId,
datanode_dbstate_int_to_string(localStatus));
if (dnLocalRole == INSTANCE_ROLE_STANDBY && initRole == INSTANCE_ROLE_PRIMARY) {
if (dnLocalRole == INSTANCE_ROLE_STANDBY &&
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY)) {
msgBalanceResult.instances[imbalanceIndex++] = instanceId;
}
noNeedDoDnNum++;

View File

@ -143,7 +143,11 @@ void SetSwitchoverPendingCmd(uint32 groupIdx, int32 memIdx, int32 waitSecond, co
cmd->cmdSour = INSTANCE_ROLE_CASCADE_STANDBY;
cmd->cmdRealPur = INSTANCE_ROLE_PRIMARY;
} else {
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
cmd->cmdPur = INSTANCE_ROLE_MAIN_STANDBY;
} else {
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
}
cmd->cmdSour = INSTANCE_ROLE_STANDBY;
cmd->cmdRealPur = INSTANCE_ROLE_INIT;
if (isNeedDelay) {

View File

@ -476,6 +476,7 @@ extern int32 g_clusterArbiTime;
extern bool g_isPauseArbitration;
extern char g_cmManualPausePath[MAX_PATH_LEN];
extern uint32 g_waitStaticPrimaryTimes;
extern SSDoubleClusterMode g_ssDoubleClusterMode;
extern void clean_init_cluster_state();
extern void instance_delay_arbitrate_time_out_direct_clean(uint32 group_index, int member_index,