cm支持双集群中备集群switchover
This commit is contained in:
parent
0e0bd1c6e1
commit
7299703207
@ -130,6 +130,7 @@ const char *g_cmsParamInfo[] = {
|
||||
#ifndef ENABLE_PRIVATEGAUSS
|
||||
"wait_static_primary_times|int|5,2147483647|NULL|NULL|",
|
||||
#endif
|
||||
"ss_double_cluster_mode|int|0,2|NULL|NULL|",
|
||||
};
|
||||
|
||||
const char *g_valueTypeStr[] = {
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include "ctl_common.h"
|
||||
#include "cm/libpq-int.h"
|
||||
#include "cm/cm_agent/cma_main.h"
|
||||
#include "cm_elog.h"
|
||||
#include "cm_msg.h"
|
||||
|
||||
/* If DN switch take long time and do not complete, it will timeout, pending_command will be clear in server_main.cpp
|
||||
CM_ThreadMonitorMain(), the default g_wait_seconds is 180s, we need to increase the g_wait_seconds to 1200s. */
|
||||
@ -59,6 +61,7 @@ extern bool wait_seconds_set;
|
||||
extern int g_waitSeconds;
|
||||
extern CM_Conn* CmServer_conn;
|
||||
extern char *g_command_operation_azName;
|
||||
SSDoubleClusterMode g_ssDoubleClusterMode = SS_DOUBLE_NULL;
|
||||
|
||||
static int QueryNeedQuickSwitchInstances(int* need_quick_switchover_instance,
|
||||
NeedQuickSwitchoverInstanceArray* needQuickSwitchoverInstance, bool* is_cluster_balance,
|
||||
@ -69,11 +72,16 @@ static int GetDatapathByInstanceId(uint32 instanceId, int instanceType, char* da
|
||||
static int JudgeInstanceRole(int instanceType, int member_index, int instance_role, const CommonOption *commCtx);
|
||||
static int JudgeDatanodeStatus(uint32 node_id, const char *data_path, int db_state);
|
||||
static int JudgeGtmStatus(uint32 node_id, const char *data_path, int gtm_state);
|
||||
static void GetClusterMode();
|
||||
|
||||
static void SetSwitchoverOper(SwitchoverOper *oper, int32 localRole, uint32 instanceId)
|
||||
{
|
||||
if (localRole == INSTANCE_ROLE_STANDBY) {
|
||||
oper->localRole = INSTANCE_ROLE_PRIMARY;
|
||||
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
|
||||
oper->localRole = INSTANCE_ROLE_MAIN_STANDBY;
|
||||
} else {
|
||||
oper->localRole = INSTANCE_ROLE_PRIMARY;
|
||||
}
|
||||
oper->peerRole = INSTANCE_ROLE_STANDBY;
|
||||
} else if (localRole == INSTANCE_ROLE_CASCADE_STANDBY) {
|
||||
oper->localRole = INSTANCE_ROLE_STANDBY;
|
||||
@ -101,7 +109,12 @@ static int DoSwitchoverBase(const CtlOption *ctx)
|
||||
cm_to_ctl_command_ack *ackMsg = NULL;
|
||||
cm_to_ctl_instance_status *instStatusPtr = NULL;
|
||||
cm_switchover_incomplete_msg *switchoverIncompletePtr = NULL;
|
||||
SwitchoverOper oper = {INSTANCE_ROLE_PRIMARY, INSTANCE_ROLE_STANDBY};
|
||||
SwitchoverOper oper;
|
||||
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
|
||||
oper = {INSTANCE_ROLE_MAIN_STANDBY, INSTANCE_ROLE_STANDBY};
|
||||
} else {
|
||||
oper = {INSTANCE_ROLE_PRIMARY, INSTANCE_ROLE_STANDBY};
|
||||
}
|
||||
|
||||
// return conn to cm_server
|
||||
do_conn_cmserver(false, 0);
|
||||
@ -1489,6 +1502,7 @@ static int GetDatapathByInstanceId(uint32 instanceId, int instanceType, char* da
|
||||
|
||||
int DoSwitchover(const CtlOption *ctx)
|
||||
{
|
||||
GetClusterMode();
|
||||
if (ctx->switchover.switchoverAll) {
|
||||
if (switchover_all_quick && g_clusterType != V3SingleInstCluster) {
|
||||
return DoSwitchoverAllQuick();
|
||||
@ -1510,3 +1524,24 @@ int DoSwitchover(const CtlOption *ctx)
|
||||
|
||||
return DoSwitchoverBase(ctx);
|
||||
}
|
||||
|
||||
static void GetClusterMode()
|
||||
{
|
||||
errno_t rc;
|
||||
char cmDir[CM_PATH_LENGTH] = { 0 };
|
||||
char configDir[CM_PATH_LENGTH] = { 0 };
|
||||
|
||||
rc = memcpy_s(cmDir, sizeof(cmDir), g_currentNode->cmDataPath, sizeof(cmDir));
|
||||
securec_check_errno(rc, (void)rc);
|
||||
|
||||
if (cmDir[0] == '\0') {
|
||||
write_runlog(ERROR, "Failed to get cm base data path from static config file.");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
rc = snprintf_s(configDir, sizeof(configDir), sizeof(configDir) - 1, "%s/cm_agent/cm_agent.conf", cmDir);
|
||||
securec_check_intval(rc, (void)rc);
|
||||
|
||||
g_ssDoubleClusterMode =
|
||||
(SSDoubleClusterMode)get_uint32_value_from_config(configDir, "ss_double_cluster_mode", SS_DOUBLE_NULL);
|
||||
}
|
@ -89,4 +89,5 @@ cms_network_isolation_timeout = 20 # cms judges the network is isolated when i
|
||||
# default 20
|
||||
wait_static_primary_times = 6 # Time to wait for the primary recovery after the primary stopped unexpectedly.
|
||||
# default value is 6
|
||||
ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2
|
||||
############### must leave a new line at the end ###################
|
||||
|
@ -84,4 +84,5 @@ cms_enable_db_crash_recovery = false # used in 2 nodes cluster. when network re
|
||||
cms_network_isolation_timeout = 20 # cms judges the network is isolated when it finds ddb cluster is not sync with each other nodes,
|
||||
# after cms_network_isolation_timeout times.
|
||||
# default 20
|
||||
ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2
|
||||
############### must leave a new line at the end ###################
|
||||
|
@ -84,4 +84,5 @@ cms_enable_db_crash_recovery = false # used in 2 nodes cluster. when network re
|
||||
cms_network_isolation_timeout = 20 # cms judges the network is isolated when it finds ddb cluster is not sync with each other nodes,
|
||||
# after cms_network_isolation_timeout times.
|
||||
# default 20
|
||||
ss_double_cluster_mode = 0 #cluster run mode for ss double cluster scene, Valid value: 0-2
|
||||
############### must leave a new line at the end ###################
|
||||
|
@ -590,7 +590,7 @@ static void DnWillChangeStaticRole(const DnArbCtx *ctx, const char *str)
|
||||
if (ctx->localRole->role != cmdSour) {
|
||||
return;
|
||||
}
|
||||
if (cmdPur == INSTANCE_ROLE_PRIMARY) {
|
||||
if (cmdPur == INSTANCE_ROLE_PRIMARY || cmdPur == INSTANCE_ROLE_MAIN_STANDBY) {
|
||||
ChangeStaticRoleAndNotifyCn(ctx->groupIdx, ctx->memIdx);
|
||||
} else {
|
||||
ChangeDnMemberIndex(str, ctx->groupIdx, ctx->memIdx, cmdPur, cmdSour);
|
||||
|
@ -614,6 +614,8 @@ void get_parameters_from_configfile()
|
||||
SECONDS_PER_DAY);
|
||||
g_diskTimeout = get_uint32_value_from_config(configDir, "disk_timeout", 200);
|
||||
g_agentNetworkTimeout = get_uint32_value_from_config(configDir, "agent_network_timeout", 6);
|
||||
g_ssDoubleClusterMode =
|
||||
(SSDoubleClusterMode)get_uint32_value_from_config(configDir, "ss_double_cluster_mode", SS_DOUBLE_NULL);
|
||||
GetDnArbitrateMode();
|
||||
#ifndef ENABLE_PRIVATEGAUSS
|
||||
g_waitStaticPrimaryTimes = get_uint32_value_from_config(configDir, "wait_static_primary_times", 6);
|
||||
|
@ -251,6 +251,7 @@ char g_cmStaticConfigurePath[MAX_PATH_LEN] = {0};
|
||||
cm_fenced_UDF_report_status *g_fenced_UDF_report_status_ptr = NULL;
|
||||
int *cn_dn_disconnect_times = NULL;
|
||||
int *g_lastCnDnDisconnectTimes = NULL;
|
||||
SSDoubleClusterMode g_ssDoubleClusterMode = SS_DOUBLE_NULL;
|
||||
|
||||
volatile switchover_az_mode cm_switchover_az_mode = AUTOSWITCHOVER_AZ;
|
||||
volatile logic_cluster_restart_mode cm_logic_cluster_restart_mode = INITIAL_LOGIC_CLUSTER_RESTART;
|
||||
|
@ -48,8 +48,8 @@ void ChangeDnMemberIndex(const char *str, uint32 groupIdx, int32 memIdx, int32 i
|
||||
datanode_role_int_to_string(instTypePur));
|
||||
instMem[i].role = instTypePur;
|
||||
cmd[i].role_changed = INSTANCE_ROLE_CHANGED;
|
||||
} else if ((instTypePur == INSTANCE_ROLE_PRIMARY || peerInstId == instMem[i].instanceId) &&
|
||||
(i != memIdx) && instMem[i].role == instTypePur) {
|
||||
} else if (((instTypePur == INSTANCE_ROLE_PRIMARY || instTypePur == INSTANCE_ROLE_MAIN_STANDBY)
|
||||
|| peerInstId == instMem[i].instanceId) && (i != memIdx) && instMem[i].role == instTypePur) {
|
||||
write_runlog(LOG, "%s: %d: instance(%u) static role(%s) will change to be %s.\n",
|
||||
str, __LINE__, instMem[i].instanceId, datanode_role_int_to_string(instMem[i].role),
|
||||
datanode_role_int_to_string(instTypeSor));
|
||||
@ -63,8 +63,13 @@ void ChangeDnMemberIndex(const char *str, uint32 groupIdx, int32 memIdx, int32 i
|
||||
void ChangeDnPrimaryMemberIndex(uint32 group_index, int primary_member_index)
|
||||
{
|
||||
if (g_one_master_multi_slave) {
|
||||
ChangeDnMemberIndex("[ChangeDnPrimaryMemberIndex]",
|
||||
group_index, primary_member_index, INSTANCE_ROLE_PRIMARY, INSTANCE_ROLE_STANDBY);
|
||||
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
|
||||
ChangeDnMemberIndex("[ChangeDnPrimaryMemberIndex]",
|
||||
group_index, primary_member_index, INSTANCE_ROLE_MAIN_STANDBY, INSTANCE_ROLE_STANDBY);
|
||||
} else {
|
||||
ChangeDnMemberIndex("[ChangeDnPrimaryMemberIndex]",
|
||||
group_index, primary_member_index, INSTANCE_ROLE_PRIMARY, INSTANCE_ROLE_STANDBY);
|
||||
}
|
||||
} else {
|
||||
change_primary_member_index(group_index, primary_member_index);
|
||||
}
|
||||
@ -79,11 +84,17 @@ void change_primary_member_index(uint32 group_index, int primary_member_index)
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
/* Does not change dummy standby member index, only change primary and standby member index */
|
||||
if (i == primary_member_index && instanceMember[i].role != INSTANCE_ROLE_PRIMARY) {
|
||||
instanceMember[i].role = INSTANCE_ROLE_PRIMARY;
|
||||
if (i == primary_member_index &&
|
||||
(instanceMember[i].role != INSTANCE_ROLE_PRIMARY && instanceMember[i].role != INSTANCE_ROLE_MAIN_STANDBY)) {
|
||||
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
|
||||
instanceMember[i].role = INSTANCE_ROLE_MAIN_STANDBY;
|
||||
} else {
|
||||
instanceMember[i].role = INSTANCE_ROLE_PRIMARY;
|
||||
}
|
||||
status[i].role_changed = INSTANCE_ROLE_CHANGED;
|
||||
SetDynamicConfigChangeToDdb(group_index, i);
|
||||
} else if (i != primary_member_index && instanceMember[i].role == INSTANCE_ROLE_PRIMARY) {
|
||||
} else if (i != primary_member_index &&
|
||||
(instanceMember[i].role == INSTANCE_ROLE_PRIMARY || instanceMember[i].role == INSTANCE_ROLE_MAIN_STANDBY)) {
|
||||
instanceMember[i].role = INSTANCE_ROLE_STANDBY;
|
||||
status[i].role_changed = INSTANCE_ROLE_CHANGED;
|
||||
SetDynamicConfigChangeToDdb(group_index, i);
|
||||
@ -386,7 +397,11 @@ void SetSwitchoverCmd(cm_instance_command_status *cmd, int32 localRole, uint32 i
|
||||
cmd->command_status = INSTANCE_COMMAND_WAIT_EXEC;
|
||||
cmd->pengding_command = (int)MSG_CM_AGENT_SWITCHOVER;
|
||||
if (localRole == INSTANCE_ROLE_STANDBY) {
|
||||
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
|
||||
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
|
||||
cmd->cmdPur = INSTANCE_ROLE_MAIN_STANDBY;
|
||||
} else {
|
||||
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
|
||||
}
|
||||
cmd->cmdSour = INSTANCE_ROLE_STANDBY;
|
||||
} else if (localRole == INSTANCE_ROLE_CASCADE_STANDBY) {
|
||||
cmd->cmdPur = INSTANCE_ROLE_STANDBY;
|
||||
|
@ -343,6 +343,8 @@ static void ReloadParametersFromConfigfile()
|
||||
GetDelayArbitClusterTimeFromConf();
|
||||
g_diskTimeout = get_uint32_value_from_config(configDir, "disk_timeout", 200);
|
||||
g_agentNetworkTimeout = get_uint32_value_from_config(configDir, "agent_network_timeout", 6);
|
||||
g_ssDoubleClusterMode =
|
||||
(SSDoubleClusterMode)get_uint32_value_from_config(configDir, "ss_double_cluster_mode", SS_DOUBLE_NULL);
|
||||
GetDnArbitrateMode();
|
||||
#ifndef ENABLE_PRIVATEGAUSS
|
||||
g_waitStaticPrimaryTimes = get_uint32_value_from_config(configDir, "wait_static_primary_times", 6);
|
||||
@ -389,13 +391,15 @@ static void ReloadParametersFromConfigfile()
|
||||
"datastorage_threshold_check_interval=%d,\n"
|
||||
" max_datastorage_threshold_check=%d, enableSetReadOnly=%s, enableSetReadOnlyThreshold=%u, "
|
||||
"switch_rto=%d, force_promote=%d, cluster_starting_aribt_delay=%u, enable_e2e_rto=%u, "
|
||||
"g_delayArbiTime=%u, g_clusterArbiTime=%d, wait_static_primary_times=%u, backup_open=%d.\n",
|
||||
"g_delayArbiTime=%u, g_clusterArbiTime=%d, wait_static_primary_times=%u, backup_open=%d, "
|
||||
"g_ssDoubleClusterMode=%d.\n",
|
||||
log_min_messages, maxLogFileSize, sys_log_path, g_alarmComponentPath, g_alarmReportInterval,
|
||||
instance_heartbeat_timeout, g_ddbArbicfg.haHeartBeatTimeOut, cmserver_self_vote_timeout,
|
||||
g_ddbArbicfg.haStatusInterval, cmserver_ha_connect_timeout, instance_failover_delay_timeout,
|
||||
datastorage_threshold_check_interval, max_datastorage_threshold_check, g_enableSetReadOnly,
|
||||
g_readOnlyThreshold, switch_rto, force_promote, g_clusterStartingArbitDelay,
|
||||
g_enableE2ERto, g_delayArbiTime, g_clusterArbiTime, g_waitStaticPrimaryTimes, backup_open);
|
||||
g_enableE2ERto, g_delayArbiTime, g_clusterArbiTime, g_waitStaticPrimaryTimes, backup_open,
|
||||
g_ssDoubleClusterMode);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1064,14 +1064,16 @@ int isNodeBalanced(uint32 *switchedInstance)
|
||||
logicClusterId = get_logicClusterId_by_dynamic_dataNodeId(
|
||||
g_instance_role_group_ptr[i].instanceMember[0].instanceId);
|
||||
if (g_single_node_cluster && dnStat->local_role == INSTANCE_ROLE_NORMAL &&
|
||||
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_PRIMARY) {
|
||||
(g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_PRIMARY ||
|
||||
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_MAIN_STANDBY)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if ((dnStat->local_role == INSTANCE_ROLE_PRIMARY &&
|
||||
if (((dnStat->local_role == INSTANCE_ROLE_PRIMARY || dnStat->local_role == INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_STANDBY) ||
|
||||
(dnStat->local_role != INSTANCE_ROLE_PRIMARY &&
|
||||
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_PRIMARY)) {
|
||||
((dnStat->local_role != INSTANCE_ROLE_PRIMARY && dnStat->local_role != INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
(g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_PRIMARY ||
|
||||
g_instance_role_group_ptr[i].instanceMember[j].instanceRoleInit == INSTANCE_ROLE_MAIN_STANDBY))) {
|
||||
if (switchedInstance != NULL) {
|
||||
switchedInstance[switchedCount] = g_instance_role_group_ptr[i].instanceMember[j].instanceId;
|
||||
}
|
||||
@ -1156,8 +1158,10 @@ int switchoverFullDone(void)
|
||||
case INSTANCE_TYPE_DATANODE:
|
||||
if (g_instance_group_report_status_ptr[group_index].instance_status.command_member[member_index]
|
||||
.pengding_command != (int32)MSG_CM_AGENT_SWITCHOVER &&
|
||||
(g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[member_index]
|
||||
.local_status.local_role != INSTANCE_ROLE_PRIMARY &&
|
||||
g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[member_index]
|
||||
.local_status.local_role != INSTANCE_ROLE_PRIMARY) {
|
||||
.local_status.local_role != INSTANCE_ROLE_MAIN_STANDBY)) {
|
||||
(void)pthread_rwlock_unlock(&(g_instance_group_report_status_ptr[group_index].lk_lock));
|
||||
write_runlog(LOG, "the instance(node = %u instanceid = %u) switchover fail\n",
|
||||
switchOverInstances[i].node, switchOverInstances[i].instanceId);
|
||||
@ -1166,7 +1170,8 @@ int switchoverFullDone(void)
|
||||
if (g_instance_group_report_status_ptr[group_index].instance_status.command_member[member_index]
|
||||
.pengding_command == (int32)MSG_CM_AGENT_SWITCHOVER) {
|
||||
for (int ii = 0; ii < g_instance_role_group_ptr[group_index].count; ii++) {
|
||||
if (g_instance_role_group_ptr[group_index].instanceMember[ii].role == INSTANCE_ROLE_PRIMARY &&
|
||||
if ((g_instance_role_group_ptr[group_index].instanceMember[ii].role == INSTANCE_ROLE_PRIMARY ||
|
||||
g_instance_role_group_ptr[group_index].instanceMember[ii].role == INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[ii]
|
||||
.local_status.db_state != INSTANCE_HA_STATE_NORMAL) {
|
||||
(void)pthread_rwlock_unlock(&(g_instance_group_report_status_ptr[group_index].lk_lock));
|
||||
@ -1180,7 +1185,9 @@ int switchoverFullDone(void)
|
||||
if ((g_instance_group_report_status_ptr[group_index].instance_status.command_member[member_index]
|
||||
.pengding_command == MSG_CM_AGENT_SWITCHOVER) ||
|
||||
(g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[member_index]
|
||||
.local_status.local_role != INSTANCE_ROLE_PRIMARY)) {
|
||||
.local_status.local_role != INSTANCE_ROLE_PRIMARY &&
|
||||
g_instance_group_report_status_ptr[group_index].instance_status.data_node_member[member_index]
|
||||
.local_status.local_role != INSTANCE_ROLE_MAIN_STANDBY)) {
|
||||
(void)pthread_rwlock_unlock(&(g_instance_group_report_status_ptr[group_index].lk_lock));
|
||||
write_runlog(LOG, "the instance(node = %u instanceid = %u) is executing switchover.\n",
|
||||
switchOverInstances[i].node, switchOverInstances[i].instanceId);
|
||||
@ -1224,7 +1231,11 @@ void SwitchOverSetting(int time_out, int instanceType, uint32 ptrIndex, int memb
|
||||
&(g_instance_group_report_status_ptr[ptrIndex].instance_status.command_member[memberIndex]);
|
||||
cmd->command_status = INSTANCE_COMMAND_WAIT_EXEC;
|
||||
cmd->pengding_command = (int)MSG_CM_AGENT_SWITCHOVER;
|
||||
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
|
||||
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
|
||||
cmd->cmdPur = INSTANCE_ROLE_MAIN_STANDBY;
|
||||
} else {
|
||||
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
|
||||
}
|
||||
cmd->cmdSour = INSTANCE_ROLE_STANDBY;
|
||||
cmd->time_out = time_out;
|
||||
cmd->peerInstId = GetPeerInstId(ptrIndex, memberIndex);
|
||||
|
@ -110,6 +110,7 @@ void ProcessCtlToCmSwitchoverMsg(MsgRecvInfo* recvMsgInfo, const ctl_to_cm_switc
|
||||
|
||||
// tell cm_ctl will switchover to primary or standby
|
||||
ackMsg.pengding_command = localRole;
|
||||
write_runlog(LOG, "ackMsg.pengding_command: %d\n", localRole);
|
||||
(void)RespondMsg(recvMsgInfo, 'S', (char *)(&ackMsg), sizeof(ackMsg));
|
||||
if (ackMsg.command_result == CM_INVALID_COMMAND) {
|
||||
return;
|
||||
@ -435,7 +436,11 @@ static void process_single_instance_switchover_info(switchover_instance *instanc
|
||||
cm_instance_command_status *cmd = &(instReport->command_member[j]);
|
||||
cmd->command_status = INSTANCE_COMMAND_WAIT_EXEC;
|
||||
cmd->pengding_command = (int)MSG_CM_AGENT_SWITCHOVER;
|
||||
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
|
||||
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
|
||||
cmd->cmdPur = INSTANCE_ROLE_MAIN_STANDBY;
|
||||
} else {
|
||||
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
|
||||
}
|
||||
cmd->cmdSour = INSTANCE_ROLE_STANDBY;
|
||||
cmd->peerInstId = GetPeerInstId(i, j);
|
||||
cmd->time_out = ctl_to_cm_swithover_ptr->wait_seconds;
|
||||
@ -701,7 +706,9 @@ void ProcessCtlToCmSwitchoverAzMsg(MsgRecvInfo* recvMsgInfo, ctl_to_cm_switchove
|
||||
break;
|
||||
} else if (g_instance_role_group_ptr[i].instanceMember[j].instanceType == INSTANCE_TYPE_DATANODE &&
|
||||
((g_instance_group_report_status_ptr[i].instance_status.data_node_member[j]
|
||||
.local_status.local_role == INSTANCE_ROLE_PRIMARY && sameAz))) {
|
||||
.local_status.local_role == INSTANCE_ROLE_PRIMARY ||
|
||||
g_instance_group_report_status_ptr[i].instance_status.data_node_member[j]
|
||||
.local_status.local_role == INSTANCE_ROLE_MAIN_STANDBY) && sameAz)) {
|
||||
primaryInstanceInTargetAZ = true;
|
||||
noNeedDoDnNum++;
|
||||
checkSwitchoverInstance = true;
|
||||
@ -849,7 +856,8 @@ static int SwitchoverDone(void)
|
||||
int dnLocalRole = g_instance_group_report_status_ptr[i].instance_status.data_node_member[j]
|
||||
.local_status.local_role;
|
||||
bool enCheck = (CheckInstInSyncList(i, j, str) == SYNCLIST_IS_FINISTH);
|
||||
if (initRole == INSTANCE_ROLE_PRIMARY && dnLocalRole != INSTANCE_ROLE_PRIMARY &&
|
||||
if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
(dnLocalRole != INSTANCE_ROLE_PRIMARY && dnLocalRole != INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
*command != (int)MSG_CM_AGENT_SWITCHOVER && enCheck) {
|
||||
if (localStatus == INSTANCE_HA_STATE_NORMAL) {
|
||||
set_pending_command(i, j, MSG_CM_AGENT_SWITCHOVER, SWITCHOVER_DEFAULT_WAIT);
|
||||
@ -864,23 +872,26 @@ static int SwitchoverDone(void)
|
||||
}
|
||||
}
|
||||
|
||||
if (initRole == INSTANCE_ROLE_PRIMARY && dnLocalRole == INSTANCE_ROLE_STANDBY &&
|
||||
if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) && dnLocalRole == INSTANCE_ROLE_STANDBY &&
|
||||
localStatus == INSTANCE_HA_STATE_PROMOTING && *command == MSG_CM_AGENT_SWITCHOVER) {
|
||||
anyInitPrimarySwitchover = true;
|
||||
}
|
||||
|
||||
/* must keep three or in this if condition, otherwise will result to some problem. */
|
||||
if (*command == MSG_CM_AGENT_SWITCHOVER &&
|
||||
((dnLocalRole != INSTANCE_ROLE_PRIMARY && initRole == INSTANCE_ROLE_PRIMARY) ||
|
||||
(g_instance_role_group_ptr[i].instanceMember[j].role == INSTANCE_ROLE_PRIMARY &&
|
||||
(((dnLocalRole != INSTANCE_ROLE_PRIMARY && dnLocalRole != INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY)) ||
|
||||
((g_instance_role_group_ptr[i].instanceMember[j].role == INSTANCE_ROLE_PRIMARY ||
|
||||
g_instance_role_group_ptr[i].instanceMember[j].role == INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
localStatus != INSTANCE_HA_STATE_NORMAL))) {
|
||||
(void)pthread_rwlock_unlock(&(g_instance_group_report_status_ptr[i].lk_lock));
|
||||
write_runlog(LOG, "%s: inst(%u) is doing switchover.\n", str, instanceId);
|
||||
return SWITCHOVER_EXECING;
|
||||
}
|
||||
|
||||
if (*command != MSG_CM_AGENT_SWITCHOVER && dnLocalRole != INSTANCE_ROLE_PRIMARY &&
|
||||
initRole == INSTANCE_ROLE_PRIMARY) {
|
||||
if (*command != MSG_CM_AGENT_SWITCHOVER &&
|
||||
(dnLocalRole != INSTANCE_ROLE_PRIMARY && dnLocalRole != INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY)) {
|
||||
write_runlog(LOG, "line %d: instanceId(%u) has not do switchover.\n", __LINE__, instanceId);
|
||||
dnCount++;
|
||||
partlySwitchover = true;
|
||||
@ -2197,38 +2208,44 @@ void ProcessCtlToCmSwitchoverAllMsg(MsgRecvInfo* recvMsgInfo, const ctl_to_cm_sw
|
||||
bool isCatchUp = IsInCatchUpState(i, j);
|
||||
bool isCheckSyncList = (CheckInstInSyncList(i, j, str) == SYNCLIST_IS_FINISTH);
|
||||
if ((dnLocalRole == INSTANCE_ROLE_STANDBY || dnLocalRole == INSTANCE_ROLE_CASCADE_STANDBY) &&
|
||||
initRole == INSTANCE_ROLE_PRIMARY && localStatus == INSTANCE_HA_STATE_NORMAL && !isInVoteAz &&
|
||||
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
localStatus == INSTANCE_HA_STATE_NORMAL && !isInVoteAz &&
|
||||
!isCatchUp && isCheckSyncList) {
|
||||
SetSwitchoverInSwitchoverProcess(i, j, switchoverMsg->wait_seconds);
|
||||
needDoDnNum++;
|
||||
} else if (initRole == INSTANCE_ROLE_PRIMARY && localStatus != INSTANCE_HA_STATE_NORMAL) {
|
||||
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
localStatus != INSTANCE_HA_STATE_NORMAL) {
|
||||
write_runlog(LOG, "dn instance=%u status=%s, will not switchover for status is unNormal.\n",
|
||||
instanceId, datanode_dbstate_int_to_string(localStatus));
|
||||
msgBalanceResult.instances[imbalanceIndex++] = instanceId;
|
||||
noNeedDoDnNum++;
|
||||
} else if (initRole == INSTANCE_ROLE_PRIMARY && dnLocalRole == INSTANCE_ROLE_PRIMARY) {
|
||||
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
(dnLocalRole == INSTANCE_ROLE_PRIMARY || dnLocalRole == INSTANCE_ROLE_MAIN_STANDBY)) {
|
||||
write_runlog(LOG,
|
||||
"dn instance=%u status=%s, will not switchover for status is already primary.\n",
|
||||
instanceId, datanode_dbstate_int_to_string(localStatus));
|
||||
noNeedDoDnNum++;
|
||||
} else if (initRole == INSTANCE_ROLE_PRIMARY && isInVoteAz && isCheckSyncList) {
|
||||
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) &&
|
||||
isInVoteAz && isCheckSyncList) {
|
||||
write_runlog(LOG, "dn instance=%u status=%s, will not switchover in vote AZ.\n", instanceId,
|
||||
datanode_dbstate_int_to_string(localStatus));
|
||||
noNeedDoDnNum++;
|
||||
} else if (initRole == INSTANCE_ROLE_PRIMARY && isCatchUp) {
|
||||
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) && isCatchUp) {
|
||||
write_runlog(LOG,
|
||||
"dn instance=%u status=%s, will not switchover for the xlog location gap"
|
||||
"between the primary and standby is too large.\n",
|
||||
instanceId, datanode_dbstate_int_to_string(localStatus));
|
||||
if (dnLocalRole == INSTANCE_ROLE_STANDBY && initRole == INSTANCE_ROLE_PRIMARY) {
|
||||
if (dnLocalRole == INSTANCE_ROLE_STANDBY &&
|
||||
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY)) {
|
||||
msgBalanceResult.instances[imbalanceIndex++] = instanceId;
|
||||
}
|
||||
noNeedDoDnNum++;
|
||||
} else if (initRole == INSTANCE_ROLE_PRIMARY && isCheckSyncList) {
|
||||
} else if ((initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY) && isCheckSyncList) {
|
||||
write_runlog(LOG,
|
||||
"dn instance=%u status=%s, will not switchover for the inst not in synclist.\n", instanceId,
|
||||
datanode_dbstate_int_to_string(localStatus));
|
||||
if (dnLocalRole == INSTANCE_ROLE_STANDBY && initRole == INSTANCE_ROLE_PRIMARY) {
|
||||
if (dnLocalRole == INSTANCE_ROLE_STANDBY &&
|
||||
(initRole == INSTANCE_ROLE_PRIMARY || initRole == INSTANCE_ROLE_MAIN_STANDBY)) {
|
||||
msgBalanceResult.instances[imbalanceIndex++] = instanceId;
|
||||
}
|
||||
noNeedDoDnNum++;
|
||||
|
@ -143,7 +143,11 @@ void SetSwitchoverPendingCmd(uint32 groupIdx, int32 memIdx, int32 waitSecond, co
|
||||
cmd->cmdSour = INSTANCE_ROLE_CASCADE_STANDBY;
|
||||
cmd->cmdRealPur = INSTANCE_ROLE_PRIMARY;
|
||||
} else {
|
||||
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
|
||||
if (g_ssDoubleClusterMode == SS_DOUBLE_STANDBY) {
|
||||
cmd->cmdPur = INSTANCE_ROLE_MAIN_STANDBY;
|
||||
} else {
|
||||
cmd->cmdPur = INSTANCE_ROLE_PRIMARY;
|
||||
}
|
||||
cmd->cmdSour = INSTANCE_ROLE_STANDBY;
|
||||
cmd->cmdRealPur = INSTANCE_ROLE_INIT;
|
||||
if (isNeedDelay) {
|
||||
|
@ -476,6 +476,7 @@ extern int32 g_clusterArbiTime;
|
||||
extern bool g_isPauseArbitration;
|
||||
extern char g_cmManualPausePath[MAX_PATH_LEN];
|
||||
extern uint32 g_waitStaticPrimaryTimes;
|
||||
extern SSDoubleClusterMode g_ssDoubleClusterMode;
|
||||
|
||||
extern void clean_init_cluster_state();
|
||||
extern void instance_delay_arbitrate_time_out_direct_clean(uint32 group_index, int member_index,
|
||||
|
Loading…
x
Reference in New Issue
Block a user