!110 refresh replconninfo to identify cascade standby after switchover/faiover

Merge pull request !110 from LiHeng/master
This commit is contained in:
opengauss-bot
2021-03-17 10:27:02 +08:00
committed by Gitee
4 changed files with 235 additions and 13 deletions

View File

@ -5359,7 +5359,9 @@ class ClusterInstanceConfig():
(dbInst.port + 4), pj.haIps[i], (dbInst.port + 4), pj.haIps[i],
pj.haPort, pj.port + 5, pj.haPort, pj.port + 5,
pj.port + 4) pj.port + 4)
if pj.instanceType == DefaultValue.CASCADE_STANDBY:
chanalInfo += " iscascade=true"
connInfo1.append(chanalInfo) connInfo1.append(chanalInfo)
else: else:
for pj in iter(peerInsts): for pj in iter(peerInsts):
@ -5381,7 +5383,8 @@ class ClusterInstanceConfig():
(dbInst.port + 4), pj.haIps[i], (dbInst.port + 4), pj.haIps[i],
pj.haPort, pj.port + 5, pj.haPort, pj.port + 5,
(pj.port + 4)) (pj.port + 4))
if pj.instanceType == DefaultValue.CASCADE_STANDBY:
chanalInfo += " iscascade=true"
connInfo1.append(chanalInfo) connInfo1.append(chanalInfo)
return connInfo1, nodename return connInfo1, nodename

View File

@ -5979,7 +5979,12 @@ class dbClusterInfo():
def isSingleNode(self): def isSingleNode(self):
return (self.__getDnInstanceNum() <= 1) return (self.__getDnInstanceNum() <= 1)
def createDynamicConfig(self, user, localHostName, sshtool): def doRefreshConf(self, user, localHostName, sshtool):
self.__createDynamicConfig(user, localHostName, sshtool)
self.__create_simple_datanode_config(user, localHostName, sshtool)
self.__reset_replconninfo(user, sshtool)
def __createDynamicConfig(self, user, localHostName, sshtool):
""" """
function : Save cluster info into to dynamic config function : Save cluster info into to dynamic config
input : String,int input : String,int
@ -6045,6 +6050,8 @@ class dbClusterInfo():
raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \ raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \
"dynamic configuration file" + "dynamic configuration file" +
" Error: \n%s" % str(e)) " Error: \n%s" % str(e))
def __create_simple_datanode_config(self, user, localhostname, sshtool):
simpleDNConfig = self.__getDynamicSimpleDNConfig(user) simpleDNConfig = self.__getDynamicSimpleDNConfig(user)
if os.path.exists(simpleDNConfig): if os.path.exists(simpleDNConfig):
cmd = "rm -f %s" % simpleDNConfig cmd = "rm -f %s" % simpleDNConfig
@ -6058,7 +6065,7 @@ class dbClusterInfo():
try: try:
with open(simpleDNConfig, "w") as fp: with open(simpleDNConfig, "w") as fp:
for dninfo in tempstatus: for dninfo in tempstatus:
dnstatus = dninfo.split()[-2] dnstatus = dninfo.split()[6]
dnname = dninfo.split()[1] dnname = dninfo.split()[1]
if dnstatus not in statusdic: if dnstatus not in statusdic:
fp.write("%s=%d\n" % fp.write("%s=%d\n" %
@ -6073,7 +6080,7 @@ class dbClusterInfo():
"dynamic configuration file" "dynamic configuration file"
+ " Error: \n%s" % str(e)) + " Error: \n%s" % str(e))
try: try:
self.__sendDynamicCfgToAllNodes(localHostName, self.__sendDynamicCfgToAllNodes(localhostname,
simpleDNConfig, simpleDNConfig,
simpleDNConfig) simpleDNConfig)
except Exception as e: except Exception as e:
@ -6083,6 +6090,18 @@ class dbClusterInfo():
"dynamic configuration file" + "dynamic configuration file" +
" Error: \n%s" % str(e)) " Error: \n%s" % str(e))
def __reset_replconninfo(self, user, sshtool):
# add for cascade
local_script = os.path.dirname(os.path.realpath(__file__)) \
+ '/../../local/Resetreplconninfo.py'
cmd = "python3 %s -U %s -t reset" % (local_script, user)
(status, output) = \
sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames())
for node in self.getClusterNodeNames():
if status[node] != 'Success':
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"]
% cmd + "Error:\n%s" % output)
def __packDynamicNodeInfo(self, dbNode, localHostName, sshtool): def __packDynamicNodeInfo(self, dbNode, localHostName, sshtool):
# node id # node id
info = struct.pack("I", dbNode.id) info = struct.pack("I", dbNode.id)

View File

@ -203,7 +203,7 @@ class OmImplOLAP(OmImpl):
self.logger.log("Starting %s." % startType) self.logger.log("Starting %s." % startType)
self.logger.log("=========================================") self.logger.log("=========================================")
hostName = DefaultValue.GetHostIpOrName() hostName = DefaultValue.GetHostIpOrName()
#get the newest dynaminc config and send to other node # get the newest dynaminc config and send to other node
self.clusterInfo.checkClusterDynamicConfig(self.context.user, hostName) self.clusterInfo.checkClusterDynamicConfig(self.context.user, hostName)
if self.context.g_opts.nodeName == "": if self.context.g_opts.nodeName == "":
hostList = self.clusterInfo.getClusterNodeNames() hostList = self.clusterInfo.getClusterNodeNames()
@ -217,10 +217,10 @@ class OmImplOLAP(OmImpl):
else: else:
time_out = self.time_out time_out = self.time_out
cmd = "source %s; %s -U %s -R %s -t %s --security-mode=%s" % ( cmd = "source %s; %s -U %s -R %s -t %s --security-mode=%s" % (
self.context.g_opts.mpprcFile, self.context.g_opts.mpprcFile,
OMCommand.getLocalScript("Local_StartInstance"), OMCommand.getLocalScript("Local_StartInstance"),
self.context.user, self.context.clusterInfo.appPath, time_out, self.context.user, self.context.clusterInfo.appPath, time_out,
self.context.g_opts.security_mode) self.context.g_opts.security_mode)
if self.dataDir != "": if self.dataDir != "":
cmd += " -D %s" % self.dataDir cmd += " -D %s" % self.dataDir
failedOutput = '' failedOutput = ''
@ -342,8 +342,9 @@ class OmImplOLAP(OmImpl):
"No need to generate dynamic configuration file for one node.") "No need to generate dynamic configuration file for one node.")
return return
self.logger.log("Generating dynamic configuration file for all nodes.") self.logger.log("Generating dynamic configuration file for all nodes.")
hostName = DefaultValue.GetHostIpOrName() hostname = DefaultValue.GetHostIpOrName()
sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames()) sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames())
self.context.clusterInfo.createDynamicConfig(self.context.user, self.context.clusterInfo.doRefreshConf(self.context.user, hostname,
hostName, sshtool) sshtool)
self.logger.log("Successfully generated dynamic configuration file.") self.logger.log("Successfully generated dynamic configuration file.")

View File

@ -0,0 +1,199 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################################
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
#
# openGauss is licensed under Mulan PSL v2.
# You can use this software according to the terms
# and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
# http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
# ----------------------------------------------------------------------------
# Description : Resetreplconninfo.py is a utility to reset local replconninfo.
#############################################################################
import getopt
import os
import sys
import subprocess
sys.path.append(sys.path[0] + "/../")
from gspylib.common.GaussLog import GaussLog
from gspylib.common.Common import DefaultValue
from gspylib.common.ErrorCode import ErrorCode
########################################################################
# Global variables define
########################################################################
g_opts = None
########################################################################
class CmdOptions():
"""
"""
def __init__(self):
"""
"""
self.action = ""
self.clusterUser = ""
def usage():
"""
Resetreplconninfo.py is a utility to reset replconninfos on local node.
Usage:
python3 Resetreplconninfo.py --help
python3 Resetreplconninfo.py -U omm -t reset
General options:
-U Cluster user.
-t reset.
--help Show help information for this utility,
and exit the command line mode.
"""
print(usage.__doc__)
def parseCommandLine():
"""
function: parse command line
"""
try:
opts, args = getopt.getopt(sys.argv[1:], "U:t:h", ["help"])
except Exception as e:
usage()
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e))
if len(args) > 0:
usage()
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"]
% str(args[0]))
global g_opts
g_opts = CmdOptions()
for (key, value) in opts:
if key == "-h" or key == "--help":
usage()
sys.exit(0)
elif key == "-t":
g_opts.action = value
elif key == "-U":
g_opts.clusterUser = value
def checkParameter():
"""
function: check parameter
"""
if g_opts.clusterUser == "":
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".")
if g_opts.action == "":
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".")
if g_opts.action != "reset":
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % 't')
class Resetreplconninfo():
"""
class: Resetreplconninfo
"""
def __init__(self):
"""
function: configure all instance on local node
"""
# get mpprc file
envfile = os.getenv('MPPDB_ENV_SEPARATE_PATH')
if envfile is not None and envfile != "":
self.userProfile = \
envfile.replace("\\", "\\\\").replace('"', '\\"\\"')
else:
self.userProfile = "~/.bashrc"
def __getStatusByOM(self):
"""
function :Get the environment parameter.
output : String
"""
cmd = "source %s;gs_om -t status --detail" % self.userProfile
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"]
% cmd + " Error: \n%s" % output)
return output.split("\n")[-1]
def resetRepl(self):
"""
function: reset Repl
input : NA
output: NA
"""
status_list = self.__getStatusByOM().split('|')
repl_list = ['replconninfo' + str(i) for i in
range(1, len(status_list))]
localhost = DefaultValue.GetHostIpOrName()
remote_ip_dict = {}
for info_all in status_list:
info = info_all.split()
if info[1] == localhost:
local_dndir = info[4]
else:
remote_ip_dict[info[2]] = info[6]
head_cmd = "source %s;" % self.userProfile
for repl in repl_list:
cmd = head_cmd + 'gs_guc check -N %s -D %s -c "%s"' % \
(localhost, local_dndir, repl)
status, output = subprocess.getstatusoutput(cmd)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
cmd + " Error:\n%s" % output)
# get remote ip and check iscascade
replinfo_all = output.split('\n')[-2].strip().split("'")
replinfo_value = replinfo_all[1].split()
for remoteip in remote_ip_dict:
if remoteip in replinfo_all[1]:
if remote_ip_dict[remoteip] == "Cascade" and \
"iscascade=true" not in replinfo_value:
replinfo_value.append("iscascade=true")
elif remote_ip_dict[remoteip] != "Cascade" and \
"iscascade=true" in replinfo_value:
replinfo_value.remove("iscascade=true")
else:
break
replinfo_all = \
replinfo_all[0] + "'" + " ".join(replinfo_value) + "'"
cmd = head_cmd + 'gs_guc reload -N %s -D %s -c "%s"' % \
(localhost, local_dndir, replinfo_all)
status, output = subprocess.getstatusoutput(cmd)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
cmd + " Error:\n%s" % output)
break
if __name__ == '__main__':
try:
# parse and check input parameters
parseCommandLine()
checkParameter()
# reset replconninfos
reseter = Resetreplconninfo()
reseter.resetRepl()
except Exception as e:
GaussLog.exitWithError(str(e))
sys.exit(0)