!87 修复在线扩容bug及优化代码

Merge pull request !87 from 薛蒙恩/master
This commit is contained in:
opengauss-bot
2021-03-09 17:40:14 +08:00
committed by Gitee
4 changed files with 558 additions and 567 deletions

View File

@ -22,6 +22,8 @@
import os
import sys
import subprocess
import socket
package_path = os.path.dirname(os.path.realpath(__file__))
ld_path = package_path + "/gspylib/clib"
if 'LD_LIBRARY_PATH' not in os.environ:
@ -92,7 +94,7 @@ General options:
-V, --version Show version information.
"""
print(self.usage.__doc__)
def parseCommandLine(self):
"""
parse parameter from command line
@ -126,47 +128,34 @@ General options:
if (ParaDict.__contains__("nodename")):
self.newHostList = ParaDict.get("nodename")
def checkParameters(self):
"""
function: Check parameter from command line
input: NA
output: NA
"""
# check user | group | xmlfile | node
if len(self.user) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-U")
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-U")
if len(self.group) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-G")
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-G")
if len(self.xmlFile) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X")
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-X")
if len(self.newHostList) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h")
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35701"] % "-h")
# check if upgrade action is exist
if DefaultValue.isUnderUpgrade(self.user):
GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"])
clusterInfo = ExpansipnClusterInfo()
def _getClusterInfoDict(self):
clusterInfo = ExpansionClusterInfo()
self.clusterInfo = clusterInfo
hostNameIpDict = clusterInfo.initFromXml(self.xmlFile)
clusterDict = clusterInfo.getClusterDirectorys()
backIpList = clusterInfo.getClusterBackIps()
nodeNameList = clusterInfo.getClusterNodeNames()
self.localIp = backIpList[0]
self.nodeNameList = nodeNameList
self.backIpNameMap = {}
for backip in backIpList:
self.backIpNameMap[backip] = clusterInfo.getNodeNameByBackIp(backip)
self.nodeNameList = clusterInfo.getClusterNodeNames()
# check parameter node must in xml config file
for nodeid in self.newHostList:
if nodeid not in backIpList:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35702"] % \
nodeid)
# get corepath and toolpath from xml file
# get corepath and toolpath from xml file
corePath = clusterInfo.readClustercorePath(self.xmlFile)
toolPath = clusterInfo.getToolPath(self.xmlFile)
# parse xml file and cache node info
@ -175,20 +164,16 @@ General options:
clusterInfoDict["logPath"] = clusterDict["logPath"][0]
clusterInfoDict["corePath"] = corePath
clusterInfoDict["toolPath"] = toolPath
for nodeName in nodeNameList:
for nodeName in self.nodeNameList:
hostInfo = hostNameIpDict[nodeName]
ipList = hostInfo[0]
portList = hostInfo[1]
backIp = ""
sshIp = ""
if len(ipList) == 1:
backIp = sshIp = ipList[0]
elif len(ipList) == 2:
backIp = ipList[0]
sshIp = ipList[1]
backIp = ipList[0]
sshIp = ipList[1]
port = portList[0]
cluster = clusterDict[nodeName]
dataNode = cluster[2]
dbNode = clusterInfo.getDbNodeByName(nodeName)
clusterInfoDict[nodeName] = {
"backIp": backIp,
"sshIp": sshIp,
@ -197,9 +182,10 @@ General options:
"localservice": int(port) + 4,
"heartBeatPort": int(port) + 3,
"dataNode": dataNode,
"instanceType": -1
"instanceType": -1,
"azPriority": dbNode.azPriority
}
nodeIdList = clusterInfo.getClusterNodeIds()
for id in nodeIdList:
insType = clusterInfo.getdataNodeInstanceType(id)
@ -207,30 +193,6 @@ General options:
clusterInfoDict[hostName]["instanceType"] = insType
self.clusterInfoDict = clusterInfoDict
for dbnode in clusterInfo.dbNodes:
# get azName of all hosts
self.hostAzNameMap[dbnode.backIps[0]] = dbnode.azName
# get cascadeRole of newHosts
if dbnode.backIps[0] in self.newHostList:
self.newHostCasRoleMap[dbnode.backIps[0]] = dbnode.cascadeRole
# check trust between the primary and other hosts
sshTool = SshTool(nodeNameList, timeout = 0)
retmap, output = sshTool.getSshStatusOutput("pwd")
for nodeName in nodeNameList:
# check root's trust
if retmap[nodeName] != DefaultValue.SUCCESS:
GaussLog.exitWithError("SSH could not connect to %s by root." % nodeName)
try:
sshTool.clenSshResultFiles()
except Exception as e:
self.logger.debug(str(e))
# check individual user's trust
checkUserTrustCmd = "su - %s -c 'ssh %s \"pwd\"'" % (self.user, nodeName)
(status, output) = subprocess.getstatusoutput(checkUserTrustCmd)
if status != 0:
GaussLog.exitWithError("SSH could not connect to %s by individual user." % nodeName)
def initLogs(self):
"""
init log file
@ -246,12 +208,102 @@ General options:
self.initLogger("gs_expansion")
self.logger.ignoreErr = True
def getExpansionInfo(self):
self._getClusterInfoDict()
self._getBackIpNameMap()
self._getHostAzNameMap()
self._getNewHostCasRoleMap()
class ExpansipnClusterInfo(dbClusterInfo):
def checkXmlIncludeNewHost(self):
"""
check parameter node must in xml config file
"""
backIpList = self.clusterInfo.getClusterBackIps()
for nodeIp in self.newHostList:
if nodeIp not in backIpList:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35702"] % \
nodeIp)
def _getBackIpNameMap(self):
backIpList = self.clusterInfo.getClusterBackIps()
for backip in backIpList:
self.backIpNameMap[backip] = \
self.clusterInfo.getNodeNameByBackIp(backip)
def checkExecutingUserAndHost(self):
# check whether current user executing this command is root
if os.getuid() != 0:
GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50104"])
# check whether current host is primary host
currentHost = socket.gethostname()
primaryHost = ""
for nodeName in self.nodeNameList:
if self.clusterInfoDict[nodeName]["instanceType"] \
== 0:
primaryHost = nodeName
break
if currentHost != primaryHost:
GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50110"] % \
(currentHost + ", which is not primary."))
def checkTrust(self, hostList = None):
"""
check trust between primary/current host and every host in hostList
"""
if hostList == None:
hostList = self.nodeNameList
rootSSHExceptionHosts = []
individualSSHExceptionHosts = []
sshTool = SshTool(hostList, timeout = 0)
retmap, output = sshTool.getSshStatusOutput("pwd")
for host in hostList:
# check root's trust
if retmap[host] != DefaultValue.SUCCESS:
rootSSHExceptionHosts.append(host)
try:
sshTool.clenSshResultFiles()
except Exception as e:
self.logger.debug(str(e))
# check individual user's trust
checkUserTrustCmd = "su - %s -c 'ssh %s \"pwd\"'" % (self.user, host)
(status, output) = subprocess.getstatusoutput(checkUserTrustCmd)
if status != 0:
individualSSHExceptionHosts.append(host)
# output ssh exception info if ssh connect failed
if rootSSHExceptionHosts or individualSSHExceptionHosts:
sshExceptionInfo = ""
if rootSSHExceptionHosts:
sshExceptionInfo += "\n"
sshExceptionInfo += ", ".join(rootSSHExceptionHosts)
sshExceptionInfo += " by root"
if individualSSHExceptionHosts:
sshExceptionInfo += "\n"
sshExceptionInfo += ", ".join(individualSSHExceptionHosts)
sshExceptionInfo += " by individual user"
GaussLog.exitWithError(ErrorCode.GAUSS_511["GAUSS_51100"] %
sshExceptionInfo)
def _getHostAzNameMap(self):
"""
get azName of all hosts
"""
for dbnode in self.clusterInfo.dbNodes:
self.hostAzNameMap[dbnode.backIps[0]] = dbnode.azName
def _getNewHostCasRoleMap(self):
"""
get cascadeRole of newHosts
"""
for dbnode in self.clusterInfo.dbNodes:
if dbnode.backIps[0] in self.newHostList:
self.newHostCasRoleMap[dbnode.backIps[0]] = dbnode.cascadeRole
class ExpansionClusterInfo(dbClusterInfo):
def __init__(self):
dbClusterInfo.__init__(self)
def getToolPath(self, xmlFile):
"""
function : Read tool path from default xml file
@ -269,6 +321,7 @@ class ExpansipnClusterInfo(dbClusterInfo):
checkPathVaild(toolPath)
return toolPath
if __name__ == "__main__":
"""
"""
@ -276,6 +329,10 @@ if __name__ == "__main__":
expansion.parseCommandLine()
expansion.checkParameters()
expansion.initLogs()
expansion.getExpansionInfo()
expansion.checkXmlIncludeNewHost()
expansion.checkExecutingUserAndHost()
expansion.checkTrust()
expImpl = ExpansionImpl(expansion)
expImpl.run()

View File

@ -3184,36 +3184,24 @@ class dbClusterInfo():
"with cm and etcd") + errMsg)
# create a dictionary
nodeipport[dbNode.name] = [nodeips, nodeports]
# delete redundant records
self.__Deduplication(nodeports)
self.__Deduplication(nodeips)
# check port and ip
self.__checkPortandIP(nodeips, nodeports, dbNode.name)
return nodeipport
def __Deduplication(self, currentlist):
"""
function : Delete the deduplication.
input : []
output : NA
"""
currentlist.sort()
for i in range(len(currentlist) - 2, -1, -1):
if currentlist.count(currentlist[i]) > 1:
del currentlist[i]
def __checkPortandIP(self, ips, ports, name):
"""
function : Check port and IP.
input : String,int,string
output : NA
"""
for port in ports:
ipsCopy = list(set(ips))
portsCopy = list(set(ports))
for port in portsCopy:
if (not self.__isPortValid(port)):
raise Exception(ErrorCode.GAUSS_512["GAUSS_51233"]
% (port, name) + " Please check it.")
for ip in ips:
for ip in ipsCopy:
if (not self.__isIpValid(ip)):
raise Exception(ErrorCode.GAUSS_506["GAUSS_50603"] + \
"The IP address is: %s." % ip + " Please "

View File

@ -1112,8 +1112,8 @@ class ErrorCode():
"detail.",
"GAUSS_35704": "[GAUSS-35704] %s [%s] does not exist on node [%s].",
"GAUSS_35705": "[GAUSS-35705] Error, the database version is "
"inconsistent in %s: %s"
"inconsistent in %s: %s",
"GAUSS_35706": "[GAUSS-35706] All new hosts %s failed."
}
##########################################################################

File diff suppressed because it is too large Load Diff