check cluster status before expansion

This commit is contained in:
zhang_xubo
2020-12-26 17:02:19 +08:00
parent 081713c378
commit 84f246fef2
2 changed files with 99 additions and 27 deletions

View File

@ -175,9 +175,10 @@ class ExpansionImpl():
logPath = self.context.clusterInfoDict["logPath"]
corePath = self.context.clusterInfoDict["corePath"]
toolPath = self.context.clusterInfoDict["toolPath"]
mppdbconfig = ""
tmpMppdbPath = DefaultValue.getEnv("PGHOST")
if not tmpMppdbPath:
tmpMppdbPath = toolPath
if tmpMppdbPath:
mppdbconfig = '<PARAM name="tmpMppdbPath" value="%s" />' % tmpMppdbPath
xmlConfig = """\
<?xml version="1.0" encoding="UTF-8"?>
@ -189,7 +190,7 @@ class ExpansionImpl():
<PARAM name="gaussdbAppPath" value="{appPath}" />
<PARAM name="gaussdbLogPath" value="{logPath}" />
<PARAM name="gaussdbToolPath" value="{toolPath}" />
<PARAM name="tmpMppdbPath" value="{mppdbPath}" />
{mappdbConfig}
<PARAM name="corePath" value="{corePath}"/>
<PARAM name="clusterType" value="single-inst"/>
</CLUSTER>
@ -210,7 +211,7 @@ class ExpansionImpl():
""".format(nodeName=nodeName,backIp=backIp,appPath=appPath,
logPath=logPath,toolPath=toolPath,corePath=corePath,
sshIp=sshIp,port=port,dataNode=dataNode,azName=self.context.azName,
mppdbPath=tmpMppdbPath)
mappdbConfig=mppdbconfig)
return xmlConfig
def changeUser(self):
@ -221,11 +222,15 @@ class ExpansionImpl():
GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50300"] % user)
user_name = pw_record.pw_name
user_uid = pw_record.pw_uid
user_gid = pw_record.pw_gid
env = os.environ.copy()
user_uid = pw_record.pw_uid
user_gid = pw_record.pw_gid
os.setgid(user_gid)
os.setuid(user_uid)
os.environ["HOME"] = pw_record.pw_dir
os.environ["USER"] = user_name
os.environ["LOGNAME"] = user_name
os.environ["SHELL"] = pw_record.pw_shell
def initSshConnect(self, host, user='root'):
@ -583,25 +588,34 @@ retry for %s times" % start_retry_num)
"""
self.logger.debug("Start to generate and send cluster static file.\n")
primaryHosts = self.getPrimaryHostName()
command = "gs_om -t generateconf -X %s --distribute" % self.context.xmlFile
sshTool = SshTool([primaryHosts])
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
[primaryHosts], self.envFile)
self.logger.debug(outputCollect)
self.cleanSshToolFile(sshTool)
primaryHost = self.getPrimaryHostName()
result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
for nodeName in self.context.nodeNameList:
nodeInfo = self.context.clusterInfoDict[nodeName]
nodeIp = nodeInfo["backIp"]
dataNode = nodeInfo["dataNode"]
exist_reg = r"(.*)%s[\s]*%s(.*)%s(.*)" % (nodeName, nodeIp, dataNode)
if not re.search(exist_reg, result) and nodeIp not in self.context.newHostList:
self.logger.debug("The node ip [%s] will not be added to cluster." % nodeIp)
dbNode = self.context.clusterInfo.getDbNodeByName(nodeName)
self.context.clusterInfo.dbNodes.remove(dbNode)
toolPath = self.context.clusterInfoDict["toolPath"]
appPath = self.context.clusterInfoDict["appPath"]
nodeNameList = self.context.nodeNameList
for hostName in nodeNameList:
hostSsh = SshTool([hostName])
toolPath = self.context.clusterInfoDict["toolPath"]
appPath = self.context.clusterInfoDict["appPath"]
srcFile = "%s/script/static_config_files/cluster_static_config_%s" \
% (toolPath, hostName)
static_config_dir = "%s/script/static_config_files" % toolPath
if not os.path.exists(static_config_dir):
os.makedirs(static_config_dir)
for dbNode in self.context.clusterInfo.dbNodes:
hostName = dbNode.name
staticConfigPath = "%s/script/static_config_files/cluster_static_config_%s" % \
(toolPath, hostName)
self.context.clusterInfo.saveToStaticConfig(staticConfigPath, dbNode.id)
srcFile = staticConfigPath
if not os.path.exists(srcFile):
GaussLog.exitWithError("Generate static file [%s] not found." \
% srcFile)
GaussLog.exitWithError("Generate static file [%s] not found." % srcFile)
hostSsh = SshTool([hostName])
targetFile = "%s/bin/cluster_static_config" % appPath
hostSsh.scpFiles(srcFile, targetFile, [hostName], self.envFile)
self.cleanSshToolFile(hostSsh)
@ -611,11 +625,11 @@ retry for %s times" % start_retry_num)
# Single-node database need start cluster after expansion
if self.isSingleNodeInstance:
primaryHost = self.getPrimaryHostName()
self.logger.debug("Single-Node instance need restart.\n")
self.commonGsCtl.queryOmCluster(primaryHosts, self.envFile)
self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
# if primary database not normal, restart it
primaryHost = self.getPrimaryHostName()
dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
dataNode, self.envFile)
@ -633,7 +647,7 @@ retry for %s times" % start_retry_num)
self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
MODE_STANDBY, self.envFile)
self.commonGsCtl.startOmCluster(primaryHosts, self.envFile)
self.commonGsCtl.startOmCluster(primaryHost, self.envFile)
def setGUCOnClusterHosts(self, hostNames=[]):
"""
@ -835,6 +849,63 @@ standby nodes.")
"""
self.checkUserAndGroupExists()
self.checkXmlFileAccessToUser()
self.checkClusterStatus()
self.validNodeInStandbyList()
def checkClusterStatus(self):
"""
Check whether the cluster status is normal before expand.
"""
self.logger.debug("Start to check cluster status.\n")
curHostName = socket.gethostname()
command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
(self.user, self.envFile)
sshTool = SshTool([curHostName])
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
[curHostName], self.envFile)
if outputCollect.find("Primary Normal") == -1:
GaussLog.exitWithError("Unable to query current cluster status. " + \
"Please import environment variables or " +\
"check whether the cluster status is normal.")
self.logger.debug("The primary database is normal.\n")
def validNodeInStandbyList(self):
"""
check if the node has been installed in the cluster.
"""
self.logger.debug("Start to check if the nodes in standby list\n")
curHostName = socket.gethostname()
command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
(self.user, self.envFile)
sshTool = SshTool([curHostName])
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
[curHostName], self.envFile)
self.logger.debug(outputCollect)
newHosts = self.context.newHostList
standbyHosts = []
existHosts = []
while len(newHosts) > 0:
hostIp = newHosts.pop()
nodeName = self.context.backIpNameMap[hostIp]
nodeInfo = self.context.clusterInfoDict[nodeName]
dataNode = nodeInfo["dataNode"]
exist_reg = r"(.*)%s[\s]*%s(.*)" % (nodeName, hostIp)
if not re.search(exist_reg, outputCollect):
standbyHosts.append(hostIp)
else:
existHosts.append(hostIp)
self.context.newHostList = standbyHosts
if len(existHosts) > 0:
self.logger.log("The nodes [%s] are already in the cluster. Skip expand these nodes." \
% ",".join(existHosts))
self.cleanSshToolFile(sshTool)
if len(standbyHosts) == 0:
self.logger.log("There is no node can be expanded.")
sys.exit(0)
def checkXmlFileAccessToUser(self):
"""