check cluster status before expansion
This commit is contained in:
@ -175,9 +175,10 @@ class ExpansionImpl():
|
||||
logPath = self.context.clusterInfoDict["logPath"]
|
||||
corePath = self.context.clusterInfoDict["corePath"]
|
||||
toolPath = self.context.clusterInfoDict["toolPath"]
|
||||
mppdbconfig = ""
|
||||
tmpMppdbPath = DefaultValue.getEnv("PGHOST")
|
||||
if not tmpMppdbPath:
|
||||
tmpMppdbPath = toolPath
|
||||
if tmpMppdbPath:
|
||||
mppdbconfig = '<PARAM name="tmpMppdbPath" value="%s" />' % tmpMppdbPath
|
||||
|
||||
xmlConfig = """\
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
@ -189,7 +190,7 @@ class ExpansionImpl():
|
||||
<PARAM name="gaussdbAppPath" value="{appPath}" />
|
||||
<PARAM name="gaussdbLogPath" value="{logPath}" />
|
||||
<PARAM name="gaussdbToolPath" value="{toolPath}" />
|
||||
<PARAM name="tmpMppdbPath" value="{mppdbPath}" />
|
||||
{mappdbConfig}
|
||||
<PARAM name="corePath" value="{corePath}"/>
|
||||
<PARAM name="clusterType" value="single-inst"/>
|
||||
</CLUSTER>
|
||||
@ -210,7 +211,7 @@ class ExpansionImpl():
|
||||
""".format(nodeName=nodeName,backIp=backIp,appPath=appPath,
|
||||
logPath=logPath,toolPath=toolPath,corePath=corePath,
|
||||
sshIp=sshIp,port=port,dataNode=dataNode,azName=self.context.azName,
|
||||
mppdbPath=tmpMppdbPath)
|
||||
mappdbConfig=mppdbconfig)
|
||||
return xmlConfig
|
||||
|
||||
def changeUser(self):
|
||||
@ -221,11 +222,15 @@ class ExpansionImpl():
|
||||
GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50300"] % user)
|
||||
|
||||
user_name = pw_record.pw_name
|
||||
user_uid = pw_record.pw_uid
|
||||
user_gid = pw_record.pw_gid
|
||||
env = os.environ.copy()
|
||||
user_uid = pw_record.pw_uid
|
||||
user_gid = pw_record.pw_gid
|
||||
os.setgid(user_gid)
|
||||
os.setuid(user_uid)
|
||||
os.environ["HOME"] = pw_record.pw_dir
|
||||
os.environ["USER"] = user_name
|
||||
os.environ["LOGNAME"] = user_name
|
||||
os.environ["SHELL"] = pw_record.pw_shell
|
||||
|
||||
|
||||
def initSshConnect(self, host, user='root'):
|
||||
|
||||
@ -583,25 +588,34 @@ retry for %s times" % start_retry_num)
|
||||
"""
|
||||
self.logger.debug("Start to generate and send cluster static file.\n")
|
||||
|
||||
primaryHosts = self.getPrimaryHostName()
|
||||
command = "gs_om -t generateconf -X %s --distribute" % self.context.xmlFile
|
||||
sshTool = SshTool([primaryHosts])
|
||||
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
|
||||
[primaryHosts], self.envFile)
|
||||
self.logger.debug(outputCollect)
|
||||
self.cleanSshToolFile(sshTool)
|
||||
primaryHost = self.getPrimaryHostName()
|
||||
result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
|
||||
for nodeName in self.context.nodeNameList:
|
||||
nodeInfo = self.context.clusterInfoDict[nodeName]
|
||||
nodeIp = nodeInfo["backIp"]
|
||||
dataNode = nodeInfo["dataNode"]
|
||||
exist_reg = r"(.*)%s[\s]*%s(.*)%s(.*)" % (nodeName, nodeIp, dataNode)
|
||||
if not re.search(exist_reg, result) and nodeIp not in self.context.newHostList:
|
||||
self.logger.debug("The node ip [%s] will not be added to cluster." % nodeIp)
|
||||
dbNode = self.context.clusterInfo.getDbNodeByName(nodeName)
|
||||
self.context.clusterInfo.dbNodes.remove(dbNode)
|
||||
|
||||
toolPath = self.context.clusterInfoDict["toolPath"]
|
||||
appPath = self.context.clusterInfoDict["appPath"]
|
||||
|
||||
nodeNameList = self.context.nodeNameList
|
||||
|
||||
for hostName in nodeNameList:
|
||||
hostSsh = SshTool([hostName])
|
||||
toolPath = self.context.clusterInfoDict["toolPath"]
|
||||
appPath = self.context.clusterInfoDict["appPath"]
|
||||
srcFile = "%s/script/static_config_files/cluster_static_config_%s" \
|
||||
% (toolPath, hostName)
|
||||
static_config_dir = "%s/script/static_config_files" % toolPath
|
||||
if not os.path.exists(static_config_dir):
|
||||
os.makedirs(static_config_dir)
|
||||
|
||||
for dbNode in self.context.clusterInfo.dbNodes:
|
||||
hostName = dbNode.name
|
||||
staticConfigPath = "%s/script/static_config_files/cluster_static_config_%s" % \
|
||||
(toolPath, hostName)
|
||||
self.context.clusterInfo.saveToStaticConfig(staticConfigPath, dbNode.id)
|
||||
srcFile = staticConfigPath
|
||||
if not os.path.exists(srcFile):
|
||||
GaussLog.exitWithError("Generate static file [%s] not found." \
|
||||
% srcFile)
|
||||
GaussLog.exitWithError("Generate static file [%s] not found." % srcFile)
|
||||
hostSsh = SshTool([hostName])
|
||||
targetFile = "%s/bin/cluster_static_config" % appPath
|
||||
hostSsh.scpFiles(srcFile, targetFile, [hostName], self.envFile)
|
||||
self.cleanSshToolFile(hostSsh)
|
||||
@ -611,11 +625,11 @@ retry for %s times" % start_retry_num)
|
||||
|
||||
# Single-node database need start cluster after expansion
|
||||
if self.isSingleNodeInstance:
|
||||
primaryHost = self.getPrimaryHostName()
|
||||
self.logger.debug("Single-Node instance need restart.\n")
|
||||
self.commonGsCtl.queryOmCluster(primaryHosts, self.envFile)
|
||||
self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
|
||||
|
||||
# if primary database not normal, restart it
|
||||
primaryHost = self.getPrimaryHostName()
|
||||
dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
|
||||
insType, dbStat = self.commonGsCtl.queryInstanceStatus(primaryHost,
|
||||
dataNode, self.envFile)
|
||||
@ -633,7 +647,7 @@ retry for %s times" % start_retry_num)
|
||||
self.commonGsCtl.startInstanceWithMode(hostName, dataNode,
|
||||
MODE_STANDBY, self.envFile)
|
||||
|
||||
self.commonGsCtl.startOmCluster(primaryHosts, self.envFile)
|
||||
self.commonGsCtl.startOmCluster(primaryHost, self.envFile)
|
||||
|
||||
def setGUCOnClusterHosts(self, hostNames=[]):
|
||||
"""
|
||||
@ -835,6 +849,63 @@ standby nodes.")
|
||||
"""
|
||||
self.checkUserAndGroupExists()
|
||||
self.checkXmlFileAccessToUser()
|
||||
self.checkClusterStatus()
|
||||
self.validNodeInStandbyList()
|
||||
|
||||
def checkClusterStatus(self):
|
||||
"""
|
||||
Check whether the cluster status is normal before expand.
|
||||
"""
|
||||
self.logger.debug("Start to check cluster status.\n")
|
||||
|
||||
curHostName = socket.gethostname()
|
||||
command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
|
||||
(self.user, self.envFile)
|
||||
sshTool = SshTool([curHostName])
|
||||
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
|
||||
[curHostName], self.envFile)
|
||||
if outputCollect.find("Primary Normal") == -1:
|
||||
GaussLog.exitWithError("Unable to query current cluster status. " + \
|
||||
"Please import environment variables or " +\
|
||||
"check whether the cluster status is normal.")
|
||||
|
||||
self.logger.debug("The primary database is normal.\n")
|
||||
|
||||
def validNodeInStandbyList(self):
|
||||
"""
|
||||
check if the node has been installed in the cluster.
|
||||
"""
|
||||
self.logger.debug("Start to check if the nodes in standby list\n")
|
||||
|
||||
curHostName = socket.gethostname()
|
||||
command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
|
||||
(self.user, self.envFile)
|
||||
sshTool = SshTool([curHostName])
|
||||
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
|
||||
[curHostName], self.envFile)
|
||||
self.logger.debug(outputCollect)
|
||||
|
||||
newHosts = self.context.newHostList
|
||||
standbyHosts = []
|
||||
existHosts = []
|
||||
while len(newHosts) > 0:
|
||||
hostIp = newHosts.pop()
|
||||
nodeName = self.context.backIpNameMap[hostIp]
|
||||
nodeInfo = self.context.clusterInfoDict[nodeName]
|
||||
dataNode = nodeInfo["dataNode"]
|
||||
exist_reg = r"(.*)%s[\s]*%s(.*)" % (nodeName, hostIp)
|
||||
if not re.search(exist_reg, outputCollect):
|
||||
standbyHosts.append(hostIp)
|
||||
else:
|
||||
existHosts.append(hostIp)
|
||||
self.context.newHostList = standbyHosts
|
||||
if len(existHosts) > 0:
|
||||
self.logger.log("The nodes [%s] are already in the cluster. Skip expand these nodes." \
|
||||
% ",".join(existHosts))
|
||||
self.cleanSshToolFile(sshTool)
|
||||
if len(standbyHosts) == 0:
|
||||
self.logger.log("There is no node can be expanded.")
|
||||
sys.exit(0)
|
||||
|
||||
def checkXmlFileAccessToUser(self):
|
||||
"""
|
||||
|
Reference in New Issue
Block a user