Revert "去除root权限"

This reverts commit a50fa50821b8d0700810616ac428938be01dc93e.
This commit is contained in:
liuheng 2024-08-01 16:34:26 +08:00
parent fa929cc406
commit 22add618a9
52 changed files with 505 additions and 1093 deletions

View File

@ -78,7 +78,7 @@ def initGlobals():
g_clusterInfo = dbClusterInfo()
g_clusterInfo.initFromStaticConfig(g_clusterUser)
# Init sshtool
g_sshTool = SshTool(g_clusterInfo.getClusterSshIps()[0], g_logger.logFile)
g_sshTool = SshTool(g_clusterInfo.getClusterNodeNames(), g_logger.logFile)
def checkSrcFile(srcFile):
@ -133,11 +133,11 @@ def parseCommandLine():
def scpFileToAllHost(srcFile, drcpath):
try:
g_logger.log("Transfer C function file to all hosts.")
g_sshTool.scpFiles(srcFile, drcpath, g_clusterInfo.getClusterSshIps()[0])
g_sshTool.scpFiles(srcFile, drcpath, g_clusterInfo.getClusterNodeNames())
cmd = "chmod 600 '%s'" % drcpath
g_sshTool.executeCommand(cmd,
DefaultValue.SUCCESS,
g_clusterInfo.getClusterSshIps()[0])
g_clusterInfo.getClusterNodeNames())
except Exception as e:
raise Exception(ErrorCode.GAUSS_536["GAUSS_53611"] % str(e))

View File

@ -69,18 +69,6 @@ class CrontabUtil(object):
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
" Error:\n%s" % output)
@staticmethod
def check_user_crontab_permission():
"""
function : Check user crontab permission
input : NA
output: True or False
"""
cmd = CmdUtil.getAllCrontabCmd()
(_, output) = subprocess.getstatusoutput(cmd)
if output.find("not allowed") >= 0:
return False
return True
@staticmethod

View File

@ -458,11 +458,10 @@ class NetUtil(object):
output: str
"""
try:
cmd = "hostname -I | awk '{print $1}'"
(status, output) = subprocess.getstatusoutput(cmd)
if not status:
return output.strip()
env_dist = os.environ
if "HOST_IP" not in list(env_dist.keys()):
host_name = NetUtil.getHostName()
return host_name
host_ip = env_dist.get("HOST_IP")
if host_ip is not None:
if NetUtil.isIpValid(host_ip):

View File

@ -31,7 +31,6 @@ from impl.backup.OLAP.BackupImplOLAP import BackupImplOLAP
from domain_utils.cluster_file.cluster_log import ClusterLog
from domain_utils.domain_common.cluster_constants import ClusterConstants
from domain_utils.cluster_os.cluster_user import ClusterUser
from gspylib.threads.SshTool import SshTool
ACTION_BACKUP = "backup"
ACTION_RESTORE = "restore"
@ -56,7 +55,6 @@ class Backup(ParallelBaseOM):
self.isParameter = False
self.isBinary = False
self.isForce = False
self.node_ip = ""
####################################################################################
# Help context. U:R:oC:v:
@ -211,16 +209,6 @@ General options:
" Only parameter files will be backed up.")
self.isParameter = True
def init_globals(self):
"""
init cluster info from static file
"""
# init the log file
self.initLogger(self.action)
# init the cluster info
self.initClusterInfoFromStaticFile(self.user)
if self.nodename:
self.node_ip = self.clusterInfo.getDbNodeByName(self.nodename)
if __name__ == '__main__':
@ -240,8 +228,6 @@ if __name__ == '__main__':
# check the parameters is not OK
backupObj.checkParameter()
# init globals
backupObj.init_globals()
# set action flag file
DefaultValue.setActionFlagFile("gs_backup")

View File

@ -611,9 +611,9 @@ def checkuser():
return
# Check cluster user trust
dbNameList = g_opts.cluster.getClusterSshIps()[0]
dbNameList = g_opts.cluster.getClusterNodeNames()
if (len(dbNameList) == 1 and
dbNameList[0] == NetUtil.getLocalIp()):
dbNameList[0] == NetUtil.GetHostIpOrName()):
return
appPath = EnvUtil.getEnv('GPHOME', g_opts.cluster.appPath)
psshPath = os.path.join(appPath, 'script/gspylib/pssh/bin/pssh')

View File

@ -23,7 +23,6 @@ import os
import sys
import time
import subprocess
import pwd
from datetime import datetime, timedelta
sys.path.append(sys.path[0] + '/../lib')
@ -149,7 +148,6 @@ class CmdOptions():
self.itemstr = ""
self.hostlistfile = ""
self.hostnameList = []
self.host_ip_list = []
self.outputfile = ""
self.logFile = ""
self.localLog = ""
@ -167,27 +165,20 @@ class CmdOptions():
#########################################################
def initGlobals():
"""
init the global parameter g_logger and g_sshTool and g_clusterInfo
init the global parameter g_logger and g_sshTool
"""
global g_logger
global g_sshTool
global g_clusterInfo
g_logger = GaussLog(g_opts.logFile, "gs_checkos")
dirName = os.path.dirname(g_opts.logFile)
g_opts.localLog = os.path.join(dirName, ClusterConstants.LOCAL_LOG_FILE)
global g_clusterInfo
g_sshTool = SshTool(g_opts.hostnameList, g_logger.logFile,
DefaultValue.TIMEOUT_PSSH_CHECK)
g_clusterInfo = dbClusterInfo()
if (g_opts.confFile != ""):
g_clusterInfo.initFromXml(g_opts.confFile)
if not g_opts.localMode:
for name in g_opts.hostnameList:
node = g_clusterInfo.getDbNodeByName(name)
g_opts.host_ip_list.append(node.sshIps[0])
global g_sshTool
g_sshTool = SshTool(g_opts.host_ip_list, g_logger.logFile,
DefaultValue.TIMEOUT_PSSH_CHECK)
#############################################################################
# Parse and check parameters
@ -313,15 +304,15 @@ def parseCommandLine():
g_opts.detail = ParaDict.get("show_detail")
def get_user_info():
"""
function: get user
input: NA
output: NA
"""
if os.getuid() != 0:
user_info = UserUtil.getUserInfo()
g_opts.user = user_info.get("name")
DefaultValue.checkPathVaild(g_opts.user)
"""
function: get user
input: NA
output: NA
"""
if os.getuid() != 0:
user_info = UserUtil.getUserInfo()
g_opts.user = user_info.get("name")
DefaultValue.checkPathVaild(g_opts.user)
def readHostFile(hostfile):
"""
@ -343,7 +334,6 @@ def checkHostList():
"""
if (g_opts.hostnamestr == "" and g_opts.hostlistfile == ""):
g_opts.hostnameList = [NetUtil.GetHostIpOrName()]
g_opts.localMode = True
elif (g_opts.hostnamestr != "" and g_opts.hostlistfile != ""):
GaussLog.exitWithError(
ErrorCode.GAUSS_500["GAUSS_50005"] % ('h', 'f'))
@ -355,8 +345,7 @@ def checkHostList():
readHostFile(g_opts.hostlistfile)
else:
parseHostnameOpts(g_opts.hostnamestr)
if g_opts.localMode and not g_opts.confFile:
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "X")
def checkConfigFile():
"""
@ -419,7 +408,7 @@ def setLogFile():
else:
create_log_file()
UserUtil.check_path_owner(g_opts.logFile)
UserUtil.check_path_owner(g_opts.logFile)
def checkItems():
"""
@ -437,12 +426,6 @@ def checkOutputFile():
except Exception as e:
GaussLog.exitWithError(str(e))
def check_config_file():
"""
check parameters
"""
if not g_opts.localMode and not g_opts.confFile:
raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % "X")
def checkParameter():
"""
@ -459,7 +442,6 @@ def checkParameter():
g_opts.hostnameList = [NetUtil.GetHostIpOrName()]
g_opts.hostnameList.sort()
checkHostnameList()
check_config_file()
##########################################
# set logfile
############################################
@ -987,7 +969,7 @@ def getLocalIPAddr():
Ips = ""
if (g_opts.confFile == ""):
localHostIp = NetUtil.getLocalIp()
localHostIp = DefaultValue.getIpByHostName()
Ips = localHostIp
else:
for node in g_clusterInfo.dbNodes:
@ -1560,8 +1542,7 @@ def main():
parseCommandLine()
checkParameter()
initGlobals()
gpHome = os.path.normpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), ".."))
os.environ['GPHOME'] = gpHome
gpHome = os.path.dirname(os.path.realpath(__file__))
Local_CheckOs = OMCommand.getLocalScript("Local_CheckOS")
Local_Check = OMCommand.getLocalScript("Local_Check")
except Exception as e:
@ -1587,7 +1568,7 @@ def main():
if ((g_opts.localMode != True) and
DefaultValue.checkInList(['A6', 'A7', 'A11', 'B1', 'B2', 'B5'],
itemList)):
g_sshTool.scpFiles(configFile, dirName, [], "", gpHome)
g_sshTool.scpFiles(configFile, dirName, [], "", "%s/../" % gpHome)
except Exception as ex:
g_logger.logExit(str(ex))

View File

@ -246,7 +246,7 @@ General options:
% ("local install path[" +
self.clusterInfo.appPath + "]"))
# initialize sshTool
g_sshTool = SshTool(self.clusterInfo.getClusterSshIps()[0],
g_sshTool = SshTool(self.clusterInfo.getClusterNodeNames(),
g_logger.logFile,
DefaultValue.TIMEOUT_PSSH_CHECK)

View File

@ -70,7 +70,6 @@ class Collect(ParallelBaseOM):
self.inFile = ""
self.outFile = ""
self.nodeName = []
self.node_ips = []
self.config = {}
self.appPath = ""

View File

@ -281,7 +281,7 @@ General options:
self.failureHosts = '.'.join(re.findall(r"\[FAILURE\] .*:.*\n",
outputCollect))
for host in list(self.hostMapForExist.keys()):
if self.hostMapForExist[host]['ipaddr'] in self.failureHosts:
if host in self.failureHosts:
GaussLog.exitWithError(
ErrorCode.GAUSS_358["GAUSS_35807"] % host)
@ -342,7 +342,7 @@ if __name__ == "__main__":
DefaultValue.check_is_streaming_dr_cluster()
dropNode.check_repeat_process()
dropNode.checkParameters()
dropNode.checkConnection(list(dropNode.backIpNameMap.values()),
dropNode.checkConnection(list(dropNode.backIpNameMap.keys()),
dropNode.envFile)
dropNode.check_cluster_status()
dropNode.flagForOnlyPrimaryLeft()

View File

@ -23,7 +23,6 @@ import os
import sys
import subprocess
import socket
import pwd
package_path = os.path.dirname(os.path.realpath(__file__))
ld_path = package_path + "/gspylib/clib"
if 'LD_LIBRARY_PATH' not in os.environ:
@ -97,8 +96,6 @@ class Expansion(ParallelBaseOM):
self.standbyLocalMode = False
self.time_out = None
self.envFile = EnvUtil.getEnv("MPPDB_ENV_SEPARATE_PATH")
self.nodeNameList = []
self.node_ip_list = []
def usage(self):
"""
@ -313,33 +310,13 @@ General options:
subprocess.getstatusoutput("chown {}:{} {}".format(self.user, self.group, self.logger.logFile))
self.logger.ignoreErr = True
# init cluster info from xml or static file
if self.xmlFile:
self.initClusterInfo()
else:
self.initClusterInfoFromStaticFile(self.g_opts.user)
# init node ip list
self.node_ip_list = self.clusterInfo.getClusterSshIps()[0]
for ip in self.newHostList:
if ip not in self.node_ip_list:
self.node_ip_list.append(ip)
def global_init(self):
"""
init node name list
"""
self.nodeNameList = self.clusterInfo.getClusterNodeNames()
def check_env_variable(self):
"""
check whether env file is sourced
"""
self.logger.debug("Checking environment variable.")
if not self.envFile:
# get user home
user_path = pwd.getpwnam(self.user).pw_dir
self.envFile = os.path.normpath(os.path.join(user_path, ".bashrc"))
self.envFile = "/home/%s/.bashrc" % self.user
cmd = "source %s" % self.envFile
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
@ -365,16 +342,15 @@ General options:
# get new hostname and hostip
hostname_str, hostip_str = self.get_new_hostname_and_hostip()
# execute gs_om -t generate_xml
user_path = pwd.getpwnam(self.user).pw_dir
if not self.envFile:
self.envFile = os.path.normpath(os.path.join(user_path, ".bashrc"))
self.envFile = "/home/%s/.bashrc" % self.user
cmd = "source %s; %s -t generate_xml --add-hostname=%s --add-hostip=%s" % (self.envFile, gs_om, hostname_str, hostip_str)
if os.getuid() == 0:
cmd = "su - %s -c '%s'" % (self.user, cmd)
status, output = subprocess.getstatusoutput(cmd)
if status != 0:
GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50231"] % self.xmlFile)
xml_tmp_file = os.path.normpath(os.path.join(user_path, "tmp_generate_xml"))
xml_tmp_file = "/home/%s/tmp_generate_xml" % self.user
if not os.path.exists(xml_tmp_file):
GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] % xml_tmp_file)
self.xmlFile = FileUtil.readFile(xml_tmp_file)[0].strip()
@ -385,14 +361,9 @@ General options:
def get_new_hostname_and_hostip(self):
hostip_str = ",".join(self.newHostList)
hostname_list = []
failed_output = ""
for ip in self.newHostList:
cmd = "hostname"
(status_map, output) = self.sshTool.getSshStatusOutput(cmd, [ip])
if status_map[ip] != "Success":
failed_output += output
else:
hostname_list.append(str(output).strip())
hostname = socket.gethostbyaddr(ip)[0]
hostname_list.append(hostname)
hostname_str = ",".join(hostname_list)
return hostname_str, hostip_str
@ -432,42 +403,61 @@ General options:
if currentHost != primaryHost:
GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50110"] %
(currentHost + ", which is not primary"))
def init_cluster_info_all_node(self):
"""
init cluster info for all node
"""
clusterInfo = dbClusterInfo()
clusterInfo.initFromXml(self.xmlFile)
self.clusterInfo = clusterInfo
self.nodeNameList = clusterInfo.getClusterNodeNames()
def checkTrust(self):
def checkTrust(self, hostList = None):
"""
check trust between primary/current host and every host in hostList
"""
if hostList == None:
hostList = list(self.nodeNameList)
backIpList = self.clusterInfo.getClusterBackIps()
hostList += backIpList
gpHome = EnvUtil.getEnv("GPHOME")
psshPath = "python3 %s/script/gspylib/pssh/bin/pssh" % gpHome
create_ssh = False
for host in self.node_ip_list:
ssh_exception_hosts = []
for host in hostList:
if os.getuid() == 0:
# check individual user's trust
check_user_trust_cmd = "su - %s -c '%s -s -H %s pwd'" % (self.user, psshPath, host)
(status, output) = subprocess.getstatusoutput(check_user_trust_cmd)
if status != 0:
create_ssh = True
ssh_exception_hosts.append(host)
# check current user's trust
check_user_trust_cmd = "%s -s -H %s 'pwd'" % (psshPath, host)
(status, output) = subprocess.getstatusoutput(check_user_trust_cmd)
if status != 0:
create_ssh = True
ssh_exception_hosts.append(host)
# output ssh exception info if ssh connect failed
if create_ssh:
if ssh_exception_hosts:
self.logger.log("The cluster need create ssh trust")
self.create_trust(self.node_ip_list)
self.create_trust()
else:
self.logger.log("The cluster no need create ssh trust")
def create_trust(self, node_ips):
def create_trust(self):
cluster_info = dbClusterInfo()
cluster_info.initFromXml(self.xmlFile)
all_ips = []
sships = cluster_info.getClusterSshIps()
for ips in sships:
all_ips.extend(ips)
if os.getuid() == 0:
self.create_trust_for_user("root", node_ips)
self.create_trust_for_user(self.user, node_ips)
self.create_trust_for_user("root", all_ips)
self.create_trust_for_user(self.user, all_ips)
def create_trust_for_user(self, user, all_ips):
self.logger.log("Please enter password for %s" % user)
self.sshTool = SshTool(all_ips, self.logFile, DefaultValue.TIMEOUT_PSSH_PREINSTALL)
self.sshTool = SshTool(self.nodeNameList, self.logFile, DefaultValue.TIMEOUT_PSSH_PREINSTALL)
self.sshTool.createTrust(user, all_ips)
self.logger.debug("Successfully created SSH trust for the %s" % user)
@ -526,13 +516,13 @@ General options:
This is expansion frame start
"""
if self.check_cm_component() and self.standbyLocalMode:
expand_impl = ExpansionImplWithCmLocal(expansion)
expand_impl = ExpansionImplWithCmLocal(self)
self.logger.log("Start expansion with cluster manager component on standby node.")
elif self.check_cm_component():
expand_impl = ExpansionImplWithCm(expansion)
expand_impl = ExpansionImplWithCm(self)
self.logger.log("Start expansion with cluster manager component.")
else:
expand_impl = ExpansionImpl(expansion)
expand_impl = ExpansionImpl(self)
self.logger.log("Start expansion without cluster manager component.")
expand_impl.run()
@ -694,9 +684,9 @@ if __name__ == "__main__":
expansion.checkParameters()
expansion.initLogs()
expansion.check_env_variable()
expansion.checkTrust()
expansion.generate_xml()
expansion.global_init()
expansion.init_cluster_info_all_node()
expansion.checkTrust()
expansion.getExpansionInfo()
expansion.check_xml_env_consistent()
expansion.checkXmlIncludeNewHost()

View File

@ -114,7 +114,7 @@ General options:
self.initClusterInfo(refreshCN=False)
self.initComponent()
# Initialize self.sshTool
self.initSshTool(self.clusterInfo.getClusterSshIps()[0],
self.initSshTool(self.clusterInfo.getClusterNodeNames(),
DefaultValue.TIMEOUT_PSSH_INSTALL)
if (len(self.clusterInfo.getClusterNodeNames()) == 1 and
self.clusterInfo.getClusterNodeNames()[0]

View File

@ -69,7 +69,6 @@ class CmdOptions():
# cluster node names passed by the
# command line option "-h".
self.nodeName = ""
self.node_ip = ""
self.time_out = None
# if action is "express", use this parameter to store whether to
# show the detail message of cluster
@ -273,11 +272,6 @@ Install options:
# init components
if self.g_opts.action != ACTION_STATUS:
self.initComponent()
# query node ip by node name
if self.g_opts.nodeName:
node = self.clusterInfo.getDbNodeByName(self.g_opts.nodeName)
self.g_opts.node_ip = node.sshIps[0]
except Exception as e:
GaussLog.exitWithError(str(e))

View File

@ -350,7 +350,7 @@ General options:
self.initClusterInfo()
os.environ[ClusterConstants.TOOL_PATH_ENV] = self.clusterToolPath
# Initialize the self.sshTool variable
self.initSshTool(self.clusterInfo.getClusterSshIps()[0],
self.initSshTool(self.clusterInfo.getClusterNodeNames(),
DefaultValue.TIMEOUT_PSSH_POSTPREINSTALL)
self.logger.debug("The cluster's information:\n%s."
% str(self.clusterInfo))

View File

@ -507,7 +507,7 @@ General options:
cmd += 'cp ./lib/libasan.so.6 {} &&'.format(memcheck_root_lib)
cmd += '\mv {} {} && '.format(' '.join(bin_files + memcheck_files), clib)
cmd += 'cd {} && rm -rf bin'.format(root)
status, output = subprocess.getstatusoutput(cmd)
status, _ = subprocess.getstatusoutput(cmd)
if status != 0:
GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50217"] %
"version.cfg" + "The cmd is %s. " % cmd +
@ -534,7 +534,7 @@ General options:
self.logger.log("Parsing the configuration file.", "addStep")
try:
# parse the configuration file
self.sshTool = SshTool(self.clusterInfo.getClusterSshIps()[0],
self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(),
self.logFile,
DefaultValue.TIMEOUT_PSSH_PREINSTALL)

View File

@ -127,8 +127,12 @@ General options:
self.clusterInfo = dbClusterInfo()
self.clusterInfo.initFromStaticConfig(self.user)
nodeSshIps = self.clusterInfo.getClusterSshIps()[0]
self.sshTool = SshTool(nodeSshIps)
nodeNames = self.clusterInfo.getClusterNodeNames()
if self.check_nodename_recognized(nodeNames):
self.sshTool = SshTool(nodeNames)
else:
nodeSshIps = self.clusterInfo.getClusterSshIps()[0]
self.sshTool = SshTool(nodeSshIps)
except Exception as e:
GaussLog.exitWithError(str(e))

View File

@ -51,9 +51,6 @@ from subprocess import PIPE
from base_utils.common.fast_popen import FastPopen
from gspylib.common.copy_python_lib import copy_lib
from base_utils.os.user_util import UserUtil
from base_utils.executor.local_remote_cmd import LocalRemoteCmd
from domain_utils.cluster_file.profile_file import ProfileFile
from base_utils.os.crontab_util import CrontabUtil
copy_lib()
DefaultValue.doConfigForParamiko()
@ -153,7 +150,6 @@ class GaussCreateTrust():
self.hosts_paswd_list = []
self.logFile = ""
self.localHost = ""
self.local_ip = ""
self.flag = False
self.localID = ""
self.user = pwd.getpwuid(os.getuid()).pw_name
@ -658,7 +654,6 @@ General options:
self.parseCommandLine()
self.checkParameter()
self.localHost = socket.gethostname()
self.local_ip = socket.gethostbyname(self.localHost)
self.init_sshtool()
self.initLogger()
global tmp_files
@ -688,115 +683,33 @@ General options:
self.synchronizationLicenseFile(result)
self.retry_register_other_ssh_agent()
self.verifyTrust()
self.init_global()
self.set_user_ssh_alive(result)
self.set_user_crontab(result)
self.logger.log("Successfully created SSH trust.")
except Exception as e:
self.logger.logExit(str(e))
finally:
self.passwd = []
def set_user_ssh_alive(self, hostname_ips):
"""
set user ssh alive
"""
package_path = os.path.dirname(os.path.realpath(__file__))
gp_home = os.path.join(package_path, '../')
if EnvUtil.getEnv("GPHOME"):
gp_home = EnvUtil.getEnv("GPHOME")
else:
os.environ['GPHOME'] = gp_home
if os.getuid() == 0:
self.set_user_crontab(hostname_ips)
else:
if CrontabUtil.check_user_crontab_permission():
self.set_user_crontab(hostname_ips)
else:
self.set_user_ssh_service(gp_home)
def init_global(self):
"""
init ssh tools
"""
self.ssh_tool = SshTool(self.hostList)
def getUserProfile(self):
"""
function: set env into /etc/profile
input : OSEnvConfig
output: NA
"""
if self.mpprcFile != "":
# have check its exists when check parameters,
# so it should exist here
user_profile = self.mpprcFile
if not os.path.exists(user_profile):
FileUtil.createFile(user_profile)
FileUtil.changeMode(DefaultValue.DIRECTORY_MODE, user_profile)
FileUtil.changeOwner(self.user, user_profile)
else:
# check if os profile exist
user_profile = ProfileFile.get_user_bashrc(self.user)
if not os.path.exists(user_profile):
self.logger.debug(
"Profile does not exist. Please create %s." % user_profile)
FileUtil.createFile(user_profile)
FileUtil.changeMode(DefaultValue.DIRECTORY_MODE, user_profile)
return user_profile
def set_user_ssh_service(self, gp_home):
"""
set user ssh service
"""
def set_user_crontab(self, hostnames_ips):
if os.getuid() == 0:
return
self.logger.log("Start set ssh service for %s" % self.user)
# copy self.hostList
host_ip = self.hostList[:]
host_ip.remove(self.local_ip)
package_path = os.path.dirname(os.path.realpath(__file__))
gp_home = os.path.join(package_path, '../')
ssh_service_local_file = os.path.normpath(os.path.join(gp_home, "script/local/create_ssh_service.sh"))
ssh_service_file = os.path.normpath(os.path.join(gp_home, "script/local/create_ssh_service.sh"))
# cp ssh service file to remote
ssh_service_dir = os.path.dirname(ssh_service_file)
LocalRemoteCmd.checkRemoteDir(self.ssh_tool, ssh_service_dir, host_ip, "")
self.ssh_tool.scpFiles(ssh_service_file, ssh_service_file, [], "", gp_path=gp_home)
# execute ssh service file
cmd = "sh %s %s %s" % (ssh_service_local_file, self.user, gp_home)
self.ssh_tool.executeCommand(cmd, DefaultValue.SUCCESS, self.hostList)
self.logger.log("Successfully to set ssh service for %s" % self.user)
def set_user_crontab(self, hostnames_ips):
hostnames = list(hostnames_ips.values())
self.logger.log("Start set cron for %s" % self.user)
# hostnames_ips key is ip; value is hostname
ips = list(hostnames_ips.keys())
package_path = os.path.dirname(os.path.realpath(__file__))
gp_home = os.path.join(package_path, '../')
check_cron = True
# create cron tmp file
cron_file = "/tmp/gauss_cron_%s" % self.user
if os.getuid() == 0:
set_cron_cmd = "crontab -u %s -l > %s && " % (self.user, cron_file)
else:
set_cron_cmd = "crontab -l > %s && " % cron_file
set_cron_cmd += "sed -i '/CheckSshAgent.py/d' %s;" % cron_file
set_cron_cmd += "echo '*/1 * * * * source ~/.bashrc;python3 %s/script/local/CheckSshAgent.py >>/dev/null 2>&1 &' >> %s" % \
(gp_home, cron_file)
if os.getuid() == 0:
set_cron_cmd += "crontab -u %s %s ;" % (self.user, cron_file)
else:
set_cron_cmd += "crontab %s ;" % cron_file
set_cron_cmd += "rm -f '%s';" % cron_file
try:
ssh_tool = SshTool(ips)
os.environ['GPHOME'] = gp_home
# create cron tmp file
cron_file = "/tmp/gauss_cron_%s" % self.user
set_cron_cmd = "crontab -l > %s && " % cron_file
set_cron_cmd += "sed -i '/CheckSshAgent.py/d' %s;" % cron_file
set_cron_cmd += "echo '*/1 * * * * source ~/.bashrc;python3 %s/script/local/CheckSshAgent.py >>/dev/null 2>&1 &' >> %s" % \
(gp_home, cron_file)
set_cron_cmd += "&& crontab %s" % cron_file
set_cron_cmd += "&& rm -f '%s';" % cron_file
self.logger.debug("Command for setting CRON: %s" % set_cron_cmd)
ssh_tool = SshTool(hostnames)
os.environ['GPHOME'] = gp_home
ssh_tool.executeCommand(set_cron_cmd, DefaultValue.SUCCESS, '', '')
ssh_tool.clenSshResultFiles()
except Exception as e:
@ -905,13 +818,20 @@ General options:
output: NA
"""
self._log("Updating the known_hosts file.", "addStep")
for ip in self.hostList:
cmd = '%s;/usr/bin/ssh-keyscan -t ed25519 %s >> %s ' % (SYSTEM_SSH_ENV, ip, self.known_hosts_fname)
hostnameList = []
hostnameList.extend(self.hostList)
for(key, value) in list(result.items()):
hostnameList.append(value)
for hostname in hostnameList:
cmd = '%s;/usr/bin/ssh-keyscan -t ed25519 %s >> %s ' % (SYSTEM_SSH_ENV, hostname, self.known_hosts_fname)
cmd += '&& sed -i "$ s/$/ #OM/" %s ' % self.known_hosts_fname
cmd += "&& chmod %s %s" % (DefaultValue.KEY_FILE_MODE, self.known_hosts_fname)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0 or "Name or service not known".lower() in output.lower():
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error:\n%s" % output)
(status, output) = self.checkAuthentication(self.localHost)
if not status:
raise Exception(ErrorCode.GAUSS_511["GAUSS_51100"] % self.localHost)
self._log("Successfully updated the known_hosts file.", "constant")
def tryParamikoConnect(self, hostname, client, pswd = None, silence = False):
@ -954,23 +874,23 @@ General options:
self.logger.logExit(ErrorCode.GAUSS_511["GAUSS_51111"] + " Error:%s." % str(e))
self._log("Successfully appended authorized_key on all remote node.", "constant")
def sendRemoteAuthorization(self, ip):
def sendRemoteAuthorization(self, hostname):
"""
function: send remote authorization
input : hostname
output: NA
"""
if ip != self.localHost:
if hostname != self.localHost:
p = None
cin = cout = cerr = None
try:
#ssh Remote Connection other node
p = paramiko.SSHClient()
p.load_system_host_keys()
ok = self.tryParamikoConnect(ip, p, self.correct_passwd_map[ip], silence = True)
ok = self.tryParamikoConnect(hostname, p, self.correct_passwd_map[hostname], silence = True)
if not ok:
self.incorrectPasswdInfo += "Without this node[%s] of the correct password.\n"\
% ip
% hostname
return
# Create .ssh directory and ensure content meets permission requirements
# for password-less SSH
@ -1010,12 +930,12 @@ General options:
if line.find("ok ok ok") < 0:
self.failedToAppendInfo += "...send to %s\nFailed to append local ID to " \
"authorized_keys on remote node %s.\n" % \
(ip, ip)
(hostname, hostname)
return
cout.close()
cerr.close()
self.logger.debug("Send to %s\nSuccessfully appended authorized_key on"
" remote node %s." % (ip, ip))
" remote node %s." % (hostname, hostname))
finally:
if cin:
cin.close()
@ -1175,10 +1095,9 @@ General options:
continue
max_try_times = 3
status = -1
status, output, cmd = self.send_trust_file(hostip)
while status != 0 and max_try_times > 0:
status, output, cmd = self.send_trust_file(hostip)
self.logger.debug(f"send_trust_file failed, countdown {max_try_times}, retry again.")
self.logger.debug("errorinfo: hostip: %s, status: %d, output: %s, "
% (hostip, status, output))
@ -1188,6 +1107,7 @@ General options:
))
time.sleep(10)
max_try_times -= 1
status, output, cmd = self.send_trust_file(hostip)
if status != 0 and max_try_times == 0:
raise Exception(ErrorCode.GAUSS_502["GAUSS_50223"] %"the authentication"
@ -1365,7 +1285,7 @@ General options:
"gspylib",
"clib"))
encrypt_path = os.path.join(encrypt_dir_path, "encrypt")
if (not os.path.exists(encrypt_path)):
if (not os.path.exists(encrypt_path)) :
root = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..')
clib = os.path.join(root, "script/gspylib/clib")
bin_files = ['./bin/encrypt']

View File

@ -136,7 +136,7 @@ General options:
# OLAP
self.initClusterInfoFromStaticFile(self.user)
# Initialize the self.sshTool variable
self.initSshTool(self.clusterInfo.getClusterSshIps()[0],
self.initSshTool(self.clusterInfo.getClusterNodeNames(),
DefaultValue.TIMEOUT_PSSH_UNINSTALL)
except Exception as e:
self.logger.logExit(str(e))

View File

@ -95,11 +95,8 @@ class Upgrade(ParallelBaseOM):
self.newClusterAppPath = ""
self.oldClusterAppPath = ""
self.clusterNodes = []
self.cluster_ip = []
self.cluster_hostname_ip_map = {}
self.nodesNum = -1
self.nodeNames = []
self.node_ips = []
##static parameter
self.binTarName = "binary_%s.tar" % NetUtil.GetHostIpOrName()
self.rollback = False
@ -275,7 +272,7 @@ Option for grey upgrade
def execCommandInSpecialNode(self, cmd, hosts, retry_times=2, time_out=0):
if not hosts:
host_list = copy.deepcopy(self.cluster_hostname_ip_map.values())
host_list = copy.deepcopy(self.clusterNodes)
else:
host_list = copy.deepcopy(hosts)
self.logger.debug("Commanded: exec cmd in the hosts {0}".format(host_list))
@ -328,8 +325,6 @@ Option for grey upgrade
# init clusterNodes
for dbNode in self.clusterInfo.dbNodes:
self.clusterNodes.append(dbNode.name)
self.cluster_ip.append(dbNode.backIps[0])
self.cluster_hostname_ip_map[dbNode.name] = dbNode.backIps[0]
if len(self.clusterNodes) == 0:
raise Exception(ErrorCode.GAUSS_512["GAUSS_51201"])
@ -340,8 +335,6 @@ Option for grey upgrade
for nodeName in self.nodeNames:
if nodeName not in self.clusterNodes:
raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % nodeName)
node = self.clusterInfo.getDbNodeByName(nodeName)
self.node_ips = node.sshIps[0]
self.logger.debug("Successfully init global infos")
# If it is a dual-cluster, initialize the related information of the dual-cluster
@ -416,14 +409,11 @@ Option for grey upgrade
:return:
"""
if not hostList:
hostList = copy.deepcopy(self.cluster_ip)
hostList = copy.deepcopy(self.clusterNodes)
else:
hostList = copy.deepcopy(hostList)
local_ip = NetUtil.getLocalIp()
if local_ip in hostList:
hostList.remove(local_ip)
if "127.0.0.1" in hostList:
hostList.remove("127.0.0.1")
if NetUtil.GetHostIpOrName() in hostList:
hostList.remove(NetUtil.GetHostIpOrName())
self.logger.debug("Start copy file:{0} to hosts:{1}.".format(
file, hostList))
@ -485,7 +475,7 @@ if __name__ == '__main__':
impl.run()
except Exception as e:
if REPEAT:
upgrade.sshTool = SshTool(upgrade.cluster_ip,
upgrade.sshTool = SshTool(upgrade.clusterNodes,
DefaultValue.TIMEOUT_PSSH_COMMON)
GaussLog.exitWithError(str(e))
finally:

View File

@ -529,11 +529,6 @@ class DefaultValue():
SSH_AUTHORIZED_KEYS = os.path.expanduser("~/.ssh/authorized_keys")
SSH_KNOWN_HOSTS = os.path.expanduser("~/.ssh/known_hosts")
# os parameter
MAX_REMAIN_SEM = 240000
MIN_REMAIN_SEM = 10000
NOFILE_LIMIT = 640000
@staticmethod
def encodeParaline(cmd, keyword):
"""
@ -1954,12 +1949,12 @@ class DefaultValue():
cooInst.hostname, pid, currentTime)
tmpDir = EnvUtil.getTmpDirFromEnv()
filepath = os.path.join(tmpDir, outputfile)
ClusterCommand.executeSQLOnRemoteHost(cooInst.listenIps[0],
ClusterCommand.executeSQLOnRemoteHost(cooInst.hostname,
cooInst.port,
sql,
filepath)
(status, result, error_output) = \
SqlExecutor.getSQLResult(cooInst.listenIps[0],
SqlExecutor.getSQLResult(cooInst.hostname,
outputfile)
if (status != 2):
return 1, "[%s]: Error: %s result: %s status: " \
@ -1971,7 +1966,7 @@ class DefaultValue():
else:
for cooInst in cnList:
(status, output) = ClusterCommand.remoteSQLCommand(
sql, user, cooInst.listenIps[0], cooInst.port)
sql, user, cooInst.hostname, cooInst.port)
resList = output.split('\n')
if (status != 0 or len(resList) < 1):
return 1, "[%s]: %s" % (cooInst.hostname, output)
@ -2998,7 +2993,7 @@ class DefaultValue():
rm_dynamic_cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (cluster_dynamic_config,
cluster_dynamic_config)
perform_cmd = "{0} && {1}".format(rm_dynamic_cmd, rm_meta_data_cmd)
CmdExecutor.execCommandWithMode(perform_cmd, ssh_tool, host_list=[node.backIps[0]])
CmdExecutor.execCommandWithMode(perform_cmd, ssh_tool, host_list=[node.name])
logger.debug("Remove dynamic_config_file and CM metadata directory "
"on node [{0}] successfully.".format(node.name))
logger.log("Remove dynamic_config_file and CM metadata directory on all nodes.")
@ -3042,11 +3037,11 @@ class DefaultValue():
raise Exception(ErrorCode.GAUSS_514['GAUSS_51400'] % cmd + "Error:\n%s" % stderr)
cm_agent_conf_file = stdout.strip() + "/cm_agent/cm_agent.conf"
if not os.path.isfile(cm_agent_conf_file):
host_list = clusterinfo.getClusterSshIps()[0]
host_list = clusterinfo.getClusterNodeNames()
cm_agent_conf_temp_file = os.path.join(EnvUtil.getTmpDirFromEnv(), "cm_agent_tmp.conf")
for host_ip in host_list:
get_file_cmd = g_file.SHELL_CMD_DICT["scpFileFromRemote"] % \
(host_ip, NetUtil.getLocalIp(), cm_agent_conf_file, cm_agent_conf_temp_file)
(host_ip, NetUtil.GetHostIpOrName(), cm_agent_conf_file, cm_agent_conf_temp_file)
proc = FastPopen(get_file_cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = proc.communicate()
if not os.path.isfile(cm_agent_conf_temp_file):
@ -3136,7 +3131,7 @@ class DefaultValue():
for inst in instances:
logger.debug("Obtain hadr user info string on node:%s with port:%s."
% (inst.hostname, inst.port))
status, output = ClusterCommand.remoteSQLCommand(sql, db_user, inst.listenIps[0],
status, output = ClusterCommand.remoteSQLCommand(sql, db_user, inst.hostname,
inst.port, maintenance_mode=mode)
if status == 0 and output:
logger.debug("Successfully obtain hadr user info string.")
@ -3181,7 +3176,7 @@ class DefaultValue():
for inst in instances:
logger.debug("Decrypt hadr user info on node:%s with port:%s."
% (inst.hostname, inst.port))
status, output = ClusterCommand.remoteSQLCommand(sql, db_user, inst.listenIps[0],
status, output = ClusterCommand.remoteSQLCommand(sql, db_user, inst.hostname,
inst.port, maintenance_mode=mode)
if status == 0 and output and "|" in output and len(output.split("|")) == 2:
logger.debug("Successfully decrypt hadr user info string.")
@ -3335,35 +3330,6 @@ class DefaultValue():
# failed to read the upgrade_step.csv in isgreyUpgradeNodeSpecify
logger.logExit(str(e))
@staticmethod
def get_remain_kernel_sem():
"""
get remain kernel sem
"""
# get total sem
cmd = "cat /proc/sys/kernel/sem"
(status, output) = subprocess.getstatusoutput(cmd)
if status:
raise Exception(ErrorCode.GAUSS_501["GAUSS_50110"] % cmd)
parts = output.split()
semmns = int(parts[1])
# get used sem
cmd = "ipcs -s"
(status, output) = subprocess.getstatusoutput(cmd)
if status:
raise Exception(ErrorCode.GAUSS_501["GAUSS_50110"] % cmd)
current_sems_lines = output.split('\n')
# skip the first three lines and process the remaining lines
current_sems = [int(line.split()[3]) for line in current_sems_lines[3:] if line.strip()]
# Calculate the number of semaphores currently in use
used_sems = sum(current_sems)
# Calculate the number of remaining semaphores
remaining_sems = semmns - used_sems
return remaining_sems
class ClusterCommand():
'''
Common for cluster command
@ -3671,8 +3637,8 @@ class ClusterCommand():
LocalRemoteCmd.cleanFile(sqlFile)
return (1, str(e))
# send new sql file to remote node if needed
localHost = NetUtil.getLocalIp()
if str(localHost) != str(host) and str(host) != "127.0.0.1":
localHost = NetUtil.GetHostIpOrName()
if str(localHost) != str(host):
cmd = LocalRemoteCmd.getRemoteCopyCmd(sqlFile, sqlFile, host)
if os.getuid() == 0 and user != "":
cmd = "su - %s \"%s\"" % (user, cmd)
@ -3691,7 +3657,7 @@ class ClusterCommand():
user_pwd=user_pwd)
if maintenance_mode:
gsql_cmd += " -m "
if str(localHost) != str(host) and str(host) != "127.0.0.1":
if str(localHost) != str(host):
sshCmd = CmdUtil.getSshCmd(host)
if os.getuid() == 0 and user != "":
cmd = " %s 'su - %s -c \"" % (sshCmd, user)
@ -3774,7 +3740,7 @@ class ClusterCommand():
if (status1 != 0):
LocalRemoteCmd.cleanFile("%s,%s" % (queryResultFile, sqlFile))
return (status1, output1)
if (str(localHost) != str(host) and str(host) != "127.0.0.1"):
if (str(localHost) != str(host)):
remoteCmd = LocalRemoteCmd.getRemoteCopyCmd(
queryResultFile,
EnvUtil.getTmpDirFromEnv(user) + "/", str(localHost))
@ -3792,14 +3758,14 @@ class ClusterCommand():
rowList = fp.readlines()
except Exception as e:
LocalRemoteCmd.cleanFile("%s,%s" % (queryResultFile, sqlFile))
if (str(localHost) != str(host) and str(host) != "127.0.0.1"):
if (str(localHost) != str(host)):
LocalRemoteCmd.cleanFile("%s,%s" % (queryResultFile, sqlFile),
host)
return (1, str(e))
# remove local sqlFile
LocalRemoteCmd.cleanFile("%s,%s" % (queryResultFile, sqlFile))
# remove remote sqlFile
if (str(localHost) != str(host) and str(host) != "127.0.0.1"):
if (str(localHost) != str(host)):
LocalRemoteCmd.cleanFile("%s,%s" % (queryResultFile, sqlFile), host)
return (0, "".join(rowList)[:-1])

View File

@ -1334,17 +1334,14 @@ class dbClusterInfo():
elif querytype == "port":
querycmd = "gs_guc check -D %s -c port" % dnInst.datadir
dbName = dbNode.name
db_ssh_ip = dbNode.sshIps[0]
dbInfoList.append({
"name": dbName,
"ip": db_ssh_ip,
"command": querycmd,
"sshtool": sshtool
})
def queryInstance(dbInfo):
dnName = dbInfo["name"]
dn_ssh_ip = dbInfo["ip"]
command = dbInfo["command"]
sshtool = dbInfo["sshtool"]
status = 0
@ -1352,13 +1349,13 @@ class dbClusterInfo():
if dnName != hostName:
(statusMap, output) = sshtool.getSshStatusOutput(
command, [dn_ssh_ip], mpprcFile)
if statusMap[dn_ssh_ip] != 'Success':
command, [dnName], mpprcFile)
if statusMap[dnName] != 'Success':
status = -1
else:
(status, output) = subprocess.getstatusoutput(command)
global_cls_query_rst[dn_ssh_ip+command.split()[-1]] = [status, output]
global_cls_query_rst[dnName+command.split()[-1]] = [status, output]
global global_cls_query_rst
parallelTool.parallelExecute(queryInstance, dbInfoList)
@ -1385,7 +1382,7 @@ class dbClusterInfo():
for dbNode in self.dbNodes:
for dnInst in dbNode.datanodes:
(status, output) = queryClsResult.get(dbNode.sshIps[0] + dnInst.datadir)
(status, output) = queryClsResult.get(dbNode.name + dnInst.datadir)
if status != 0 or output.find("exc_sql failed") > 0:
if output.find(
"could not connect to the local server") \
@ -1555,8 +1552,8 @@ class dbClusterInfo():
"%s -A -t -c \"%s\"" % \
(dnInst.port, sql_get)
(statusMap, output) = sshtool.getSshStatusOutput(cmd, [
dbNode.sshIps[0]])
if statusMap[dbNode.sshIps[0]] != 'Success' or output.find(
dbNode.name])
if statusMap[dbNode.name] != 'Success' or output.find(
"failed to connect") >= 0:
continue
else:
@ -1585,8 +1582,8 @@ class dbClusterInfo():
"%s -A -t -c \"%s\"" % \
(dnInst.port, subsql)
(statusMap, cascadeOutput) = sshtool.getSshStatusOutput(cmd, [
dbNode.sshIps[0]])
if statusMap[dbNode.sshIps[0]] != 'Success' or cascadeOutput.find(
dbNode.name])
if statusMap[dbNode.name] != 'Success' or cascadeOutput.find(
"failed to connect") >= 0:
continue
else:
@ -1633,9 +1630,9 @@ class dbClusterInfo():
"\"%s\"" % (
dnInst.port, sql)
(statusMap, output) = sshtool.getSshStatusOutput(cmd,
[dbNode.sshIps[0]])
[dbNode.name])
dnDown = output.find("failed to connect") >= 0
if statusMap[dbNode.sshIps[0]] != 'Success' or dnDown:
if statusMap[dbNode.name] != 'Success' or dnDown:
dnInst.localRole = "Down" if dnDown else "Unknown"
dnInst.staticConnections = 0
dnInst.state = "Manually stopped" if dnDown else "Unknown"
@ -4617,7 +4614,7 @@ class dbClusterInfo():
dynamicConfigFile)
except Exception as e:
cmd = "rm -f %s" % dynamicConfigFile
sshtool.getSshStatusOutput(cmd, self.getClusterSshIps()[0])
sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames())
raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \
"dynamic configuration file" +
" Error: \n%s" % str(e))
@ -4666,7 +4663,7 @@ class dbClusterInfo():
simpleDNConfig)
except Exception as e:
cmd = "rm -f %s" % simpleDNConfig
sshtool.getSshStatusOutput(cmd, self.getClusterSshIps()[0])
sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames())
raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] %
"dynamic configuration file" +
" Error: \n%s" % str(e))
@ -4677,7 +4674,7 @@ class dbClusterInfo():
+ '/../../local/Resetreplconninfo.py'
cmd = "python3 %s -U %s -t reset" % (local_script, user)
sshtool.setTimeOut(120)
for node in self.getClusterSshIps()[0]:
for node in self.getClusterNodeNames():
(status, output) = sshtool.getSshStatusOutput(cmd, [node])
if status[node] != 'Success':
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"]
@ -4791,7 +4788,7 @@ class dbClusterInfo():
% (gaussHome, dbNode.name)
if dbNode.name != localHostName:
cmd = "export LD_LIBRARY_PATH=/usr/lib64;/usr/bin/scp %s:%s %s" % (
dbNode.sshIps[0], dynamicConfigFile, remoteDynamicConfigFile)
dbNode.name, dynamicConfigFile, remoteDynamicConfigFile)
status, output = subprocess.getstatusoutput(cmd)
if status:
if output.find("No such file or directory") >= 0:
@ -4831,7 +4828,7 @@ class dbClusterInfo():
cmd = "cp -f %s %s" % (sourceFile, targetFile)
status, output = subprocess.getstatusoutput(cmd)
else:
cmd = "export LD_LIBRARY_PATH=/usr/lib64;/usr/bin/scp %s %s:%s" % (sourceFile, dbNode.sshIps[0], targetFile)
cmd = "export LD_LIBRARY_PATH=/usr/lib64;/usr/bin/scp %s %s:%s" % (sourceFile, dbNode.name, targetFile)
status, output = subprocess.getstatusoutput(cmd)
if status:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +

View File

@ -1786,7 +1786,7 @@ class GaussStat():
f.writelines(output)
if (f):
f.close()
tmp_sshTool = SshTool(g_clusterInfo.getClusterSshIps()[0],
tmp_sshTool = SshTool(g_clusterInfo.getClusterNodeNames(),
self.logger.logFile)
tmp_sshTool.scpFiles(databaseSizeFile, binPath)
else:
@ -1809,7 +1809,7 @@ class GaussStat():
f.writelines(output)
if (f):
f.close()
tmp_sshTool = SshTool(g_clusterInfo.getClusterSshIps()[0],
tmp_sshTool = SshTool(g_clusterInfo.getClusterNodeNames(),
self.logger.logFile)
tmp_sshTool.scpFiles(databaseSizeFile, binPath)
except Exception as e:

View File

@ -335,11 +335,11 @@ class ParallelBaseOM(object):
self.logger.debug("Distributing files.")
try:
# get the all nodes
hosts = self.clusterInfo.getClusterSshIps()[0]
if NetUtil.getLocalIp() not in hosts:
hosts = self.clusterInfo.getClusterNodeNames()
if NetUtil.GetHostIpOrName() not in hosts:
raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] %
NetUtil.getLocalIp())
hosts.remove(NetUtil.getLocalIp())
NetUtil.GetHostIpOrName())
hosts.remove(NetUtil.GetHostIpOrName())
# Send xml file to every host
DefaultValue.distributeXmlConfFile(self.sshTool, self.xmlFile,
hosts, self.mpprcFile)
@ -432,7 +432,7 @@ class ParallelBaseOM(object):
killSnapshotSQL = "select * from kill_snapshot();"
(status, output) = ClusterCommand.remoteSQLCommand(
killSnapshotSQL, self.user, dnInst.listenIps[0], dnInst.port,
killSnapshotSQL, self.user, dnInst.hostname, dnInst.port,
False, DefaultValue.DEFAULT_DB_NAME)
if (status != 0):
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
@ -454,7 +454,7 @@ class ParallelBaseOM(object):
self.logger.debug("The ca file dir is: %s." % caPath)
if (len(hostList) == 0):
for dbNode in self.clusterInfo.dbNodes:
hostList.append(dbNode.sshIps[0])
hostList.append(dbNode.name)
# Create CA dir and prepare files for using.
self.logger.debug("Create CA file directory.")
try:
@ -492,7 +492,7 @@ class ParallelBaseOM(object):
self.logger.debug("The ca file dir is: %s." % caPath)
if (len(hostList) == 0):
for dbNode in self.clusterInfo.dbNodes:
hostList.append(dbNode.sshIps[0])
hostList.append(dbNode.name)
# Create CA dir and prepare files for using.
self.logger.debug("Create CA file directory.")
try:
@ -514,7 +514,7 @@ class ParallelBaseOM(object):
FileUtil.removeFile(certFile)
DefaultValue.cleanCaDir(caPath)
raise Exception(str(e))
if len(hostList) == 1 and hostList[0] == NetUtil.getLocalIp():
if len(hostList) == 1 and hostList[0] == socket.gethostname():
self.logger.debug("Local host database, no need transform files.")
else:
for certFile in DefaultValue.GRPC_CERT_LIST:
@ -568,7 +568,7 @@ class ParallelBaseOM(object):
"'%s'/server.key.rand" % binPath)
if len(hostList) == 0:
for dbNode in self.clusterInfo.dbNodes:
hostList.append(dbNode.sshIps[0])
hostList.append(dbNode.name)
if not self.isSingle:
# localhost no need scp files
for certFile in DefaultValue.BIN_CERT_LIST:

View File

@ -436,10 +436,6 @@ class DN_OLAP(Kernel):
dynamicDict = {}
dynamicDict = DefaultValue.dynamicGuc("dn", tmpGucFile,
gucXml)
# get os remain sem
remain_sem = DefaultValue.get_remain_kernel_sem()
# calc max_connections for remain sem
self.calc_max_connections_for_sems(remain_sem, dynamicDict)
if gucXml:
dynamicDict["log_line_prefix"] = "'%s'" % \
dynamicDict["log_line_prefix"]
@ -480,18 +476,6 @@ class DN_OLAP(Kernel):
self.modifyDummpyStandbyConfigItem()
def calc_max_connections_for_sems(self, remaining_sems, guc_dict):
"""
calc max connetions for remain sem
"""
if int(remaining_sems) >= DefaultValue.MAX_REMAIN_SEM:
return
elif int(remaining_sems) < DefaultValue.MIN_REMAIN_SEM:
raise Exception("Error: The remaining signal quantity of the current system is less than %s" % DefaultValue.MIN_REMAIN_SEM)
else:
# Number of connections with 1w semaphore=200
guc_dict["max_connections"] = int((600 / 30000 * int(remaining_sems)))
def setPghbaConfig(self, clusterAllIpList, try_reload=False, float_ips=None):
"""
"""

View File

@ -38,7 +38,6 @@ import select
sys.path.append(sys.path[0] + "/../../")
from gspylib.common.ErrorCode import ErrorCode
from base_utils.os.net_util import NetUtil
localDirPath = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, localDirPath + "/../../../lib/netifaces/")
@ -620,20 +619,7 @@ class GenericPlatform:
input: NA
output: str
"""
try:
cmd = "hostname -I | awk '{print $1}'"
(status, output) = subprocess.getstatusoutput(cmd)
if not status:
return output.strip()
env_dist = os.environ
host_ip = env_dist.get("HOST_IP")
if host_ip is not None:
if NetUtil.isIpValid(host_ip):
return host_ip
host_ip = socket.gethostbyname(socket.gethostname())
except Exception as e:
raise Exception(str(e))
return host_ip
return self.getHostName()
def getScpCmd(self):
"""

View File

@ -444,7 +444,7 @@ class SshTool():
# single cluster or execute only in local node.
if (len(hostList) == 1 and
(hostList[0] == "127.0.0.1" or hostList[0] == NetUtil.getLocalIp())
hostList[0] == NetUtil.GetHostIpOrName()
and cmd.find(" --lock-cluster ") < 0):
localMode = True
if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile):
@ -610,7 +610,7 @@ class SshTool():
# single cluster or execute only in local node.
if (len(hostList) == 1 and
(hostList[0] == "127.0.0.1" or hostList[0] == NetUtil.getLocalIp())):
hostList[0] == NetUtil.GetHostIpOrName()):
localMode = True
if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile):
sshCmd = "source %s ; %s 2>&1" % (osProfile, cmd)
@ -766,28 +766,23 @@ class SshTool():
sshHosts.append("[" + host + "]")
else:
sshHosts.append(host)
if len(sshHosts) == 1 and (sshHosts[0] == NetUtil.getLocalIp() or sshHosts[0] == "127.0.0.1") and \
if len(sshHosts) == 1 and sshHosts[0] == socket.gethostname() and \
srcFile != targetDir and \
srcFile != os.path.join(targetDir, os.path.split(srcFile)[1]):
localMode = True
scpCmd = "cp -r %s %s" % (srcFile, targetDir)
else:
# cp file on local node
if NetUtil.getLocalIp() in sshHosts or "127.0.0.1" in sshHosts:
if NetUtil.getLocalIp() in sshHosts:
localhost_idx = sshHosts.index(NetUtil.getLocalIp())
local_host = NetUtil.getLocalIp()
if "127.0.0.1" in sshHosts:
localhost_idx = sshHosts.index("127.0.0.1")
local_host = "127.0.0.1"
if socket.gethostname() in sshHosts:
localhost_idx = sshHosts.index(socket.gethostname())
sshHosts.pop(localhost_idx)
cpcmd = "cp -r %s %s" % (srcFile, targetDir)
if srcFile != targetDir and srcFile != os.path.join(targetDir, os.path.basename(srcFile)):
(status, output) = subprocess.getstatusoutput(cpcmd)
if status == 0:
resultMap[local_host] = DefaultValue.SUCCESS
resultMap[socket.gethostname()] = DefaultValue.SUCCESS
else:
resultMap[local_host] = DefaultValue.FAILURE
resultMap[socket.gethostname()] = DefaultValue.FAILURE
if not sshHosts:
return
scpCmd = "%s -r -v -t %s -p %s -H %s -o %s -e %s %s %s" \

View File

@ -45,6 +45,11 @@ class BackupImpl:
input : NA
output: NA
'''
try:
self.context.initLogger(self.context.action)
except Exception as e:
self.context.logger.closeLog()
raise Exception(str(e))
try:
self.parseConfigFile()

View File

@ -56,7 +56,7 @@ class BackupImplOLAP(BackupImpl):
self.context.logger.log("Parsing configuration files.")
if self.context.isForce and self.context.nodename != "" \
and self.context.action == BackupImpl.ACTION_RESTORE:
self.context.initSshTool([self.context.node_ip],
self.context.initSshTool([self.context.nodename],
DefaultValue.TIMEOUT_PSSH_BACKUP)
self.context.logger.log(
"Successfully init restore nodename: %s."
@ -65,18 +65,18 @@ class BackupImplOLAP(BackupImpl):
try:
self.context.initClusterInfoFromStaticFile(self.context.user)
host_ip_list = []
nodeNames = self.context.clusterInfo.getClusterNodeNames()
if self.context.nodename == "":
host_ip_list = self.context.clusterInfo.getClusterBackIps()
self.context.nodename = nodeNames
else:
remoteNode = self.context.clusterInfo.getDbNodeByName(
self.context.nodename)
if remoteNode is None:
raise Exception(ErrorCode.GAUSS_512["GAUSS_51209"] % (
"the node", self.context.nodename))
host_ip_list = [remoteNode.backIps]
self.context.nodename = [self.context.nodename]
self.context.initSshTool(host_ip_list,
self.context.initSshTool(self.context.nodename,
DefaultValue.TIMEOUT_PSSH_BACKUP)
except Exception as e:
@ -86,7 +86,7 @@ class BackupImplOLAP(BackupImpl):
def doRemoteBackup(self):
"""
function: backup openguass
function: Get user and group
input : NA
output: NA
"""
@ -157,14 +157,14 @@ class BackupImplOLAP(BackupImpl):
cmd = g_file.SHELL_CMD_DICT["createDir"] \
% (self.context.backupDir, self.context.backupDir,
DefaultValue.KEY_DIRECTORY_MODE)
self._runCmd(cmd, self.context.node_ip)
self._runCmd(cmd, self.context.nodename)
# send backup package to the specified node from the local node
originalFile = "'%s'/%s.tar" % (tmp_backupDir, flag)
if flag == "parameter":
if self.context.nodename != [NetUtil.getHostName()]:
self.context.sshTool.scpFiles(
originalFile,
self.context.backupDir, self.context.node_ip)
self.context.backupDir, self.context.nodename)
else:
FileUtil.cpFile(originalFile, self.context.backupDir)
else:

View File

@ -188,7 +188,7 @@ class CheckperfImplOLAP(CheckperfImpl):
self.logger.debug(
"Successfully collected statistics on all hosts.")
def getMetaData(self, hostName, host, port):
def getMetaData(self, hostName, port):
"""
function: get meta data of PMK(curr_collect_start_time,
last_collect_start_time, last_snapshot_id)
@ -221,10 +221,10 @@ class CheckperfImplOLAP(CheckperfImpl):
filepath = os.path.join(tmpDir, outputfile)
# execute SQL on remote host
ClusterCommand.executeSQLOnRemoteHost(
host, port, querySql, filepath)
hostName, port, querySql, filepath)
# get sql result from outputfile
(status, result, error_output) = \
SqlExecutor.getSQLResult(host, outputfile)
SqlExecutor.getSQLResult(hostName, outputfile)
if (status != 2 or error_output != ""):
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] \
@ -249,7 +249,7 @@ class CheckperfImplOLAP(CheckperfImpl):
else:
(status, output) = ClusterCommand.remoteSQLCommand(
querySql, self.opts.user,
host, port, False, DefaultValue.DEFAULT_DB_NAME)
hostName, port, False, DefaultValue.DEFAULT_DB_NAME)
if (status != 0):
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"]
% querySql + " Error: \n%s" % str(output))
@ -271,7 +271,7 @@ class CheckperfImplOLAP(CheckperfImpl):
except Exception as e:
raise Exception(str(e))
def deleteExpiredSnapShots(self, hostName, host, port):
def deleteExpiredSnapShots(self, hostName, port):
"""
function: delete expired snapshots records
input : hostName, port
@ -298,10 +298,10 @@ class CheckperfImplOLAP(CheckperfImpl):
filepath = os.path.join(tmpDir, outputfile)
# execute SQL on remote host
ClusterCommand.executeSQLOnRemoteHost( \
host, port, querySql, filepath)
hostName, port, querySql, filepath)
# get sql result from outputfile
(status, result, error_output) = \
SqlExecutor.getSQLResult(host, outputfile)
SqlExecutor.getSQLResult(hostName, outputfile)
if (status != 2):
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] \
% querySql \
@ -313,7 +313,7 @@ class CheckperfImplOLAP(CheckperfImpl):
querySql = "SELECT * FROM pmk.delete_expired_snapshots();"
(status, output) = ClusterCommand.remoteSQLCommand(
querySql, self.opts.user,
host, port, False, DefaultValue.DEFAULT_DB_NAME)
hostName, port, False, DefaultValue.DEFAULT_DB_NAME)
if (status != 0):
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] \
% querySql \
@ -430,8 +430,6 @@ class CheckperfImplOLAP(CheckperfImpl):
hostNames = self.clusterInfo.getClusterNodeNames()
# traversing host name
for hostName in hostNames:
node = self.clusterInfo.getDbNodeByName(hostName)
node_ip = node.sshIps[0]
recordTempFile = os.path.join(
EnvUtil.getTmpDirFromEnv(self.opts.user),
"recordTempFile_%d_%s" % (os.getpid(), hostName))
@ -442,9 +440,9 @@ class CheckperfImplOLAP(CheckperfImpl):
else:
if (self.clusterInfo.isSingleInstCluster()):
continue
if (hostName != NetUtil.getHostName()):
if (hostName != NetUtil.GetHostIpOrName()):
scpcmd = "pssh -s -H %s 'pscp -H %s %s %s' " \
% (node_ip, NetUtil.getHostName(),
% (hostName, NetUtil.GetHostIpOrName(),
recordTempFile, recordTempFile)
(status, output) = subprocess.getstatusoutput(scpcmd)
if (status != 0):
@ -648,7 +646,7 @@ class CheckperfImplOLAP(CheckperfImpl):
FileUtil.removeFile(tempFile)
raise Exception(str(e))
def getAllHostsPrevNodeStat(self, hostName, host, port, snapshotId):
def getAllHostsPrevNodeStat(self, hostName, port, snapshotId):
"""
function: get prev node stat of all hosts
input : hostName, port, snapshotId
@ -698,9 +696,9 @@ class CheckperfImplOLAP(CheckperfImpl):
filepath = os.path.join(tmpDir, outputfile)
# execute SQL on remote host
ClusterCommand.executeSQLOnRemoteHost(
host, port, querySql, filepath, snapshotId)
hostName, port, querySql, filepath, snapshotId)
(status, result, error_output) = \
SqlExecutor.getSQLResult(host, outputfile)
SqlExecutor.getSQLResult(hostName, outputfile)
if (status != 2):
raise Exception(
ErrorCode.GAUSS_513["GAUSS_51300"] \
@ -750,9 +748,9 @@ class CheckperfImplOLAP(CheckperfImpl):
tmpDir = EnvUtil.getTmpDirFromEnv(self.opts.user)
filepath = os.path.join(tmpDir, outputfile)
ClusterCommand.executeSQLOnRemoteHost(
host, port, querySql, filepath, snapshotId)
hostName, port, querySql, filepath, snapshotId)
(status, result, error_output) = \
SqlExecutor.getSQLResult(host, outputfile)
SqlExecutor.getSQLResult(hostName, outputfile)
if (status != 2):
raise Exception(
ErrorCode.GAUSS_513["GAUSS_51300"] \
@ -1031,7 +1029,7 @@ class CheckperfImplOLAP(CheckperfImpl):
except Exception as e:
raise Exception(str(e))
def handleSessionCpuStat(self, hostname, host):
def handleSessionCpuStat(self, hostname):
"""
function: handle session cpu stat of all hosts
input : hostname
@ -1078,7 +1076,7 @@ class CheckperfImplOLAP(CheckperfImpl):
self.sshTool.scpFiles(
sessionCpuTempResult,
EnvUtil.getTmpDirFromEnv(self.opts.user) \
+ "/", [host])
+ "/", [hostname])
FileUtil.removeFile(sessionCpuTempResult)
else:
@ -1090,7 +1088,7 @@ class CheckperfImplOLAP(CheckperfImpl):
FileUtil.removeFile(sessionCpuTempResult)
raise Exception(str(e))
def handleSessionMemoryStat(self, hostname, host):
def handleSessionMemoryStat(self, hostname):
"""
function: handle session memory stat of all hosts
input : hostname
@ -1139,7 +1137,7 @@ class CheckperfImplOLAP(CheckperfImpl):
self.sshTool.scpFiles(
sessionMemTempResult,
EnvUtil.getTmpDirFromEnv(self.opts.user) \
+ "/", [host])
+ "/", [hostname])
FileUtil.removeFile(sessionMemTempResult)
else:
@ -1151,7 +1149,7 @@ class CheckperfImplOLAP(CheckperfImpl):
FileUtil.removeFile(sessionMemTempResult)
raise Exception(str(e))
def handleSessionIOStat(self, hostname, host):
def handleSessionIOStat(self, hostname):
"""
function: handle session IO stat of all hosts
input : hostname
@ -1198,7 +1196,7 @@ class CheckperfImplOLAP(CheckperfImpl):
self.sshTool.scpFiles(
sessionIOTempResult,
EnvUtil.getTmpDirFromEnv(self.opts.user) \
+ "/", [host])
+ "/", [hostname])
# close and remove temporary file
FileUtil.removeFile(sessionIOTempResult)
@ -1306,7 +1304,7 @@ class CheckperfImplOLAP(CheckperfImpl):
"The total database size is [%s]." \
% str(self.opts.databaseSize))
def insertNodeStat(self, hostName, host, port, currTime, lastTime, snapshotId):
def insertNodeStat(self, hostName, port, currTime, lastTime, snapshotId):
"""
function: insert the node stat of all hosts into the cluster
input : hostname, port, currTime, lastTime, snapshotId
@ -1405,27 +1403,27 @@ class CheckperfImplOLAP(CheckperfImpl):
hostName, pid, currentTime)
tmpDir = EnvUtil.getTmpDirFromEnv()
filepath = os.path.join(tmpDir, outputfile)
ClusterCommand.executeSQLOnRemoteHost(dnInst.listenIps[0],
ClusterCommand.executeSQLOnRemoteHost(dnInst.hostname,
dnInst.port,
tempSql,
filepath)
(status, result,
error_output1) = SqlExecutor.getSQLResult(
dnInst.listenIps[0], outputfile)
ClusterCommand.executeSQLOnRemoteHost(dnInst.listenIps[0],
dnInst.hostname, outputfile)
ClusterCommand.executeSQLOnRemoteHost(dnInst.hostname,
dnInst.port,
insertSql,
filepath)
(status, result,
error_output2) = SqlExecutor.getSQLResult(
dnInst.listenIps[0], outputfile)
ClusterCommand.executeSQLOnRemoteHost(dnInst.listenIps[0],
dnInst.hostname, outputfile)
ClusterCommand.executeSQLOnRemoteHost(dnInst.hostname,
dnInst.port,
updateSql,
filepath)
(status, result,
error_output3) = SqlExecutor.getSQLResult(
dnInst.listenIps[0], outputfile)
dnInst.hostname, outputfile)
if error_output1 != "":
self.logger.debug(
"Failed to execute SQL: %s" % startSql
@ -1455,7 +1453,7 @@ class CheckperfImplOLAP(CheckperfImpl):
+ updateSql + commitSql
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.opts.user,
host, port, False, DefaultValue.DEFAULT_DB_NAME)
hostName, port, False, DefaultValue.DEFAULT_DB_NAME)
if status != 0:
self.logger.debug(
"Failed to execute SQL: %s" % sql
@ -1503,7 +1501,7 @@ class CheckperfImplOLAP(CheckperfImpl):
if (self.opts.mpprcFile != ""):
cmd = "source %s; %s" % (self.opts.mpprcFile, cmd)
if (host != NetUtil.getLocalIp()):
if (host != NetUtil.GetHostIpOrName()):
cmd = "pssh -s -H %s \'%s\'" % (str(host), cmd)
if (os.getuid() == 0):
@ -1530,7 +1528,7 @@ class CheckperfImplOLAP(CheckperfImpl):
"""
try:
querySql = "DROP SCHEMA IF EXISTS pmk CASCADE;"
local_host = NetUtil.getLocalIp()
local_host = NetUtil.GetHostIpOrName()
if (self.DWS_mode):
if (host == local_host):
(status, result,
@ -1574,7 +1572,7 @@ class CheckperfImplOLAP(CheckperfImpl):
querySql = "SELECT * FROM pmk.pmk_meta_data " \
"WHERE last_snapshot_collect_time >= " \
"date_trunc('second', current_timestamp);"
local_host = NetUtil.getLocalIp()
local_host = NetUtil.GetHostIpOrName()
if (self.DWS_mode):
if (host == local_host):
(status, result, error_output) = \
@ -1743,7 +1741,6 @@ class CheckperfImplOLAP(CheckperfImpl):
self.checkClusterStatus()
nodeNames = self.clusterInfo.getClusterNodeNames()
node_ips = self.clusterInfo.getClusterSshIps()[0]
tmpDir = EnvUtil.getTmpDirFromEnv(self.opts.user)
pid = os.getpid()
for nodeName in nodeNames:
@ -1773,10 +1770,9 @@ class CheckperfImplOLAP(CheckperfImpl):
normalDNs = self.getNormalDatanodes()
hostname = normalDNs[0].hostname
host = normalDNs[0].listenIps[0]
data_dir = normalDNs[0].datadir
port = normalDNs[0].port
dn_ip = NetUtil.getLocalIp()
dn_ip = NetUtil.GetHostIpOrName()
if self.check_enable_dcf(dn_ip, data_dir):
paxos_logger_role_ip = self.get_paxos_role(dn_ip, data_dir)
self.logger.debug("the paxos logger role ip is %s" % paxos_logger_role_ip)
@ -1784,25 +1780,24 @@ class CheckperfImplOLAP(CheckperfImpl):
for logger_ip in paxos_logger_role_ip:
logger_hostname = self.clusterInfo.getNodeNameByBackIp(logger_ip)
nodeNames.remove(logger_hostname)
node_ips.remove(logger_ip)
# install pmk schema
self.installPMKSchema(dn_ip, port)
self.installPMKSchema(hostname, port)
# check pmk_meta_data
self.checkPMKMetaData(dn_ip, port)
self.checkPMKMetaData(hostname, port)
# get pmk meta data
(pmk_curr_collect_start_time,
pmk_last_collect_start_time, last_snapshot_id) = \
self.getMetaData(hostname, host, port)
self.deleteExpiredSnapShots(hostname, host, port)
self.getMetaData(hostname, port)
self.deleteExpiredSnapShots(hostname, port)
# collect pmk stat
self.collectPMKData(pmk_curr_collect_start_time,
pmk_last_collect_start_time,
last_snapshot_id, port, node_ips)
last_snapshot_id, port, nodeNames)
# launch asynchronous collection
self.launchAsynCollection(host, port)
self.launchAsynCollection(hostname, port)
# get database size from previous collection
self.getPreviousDbSize()
@ -1815,20 +1810,20 @@ class CheckperfImplOLAP(CheckperfImpl):
# get memory stat of all sessions
self.getAllSessionMemoryStat()
# handle session cpu stat of all hosts
self.handleSessionCpuStat(str(hostname), host)
self.handleSessionCpuStat(str(hostname))
# Handle session IO stat of all hosts
self.handleSessionIOStat(str(hostname), host)
self.handleSessionIOStat(str(hostname))
# handle session memory stat of all hosts
self.handleSessionMemoryStat(str(hostname), host)
self.handleSessionMemoryStat(str(hostname))
# get node stat of all hosts
self.getAllHostsNodeStat()
# get prev node stat of all hosts
self.getAllHostsPrevNodeStat(hostname, host, port, last_snapshot_id)
self.getAllHostsPrevNodeStat(hostname, port, last_snapshot_id)
# handle the node stat of all hosts
self.handleNodeStat()
# insert the node stat of all hosts into the cluster
self.insertNodeStat(hostname, host, port,
self.insertNodeStat(hostname, port,
pmk_curr_collect_start_time,
pmk_last_collect_start_time, last_snapshot_id)
@ -1858,7 +1853,7 @@ class CheckperfImplOLAP(CheckperfImpl):
cmd += " --database-size=%s" % str(self.opts.databaseSize)
if (str(hostname) != NetUtil.GetHostIpOrName()):
cmd = "pssh -s -H %s \'%s\'" % (str(host), cmd)
cmd = "pssh -s -H %s \'%s\'" % (str(hostname), cmd)
if (os.getuid() == 0):
cmd = """su - %s -c "%s" """ % (self.opts.user, cmd)

View File

@ -73,17 +73,12 @@ class CollectImplOLAP(CollectImpl):
if not self.context.clusterInfo.getDbNodeByName(nodename):
self.context.logger.logExit(
ErrorCode.GAUSS_516["GAUSS_51619"] % nodename)
else:
node = self.context.clusterInfo.getDbNodeByName(nodename)
node_ip = node.sshIps[0]
self.context.node_ips.append(node_ip)
if (len(self.context.nodeName) == 0):
self.context.nodeName = \
self.context.clusterInfo.getClusterNodeNames()
self.context.node_ips = self.context.clusterInfo.getClusterSshIps()[0]
self.context.initSshTool(self.context.node_ips,
self.context.initSshTool(self.context.nodeName,
DefaultValue.TIMEOUT_PSSH_COLLECTOR)
if (len(self.context.nodeName) == 1 and self.context.nodeName[
0] == NetUtil.GetHostIpOrName()):
@ -154,10 +149,10 @@ class CollectImplOLAP(CollectImpl):
(self.context.nodeName[0], output))
else:
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd, self.context.node_ips)
self.context.sshTool.parseSshOutput(self.context.node_ips)
cmd, self.context.nodeName)
self.context.sshTool.parseSshOutput(self.context.nodeName)
# Gets the execution result
for node in self.context.node_ips:
for node in self.context.nodeName:
if (status[node] != DefaultValue.SUCCESS):
flag = 1
failedNodeList.append(node)
@ -224,11 +219,11 @@ class CollectImplOLAP(CollectImpl):
else:
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd,
self.context.node_ips)
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.node_ips)
self.context.nodeName)
# Gets the execution result
for node in self.context.node_ips:
for node in self.context.nodeName:
if (status[node] != DefaultValue.SUCCESS):
flag = 1
self.context.logger.log(
@ -405,13 +400,13 @@ class CollectImplOLAP(CollectImpl):
if self.context.isSingle or self.context.localMode:
if len(json.loads(output)["failedTask"]) > 0:
isFailed = 1
failedNodeList.append(self.context.node_ips[0])
failedNodeList.append(self.context.nodeName[0])
else:
successNodeList.append(self.context.node_ips[0])
self.generalDetailInfo(self.context.node_ips[0], output)
successNodeList.append(self.context.nodeName[0])
self.generalDetailInfo(self.context.nodeName[0], output)
jobName = json.loads(output)["jobName"]
else:
for node in self.context.node_ips:
for node in self.context.nodeName:
if len(json.loads(str(output[node]))["failedTask"]) > 0:
isFailed = 1
failedNodeList.append(node)
@ -438,14 +433,14 @@ class CollectImplOLAP(CollectImpl):
if self.context.isSingle or self.context.localMode:
if len(json.loads(output)["failedTask"]) == 0:
isFailed = 0
successNodeList.append(self.context.node_ips[0])
successNodeList.append(self.context.nodeName[0])
else:
failedNodeList.append(self.context.node_ips[0])
failedNodeList.append(self.context.nodeName[0])
self.generalDetailInfo(self.context.node_ips[0], output)
self.generalDetailInfo(self.context.nodeName[0], output)
jobName = json.loads(output)["jobName"]
else:
for node in self.context.node_ips:
for node in self.context.nodeName:
if len(json.loads(str(output[node]))["failedTask"]) == 0:
isFailed = 0
successNodeList.append(node)
@ -482,9 +477,9 @@ class CollectImplOLAP(CollectImpl):
else:
self.context.sshTool.getSshStatusOutput(
cmd,
self.context.node_ips)
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.node_ips)
self.context.nodeName)
# Gets the execution result
flag = self.resultCheck(outputMap)
if (flag == 0):
@ -513,9 +508,9 @@ class CollectImplOLAP(CollectImpl):
else:
self.context.sshTool.getSshStatusOutput(
cmd,
self.context.node_ips)
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.node_ips)
self.context.nodeName)
# Gets the execution result
flag = self.resultCheck(outputMap)
if (flag == 0):
@ -621,9 +616,9 @@ class CollectImplOLAP(CollectImpl):
"Collection will be timeout in %ds." % timeout)
self.context.sshTool.getSshStatusOutput(
cmd,
self.context.node_ips)
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.node_ips)
self.context.nodeName)
# Gets the execution result
flag = self.resultCheck(outputMap)
if (flag == 0):
@ -655,9 +650,9 @@ class CollectImplOLAP(CollectImpl):
else:
self.context.sshTool.getSshStatusOutput(
cmd,
self.context.node_ips)
self.context.nodeName)
output_map = self.context.sshTool.parseSshOutput(
self.context.node_ips)
self.context.nodeName)
# Gets the execution result
flag = self.resultCheck(output_map)
if (flag == 0):
@ -688,9 +683,9 @@ class CollectImplOLAP(CollectImpl):
else:
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd,
self.context.node_ips)
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.node_ips)
self.context.nodeName)
# Gets the execution result
flag = self.planResultCheck(outputMap)
if (flag == 0):
@ -706,9 +701,9 @@ class CollectImplOLAP(CollectImpl):
else:
self.context.sshTool.getSshStatusOutput(
cmd,
self.context.node_ips)
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.node_ips)
self.context.nodeName)
# Gets the execution result
flag = self.resultCheck(outputMap)
if (flag == 0):
@ -773,12 +768,12 @@ class CollectImplOLAP(CollectImpl):
"Copy logs will be timeout in %ds." % timeout)
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd,
self.context.node_ips,
self.context.nodeName,
parallel_num=parallelNum)
self.context.sshTool.parseSshOutput(
self.context.node_ips)
self.context.nodeName)
# Gets the execution result
for node in self.context.node_ips:
for node in self.context.nodeName:
if (status[node] == DefaultValue.SUCCESS):
flag = 1

View File

@ -67,7 +67,6 @@ class DropnodeImpl():
self.group = self.context.group
self.backupFilePrimary = ''
self.localhostname = NetUtil.GetHostIpOrName()
self.local_ip = NetUtil.getLocalIp()
self.logger = self.context.logger
self.resultDictOfPrimary = []
self.replSlot = ''
@ -113,25 +112,23 @@ class DropnodeImpl():
check all standby state whether switchover is happening
"""
for hostNameLoop in self.context.hostMapForExist.keys():
host_ip = self.context.backIpNameMap[hostNameLoop]
sshtool_host = SshTool([host_ip])
sshtool_host = SshTool([hostNameLoop])
for i in self.context.hostMapForExist[hostNameLoop]['datadir']:
# check whether switchover/failover is happening
self.commonOper.checkStandbyState(host_ip, i,
self.commonOper.checkStandbyState(hostNameLoop, i,
sshtool_host,
self.userProfile)
self.cleanSshToolFile(sshtool_host)
for hostNameLoop in self.context.hostMapForDel.keys():
host_ip = self.context.backIpNameMap[hostNameLoop]
if hostNameLoop not in self.context.failureHosts:
sshtool_host = SshTool([host_ip])
sshtool_host = SshTool([hostNameLoop])
for i in self.context.hostMapForDel[hostNameLoop]['datadir']:
# check whether switchover/failover is happening
self.commonOper.checkStandbyState(host_ip, i,
self.commonOper.checkStandbyState(hostNameLoop, i,
sshtool_host,
self.userProfile, True)
self.commonOper.stopInstance(host_ip, sshtool_host, i,
self.commonOper.stopInstance(hostNameLoop, sshtool_host, i,
self.userProfile)
self.cleanSshToolFile(sshtool_host)
else:
@ -142,12 +139,11 @@ class DropnodeImpl():
drop the target node on the other host
"""
for hostNameLoop in self.context.hostMapForExist.keys():
host_ip = self.context.backIpNameMap[hostNameLoop]
sshtool_host = SshTool([host_ip])
sshtool_host = SshTool([hostNameLoop])
# backup
backupfile = self.commonOper.backupConf(
self.gphomepath, self.user, hostNameLoop,
host_ip, self.userProfile, sshtool_host, self.pghostPath)
self.gphomepath, self.user,
hostNameLoop, self.userProfile, sshtool_host, self.pghostPath)
self.logger.log(
"[gs_dropnode]The backup file of " + hostNameLoop + " is " + backupfile)
if hostNameLoop == self.localhostname:
@ -155,13 +151,13 @@ class DropnodeImpl():
indexForuse = 0
for i in self.context.hostMapForExist[hostNameLoop]['datadir']:
# parse
resultDict = self.commonOper.parseConfigFile(host_ip, i,
resultDict = self.commonOper.parseConfigFile(hostNameLoop, i,
self.dnIdForDel,
self.context.hostIpListForDel,
sshtool_host,
self.envFile)
resultDictForRollback = self.commonOper.parseBackupFile(
hostNameLoop, host_ip, backupfile,
hostNameLoop, backupfile,
self.context.hostMapForExist[hostNameLoop][
'dn_id'][indexForuse],
resultDict['replStr'], sshtool_host,
@ -171,7 +167,7 @@ class DropnodeImpl():
# try set
try:
self.commonOper.SetPgsqlConf(resultDict['replStr'],
host_ip, i,
hostNameLoop, i,
resultDict['syncStandbyStr'],
sshtool_host,
self.userProfile,
@ -180,7 +176,7 @@ class DropnodeImpl():
except ValueError:
self.logger.log("[gs_dropnode]Rollback pgsql process.")
self.commonOper.SetPgsqlConf(resultDict['replStr'],
host_ip, i,
hostNameLoop, i,
resultDict['syncStandbyStr'],
sshtool_host,
self.userProfile,
@ -194,26 +190,24 @@ class DropnodeImpl():
operation only need to be executed on primary node
"""
for hostNameLoop in self.context.hostMapForExist.keys():
data_dir = self.context.hostMapForExist[hostNameLoop]['datadir']
host_ip = self.context.backIpNameMap[hostNameLoop]
try:
self.commonOper.SetPghbaConf(self.userProfile, host_ip,
self.commonOper.SetPghbaConf(self.userProfile, hostNameLoop,
self.resultDictOfPrimary[0][
'pghbaStr'], data_dir, False)
'pghbaStr'], False)
except ValueError:
self.logger.log("[gs_dropnode]Rollback pghba conf.")
self.commonOper.SetPghbaConf(self.userProfile, host_ip,
self.commonOper.SetPghbaConf(self.userProfile, hostNameLoop,
self.resultDictOfPrimary[0][
'pghbaStr'], data_dir, True)
'pghbaStr'], True)
indexLoop = 0
for i in self.context.hostMapForExist[self.localhostname]['datadir']:
try:
self.commonOper.SetReplSlot(self.local_ip, self.gsql_path,
self.commonOper.SetReplSlot(self.localhostname, self.gsql_path,
self.context.hostMapForExist[self.localhostname]['port'][indexLoop],
self.dnIdForDel)
except ValueError:
self.logger.log("[gs_dropnode]Rollback replslot")
self.commonOper.SetReplSlot(self.local_ip, self.gsql_path,
self.commonOper.SetReplSlot(self.localhostname, self.gsql_path,
self.context.hostMapForExist[self.localhostname]['port'][indexLoop],
self.dnIdForDel, True)
indexLoop += 1
@ -260,13 +254,12 @@ class DropnodeImpl():
cmd = "%s/script/gs_om -t refreshconf" % self.gphomepath
subprocess.getstatusoutput(cmd)
for hostName in self.context.hostMapForExist.keys():
host_ip = self.context.backIpNameMap[hostName]
hostSsh = SshTool([host_ip])
hostSsh = SshTool([hostName])
if hostName != self.localhostname:
staticConfigPath_name = "%s/cluster_static_config_%s" % (
tmpDir, hostName)
hostSsh.scpFiles(staticConfigPath_name, staticConfigPath,
[host_ip], self.envFile)
[hostName], self.envFile)
try:
os.unlink(staticConfigPath_name)
except FileNotFoundError:
@ -353,9 +346,9 @@ class DropnodeImpl():
"restart the node.\nDo you want to restart the primary " \
"node now (yes/no)? "
self.context.checkInput(msgPrint)
sshTool = SshTool([self.local_ip])
sshTool = SshTool([self.localhostname])
for i in self.context.hostMapForExist[self.localhostname]['datadir']:
self.commonOper.stopInstance(self.local_ip, sshTool, i,
self.commonOper.stopInstance(self.localhostname, sshTool, i,
self.userProfile)
self.commonOper.startInstance(i, self.userProfile)
self.cleanSshToolFile(sshTool)
@ -406,7 +399,7 @@ class OperCommon:
if dbState in ['Promoting', 'Wait', 'Demoting']:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35808"] % host)
def backupConf(self, appPath, user, host, host_ip, envfile, sshTool, pghostPath):
def backupConf(self, appPath, user, host, envfile, sshTool, pghostPath):
"""
backup the configuration file (postgresql.conf and pg_hba.conf)
The Backup.py can do this
@ -419,12 +412,12 @@ class OperCommon:
cmd = "(find %s -type d | grep gs_dropnode_backup | xargs rm -rf;" \
"if [ ! -d '%s' ]; then mkdir -p '%s' -m %s;fi)" \
% (pghostPath, tmpPath, tmpPath, DefaultValue.KEY_DIRECTORY_MODE)
sshTool.executeCommand(cmd, DefaultValue.SUCCESS, [host_ip], envfile)
sshTool.executeCommand(cmd, DefaultValue.SUCCESS, [host], envfile)
logfile = os.path.join(tmpPath, 'gs_dropnode_call_Backup_py.log')
cmd = "python3 %s -U %s -P %s -p --nodeName=%s -l %s" \
% (backupPyPath, user, tmpPath, host, logfile)
(statusMap, output) = sshTool.getSshStatusOutput(cmd, [host_ip], envfile)
if statusMap[host_ip] != 'Success':
(statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile)
if statusMap[host] != 'Success':
self.logger.debug(
"[gs_dropnode]Backup parameter config file failed." + output)
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"])
@ -594,7 +587,7 @@ class OperCommon:
output_result = output_result.replace(output_no, output_new_no, 1)
return output_result
def parseBackupFile(self, host, host_ip, backupfile, dnId, replstr, sshTool,
def parseBackupFile(self, host, backupfile, dnId, replstr, sshTool,
envfile):
"""
parse the backup file eg.parameter_host.tar to get the value for rollback
@ -608,8 +601,8 @@ class OperCommon:
% (
backupfile, backupdir, backupdir, 'parameter_' + host, dnId[3:],
backupdir, 'parameter_' + host, dnId[3:])
(statusMap, output) = sshTool.getSshStatusOutput(cmd, [host_ip], envfile)
if statusMap[host_ip] != 'Success':
(statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile)
if statusMap[host] != 'Success':
self.logger.log(
"[gs_dropnode]Parse backup parameter config file failed:" + output)
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"])
@ -659,7 +652,7 @@ class OperCommon:
self.logger.log(
"[gs_dropnode]End of set openGauss config file on %s." % host)
def SetPghbaConf(self, envProfile, host, pgHbaValue, data_dir,
def SetPghbaConf(self, envProfile, host, pgHbaValue,
flagRollback=False):
"""
Set the value of pg_hba.conf
@ -667,7 +660,7 @@ class OperCommon:
self.logger.log(
"[gs_dropnode]Start of set pg_hba config file on %s." % host)
cmd = 'source %s;' % envProfile
ssh_tool = SshTool([host])
if len(pgHbaValue):
ip_entries = pgHbaValue[:-1].split('|')
for entry in ip_entries:
@ -675,16 +668,26 @@ class OperCommon:
if not flagRollback:
if NetUtil.get_ip_version(entry) == NetUtil.NET_IPV4:
v = entry[0:entry.find('/32') + 3]
cmd += "gs_guc set -D %s -h '%s';" % (data_dir[0], v)
cmd += "gs_guc set -N %s -I all -h '%s';" % (host, v)
elif NetUtil.get_ip_version(entry) == NetUtil.NET_IPV6:
v = entry[0:entry.find('/128') + 4]
cmd += "gs_guc set -D %s -h '%s';" % (data_dir[0], v)
cmd += "gs_guc set -N %s -I all -h '%s';" % (host, v)
elif NetUtil.get_ip_version(entry) == "":
raise ValueError(f"Invalid IP address format: {entry}")
else:
cmd += "gs_guc set -D %s -h '%s';" % (data_dir[0], entry)
(status, output) = ssh_tool.getSshStatusOutput(cmd, [host])
if not status[host]:
cmd += "gs_guc set -N %s -I all -h '%s';" % (host, entry)
(status, output) = subprocess.getstatusoutput(cmd)
result_v = re.findall(r'Failed instances: (\d)\.', output)
if status:
self.logger.debug(
"[gs_dropnode]Set pg_hba config file failed:" + output)
raise ValueError(output)
if len(result_v):
if result_v[0] != '0':
self.logger.debug(
"[gs_dropnode]Set pg_hba config file failed:" + output)
raise ValueError(output)
else:
self.logger.debug(
"[gs_dropnode]Set pg_hba config file failed:" + output)
raise ValueError(output)

View File

@ -67,7 +67,7 @@ class DropNodeWithCmImpl(DropnodeImpl):
self.drop_nodes = [node for node in self.context.clusterInfo.dbNodes
for drop_ip in self.context.hostIpListForDel
if drop_ip in node.backIps]
self.ssh_tool = SshTool([node.backIps[0] for node in self.context.clusterInfo.dbNodes])
self.ssh_tool = SshTool([node.name for node in self.context.clusterInfo.dbNodes])
self.cm_component = CM_OLAP()
self.cm_component.binPath = os.path.realpath(os.path.join(
@ -157,10 +157,8 @@ class DropNodeWithCmImpl(DropnodeImpl):
OMCommand.getLocalScript("Local_Config_CM_Res"),
ACTION_DROP_NODE, self.user, del_hosts, self.context.localLog)
self.logger.debug("Command for updating cm resource file: %s" % cmd)
for name in self.context.hostMapForExist.keys():
host_ip = self.backIpNameMap[name]
CmdExecutor.execCommandWithMode(cmd, self.ssh_tool,
host_list=host_ip)
host_list=self.context.hostMapForExist.keys())
self.logger.log("Successfully updated cm resource file.")
def update_dss_inst(self):
@ -240,7 +238,7 @@ class DropNodeWithCmImpl(DropnodeImpl):
"bin", "drop_node_flag"))
cmd = g_file.SHELL_CMD_DICT["createFile"] % (flag_file,
DefaultValue.FILE_MODE, flag_file)
CmdExecutor.execCommandWithMode(cmd, self.ssh_tool, host_list=[drop_node.backIps[0]])
CmdExecutor.execCommandWithMode(cmd, self.ssh_tool, host_list=[drop_node.name])
self.logger.log("Generate drop flag file on "
"drop node {0} successfully.".format(drop_node.name))
@ -258,7 +256,7 @@ class DropNodeWithCmImpl(DropnodeImpl):
self.logger.debug("stopCMProcessesCmd: " + stopCMProcessesCmd)
gaussHome = EnvUtil.getEnv("GAUSSHOME")
gaussLog = EnvUtil.getEnv("GAUSSLOG")
hostList = [node.backIps[0] for node in self.context.clusterInfo.dbNodes]
hostList = [node.name for node in self.context.clusterInfo.dbNodes]
CmdExecutor.execCommandWithMode(stopCMProcessesCmd, self.ssh_tool, host_list=hostList)
# for flush dcc configuration
DefaultValue.remove_metadata_and_dynamic_config_file(self.user, self.ssh_tool, self.logger)
@ -266,7 +264,7 @@ class DropNodeWithCmImpl(DropnodeImpl):
dataPath = self.context.hostMapForExist[self.localhostname]['datadir'][0]
gsctlReloadCmd = "source %s; gs_ctl reload -N all -D %s" % (self.envFile, dataPath)
self.logger.debug("gsctlReloadCmd: " + gsctlReloadCmd)
CmdExecutor.execCommandWithMode(gsctlReloadCmd, self.ssh_tool, host_list=[self.local_ip])
CmdExecutor.execCommandWithMode(gsctlReloadCmd, self.ssh_tool, host_list=[self.localhostname])
# start CM processes
startCMProcessedCmd = "source %s; nohup %s/bin/om_monitor -L %s/cm/om_monitor >> /dev/null 2>&1 &" % \
(self.envFile, gaussHome, gaussLog)

View File

@ -144,10 +144,9 @@ class ExpansionImpl():
rollback primary's wal_keep_segments, when current user is root
"""
self.logger.debug("Start to rollback primary's wal_keep_segments")
primaryHostName = self.getPrimaryHostName()
primary_host_ip = self.context.clusterInfoDict[primaryHostName]["backIp"]
primaryDataNode = self.context.clusterInfoDict[primaryHostName]["dataNode"]
status = self.commonGsCtl.setGucPara(primary_host_ip, self.envFile, primaryDataNode,
primary = self.getPrimaryHostName()
primaryDataNode = self.context.clusterInfoDict[primary]["dataNode"]
status = self.commonGsCtl.setGucPara(primary, self.envFile, primaryDataNode,
"wal_keep_segments", self.walKeepSegments, self.user)
if status != DefaultValue.SUCCESS:
self.logger.log("Failed to rollback wal_keep_segments, please manually "
@ -501,7 +500,8 @@ class ExpansionImpl():
if EnvUtil.getEnv("MPPDB_ENV_SEPARATE_PATH"):
preinstallCmd += " --sep-env-file={envFile}".format(envFile = self.envFile)
if not os.listdir(os.path.join(EnvUtil.getEnv("GPHOME"),"lib")):
preinstallCmd += " --unused-third-party 2>&1"
preinstallCmd += " --unused-third-party"
preinstallCmd += " --skip-hostname-set 2>&1"
failedPreinstallHosts = []
for host in self.context.newHostList:
@ -548,7 +548,6 @@ class ExpansionImpl():
"""
self.logger.debug("Get the existing hosts.")
primaryHost = self.getPrimaryHostName()
primary_host_ip = self.context.clusterInfoDict[primaryHost]["sshIp"]
command = ""
if EnvUtil.getEnv("MPPDB_ENV_SEPARATE_PATH"):
command = "source %s;gs_om -t status --detail" % self.envFile
@ -558,13 +557,13 @@ class ExpansionImpl():
if isRootUser and self.context.current_user_root:
command = "su - %s -c '%s'" % (self.user, command)
self.logger.debug(command)
sshTool = SshTool([primary_host_ip])
sshTool = SshTool([primaryHost])
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
[primary_host_ip], self.envFile)
[primaryHost], self.envFile)
self.cleanSshToolFile(sshTool)
self.logger.debug("Expansion cluster status result:{0}".format(resultMap))
self.logger.debug("Expansion cluster status output:{0}".format(outputCollect))
if resultMap[primary_host_ip] != DefaultValue.SUCCESS:
if resultMap[primaryHost] != DefaultValue.SUCCESS:
GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51600"])
instances = re.split('(?:\|)|(?:\n)', outputCollect)
self.existingHosts = []
@ -585,8 +584,7 @@ class ExpansionImpl():
nodeNames = self.context.nodeNameList
# get gs_om node info
primaryHost = self.getPrimaryHostName()
pri_host_ip = self.context.clusterInfoDict[primaryHost]["sshIp"]
result = self.commonGsCtl.queryOmCluster(pri_host_ip, self.envFile)
result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
instances = re.split('(?:\|)|(?:\n)', result)
pattern_ip = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b|\b(?:[0-9a-fA-F]{0,4}(?::[0-9a-fA-F]{0,4}){7})\b')
host_ip = []
@ -614,8 +612,6 @@ class ExpansionImpl():
if self.expansionSuccess[host]:
hostIpList.append(host)
self.logger.debug("hostIpList=%s" % hostIpList)
self.logger.debug("self.expansionSuccess=%s" % self.expansionSuccess)
nodeDict = self.context.clusterInfoDict
backIpNameMap = self.context.backIpNameMap
hostAzNameMap = self.context.hostAzNameMap
@ -717,13 +713,12 @@ gs_guc set -D {dn} -c "available_zone='{azName}'"
generate GRPC cert for single node
"""
primaryHost = self.getPrimaryHostName()
primary_host_ip = self.context.clusterInfoDict[primaryHost]["sshIp"]
dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
needGRPCHosts = []
for host in self.expansionSuccess:
if self.expansionSuccess[host]:
needGRPCHosts.append(host)
insType, _ = self.commonGsCtl.queryInstanceStatus(primary_host_ip,
insType, _ = self.commonGsCtl.queryInstanceStatus(primaryHost,
dataNode,self.envFile)
if insType != MODE_PRIMARY:
primaryHostIp = self.context.clusterInfoDict[primaryHost]["backIp"]
@ -770,11 +765,10 @@ gs_guc set -D {dn} -c "available_zone='{azName}'"
primaryHost = self.getPrimaryHostName()
dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
command = "source %s; gs_ctl reload -D %s " % (self.envFile, dataNode)
primary_host_ip = self.context.clusterInfoDict[primaryHost]["sshIp"]
sshTool = SshTool([primary_host_ip])
sshTool = SshTool([primaryHost])
self.logger.debug(command)
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
[primary_host_ip], self.envFile)
[primaryHost], self.envFile)
self.logger.debug(outputCollect)
self.cleanSshToolFile(sshTool)
@ -852,20 +846,21 @@ gs_guc set -D {dn} -c "available_zone='{azName}'"
# reset current standby's application name before started
self.resetStandbyAppName(hostName=hostName, sshIp=host)
# start new host as standby mode
self.commonGsCtl.stopInstance(host, dataNode, self.envFile)
self.commonGsCtl.stopInstance(hostName, dataNode, self.envFile)
result, output = self.commonGsCtl.startInstanceWithMode(host,
dataNode, MODE_STANDBY, self.envFile)
if result[host] != DefaultValue.SUCCESS:
if "Uncompleted build is detected" not in output:
self.expansionSuccess[host] = False
self.logger.log("Failed to start %s as standby "
"before building. Error: %s" % (host, output))
"before building." % host)
continue
else:
self.logger.debug("Uncompleted build is detected on %s. Error: %s" % (host, output))
self.logger.debug("Uncompleted build is detected on %s." %
host)
else:
insType, dbState = self.commonGsCtl.queryInstanceStatus(
host, dataNode, self.envFile)
hostName, dataNode, self.envFile)
if insType != ROLE_STANDBY:
self.logger.log("Build %s failed." % host)
self.expansionSuccess[host] = False
@ -921,12 +916,12 @@ gs_guc set -D {dn} -c "available_zone='{azName}'"
print("\rThe program is running {}".format(waitChars[index]), end="")
time.sleep(timeFlush)
insType, dbState = self.commonGsCtl.queryInstanceStatus(
host, dataNode, self.envFile)
hostName, dataNode, self.envFile)
if dbState not in [STATE_STARTING, STATE_CATCHUP]:
self.logger.debug("%s starting and catchup complete." % host)
break
insType, dbState = self.commonGsCtl.queryInstanceStatus(
host, dataNode, self.envFile)
hostName, dataNode, self.envFile)
if insType == hostRole and dbState == STATE_NORMAL:
if self.context.newHostCasRoleMap[host] == "off":
existingStandbys.append(host)
@ -1035,8 +1030,7 @@ gs_guc set -D {dn} -c "available_zone='{azName}'"
self.logger.log("Start to generate and send cluster static file.")
primaryHost = self.getPrimaryHostName()
primary_host_ip = self.context.clusterInfoDict[primaryHost]["sshIp"]
result = self.commonGsCtl.queryOmCluster(primary_host_ip, self.envFile)
result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile)
for nodeName in self.context.nodeNameList:
nodeInfo = self.context.clusterInfoDict[nodeName]
nodeIp = nodeInfo["backIp"]
@ -1064,7 +1058,6 @@ gs_guc set -D {dn} -c "available_zone='{azName}'"
for dbNode in self.context.clusterInfo.dbNodes:
hostName = dbNode.name
host_ip = dbNode.sshIps[0]
staticConfigPath = "%s/script/static_config_files/cluster_static_config_%s" % \
(toolPath, hostName)
self.context.clusterInfo.saveToStaticConfig(staticConfigPath, dbNode.id)
@ -1083,9 +1076,9 @@ gs_guc set -D {dn} -c "available_zone='{azName}'"
(dynamic_file, dynamic_file)
if hostName != socket.gethostname():
hostSsh = SshTool([host_ip], timeout=300)
hostSsh.scpFiles(srcFile, targetFile, [host_ip], self.envFile)
hostSsh.getSshStatusOutput(dynamic_opt_cmd, [host_ip], self.envFile)
hostSsh = SshTool([hostName], timeout=300)
hostSsh.scpFiles(srcFile, targetFile, [hostName], self.envFile)
hostSsh.getSshStatusOutput(dynamic_opt_cmd, [hostName], self.envFile)
self.cleanSshToolFile(hostSsh)
else:
scpcmd = "cp %s %s" % (srcFile, targetFile)
@ -1216,13 +1209,12 @@ remoteservice={remoteservice}'"\
gaussdbVersionPattern = re.compile("gaussdb \((.*)\) .*")
gsomVersionPattern = re.compile("gs_om \(.*\) .*")
primaryHostName = self.getPrimaryHostName()
primary_host_ip = self.context.clusterInfoDict[primaryHostName]["sshIp"]
sshPrimary = SshTool([primary_host_ip])
sshPrimary = SshTool([primaryHostName])
resultMap, outputCollect = sshPrimary.getSshStatusOutput(
getGaussdbVersionCmd, [], envFile)
self.logger.debug(resultMap)
self.logger.debug(outputCollect)
if resultMap[primary_host_ip] != DefaultValue.SUCCESS:
if resultMap[primaryHostName] != DefaultValue.SUCCESS:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35707"] %
("gaussdb", "primary"))
primaryGaussdbVersion = gaussdbVersionPattern.findall(outputCollect)[0]
@ -1230,7 +1222,7 @@ remoteservice={remoteservice}'"\
getGsomVersionCmd, [], envFile)
self.logger.debug(resultMap)
self.logger.debug(outputCollect)
if resultMap[primary_host_ip] != DefaultValue.SUCCESS:
if resultMap[primaryHostName] != DefaultValue.SUCCESS:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35707"] %
("gs_om", "primary"))
primaryGsomVersion = gsomVersionPattern.findall(outputCollect)[0]
@ -1240,9 +1232,10 @@ remoteservice={remoteservice}'"\
failCheckGsomVersionHosts = []
wrongGaussdbVersionHosts = []
wrongGsomVersionHosts = []
for host in standbyHosts:
if not self.expansionSuccess[host]:
for backIp in standbyHosts:
if not self.expansionSuccess[backIp]:
continue
host = self.context.backIpNameMap[backIp]
sshTool = SshTool([host])
# get gaussdb version
resultMap, outputCollect = sshTool.getSshStatusOutput(
@ -1362,7 +1355,7 @@ remoteservice={remoteservice}'"\
fo.close()
# send guc command bashfile to each host and execute it.
if NetUtil.getLocalIp() != host_name:
if socket.gethostname() != host_name:
ssh_tool.scpFiles("%s" % temp_sh_file, "%s" % temp_sh_file, [host_name],
self.envFile)
result_map, output_collect = \
@ -1467,15 +1460,14 @@ remoteservice={remoteservice}'"\
"""
self.logger.debug("Checking the consistence of datanodes.")
primaryName = self.getPrimaryHostName()
primary_host_ip = self.context.clusterInfoDict[primaryName]["sshIp"]
cmd = "source %s;gs_om -t status --detail" % (self.envFile)
cmd = CmdUtil.get_user_exec_cmd(self.context.current_user_root, self.user, cmd)
sshTool = SshTool([primary_host_ip])
sshTool = SshTool([primaryName])
resultMap, outputCollect = sshTool.getSshStatusOutput(cmd,
[primary_host_ip], self.envFile)
[primaryName], self.envFile)
self.logger.debug(resultMap)
self.logger.debug(outputCollect)
if resultMap[primary_host_ip] != DefaultValue.SUCCESS:
if resultMap[primaryName] != DefaultValue.SUCCESS:
GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51600"])
self.cleanSshToolFile(sshTool)
pos = outputCollect.rfind("-----")
@ -1539,17 +1531,16 @@ remoteservice={remoteservice}'"\
"""
self.logger.debug("Start to check cluster status.")
# curHostName = socket.gethostname()
curHostName = socket.gethostname()
command = "source %s; gs_om -t status --detail" % (self.envFile)
command = CmdUtil.get_user_exec_cmd(self.context.current_user_root, self.user, command)
(status, output) = subprocess.getstatusoutput(command)
# sshTool = SshTool([curHostName])
# resultMap, outputCollect = sshTool.getSshStatusOutput(command,
# [curHostName], self.envFile)
# self.logger.debug(resultMap)
# self.logger.debug(outputCollect)
# self.cleanSshToolFile(sshTool)
if output.find("Primary Normal") == -1:
sshTool = SshTool([curHostName])
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
[curHostName], self.envFile)
self.logger.debug(resultMap)
self.logger.debug(outputCollect)
self.cleanSshToolFile(sshTool)
if outputCollect.find("Primary Normal") == -1:
GaussLog.exitWithError((ErrorCode.GAUSS_357["GAUSS_35709"] %
("status", "primary", "Normal")) + "\nExpansion failed.")
@ -1809,8 +1800,9 @@ class GsCtlCommon:
"""
"""
command = "source %s ; gs_ctl query -D %s" % (env, datanode)
sshTool = SshTool([host])
resultMap, outputCollect = sshTool.getSshStatusOutput(command, [host], env)
sshTool = SshTool([datanode])
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
[host], env)
self.logger.debug(outputCollect)
localRole = re.findall(r"local_role.*: (.*?)\n", outputCollect)
db_state = re.findall(r"db_state.*: (.*?)\n", outputCollect)

View File

@ -79,7 +79,6 @@ class ExpansionImplWithCm(ExpansionImpl):
self.new_nodes = list()
self.app_names = list()
self._init_global()
self.logger = expansion.logger
def _init_global(self):
"""
@ -89,7 +88,7 @@ class ExpansionImplWithCm(ExpansionImpl):
self.static_cluster_info.initFromStaticConfig(self.context.user)
self.xml_cluster_info.initFromXml(self.context.xmlFile)
self.ssh_tool = SshTool(self.xml_cluster_info.getClusterSshIps()[0])
self.ssh_tool = SshTool(self.xml_cluster_info.getClusterNodeNames())
self.new_nodes = [node for node in self.xml_cluster_info.dbNodes
for back_ip in node.backIps if back_ip in self.context.newHostList]
@ -100,13 +99,6 @@ class ExpansionImplWithCm(ExpansionImpl):
Get node names from db_node_info object
"""
return [node.name for node in nodes]
@staticmethod
def get_node_ip(nodes):
"""
Get node names from db_node_info object
"""
return [node.sshIps[0] for node in nodes]
def _change_user_without_root(self):
"""
@ -116,7 +108,7 @@ class ExpansionImplWithCm(ExpansionImpl):
return
self.ssh_tool.clenSshResultFiles()
self.changeUser()
self.ssh_tool = SshTool(self.xml_cluster_info.getClusterSshIps()[0])
self.ssh_tool = SshTool(self.xml_cluster_info.getClusterNodeNames())
self.logger.log("Success to change user to [{0}]".format(self.user))
def send_xml(self):
@ -152,7 +144,7 @@ class ExpansionImplWithCm(ExpansionImpl):
self.logger.log("Preinstall command is: {0}".format(cmd))
failed_preinstall_hosts = []
for host in ExpansionImplWithCm.get_node_ip(self.new_nodes):
for host in ExpansionImplWithCm.get_node_names(self.new_nodes):
sshTool = SshTool([host], timeout=300)
result_map, output = sshTool.getSshStatusOutput(cmd, [])
self.logger.debug(result_map)
@ -187,7 +179,7 @@ class ExpansionImplWithCm(ExpansionImpl):
# exec the cmd for install application on all nodes
result_map, output = \
self.ssh_tool.getSshStatusOutput(cmd,
ExpansionImplWithCm.get_node_ip(self.new_nodes))
ExpansionImplWithCm.get_node_names(self.new_nodes))
self.logger.log("Install on new node output: {0}".format(output))
if "Failure" in result_map.values():
self.logger.debug(ErrorCode.GAUSS_527["GAUSS_52707"] %
@ -209,7 +201,7 @@ class ExpansionImplWithCm(ExpansionImpl):
self.ssh_tool.scpFiles(ca_file_dir,
os.path.dirname(ca_file_dir),
ExpansionImplWithCm.get_node_ip(self.new_nodes))
ExpansionImplWithCm.get_node_names(self.new_nodes))
self.logger.log("success to send all CA file.")
def _get_local_cm_agent_dir(self):
@ -230,7 +222,7 @@ class ExpansionImplWithCm(ExpansionImpl):
DefaultValue.MAX_DIRECTORY_MODE)
cmd += " && chown -R {0}:{1} {2}".format(self.user, self.group,
self._get_local_cm_agent_dir())
self.ssh_tool.getSshStatusOutput(cmd, ExpansionImplWithCm.get_node_ip(self.new_nodes))
self.ssh_tool.getSshStatusOutput(cmd, ExpansionImplWithCm.get_node_names(self.new_nodes))
self.logger.debug("Success to create CM directory on nodes "
"{0}".format(ExpansionImplWithCm.get_node_names(self.new_nodes)))
@ -248,7 +240,7 @@ class ExpansionImplWithCm(ExpansionImpl):
"Command for set node crontab: %s." % cmd)
CmdExecutor.execCommandWithMode(
cmd, self.ssh_tool,
host_list=ExpansionImplWithCm.get_node_ip(self.new_nodes))
host_list=ExpansionImplWithCm.get_node_names(self.new_nodes))
self.logger.debug("Success to set om_monitor crontab on nodes "
"{0}".format(ExpansionImplWithCm.get_node_names(self.new_nodes)))
@ -292,7 +284,7 @@ class ExpansionImplWithCm(ExpansionImpl):
"Command for initializing instances: %s" % cmd)
CmdExecutor.execCommandWithMode(
cmd, self.ssh_tool,
host_list=ExpansionImplWithCm.get_node_ip(self.new_nodes))
host_list=ExpansionImplWithCm.get_node_names(self.new_nodes))
self.logger.log("Success to init instance on nodes "
"{0}".format(ExpansionImplWithCm.get_node_names(self.new_nodes)))
@ -324,14 +316,12 @@ class ExpansionImplWithCm(ExpansionImpl):
Set guc parameter.
"""
node_name, inst_dir, para_name, para_value = para_list
# node_name to node_ip
node_ip = self.context.clusterInfoDict[node_name]['backIp']
guc_path = os.path.join(os.path.realpath(self.static_cluster_info.appPath),
"bin", "gs_guc")
para_str = " -c \"{0}='{1}'\" ".format(para_name, para_value)
cmd = "{0} set -D {1} {2}".format(guc_path, inst_dir, para_str)
self.logger.debug("Set guc parameter command: {0}".format(cmd))
self.guc_executor(self.ssh_tool, cmd, node_ip)
self.guc_executor(self.ssh_tool, cmd, node_name)
self.logger.debug("Successfully set guc param [{0}] "
"on node [{1}]".format(para_name, node_name))
@ -382,7 +372,7 @@ class ExpansionImplWithCm(ExpansionImpl):
"Parameter is: {1}".format(cmd, new_node_line))
CmdExecutor.execCommandWithMode(cmd, self.ssh_tool,
host_list=self.get_node_ip(self.new_nodes))
host_list=self.get_node_names(self.new_nodes))
self.logger.log("Update dcf config on new nodes successfully.")
old_node_cmd = "source {0}; " \
"{1} {2}".format(self.envFile,
@ -391,7 +381,7 @@ class ExpansionImplWithCm(ExpansionImpl):
DefaultValue.BASE_ENCODE))
CmdExecutor.execCommandWithMode(old_node_cmd, self.ssh_tool,
host_list=self.get_node_ip(self.static_cluster_info.dbNodes))
host_list=self.get_node_names(self.static_cluster_info.dbNodes))
self.logger.log("Update dcf config on old nodes successfully.")
self.logger.debug("Set other guc parameters successfully.")
@ -444,7 +434,7 @@ class ExpansionImplWithCm(ExpansionImpl):
(self.xml_cluster_info.float_ips[new_inst.float_ips[0]], submask_length)
self.logger.log("Ready to perform command on node [{0}]. "
"Command is : {1}".format(new_node.name, cmd))
CmdExecutor.execCommandWithMode(cmd, self.ssh_tool, host_list=[new_node.sshIps[0]])
CmdExecutor.execCommandWithMode(cmd, self.ssh_tool, host_list=[new_node.name])
def _config_pg_hba(self):
"""
@ -677,8 +667,8 @@ class ExpansionImplWithCm(ExpansionImpl):
stopCMProcessesCmd = "pkill -9 om_monitor -U {user}; pkill -9 cm_agent -U {user}; " \
"pkill -9 cm_server -U {user};".format(user=self.user)
self.logger.debug("stopCMProcessesCmd: " + stopCMProcessesCmd)
hostList = [node.sshIps[0] for node in clusterInfo.dbNodes]
newNodesList = [node.sshIps[0] for node in self.new_nodes]
hostList = [node.name for node in clusterInfo.dbNodes]
newNodesList = [node.name for node in self.new_nodes]
existingHosts = [host for host in hostList if host not in newNodesList]
gaussHome = EnvUtil.getEnv("GAUSSHOME")
gaussLog = EnvUtil.getEnv("GAUSSLOG")
@ -713,12 +703,11 @@ class ExpansionImplWithCm(ExpansionImpl):
# execute gs_ctl reload
ctlPath = os.path.join(os.path.realpath(self.static_cluster_info.appPath), "bin", "gs_ctl")
nodeDict = self.context.clusterInfoDict
localHost = NetUtil.getHostName()
local_ip = NetUtil.getLocalIp()
localHost = socket.gethostname()
dataPath = nodeDict[localHost]["dataNode"]
ctlReloadCmd = "source %s; %s reload -N all -D %s" % (self.envFile, ctlPath, dataPath)
self.logger.debug("ctlReloadCmd: " + ctlReloadCmd)
CmdExecutor.execCommandWithMode(ctlReloadCmd, self.ssh_tool, host_list=[local_ip])
CmdExecutor.execCommandWithMode(ctlReloadCmd, self.ssh_tool, host_list=[localHost])
def ss_restart_cluster(self):
"""

View File

@ -62,7 +62,7 @@ class ExpansionImplWithCmLocal(ExpansionImplWithCm):
gsql_cmd = "source {0} ; gsql -V".format(self.envFile)
result_map, output_collect = \
self.ssh_tool.getSshStatusOutput(gsql_cmd,
hostList=self.get_node_ip(self.new_nodes))
hostList=self.get_node_names(self.new_nodes))
self.logger.debug("Check remote nodes commit ID , "
"result_map is : {0}".format(result_map))
@ -103,14 +103,14 @@ class ExpansionImplWithCmLocal(ExpansionImplWithCm):
cm_agent_conf = os.path.realpath(os.path.join(new_node.cmagents[0].datadir,
"cm_agent.conf"))
cmd = "ls {0} | wc -l".format(cm_agent_conf)
_, output_collect = self.ssh_tool.getSshStatusOutput(cmd, hostList=[new_node.sshIps[0]])
_, output_collect = self.ssh_tool.getSshStatusOutput(cmd, hostList=[new_node.name])
result_dict = self._parse_ssh_tool_output_collect(output_collect)
if new_node.sshIps[0] not in result_dict:
if new_node.name not in result_dict:
self.logger.error("Check remote node [{0}] cm_agent.conf failed. "
"output: {1}".format(new_node.name, result_dict))
raise Exception("Check remote node [{0}] cm_agent.conf failed. "
"output: {1}".format(new_node.name, result_dict))
if result_dict.get(new_node.sshIps[0]) != '1':
if result_dict.get(new_node.name) != '1':
self.logger.error("Check remote node [{0}] result failed. "
"output: {1}".format(new_node.name, result_dict))
raise Exception("Check remote node [{0}] result failed. "

View File

@ -574,7 +574,7 @@ class InstallImpl:
DefaultValue.enableWhiteList(
self.context.sshTool,
self.context.mpprcFile,
self.context.clusterInfo.getClusterSshIps()[0],
self.context.clusterInfo.getClusterNodeNames(),
self.context.logger)
except Exception as e:
# failed to clear the backup directory

View File

@ -353,7 +353,7 @@ class InstallImplOLAP(InstallImpl):
memCheck = "cat /proc/cpuinfo | grep processor | wc -l"
coresCheck = "env -u LANGUAGE LC_ALL=C free -g --si | grep 'Mem' | awk -F ' ' '{print \$2}'"
cmd = "pssh -s -H %s \"%s & %s\"" % (
dbNode.sshIps[0], memCheck, coresCheck)
dbNode.name, memCheck, coresCheck)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0 or len(output.strip().split()) != 2:
self.context.logger.debug(

View File

@ -224,11 +224,11 @@ class OmImplOLAP(OmImpl):
# get the newest dynaminc config and send to other node
self.clusterInfo.checkClusterDynamicConfig(self.context.user, hostName)
if self.context.g_opts.nodeName == "":
hostList = self.clusterInfo.getClusterSshIps()[0]
hostList = self.clusterInfo.getClusterNodeNames()
else:
hostList = []
hostList.append(self.context.g_opts.node_ip)
self.sshTool = SshTool(self.clusterInfo.getClusterSshIps()[0], None,
hostList.append(self.context.g_opts.nodeName)
self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None,
DefaultValue.TIMEOUT_CLUSTER_START)
if self.time_out is None:
time_out = DefaultValue.TIMEOUT_CLUSTER_START
@ -329,11 +329,11 @@ class OmImplOLAP(OmImpl):
self.logger.log("Stopping %s." % stop_type)
self.logger.log("=========================================")
if self.context.g_opts.nodeName == "":
host_list = self.clusterInfo.getClusterSshIps()[0]
host_list = self.clusterInfo.getClusterNodeNames()
else:
host_list = []
host_list.append(self.context.g_opts.node_ip)
self.sshTool = SshTool(self.clusterInfo.getClusterSshIps()[0], None,
host_list.append(self.context.g_opts.nodeName)
self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None,
DefaultValue.TIMEOUT_CLUSTER_START)
if self.time_out is None:
time_out = DefaultValue.TIMEOUT_CLUSTER_STOP
@ -400,7 +400,7 @@ class OmImplOLAP(OmImpl):
return
self.logger.log("Generating dynamic configuration file for all nodes.")
hostname = NetUtil.GetHostIpOrName()
sshtool = SshTool(self.context.clusterInfo.getClusterSshIps()[0])
sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames())
self.context.clusterInfo.doRefreshConf(self.context.user, hostname,
sshtool)

View File

@ -73,7 +73,6 @@ class OmImpl:
self.clusterInfo = OperationManager.clusterInfo
self.dataDir = OperationManager.g_opts.dataDir
self.sshTool = None
self.node_ip = OperationManager.g_opts.node_ip
def doStopCluster(self):
"""
@ -246,7 +245,7 @@ class OmImpl:
output:NA
"""
host_name = NetUtil.GetHostIpOrName()
sshtool = SshTool(self.context.clusterInfo.getClusterSshIps()[0], timeout=self.time_out)
sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames(), timeout=self.time_out)
node_id = 0
if self.context.g_opts.nodeName != "":
for db_node in self.context.clusterInfo.dbNodes:
@ -410,7 +409,7 @@ class OmImpl:
self.context.clusterInfo.initFromStaticConfig(
UserUtil.getPathOwner(self.context.g_opts.certFile)[0])
self.sshTool = SshTool(
self.context.clusterInfo.getClusterSshIps()[0],
self.context.clusterInfo.getClusterNodeNames(),
self.logger.logFile)
except Exception as e:
raise Exception(str(e))
@ -733,7 +732,7 @@ class OmImpl:
self.context.clusterInfo = dbClusterInfo()
self.context.clusterInfo.initFromStaticConfig(
pwd.getpwuid(os.getuid()).pw_name)
self.sshTool = SshTool(self.context.clusterInfo.getClusterSshIps()[0],
self.sshTool = SshTool(self.context.clusterInfo.getClusterNodeNames(),
self.logger.logFile)
backupList = DefaultValue.CERT_FILES_LIST[:]
@ -1244,7 +1243,7 @@ class OmImpl:
output:dictionary
"""
existNodes = []
for nodeName in self.context.clusterInfo.getClusterSshIps()[0]:
for nodeName in self.context.clusterInfo.getClusterNodeNames():
if (nodeName == NetUtil.GetHostIpOrName()):
continue
if (self.sshTool.checkRemoteFileExist(nodeName, filepath, "")):

View File

@ -157,7 +157,7 @@ class PostUninstallImpl:
self.logger.log("Checking unpreinstallation.")
if not self.localMode:
ProfileFile.checkAllNodesMpprcFile(
self.clusterInfo.getClusterSshIps()[0], self.mpprcFile)
self.clusterInfo.getClusterNodeNames(), self.mpprcFile)
cmd = "%s -t %s -u %s -l '%s' -X '%s'" % (
OMCommand.getLocalScript("Local_UnPreInstall"),
@ -302,7 +302,7 @@ class PostUninstallImpl:
# get other nodes
hostName = NetUtil.GetHostIpOrName()
otherNodes = self.clusterInfo.getClusterSshIps()[0]
otherNodes = self.clusterInfo.getClusterNodeNames()
for otherNode in otherNodes:
if (otherNode == hostName):
otherNodes.remove(otherNode)
@ -348,7 +348,7 @@ class PostUninstallImpl:
try:
# get other nodes
hostName = NetUtil.GetHostIpOrName()
otherNodes = self.clusterInfo.getClusterSshIps()[0]
otherNodes = self.clusterInfo.getClusterNodeNames()
for otherNode in otherNodes:
if (otherNode == hostName):
otherNodes.remove(otherNode)
@ -399,7 +399,7 @@ class PostUninstallImpl:
try:
# get other nodes
hostName = NetUtil.GetHostIpOrName()
otherNodes = self.clusterInfo.getClusterSshIps()[0]
otherNodes = self.clusterInfo.getClusterNodeNames()
for otherNode in otherNodes:
if (otherNode == hostName):
otherNodes.remove(otherNode)
@ -647,7 +647,7 @@ class PostUninstallImpl:
% "MPPRC file"
+ " Command: %s. Error: \n%s" % (baseCmd, output))
else:
dbNodeNames = self.clusterInfo.getClusterSshIps()[0]
dbNodeNames = self.clusterInfo.getClusterNodeNames()
for dbNodeName in dbNodeNames:
cmd = "pssh -s -H %s '%s'" % (dbNodeName, baseCmd)
(status, output) = subprocess.getstatusoutput(cmd)
@ -703,7 +703,7 @@ class PostUninstallImpl:
self.sshTool,
self.localMode,
self.mpprcFile,
self.clusterInfo.getClusterSshIps()[0])
self.clusterInfo.getClusterNodeNames())
except Exception as e:
self.logger.logExit(str(e))
@ -869,11 +869,10 @@ class PostUninstallImpl:
# get the user name
username = pwd.getpwuid(os.getuid()).pw_name
# get the user sshIps
sshIps = self.clusterInfo.getClusterSshIps()[0]
Ips.extend(sshIps)
sshIps = self.clusterInfo.getClusterSshIps()
# save the sshIps to Ips
# for ips in sshIps:
# Ips.extend(ips)
for ips in sshIps:
Ips.extend(ips)
self.logger.log("Creating SSH trust for the root permission user.")
# Ask to create trust for root
@ -944,7 +943,7 @@ class PostUninstallImpl:
cmd = "%s" + delete_line_cmd + delete_shell_cmd
# get remote node and local node
host_list = self.clusterInfo.getClusterSshIps()[0]
host_list = self.clusterInfo.getClusterNodeNames()
host_list.remove(local_host)
# delete remote root mutual trust

View File

@ -18,7 +18,6 @@ import subprocess
import os
import sys
import time
import socket
sys.path.append(sys.path[0] + "/../../")
@ -572,11 +571,8 @@ class PreinstallImplOLAP(PreinstallImpl):
if not self.context.is_new_root_path:
current_path = self.get_package_path()
script = os.path.join(current_path, "script")
hostList = self.context.clusterInfo.getClusterSshIps()[0]
if NetUtil.getLocalIp() in hostList:
hostList.remove(NetUtil.getLocalIp())
if "127.0.0.1" in hostList:
hostList.remove("127.0.0.1")
hostList = self.context.clusterInfo.getClusterNodeNames()
hostList.remove(NetUtil.GetHostIpOrName())
if not self.context.localMode and hostList:
cmd = "rm -f %s/gs_*" % script
self.context.sshTool.executeCommand(cmd,
@ -610,7 +606,7 @@ class PreinstallImplOLAP(PreinstallImpl):
# exec the cmd
CmdExecutor.execCommandWithMode(cmd,
self.context.sshTool,
self.context.localMode or self.context.isSingle,
self.context.localMode,
self.context.mpprcFile)
self.del_remote_pkgpath()

View File

@ -20,7 +20,6 @@ import pwd
import sys
import re
import getpass
import socket
sys.path.append(sys.path[0] + "/../")
@ -40,8 +39,6 @@ from base_utils.os.net_util import NetUtil
from base_utils.os.env_util import EnvUtil
from domain_utils.cluster_file.profile_file import ProfileFile
from domain_utils.cluster_file.version_info import VersionInfo
from base_utils.executor.local_remote_cmd import LocalRemoteCmd
from base_utils.os.crontab_util import CrontabUtil
# action name
# prepare cluster tool package path
@ -86,8 +83,6 @@ ACTION_CHANGE_TOOL_ENV = "change_tool_env"
ACTION_CHECK_CONFIG = "check_config"
# check cpu
ACTION_CHECK_CPU_INSTRUCTIONS = "check_cpu_instructions"
# check nofile limit
ACTION_CHECK_NOFILE_LIMIT = "check_nofile_limit"
#############################################################################
# Global variables
# self.context.logger: globle logger
@ -173,7 +168,7 @@ class PreinstallImpl:
hideninfo:NA
"""
if self.context.localMode or self.context.isSingle:
if not self.context.skipHostnameSet or self.context.current_user_root:
if not self.context.skipHostnameSet:
self.writeLocalHosts({"127.0.0.1": "localhost"})
self.writeLocalHosts({"::1": "localhost"})
return
@ -276,8 +271,8 @@ class PreinstallImpl:
cmd = "%s" + delete_line_cmd + delete_shell_cmd
# get remote node and local node
host_list = self.context.clusterInfo.getClusterSshIps()[0]
local_host = NetUtil.getLocalIp()
host_list = self.context.clusterInfo.getClusterNodeNames()
local_host = NetUtil.GetHostIpOrName()
host_list.remove(local_host)
# delete remote root mutual trust
@ -358,9 +353,9 @@ class PreinstallImpl:
self.context.logger.log("Distributing package.", "addStep")
try:
# get the all node names in xml file
hosts = self.context.clusterInfo.getClusterSshIps()[0]
hosts = self.context.clusterInfo.getClusterNodeNames()
# remove the local node name
hosts.remove(socket.gethostbyname(socket.gethostname()))
hosts.remove(NetUtil.GetHostIpOrName())
self.getTopToolPath(self.context.sshTool,
self.context.clusterToolPath, hosts,
self.context.mpprcFile)
@ -711,10 +706,10 @@ class PreinstallImpl:
try:
# the IP for create trust
allIps = []
# return list
sshIps = self.context.clusterInfo.getClusterSshIps()[0]
sshIps = self.context.clusterInfo.getClusterSshIps()
# get all IPs
allIps.extend(sshIps)
for ips in sshIps:
allIps.extend(ips)
# create trust
self.context.sshTool.createTrust(self.context.user, allIps)
self.context.user_ssh_agent_flag = True
@ -776,32 +771,6 @@ class PreinstallImpl:
self.context.logger.log("Warning: This cluster is missing the rdtscp or avx instruction.")
self.context.logger.log("Successfully checked cpu instructions.", "constant")
def check_nofile_limit(self):
"""
function: Check if nofile limit more then 640000
input:NA
output:NA
"""
if self.context.localMode or self.context.isSingle:
return
if not self.context.clusterInfo.hasNoCm():
self.context.logger.log("Checking nofile limit.", "addStep")
try:
# Checking OS version
cmd = "%s -t %s -u %s -l %s" % (
OMCommand.getLocalScript("Local_PreInstall"),
ACTION_CHECK_NOFILE_LIMIT,
self.context.user,
self.context.localLog)
CmdExecutor.execCommandWithMode(
cmd,
self.context.sshTool,
self.context.localMode or self.context.isSingle,
self.context.mpprcFile)
except Exception as e:
raise Exception(str(e))
self.context.logger.log("Successfully checked nofile limit.", "constant")
def createOSUser(self):
"""
function:
@ -1637,20 +1606,6 @@ class PreinstallImpl:
package_dir = os.path.join(dir_name, "./../../../")
return os.path.realpath(package_dir)
def set_user_ssh_alive(self):
"""
set user ssh alive
"""
gp_home = EnvUtil.getEnv("GPHOME")
if os.getuid() == 0:
self.set_user_crontab()
else:
if CrontabUtil.check_user_crontab_permission():
self.set_user_crontab()
else:
self.set_user_ssh_service(gp_home)
def set_user_crontab(self):
"""
:return:
@ -1678,32 +1633,6 @@ class PreinstallImpl:
self.context.mpprcFile)
self.context.logger.debug("Successfully to set cron for %s" %self.context.user)
def set_user_ssh_service(self, gp_home):
"""
set user ssh service
"""
if os.getuid() == 0:
return
self.logger.log("Start set ssh service for %s" % self.user)
# copy self.hostList
host_ips = self.context.clusterInfo.getClusterSshIps()[0]
host_ip = host_ips[:]
host_ip.remove(self.local_ip)
ssh_service_local_file = os.path.normpath(os.path.join(gp_home, "script/local/create_ssh_service.sh"))
ssh_service_file = os.path.normpath(os.path.join(gp_home, "script/local/create_ssh_service.sh"))
# cp ssh service file to remote
ssh_service_dir = os.path.dirname(ssh_service_file)
LocalRemoteCmd.checkRemoteDir(self.context.ssh_tool, ssh_service_dir, host_ip, "")
self.context.ssh_tool.scpFiles(ssh_service_file, ssh_service_file, [], "", gp_path=gp_home)
# execute ssh service file
cmd = "sh %s %s %s" % (ssh_service_local_file, self.user, gp_home)
self.context.ssh_tool.executeCommand(cmd, DefaultValue.SUCCESS, host_ips)
self.logger.log("Successfully to set ssh service for %s" % self.user)
def do_perf_config(self):
"""
run gs_perfconfig to tune os configure.
@ -1766,8 +1695,6 @@ class PreinstallImpl:
self.checkOSVersion()
# check cpu instructions
self.check_cpu_instructions()
# check nofile limit
self.check_nofile_limit()
# create path and set mode
self.createDirs()
# set os parameters
@ -1792,8 +1719,8 @@ class PreinstallImpl:
self.fixServerPackageOwner()
# unreg the disk of the dss and about
self.dss_init()
# set user ssh alive: crontab or systemd service
self.set_user_ssh_alive()
# set user cron
self.set_user_crontab()
# set user env and a flag,
# indicate that the preinstall.py has been execed succeed
self.doPreInstallSucceed()

View File

@ -1644,7 +1644,7 @@ class StreamingBase(object):
cmd = g_file.SHELL_CMD_DICT["deleteDir"] % (self.streaming_file_dir,
self.streaming_file_dir)
try:
self.ssh_tool.executeCommand(cmd, hostList=self.cluster_info.getClusterSshIps()[0])
self.ssh_tool.executeCommand(cmd, hostList=self.cluster_info.getClusterNodeNames())
except Exception as error:
self.logger.debug(
"Failed to remove streaming dir with error:%s" % error)
@ -1972,7 +1972,7 @@ class StreamingBase(object):
StreamingConstants.STREAMING_CLUSTER_CONF_RECORD)
cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (cluster_info_file, cluster_info_file)
try:
self.ssh_tool.executeCommand(cmd, hostList=self.cluster_info.getClusterSshIps()[0])
self.ssh_tool.executeCommand(cmd, hostList=self.cluster_info.getClusterNodeNames())
except Exception as error:
self.logger.debug(
"Failed to remove cluster file with error:%s" % error)
@ -2502,8 +2502,8 @@ class StreamingBase(object):
if node_list:
p_node_list = " -H ".join(node_list)
elif self.cluster_info.getClusterSshIps()[0]:
p_node_list = " -H ".join(self.cluster_info.getClusterSshIps()[0])
elif self.cluster_info.getClusterNodeNames():
p_node_list = " -H ".join(self.cluster_info.getClusterNodeNames())
else:
raise Exception("Failed to delivery file: %s, node information does not exits"
% file_name)

View File

@ -61,7 +61,7 @@ class UninstallImpl:
# Initialize the unstallation.clusterInfo variable
unstallation.initClusterInfoFromStaticFile(self.user)
self.clusterInfo = unstallation.clusterInfo
nodeNames = self.clusterInfo.getClusterSshIps()[0]
nodeNames = self.clusterInfo.getClusterNodeNames()
# Initialize the self.sshTool variable
unstallation.initSshTool(nodeNames,
DefaultValue.TIMEOUT_PSSH_UNINSTALL)
@ -208,7 +208,7 @@ class UninstallImpl:
"""
try:
failedNodes = []
validNodeName = self.clusterInfo.getClusterSshIps()[0]
validNodeName = self.clusterInfo.getClusterNodeNames()
# the command for killing all process
cmd_check_kill = DefaultValue.killInstProcessCmd(procFileName,
True, 9, False)

View File

@ -152,7 +152,7 @@ class UpgradeImpl:
"""
self.context.logger.debug("Init global infos", "addStep")
self.context.sshTool = SshTool(
self.context.cluster_ip, self.context.localLog,
self.context.clusterNodes, self.context.localLog,
DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE)
self.initVersionInfo()
self.initClusterConfig()
@ -375,7 +375,7 @@ class UpgradeImpl:
self.context.logger.debug(traceback.format_exc() + str(e))
if not self.context.sshTool:
self.context.sshTool = SshTool(
self.context.cluster_ip, self.context.logger,
self.context.clusterNodes, self.context.logger,
DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE)
if action == const.ACTION_AUTO_ROLLBACK and \
self.checkBakPathNotExists():
@ -776,7 +776,7 @@ class UpgradeImpl:
const.TMP_DYNAMIC_DN_INFO)
self.generateDynamicInfoFile(tmp_file)
self.context.logger.debug("Cmd for setting parameter: %s." % cmd)
host_list = copy.deepcopy(self.context.cluster_ip)
host_list = copy.deepcopy(self.context.clusterNodes)
self.context.execCommandInSpecialNode(cmd, host_list)
self.context.logger.debug("Successfully set guc value.")
except Exception as er:
@ -788,7 +788,7 @@ class UpgradeImpl:
if os.path.exists(tmp_file):
delete_cmd = "(if [ -f '%s' ]; then rm -f '%s'; fi) " % \
(tmp_file, tmp_file)
host_list = copy.deepcopy(self.context.cluster_ip)
host_list = copy.deepcopy(self.context.clusterNodes)
self.context.execCommandInSpecialNode(delete_cmd, host_list)
def setClusterReadOnlyMode(self):
@ -920,7 +920,7 @@ class UpgradeImpl:
if not self.context.isSingle:
self.context.sshTool.scpFiles(fileName, os.path.dirname(fileName) + "/",
hostList=self.context.cluster_ip)
hostList=self.context.clusterNodes)
self.context.logger.debug("Successfully written and send file %s. "
"The list context is %s." % (fileName, cmsParaDict))
except Exception as er:
@ -1041,7 +1041,7 @@ class UpgradeImpl:
"Maybe we should keep guc consistent manually "
"if failed")
cmd = "%s '%s'" % (CmdUtil.getRemoveCmd("file"), cmsGucFile)
hostList = copy.deepcopy(self.context.cluster_ip)
hostList = copy.deepcopy(self.context.clusterNodes)
self.context.execCommandInSpecialNode(cmd, hostList)
oldGUCParas = self.getCMServerGUC(OriginalGUCparas)
else:
@ -1060,7 +1060,7 @@ class UpgradeImpl:
self.set_cm_server_guc(closeGUCparas)
cmd = "mv '%s' '%s'" % (cmsGucFile, cmsGucFileSet)
hostList = copy.deepcopy(self.context.cluster_ip)
hostList = copy.deepcopy(self.context.clusterNodes)
self.context.execCommandInSpecialNode(cmd, hostList)
# make sure all cm_server child process has been killed. Example: gs_check
@ -1146,7 +1146,7 @@ class UpgradeImpl:
cmd += " && {0}".format(g_file.SHELL_CMD_DICT["deleteFile"] % (cms_guc_file, cms_guc_file))
if len(hostList) == 0:
hosts = copy.deepcopy(self.context.cluster_ip)
hosts = copy.deepcopy(self.context.clusterNodes)
self.context.execCommandInSpecialNode(cmd, hosts)
else:
self.context.execCommandInSpecialNode(cmd, copy.deepcopy(hostList))
@ -1453,7 +1453,7 @@ class UpgradeImpl:
first_cms_inst = all_cm_server_inst[0]
server_conf_file = os.path.join(first_cms_inst.datadir, "cm_server.conf")
remote_cmd = "grep -E '^enable_ssl = ' {0}".format(server_conf_file)
ssh_cmd = "pssh -s -H {0} \"{1}\"".format(first_cms_inst.listenIps[0] , remote_cmd)
ssh_cmd = "pssh -s -H {0} \"{1}\"".format(first_cms_inst.hostname, remote_cmd)
status, output = subprocess.getstatusoutput(ssh_cmd)
if status != 0 or "=" not in output:
self.context.logger.warn("Get enable_ssl failed. Output:: [{0}]".format(output))
@ -1475,7 +1475,7 @@ class UpgradeImpl:
flag_file_name = "enable_ssl_on" if enable_ssl_value == "on" else "enable_ssl_off"
flag_file_path = os.path.join(EnvUtil.getTmpDirFromEnv(), flag_file_name)
generate_cmd = "touch {0} && chmod 400 {0}".format(flag_file_path)
self.context.sshTool.executeCommand(generate_cmd, hostList=self.context.clusterInfo.getClusterSshIps()[0])
self.context.sshTool.executeCommand(generate_cmd, hostList=self.context.clusterInfo.getClusterNodeNames())
self.context.logger.debug("Generate enable_ssl flag file [{0}] successfully.".format(flag_file_path))
def set_enable_ssl(self, value):
@ -1497,13 +1497,13 @@ class UpgradeImpl:
self.context.logger.debug("Old cluster turn off enable_ssl.")
rm_flag_cmd = "rm -f {0}".format(ssl_off_flag)
self.context.sshTool.executeCommand(rm_flag_cmd,
hostList=self.context.clusterInfo.getClusterSshIps()[0])
hostList=self.context.clusterInfo.getClusterNodeNames())
return
if os.path.isfile(ssl_on_flag):
self.context.logger.debug("Old cluster turn on enable_ssl [{0}].".format(ssl_on_flag))
rm_flag_cmd = "rm -f {0}".format(ssl_on_flag)
self.context.sshTool.executeCommand(rm_flag_cmd,
hostList=self.context.clusterInfo.getClusterSshIps()[0])
hostList=self.context.clusterInfo.getClusterNodeNames())
else:
self.context.logger.debug("Old cluster not set enable_ssl parameter.")
return
@ -1522,7 +1522,7 @@ class UpgradeImpl:
self.context.sshTool.executeCommand(cmd, hostList=cm_node_names)
cmd = "sed -i 's/enable_ssl = {0}/enable_ssl = {1}/g' {2}".format(origin_value, value, cms_conf_file)
self.context.sshTool.executeCommand(cmd, hostList=self.context.clusterInfo.getClusterSshIps()[0])
self.context.sshTool.executeCommand(cmd, hostList=self.context.clusterInfo.getClusterNodeNames())
self.reload_cmserver()
self.context.logger.debug("Turn {0} enable_ssl parameter.".format(value))
@ -1713,7 +1713,7 @@ class UpgradeImpl:
self.prepareSql("upgrade-post")
self.execRollbackUpgradedCatalog(scriptType="upgrade-post")
self.getLsnInfo()
hosts = copy.deepcopy(self.context.cluster_ip)
hosts = copy.deepcopy(self.context.clusterNodes)
self.recordNodeStep(
GreyUpgradeStep.STEP_PRE_COMMIT, nodes=hosts)
self.recordDualClusterStage(self.newCommitId, DualClusterStage.STEP_UPGRADE_FINISH)
@ -1775,7 +1775,7 @@ class UpgradeImpl:
self.context.upgradeBackupPath,
self.context.localLog)
self.context.logger.debug("Command for sync GUC in upgrade: %s" % cmd)
hostList = copy.deepcopy(self.context.node_ips)
hostList = copy.deepcopy(self.context.nodeNames)
self.context.sshTool.executeCommand(cmd, hostList=hostList)
self.context.logger.debug("Successfully sync guc.")
@ -1807,14 +1807,14 @@ class UpgradeImpl:
self.context.localLog)
self.context.logger.debug("Command for syncing config files: %s"
% cmd)
hostList = copy.deepcopy(self.context.node_ips)
hostList = copy.deepcopy(self.context.nodeNames)
self.context.sshTool.executeCommand(cmd, hostList=hostList)
# change the owner of application
cmd = "chown -R %s:%s '%s'" % \
(self.context.user, self.context.group,
self.context.newClusterAppPath)
hostList = copy.deepcopy(self.context.node_ips)
hostList = copy.deepcopy(self.context.nodeNames)
self.context.sshTool.executeCommand(cmd, hostList=hostList)
except Exception as e:
raise Exception(str(e) + " Failed to sync configuration.")
@ -1959,7 +1959,7 @@ class UpgradeImpl:
if self.need_rolling(is_rollback) or is_rolling:
cmd += " --rolling"
self.context.logger.debug("Command for waiting for om_monitor: %s." % cmd)
hostList = copy.deepcopy(self.context.cluster_ip)
hostList = copy.deepcopy(self.context.nodeNames)
self.context.sshTool.executeCommand(cmd, hostList=hostList)
elapsed = timeit.default_timer() - start_time
self.context.logger.debug("Time to wait for om_monitor: %s." %
@ -2007,7 +2007,7 @@ class UpgradeImpl:
self.context.logger.debug(
"Command for switching DN processes: %s." % cmd)
hostList = copy.deepcopy(self.context.cluster_ip)
hostList = copy.deepcopy(self.context.nodeNames)
self.context.sshTool.executeCommand(cmd, hostList=hostList)
start_cluster_time = timeit.default_timer()
self.greyStartCluster()
@ -2093,7 +2093,7 @@ class UpgradeImpl:
try:
# prepare dynamic cluster info file in every node
self.getOneDNInst(checkNormal=True)
execHosts = [self.dnInst.listenIps[0]]
execHosts = [self.dnInst.hostname]
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
const.ACTION_GET_LSN_INFO,
@ -2310,7 +2310,7 @@ class UpgradeImpl:
"""
cmd = "if [ -d '%s' ]; then echo 'True'; else echo 'False'; fi" %\
self.context.upgradeBackupPath
hostList = copy.deepcopy(self.context.cluster_ip)
hostList = copy.deepcopy(self.context.clusterNodes)
(resultMap, outputCollect) = self.context.sshTool.getSshStatusOutput(
cmd, hostList)
self.context.logger.debug(
@ -2799,7 +2799,7 @@ class UpgradeImpl:
cmd += "%s -m install -U %s --krb-client " % (
OMCommand.getLocalScript("Local_Kerberos"), self.context.user)
self.context.sshTool.executeCommand(
cmd, hostList=self.context.cluster_ip)
cmd, hostList=self.context.clusterNodes)
self.context.logger.log("Successfully install Kerberos.")
cmd = "source %s && gs_om -t start" % self.context.userProfile
(status, output) = subprocess.getstatusoutput(cmd)
@ -2947,7 +2947,7 @@ class UpgradeImpl:
(status, output) = ClusterCommand.remoteSQLCommand(
sql,
self.context.user,
self.dnInst.listenIps[0],
self.dnInst.hostname,
self.dnInst.port,
False,
DefaultValue.DEFAULT_DB_NAME,
@ -2968,7 +2968,7 @@ class UpgradeImpl:
(status, output) = ClusterCommand.remoteSQLCommand(
sql,
self.context.user,
self.dnInst.listenIps[0],
self.dnInst.hostname,
self.dnInst.port,
False,
DefaultValue.DEFAULT_DB_NAME,
@ -3014,7 +3014,7 @@ class UpgradeImpl:
if self.context.standbyCluster:
self.context.logger.debug("no need prepare upgrade sql folder under force upgrade")
return
hosts = self.context.cluster_ip
hosts = self.context.clusterNodes
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -X %s -l %s" % \
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
const.ACTION_UPGRADE_SQL_FOLDER,
@ -3069,7 +3069,7 @@ class UpgradeImpl:
(status, output) = ClusterCommand.remoteSQLCommand(
sql,
self.context.user,
host.listenIps[0],
host.hostname,
host.port,
False,
DefaultValue.DEFAULT_DB_NAME,
@ -3192,7 +3192,7 @@ class UpgradeImpl:
self.context.upgradeBackupPath, "oldClusterDBAndRel")
cmd = "rm -rf '%s' && mkdir '%s' -m '%s' " % \
(backup_path, backup_path, DefaultValue.KEY_DIRECTORY_MODE)
hostList = copy.deepcopy(self.context.cluster_ip)
hostList = copy.deepcopy(self.context.clusterNodes)
self.context.sshTool.executeCommand(cmd, hostList=hostList)
# prepare dynamic cluster info file in every node
self.generateDynamicInfoFile(tmpFile)
@ -3221,7 +3221,7 @@ class UpgradeImpl:
if os.path.exists(tmpFile):
deleteCmd = "(if [ -f '%s' ]; then rm -f '%s'; fi) " % \
(tmpFile, tmpFile)
hostList = copy.deepcopy(self.context.cluster_ip)
hostList = copy.deepcopy(self.context.clusterNodes)
self.context.sshTool.executeCommand(
deleteCmd, hostList=hostList)
@ -3241,7 +3241,7 @@ class UpgradeImpl:
raise Exception("Can not genetate dynamic info file")
self.context.distributeFileToSpecialNode(tmpFile,
os.path.dirname(tmpFile),
self.context.cluster_ip)
self.context.clusterNodes)
self.context.logger.debug(
"Success to generate dynamic info file and send to every node.")
except Exception as er:
@ -3270,7 +3270,7 @@ class UpgradeImpl:
if instance.status == 'Primary':
for staticDBNode in self.context.clusterInfo.dbNodes:
if staticDBNode.id == instance.nodeId:
cnAndPrimaryDnNodes.append(staticDBNode.sshIps[0])
cnAndPrimaryDnNodes.append(staticDBNode.name)
result = list(set(cnAndPrimaryDnNodes))
self.context.logger.debug("Success to get primary dn list from "
"dynamic file: {0}.".format(result))
@ -3370,7 +3370,7 @@ class UpgradeImpl:
self.context.logger.debug(
"Start to update pg_proc in inplace large upgrade ")
# generate new csv file
execHosts = [self.dnInst.listenIps[0]]
execHosts = [self.dnInst.hostname]
# send cmd to all node and exec
cmd = "%s -t %s -U %s -R '%s' -l %s" % (
OMCommand.getLocalScript("Local_Upgrade_Utility"),
@ -3463,7 +3463,7 @@ class UpgradeImpl:
for eachdb in database_list:
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps[0], self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
@ -3484,7 +3484,7 @@ class UpgradeImpl:
for eachdb in database_list:
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps, self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
@ -3500,7 +3500,7 @@ class UpgradeImpl:
mode = True if "dual-standby" in self.context.clusterType else False
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps, self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True, maintenance_mode=mode)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
@ -3526,7 +3526,7 @@ class UpgradeImpl:
for eachdb in database_list:
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps[0], self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True, maintenance_mode=mode)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
@ -3564,22 +3564,18 @@ class UpgradeImpl:
if (scriptType == "rollback-post"):
self.getPrimaryDN(checkNormal=True)
dnNodeName = self.primaryDn.hostname
dn_node_ip = self.primaryDn.listenIps[0]
else:
dnNodeName = self.primaryDn.hostname
dn_node_ip = self.primaryDn.listenIps[0]
self.context.logger.debug("Primary dn {0} from cm_ctl query".format(
dnNodeName))
elif self.operate_action == const.ACTION_AUTO_ROLLBACK and \
self.get_cms_num(old_cluster_config_file) > 0 and scriptType == "rollback":
self.getPrimaryDN(checkNormal=True)
dnNodeName = self.primaryDn.hostname
dn_node_ip = self.primaryDn.listenIps[0]
self.context.logger.debug("Primary dn {0} from cm_ctl query".format(
dnNodeName))
else:
dnNodeName = self.dnInst.hostname
dn_node_ip = self.primaryDn.listenIps[0]
self.context.logger.debug("Primary dn {0} from config file".format(
dnNodeName))
@ -3600,7 +3596,7 @@ class UpgradeImpl:
self.context.logger.debug("Scp {0} file to nodes {1}".format(
check_upgrade_sql, dnNodeName))
if not self.context.isSingle:
LocalRemoteCmd.scpFile(dn_node_ip, check_upgrade_sql,
LocalRemoteCmd.scpFile(dnNodeName, check_upgrade_sql,
self.context.upgradeBackupPath)
if not os.path.isfile(maindb_sql):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % maindb_sql)
@ -3608,9 +3604,9 @@ class UpgradeImpl:
raise Exception(
ErrorCode.GAUSS_502["GAUSS_50210"] % otherdb_sql)
if (not self.context.isSingle):
LocalRemoteCmd.scpFile(dn_node_ip, maindb_sql,
LocalRemoteCmd.scpFile(dnNodeName, maindb_sql,
self.context.upgradeBackupPath)
LocalRemoteCmd.scpFile(dn_node_ip, otherdb_sql,
LocalRemoteCmd.scpFile(dnNodeName, otherdb_sql,
self.context.upgradeBackupPath)
self.context.logger.debug(
"Scp {0} file and {1} file to nodes {2}".format(
@ -3629,7 +3625,7 @@ class UpgradeImpl:
self.context.sshTool,
self.context.isSingle,
self.context.userProfile,
[dn_node_ip])
[dnNodeName])
self.context.logger.debug(
"Successfully {0} catalog.".format(scriptType))
except Exception as e:
@ -3669,7 +3665,7 @@ class UpgradeImpl:
sql += "COMMIT;"
self.context.logger.debug("Current sql %s." % sql)
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user, dn.listenIps[0], dn.port,
sql, self.context.user, dn.hostname, dn.port,
False, DefaultValue.DEFAULT_DB_NAME,
IsInplaceUpgrade=True)
if status != 0:
@ -4023,7 +4019,7 @@ END$$;"""
self.context.logger.debug(f"rebuild_pg_proc_index at database {eachdb}")
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps[0], self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
@ -4064,7 +4060,7 @@ END;"""
for eachdb in database_list:
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps[0], self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True)
if status != 0:
self.context.logger.debug("re-create sha2 functoin failed. Error: %s" % str(output))
@ -4093,7 +4089,7 @@ END;"""
for eachdb in database_list:
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps[0], self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
@ -4111,7 +4107,7 @@ END;"""
for eachdb in database_list:
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps[0], self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
@ -4192,12 +4188,12 @@ END;"""
" else echo 'NoThisFile'; fi" % dirFile
self.context.logger.debug("Command for checking file: %s" % cmd)
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd, self.context.cluster_ip, self.context.mpprcFile)
cmd, self.context.clusterNodes, self.context.mpprcFile)
outputMap = self.context.sshTool.parseSshOutput(
self.context.cluster_ip)
self.context.clusterNodes)
self.context.logger.debug("Output: %s" % output)
copyNode = ""
for node in self.context.cluster_ip:
for node in self.context.clusterNodes:
if status[node] == DefaultValue.SUCCESS:
if 'GetFile' in outputMap[node]:
copyNode = node
@ -4396,7 +4392,7 @@ END;"""
if cmNode.name.strip() == NetUtil.GetHostIpOrName():
executeCmd = cmd
else:
sshCmd = "%s " % CmdUtil.getSshCmd(cmNode.backIps[0])
sshCmd = "%s " % CmdUtil.getSshCmd(cmNode.name)
executeCmd = "%s \"%s\"" % (sshCmd, cmd)
self.context.logger.debug(
"Command for getting CMServer parameters: %s." % executeCmd)
@ -4441,7 +4437,7 @@ END;"""
const.ACTION_CLEAN_CONF_BAK_OLD,
self.context.user,
self.context.localLog)
hostList = copy.deepcopy(self.context.node_ips)
hostList = copy.deepcopy(self.context.nodeNames)
self.context.sshTool.executeCommand(cmd, hostList=hostList)
except Exception as e:
raise Exception(str(e))
@ -4722,7 +4718,7 @@ END;"""
if self.context.forceRollback:
cmd += " --force"
self.context.logger.debug("Command for restoring config: %s" % cmd)
rollbackList = copy.deepcopy(self.context.cluster_ip)
rollbackList = copy.deepcopy(self.context.clusterNodes)
self.context.sshTool.executeCommand(cmd, hostList=rollbackList)
self.context.logger.debug("Successfully restore config.")
@ -4741,7 +4737,7 @@ END;"""
if self.context.forceRollback:
cmd += " --force"
self.context.logger.debug("Command for restoring GUC: %s" % cmd)
rollbackList = copy.deepcopy(self.context.cluster_ip)
rollbackList = copy.deepcopy(self.context.clusterNodes)
self.context.sshTool.executeCommand(cmd, hostList=rollbackList)
self.context.logger.debug("Successfully restore guc.")
@ -4897,7 +4893,7 @@ END;"""
sql = "select count(*) from pg_class where relname = '%s';" % name
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps[0], self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True, maintenance_mode=mode)
if status != 0 or SqlResult.findErrorInSql(output):
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
@ -4945,7 +4941,7 @@ END;"""
if self.check_table_or_index_exist(table_name, eachdb):
(status, output) = ClusterCommand.remoteSQLCommand(
delete_table_sql, self.context.user,
self.dnInst.listenIps[0], self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True)
if status != 0:
raise Exception(
@ -4957,7 +4953,7 @@ END;"""
"drop index %s;commit;" % index
(status, output) = ClusterCommand.remoteSQLCommand(
sql, self.context.user,
self.dnInst.listenIps[0], self.dnInst.port, False,
self.dnInst.hostname, self.dnInst.port, False,
eachdb, IsInplaceUpgrade=True)
if status != 0:
raise Exception(
@ -5690,11 +5686,8 @@ END;"""
try:
hosts = self.context.clusterInfo.getClusterSshIps()[0]
if NetUtil.getLocalIp() in hosts:
hosts.remove(NetUtil.getLocalIp())
if "127.0.0.1" in hosts:
hosts.remove("127.0.0.1")
hosts = self.context.clusterInfo.getClusterNodeNames()
hosts.remove(NetUtil.GetHostIpOrName())
# Send xml file to every host
DefaultValue.distributeXmlConfFile(self.context.sshTool,
@ -5741,12 +5734,8 @@ END;"""
"""
self.context.logger.debug("Distribute the file %s" % step_file)
# send the file to each node
hosts = self.context.clusterInfo.getClusterSshIps()
if NetUtil.getLocalIp() in hosts:
hosts.remove(NetUtil.getLocalIp())
if "127.0.0.1" in hosts:
hosts.remove("127.0.0.1")
hosts = self.context.clusterInfo.getClusterNodeNames()
hosts.remove(NetUtil.GetHostIpOrName())
if not self.context.isSingle:
stepDir = os.path.normpath(os.path.dirname(step_file))
self.context.sshTool.scpFiles(step_file, stepDir, hosts)
@ -5885,7 +5874,7 @@ END;"""
raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME")
versionFile = os.path.join(gaussHome, "bin/upgrade_version")
cmd = "sed -n \'3,1p\' %s" % versionFile
hostList = copy.deepcopy(self.context.cluster_ip)
hostList = copy.deepcopy(self.context.clusterNodes)
(resultMap, outputCollect) = \
self.context.sshTool.getSshStatusOutput(cmd, hostList)
for key, val in resultMap.items():
@ -6301,7 +6290,7 @@ END;"""
self.context.logger.debug("No need to create CA for CM.")
return
hostList = copy.deepcopy(self.context.node_ips)
hostList = copy.deepcopy(self.context.nodeNames)
cmd = "%s -t %s -U %s --new_cluster_app_path=%s -l %s" % \
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
@ -6322,7 +6311,7 @@ END;"""
self.context.logger.debug("Start to restart cmagent and cmserver")
kill_cm_proc = "pkill -9 cm_agent -U {user}; " \
"pkill -9 cm_server -U {user};".format(user=self.context.user)
host_list = copy.deepcopy(self.context.cluster_ip)
host_list = copy.deepcopy(self.context.clusterNodes)
self.context.logger.debug(f"stopCMProcessesCmd: {kill_cm_proc} on {host_list}")
self.context.sshTool.getSshStatusOutput(kill_cm_proc, host_list)
self.context.logger.debug("End to restart cmagent and cmserver")
@ -6353,7 +6342,7 @@ END;"""
self.context.localLog)
self.context.logger.debug("reloading all cmagent process: %s" % cmd)
try:
hostList = copy.deepcopy(self.context.cluster_ip)
hostList = copy.deepcopy(self.context.clusterNodes)
self.context.execCommandInSpecialNode(cmd, hostList)
# wait the cluster be normal
self.waitClusterNormalDegrade()
@ -6378,7 +6367,7 @@ END;"""
# Get all the nodes that contain the CMSERVER instance
for dbNode in self.context.clusterInfo.dbNodes:
if len(dbNode.cmservers) > 0:
cm_nodes.append(dbNode.sshIps[0])
cm_nodes.append(dbNode.name)
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
const.ACTION_RELOAD_CMSERVER,
@ -6734,9 +6723,9 @@ END;"""
output += "\n Cluster is pausing."
elif checkPosition == const.OPTION_POSTCHECK:
if len(self.context.nodeNames) != 0:
checknodes = self.context.node_ips
checknodes = self.context.nodeNames
else:
checknodes = self.context.clusterInfo.getClusterSshIps()
checknodes = self.context.clusterInfo.getClusterNodeNames()
if self.checkClusterStatus(checkPosition) != 0:
output += "\n Cluster status is Abnormal."
if not self.checkVersion(
@ -6927,7 +6916,7 @@ END;"""
sql = "SELECT 1;"
(status, output) = \
ClusterCommand.remoteSQLCommand(
sql, self.context.user, dnInst.listenIps[0], dnInst.port,
sql, self.context.user, dnInst.hostname, dnInst.port,
False, DefaultValue.DEFAULT_DB_NAME,
IsInplaceUpgrade=True, maintenance_mode=mode)
if status != 0 or not output.isdigit():
@ -7233,7 +7222,7 @@ END;"""
const.ACTION_CLEAN_CM,
self.context.localLog)
self.context.logger.debug("Roll back CM install command: {0}".format(cmd))
self.context.sshTool.executeCommand(cmd, hostList=self.context.node_ips)
self.context.sshTool.executeCommand(cmd, hostList=self.context.nodeNames)
self.context.logger.debug("Clean cm directory successfully.")
else:
self.context.logger.debug("No need clean CM instance directory.")
@ -7268,7 +7257,7 @@ END;"""
self.context.sshTool,
self.context.isSingle,
self.context.mpprcFile,
self.context.node_ips)
self.context.nodeNames)
else:
CmdExecutor.execCommandWithMode(cmd,
self.context.sshTool,
@ -7352,11 +7341,11 @@ END;"""
"else echo 'NoThisFile'; fi" % packFilePath
self.context.logger.debug("Command for checking file: %s" % cmd)
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd, self.context.cluster_ip, self.context.mpprcFile)
cmd, self.context.clusterNodes, self.context.mpprcFile)
outputMap = self.context.sshTool.parseSshOutput(
self.context.cluster_ip)
self.context.clusterNodes)
self.context.logger.debug("Output: %s" % output)
for node in self.context.cluster_ip:
for node in self.context.clusterNodes:
if status[node] == DefaultValue.SUCCESS:
if 'GetFile' in outputMap[node]:
copyNode = node
@ -7364,7 +7353,7 @@ END;"""
if copyNode:
self.context.logger.debug("Copy the file %s from node %s." %
(packFilePath, copyNode))
for node in self.context.cluster_ip:
for node in self.context.clusterNodes:
if status[node] == DefaultValue.SUCCESS:
if 'NoThisFile' in outputMap[node]:
cmd = LocalRemoteCmd.getRemoteCopyCmd(
@ -7504,7 +7493,7 @@ END;"""
execHost = self.primaryDn
self.context.logger.debug("Exec sql in dn node {0}".format(execHost.hostname))
(status, output) = ClusterCommand.remoteSQLCommand(sql, self.context.user,
execHost.listenIps[0], execHost.port,
execHost.hostname, execHost.port,
False,
DefaultValue.DEFAULT_DB_NAME,
IsInplaceUpgrade=True,
@ -7826,7 +7815,7 @@ END;"""
self.context.sshTool.scpFiles(self.context.upgradePhaseInfoPath,
self.context.tmpDir,
hostList=self.context.cluster_ip)
hostList=self.context.clusterNodes)
def getDisasterRecoveryUser(self):
"""
@ -7873,7 +7862,7 @@ END;"""
self.context.user,
self.context.localLog)
self.context.logger.debug("clean gs_secure_files folder:{0}".format(cmd))
host_list = copy.deepcopy(self.context.cluster_ip)
host_list = copy.deepcopy(self.context.clusterNodes)
self.context.execCommandInSpecialNode(cmd, host_list)
except Exception as er:
raise Exception(str(er))

View File

@ -57,7 +57,7 @@ class UpgradeCmImpl(UpgradeImpl):
Initialize global value
"""
self.upgrade_context.initClusterInfoFromStaticFile(self.upgrade_context.user)
self.upgrade_context.initSshTool(self.upgrade_context.clusterInfo.getClusterBackIps()[0],
self.upgrade_context.initSshTool(self.upgrade_context.clusterInfo.getClusterNodeNames(),
timeout=300)
self.cluster_info = self.upgrade_context.clusterInfo
self.ssh_tool = self.upgrade_context.sshTool

View File

@ -77,7 +77,7 @@ class LocalBackup(LocalBaseOM):
def __init__(self, logFile="", user="", tmpBackupDir="", \
backupPara=False, backupBin=False, nodeName=""):
"""
function: initialize variable
function: initialize variable
input : user, tmpBackupDir, backupPara,
backupBin, logFile, nodeName
output: parameter

View File

@ -99,7 +99,7 @@ def initGlobals():
g_logger.debug("Cluster information: \n%s." % str(g_clusterInfo))
global g_sshTool
nodenames = g_clusterInfo.getClusterSshIps()[0]
nodenames = g_clusterInfo.getClusterNodeNames()
g_sshTool = SshTool(nodenames)
try:

View File

@ -1054,10 +1054,7 @@ def formatTime(filename):
output : str
"""
try:
if 'dms' in filename or 'dss' in filename:
timelist = re.findall(r"\d\d\d\d\d\d\d\d\d\d\d\d\d\d", filename)
else:
timelist = re.findall(r"\d\d\d\d-\d\d-\d\d_\d\d\d\d\d\d", filename)
timelist = re.findall(r"\d\d\d\d-\d\d-\d\d_\d\d\d\d\d\d", filename)
if not timelist:
with open(filename, 'r') as f:
lines = f.readlines()

View File

@ -89,7 +89,6 @@ ACTION_SET_CGROUP = "set_cgroup"
ACTION_CHECK_CONFIG = "check_config"
ACTION_DSS_NIT = "dss_init"
ACTION_CHECK_CPU_INSTRUCTIONS = "check_cpu_instructions"
ACTION_CHECK_NOFILE_LIMIT = "check_nofile_limit"
g_nodeInfo = None
envConfig = {}
@ -281,8 +280,7 @@ Common options:
ACTION_SET_ARM_OPTIMIZATION,
ACTION_CHECK_DISK_SPACE, ACTION_SET_WHITELIST,
ACTION_FIX_SERVER_PACKAGE_OWNER, ACTION_DSS_NIT,
ACTION_CHANGE_TOOL_ENV, ACTION_CHECK_CONFIG, ACTION_CHECK_CPU_INSTRUCTIONS,
ACTION_CHECK_NOFILE_LIMIT]
ACTION_CHANGE_TOOL_ENV, ACTION_CHECK_CONFIG, ACTION_CHECK_CPU_INSTRUCTIONS]
if self.action == "":
GaussLog.exitWithError(
ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".")
@ -638,7 +636,7 @@ Common options:
self.logger.debug("Change file[/etc/hosts] mode.")
FileUtil.changeMode(DefaultValue.HOSTS_FILE, "/etc/hosts")
try:
node_names = self.clusterInfo.getClusterSshIps()[0]
node_names = self.clusterInfo.getClusterNodeNames()
pool = ThreadPool(DefaultValue.getCpuSet())
pool.map(self.check_hostname, node_names)
pool.close()
@ -2020,25 +2018,6 @@ Common options:
self.logger.debug(cpu_mission)
raise Exception(cpu_mission)
def check_nofile_limit(self):
"""
function: Check nofile limit
input:NA
output:NA
"""
self.logger.debug("Checking nofile limit.")
if os.getuid() == 0:
cmd = "su - %s -c \'ulimit -n\'" % self.user
else:
cmd = "ulimit -n"
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error: \n%s" % output)
ulimit_value = int(output.strip())
if ulimit_value < DefaultValue.NOFILE_LIMIT:
raise Exception("Deploy cm, the number of file handles for %s user must be greater than %s" % (self.user, DefaultValue.NOFILE_LIMIT))
self.logger.debug("Successfully checked nofile limit.")
def checkPlatformArm(self):
"""
function: Setting ARM Optimization
@ -2814,7 +2793,7 @@ Common options:
toolPath = self.clusterToolPath
self.logger.log("change '%s' files permission and owner." % toolPath)
FileUtil.changeOwner(self.user, toolPath, recursive=True, link=True)
FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE,
FileUtil.changeMode(DefaultValue.MAX_DIRECTORY_MODE,
toolPath, recursive=True)
FileUtil.changeMode(DefaultValue.SPE_FILE_MODE,
"%s/script/gs_*" % toolPath)
@ -3170,8 +3149,6 @@ Common options:
self.check_config()
elif self.action == ACTION_CHECK_CPU_INSTRUCTIONS:
self.check_cpu_instructions()
elif self.action == ACTION_CHECK_NOFILE_LIMIT:
self.check_nofile_limit()
else:
self.logger.logExit(ErrorCode.GAUSS_500["GAUSS_50000"]
% self.action)

View File

@ -23,7 +23,6 @@ import getopt
import os
import sys
import re
import subprocess
sys.path.append(sys.path[0] + "/../")
from gspylib.common.GaussLog import GaussLog
@ -165,22 +164,7 @@ class Uninstall(LocalBaseOM):
self.logger.log("Deleting monitor.")
if not CrontabUtil.check_user_crontab_permission():
self.logger.log("Warning: The user has no permission to delete crontab task.")
self.query_om_monitor_service()
self.clean_om_monitor_service()
else:
self.clean_om_monitor_crontab()
# clean om_monitor,cm_agent,cm_server process
for progname in ["om_monitor", "cm_agent", "cm_server"]:
ProcessUtil.killallProcess(self.user, progname, '9')
self.logger.log("Successfully deleted OM Monitor.")
def clean_om_monitor_crontab(self):
"""
function: clean om_monitor crontab
input : NA
output: NA
"""
self.logger.log("Deleting om monitor crontab.")
return
try:
# get all content by crontab command
(status, output) = CrontabUtil.getAllCrontab()
@ -190,43 +174,18 @@ class Uninstall(LocalBaseOM):
FileUtil.createFile(crontabFile, True)
content_CronTabFile = [output]
FileUtil.writeFile(crontabFile, content_CronTabFile)
FileUtil.deleteLine(crontabFile, "\/bin\/om_monitor")
CrontabUtil.execCrontab(crontabFile)
FileUtil.removeFile(crontabFile)
# clean om_monitor,cm_agent,cm_server process
for progname in ["om_monitor", "cm_agent", "cm_server"]:
ProcessUtil.killallProcess(self.user, progname, '9')
except Exception as e:
if os.path.exists(crontabFile):
FileUtil.removeFile(crontabFile)
raise Exception(str(e))
self.logger.log("Successfully deleted om monitor.")
def clean_om_monitor_service(self):
"""
function: clean om_monitor systemd service
input : NA
output: NA
"""
self.logger.log("Deleting om monitor service.")
cmd = "systemctl --user stop om_monitor; systemctl --user disable om_monitor"
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
self.logger.log("Failed to stop om_monitor service.")
clean_systemd_cmd = "ps ux|grep dbus-daemon |grep -v grep | awk '{print $2}'|xargs -r kill -9 \
ps ux|grep /usr/lib/systemd/systemd |grep -v grep | awk '{print $2}'|xargs -r kill -9 "
(status, output) = subprocess.getstatusoutput(clean_systemd_cmd)
if status != 0:
self.logger.log("Failed to clean systemd service.")
self.logger.log("Deleting om monitor service.")
def query_om_monitor_service(self):
"""
function: query om_monitor systemd service
input : NA
output: True False
"""
self.logger.log("Querying om monitor service.")
cmd = "systemctl --user status om_monitro"
(status, output) = subprocess.getstatusoutput(cmd)
if output.find("om_monitor") == -1:
self.logger.log("Warning: The om monitor service is not running.")
return False
self.logger.log("Querying om monitor service succefully.")
return True
self.logger.log("Successfully deleted OMMonitor.")
def checkParameters(self):
"""

View File

@ -1,187 +0,0 @@
#!/bin/bash
service_name=ssh
user=$1
gphome=$2
user_home=$(eval echo ~$user)
service_dir="${user_home}/.config/systemd/user"
service_file="${service_dir}/${service_name}.service"
commond=${gphome}script/local/CheckSshAgent.py
ssh_query_flag=false
ssh_content_flag=false
db_status_flag=false
ssh_start_flag=false
# Create the service dir
if [ ! -d ${service_dir} ]; then
mkdir -p ${service_dir}
fi
write_ssh_file() {
cat <<EOL > $service_file
[Unit]
Description=ssh service
After=network.target
[Service]
ExecStart=${commond}
Restart=always
RestartSec=1s
StartLimitInterval=0
[Install]
WantedBy=default.target
EOL
}
# 1.query ssh service status
query_ssh() {
res=$(systemctl --user status $service_name.service)
if [[ $res =~ ".config/systemd/user/ssh.service" ]]; then
echo "query ssh successfully"
ssh_query_flag=true
else
echo "query ssh failed"
ssh_query_flag=false
fi
}
# 2.query ssh service content
query_ssh_content() {
content=$(cat $service_file | grep ExecStart)
if [[ $content =~ "${commond}" ]]; then
echo "query ssh content successfully"
ssh_content_flag=true
else
echo "query ssh content failed"
ssh_content_flag=false
fi
}
# 3.create ssh service file
create_ssh_file() {
local max_retries=3
local count=0
while [ $count -lt $max_retries ]; do
query_ssh_content
if [ $ssh_content_flag = "true" ]; then
echo "create ssh service file successfully"
break
else
write_ssh_file
echo "create ssh service file failed, retrying..."
fi
count=$(( $count + 1 ))
done
}
check_dbus() {
if ! dbus-send --session --dest=org.freedesktop.DBus --type=method_call --print-reply / org.freedesktop.DBus.ListNames > /dev/null 2>&1; then
echo "check dbus failed"
$db_status_flag=false
else
echo "check dbus sucessfully"
$db_status_flag=true
fi
}
start_dbus() {
# XDG_RUNTIME_DIR是一个环境变量,用于指定用户的运行时目录。这个目录通常用于存放用户会话中的临时文件和套接字文件
# start the D-Bus session
export XDG_RUNTIME_DIR=/run/user/$(id -u)
mkdir -p /run/user/$(id -u)
chmod 700 /run/user/$(id -u)
eval $(dbus-launch --sh-syntax)
}
clean_dbus() {
dbus_id=$(dbus-launch) | grep DBUS_SESSION_BUS_PID | awk -F'=' '{print $2}'
kill -9 ${dbus_id}
ps ux|grep dbus-daemon |grep -v grep | awk '{print $2}'|xargs -r kill -9
ps ux|grep /usr/lib/systemd/systemd |grep -v grep | awk '{print $2}'|xargs -r kill -9
# 删除 /run/user/${id -u} 目录的文件
# 删除进程中 /usr/bin/dbus-daemon 的进程
# 重新创建 dbus会话,先导入环境变量
rm -rf /run/user/${id -u}/*
}
# 3.create dbus
create_dbus() {
local max_retries=3
local count=0
while [ $count -lt $max_retries ]; do
check_dbus
if [ $db_status_flag = "true" ]; then
echo "dbus is running"
break
else
echo "dbus is not running"
clean_dbus
start_dbus
sleep 1s
fi
count=$((count+1))
done
}
# 4.reload daemon
reload_daemon() {
local max_retries=3
local count=0
while [ $count -lt $max_retries ]; do
chmod +x ${service_file}
# Reload systemd, start and enable the service
res=$(systemctl --user daemon-reload)
if [ $? -ne 0 ]; then
echo "systemctl --user daemon-reload failed"
create_dbus
else
echo "systemctl --user daemon-reload successfully"
break
fi
count=$((count+1))
done
}
# 5.start ssh service
start_ssh() {
systemctl --user start $service_name.service
systemctl --user enable $service_name.service
}
query_ssh
if [ $ssh_query_flag = "true" ]; then
query_ssh_content
if [ $ssh_content_flag = "true" ]; then
echo "ssh service is running and content is correct"
exit 0
else
echo "ssh service is running but content is incorrect"
create_ssh_file
if [ $ssh_content_flag = "true" ]; then
echo "ssh service is running and content is correct"
exit 0
else
echo "ssh service is running but content is incorrect"
exit 1
fi
fi
else
echo "ssh service is not running"
check_dbus
if [ $db_status_flag = "true" ]; then
echo "dbus is running"
else
echo "dbus is not running"
create_dbus
if [ $db_status_flag = "true" ]; then
echo "dbus is running"
else
echo "dbus is not running"
exit 1
fi
fi
reload_daemon
start_ssh
fi

View File

@ -52,7 +52,7 @@ class Collector(object):
return [param.database.value]
# 否则自动检测所有库,且仅支持A库
if param.action == Action.VERIFY:
if action == Action.VERIFY:
search_db_sql = "select datname from pg_database " \
"where datname not in ('template0', 'template1') and " \
" upper(datcompatibility) != 'A'"
@ -69,7 +69,7 @@ class Collector(object):
logger.err('没有需要校验的库。')
return [row[0] for row in qres]
elif param.action == Action.EXPORT:
elif action == Action.EXPORT:
search_db_sql = "select datname from pg_database " \
"where datname = 'postgres' and upper(datcompatibility) = 'A'"
qres = og.query(search_db_sql)