From 62b5d4e434cdbafd4f0b94fb7e2b2ed827ebdf28 Mon Sep 17 00:00:00 2001 From: liuheng Date: Thu, 1 Aug 2024 18:09:41 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8E=BB=E9=99=A4root=E6=9D=83=E9=99=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../base_utils/executor/local_remote_cmd.py | 15 +- script/base_utils/os/crontab_util.py | 37 ++++ script/base_utils/os/file_util.py | 61 ++++++ script/base_utils/os/hosts_util.py | 57 ++++++ .../base_utils/security/security_checker.py | 9 + script/gs_check | 3 +- script/gs_checkos | 2 +- script/gs_expansion | 108 ++++++---- script/gs_preinstall | 23 +++ script/gs_sshexkey | 111 +++++++--- script/gspylib/common/Common.py | 118 ++++++++++- script/gspylib/common/DbClusterInfo.py | 4 +- .../component/Kernel/DN_OLAP/DN_OLAP.py | 17 ++ .../gspylib/inspection/common/SharedFuncs.py | 5 +- script/gspylib/threads/SshTool.py | 193 +++++++++++++----- .../impl/checkperf/OLAP/CheckperfImplOLAP.py | 4 + script/impl/dropnode/DropnodeImpl.py | 55 ++--- .../impl/dropnode/drop_node_with_cm_impl.py | 1 + script/impl/expansion/ExpansionImpl.py | 32 +++ .../expansion/expansion_impl_with_cm_local.py | 2 +- script/impl/install/OLAP/InstallImplOLAP.py | 8 +- .../preinstall/OLAP/PreinstallImplOLAP.py | 10 +- script/impl/preinstall/PreinstallImpl.py | 41 +++- script/local/PreInstallUtility.py | 31 ++- 24 files changed, 774 insertions(+), 173 deletions(-) create mode 100644 script/base_utils/os/hosts_util.py diff --git a/script/base_utils/executor/local_remote_cmd.py b/script/base_utils/executor/local_remote_cmd.py index b71d5e8..c02789b 100644 --- a/script/base_utils/executor/local_remote_cmd.py +++ b/script/base_utils/executor/local_remote_cmd.py @@ -20,6 +20,7 @@ import os import subprocess import threading +import re from base_utils.common.constantsbase import ConstantsBase from gspylib.common.ErrorCode import ErrorCode @@ -27,6 +28,9 @@ from base_utils.os.cmd_util import CmdUtil from base_utils.os.net_util import NetUtil from gspylib.os.gsfile import g_file from base_utils.executor.cmd_executor import CmdExecutor +from base_utils.security.security_checker import SecurityChecker +from base_utils.os.file_util import FileUtil +from base_utils.os.hosts_util import HostsUtil class LocalRemoteCmd(object): @@ -136,13 +140,20 @@ class LocalRemoteCmd(object): if path_type == "directory": opts = "-x -r" + if not SecurityChecker.check_is_ip(remote_host): + remote_host = HostsUtil.hostname_to_ip(remote_host) if copy_to: - return "%s;pscp --trace-id %s %s -H %s %s %s " % \ - (ENV_SOURCE_CMD, trace_id, opts.strip(), remote_host, src, dest) + cmd = "%s;pscp --trace-id %s %s -H %s %s %s " % \ + (ENV_SOURCE_CMD, trace_id, opts.strip(), remote_host, src, dest) + return cmd else: localhost = NetUtil.getLocalIp() if other_host is not None: localhost = other_host + if not SecurityChecker.check_is_ip(remote_host): + remote_host = HostsUtil.hostname_to_ip(remote_host) + if not SecurityChecker.check_is_ip(localhost): + localhost = HostsUtil.hostname_to_ip(localhost) return "%s;pssh --trace-id %s -s -H %s \" pscp %s -H %s %s %s \" " % \ (ENV_SOURCE_CMD, trace_id, remote_host, opts.strip(), localhost, src, dest) diff --git a/script/base_utils/os/crontab_util.py b/script/base_utils/os/crontab_util.py index 5d8c2f5..28b7b8f 100644 --- a/script/base_utils/os/crontab_util.py +++ b/script/base_utils/os/crontab_util.py @@ -20,6 +20,8 @@ import os import subprocess +import sys +import time from gspylib.common.ErrorCode import ErrorCode from base_utils.os.cmd_util import CmdUtil @@ -83,3 +85,38 @@ class CrontabUtil(object): if output.find("not allowed") >= 0: return False return True + + @staticmethod + def user_custom_cron_task(cmd, pid_file): + """ + function : Check user custom cron task + input : NA + output: True or False + """ + CrontabUtil.deamonize(cmd, pid_file) + + @staticmethod + def job(cmd): + """ + custem user cron + """ + subprocess.run(cmd, shell=True) + + @staticmethod + def run_timer(cmd): + while True: + CrontabUtil.job(cmd) + time.sleep(1) + + @staticmethod + def deamonize(cmd, pid_file): + import daemon + from daemon import pidfile + with daemon.DaemonContext( + working_directory='.', + umask=0o002, + pidfile=pidfile.TimeoutPIDLockFile(pid_file), + stdout=sys.stdout, + stderr=sys.stderr + ): + CrontabUtil.run_timer(cmd) \ No newline at end of file diff --git a/script/base_utils/os/file_util.py b/script/base_utils/os/file_util.py index 55e20b9..2062b69 100644 --- a/script/base_utils/os/file_util.py +++ b/script/base_utils/os/file_util.py @@ -956,3 +956,64 @@ class FileUtil(object): output: NA """ return os.path.exists(file) + + @staticmethod + def get_hosts_file(): + """ + get hosts file + """ + gphome = os.environ.get("GPHOME") + if gphome: + host_file = os.path.normpath(os.path.join(gphome, "hosts")) + if host_file and os.path.exists(host_file) and os.path.isfile(host_file): + return host_file + + dir_name = os.path.dirname(os.path.realpath(__file__)) + package_dir = os.path.join(dir_name, "./../../") + host_file = os.path.normpath(os.path.join(package_dir, "..", "hosts")) + + if not os.path.exists(host_file) or not os.path.isfile(host_file): + raise Exception(ErrorCode.GAUSS_522["GAUSS_52200"] % host_file) + return host_file + + @staticmethod + def write_hosts_file(path, content=None, mode='w'): + lock = _thread.allocate_lock() + if content is None: + content = {} + # check if not exists. + if not os.path.exists(path): + FileUtil.createFileInSafeMode(path) + # check if is a file. + if os.path.exists(path) and not os.path.isfile(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % path) + # if no context, return + if not content: + return False + with open(path, mode) as f: + for ip, hostname in content.items(): + f.write("%s %s" % (ip, hostname) + os.linesep) + lock.acquire() + try: + # write context. + f.flush() + except Exception as excep: + lock.release() + raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % path + + "Error:\n%s" % str(excep)) + lock.release() + return True + + @staticmethod + def read_hosts_file(path, mode='r'): + if not os.path.exists(path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) + content = {} + with open(path, mode) as f: + for line in f.readlines(): + if len(line.strip().split()) != 2: + raise Exception("Error: %s format error" % path) + ip = line.strip().split()[0] + hostname = line.strip().split()[1] + content[ip] = hostname + return content diff --git a/script/base_utils/os/hosts_util.py b/script/base_utils/os/hosts_util.py new file mode 100644 index 0000000..7e766c2 --- /dev/null +++ b/script/base_utils/os/hosts_util.py @@ -0,0 +1,57 @@ +import os +import re + +from base_utils.os.file_util import FileUtil + +class HostsUtil: + + @staticmethod + def hostname_to_ip(hostname): + """ + function: get ip from hostname + input: hostname + output: ip + """ + ip_str = "" + hosts_file = FileUtil.get_hosts_file() + contents = FileUtil.read_hosts_file(hosts_file) + for ip, name in contents.items(): + if name == hostname: + ip_str = ip + return ip_str + if ip_str == "": + raise Exception("hostname %s not found in hosts file %s" % (hostname, hosts_file)) + + @staticmethod + def hostname_list_to_ip_list(host_list): + if not host_list: + return [] + hosts_file = FileUtil.get_hosts_file() + if not os.path.isfile(hosts_file): + raise Exception("hosts file is not exist") + # key:value name:ip + contents = FileUtil.read_hosts_file(hosts_file) + ip_list = [] + for hostname in host_list: + for ip, name in contents.items(): + if hostname == name: + ip_list.append(ip) + return ip_list + + @staticmethod + def ip_to_hostname(host_ip): + if not host_ip: + return "" + hosts_file = FileUtil.get_hosts_file() + if not os.path.isfile(hosts_file): + raise Exception("hosts file is not exist") + contents = FileUtil.read_hosts_file(hosts_file) + name = "" + for ip, hostname in contents.items(): + if host_ip == ip: + name = hostname + if name == "": + raise Exception("ip to hostname failed,ip is %s" % host_ip) + return name + + \ No newline at end of file diff --git a/script/base_utils/security/security_checker.py b/script/base_utils/security/security_checker.py index aaee8cf..bdb5715 100644 --- a/script/base_utils/security/security_checker.py +++ b/script/base_utils/security/security_checker.py @@ -124,3 +124,12 @@ class SecurityChecker(object): raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50022'] % (description, 'between 0 and {}{}'.format( str(max_value), des2))) + + @staticmethod + def check_is_ip(value): + """ + check ip valid + """ + if not re.match(SecurityChecker.IP_PATTERN, value): + return False + return True \ No newline at end of file diff --git a/script/gs_check b/script/gs_check index 8387ade..8a99784 100644 --- a/script/gs_check +++ b/script/gs_check @@ -615,9 +615,10 @@ def checkuser(): if (len(dbNameList) == 1 and dbNameList[0] == NetUtil.GetHostIpOrName()): return + node_ips = g_opts.cluster.getClusterSshIps()[0] appPath = EnvUtil.getEnv('GPHOME', g_opts.cluster.appPath) psshPath = os.path.join(appPath, 'script/gspylib/pssh/bin/pssh') - cmd = "%s -H %s 'id' " % (psshPath, " -H ".join(dbNameList)) + cmd = "%s -H %s 'id' " % (psshPath, " -H ".join(node_ips)) (status, output) = subprocess.getstatusoutput(cmd) if status != 0: errorNode = [] diff --git a/script/gs_checkos b/script/gs_checkos index 8b4a2aa..d166cf0 100644 --- a/script/gs_checkos +++ b/script/gs_checkos @@ -1504,7 +1504,7 @@ def getCmdOutput(cmd, ssh_conf=""): (status, output) = g_sshTool.getSshStatusOutput(cmd, [], "", "%s/../" % gp_path, ssh_config=ssh_conf) - outputMap = g_sshTool.parseSshOutput(g_sshTool.hostNames) + outputMap = g_sshTool.parseSshOutput(g_opts.hostnameList) return (status, output, outputMap) diff --git a/script/gs_expansion b/script/gs_expansion index fc1ca52..0c7ea6b 100644 --- a/script/gs_expansion +++ b/script/gs_expansion @@ -23,6 +23,7 @@ import os import sys import subprocess import socket +import pwd package_path = os.path.dirname(os.path.realpath(__file__)) ld_path = package_path + "/gspylib/clib" if 'LD_LIBRARY_PATH' not in os.environ: @@ -96,6 +97,8 @@ class Expansion(ParallelBaseOM): self.standbyLocalMode = False self.time_out = None self.envFile = EnvUtil.getEnv("MPPDB_ENV_SEPARATE_PATH") + self.new_hostname_ip_map = {} + self.new_hostname_list = [] def usage(self): """ @@ -310,6 +313,35 @@ General options: subprocess.getstatusoutput("chown {}:{} {}".format(self.user, self.group, self.logger.logFile)) self.logger.ignoreErr = True + # init cluster info from xml or static file + if self.xmlFile: + self.initClusterInfo() + else: + self.initClusterInfoFromStaticFile(self.user) + + # init node ip list + self.node_ip_list = self.clusterInfo.getClusterSshIps()[0] + for ip in self.newHostList: + if ip not in self.node_ip_list: + self.node_ip_list.append(ip) + + def global_init(self): + """ + init node name list + """ + self.nodeNameList = self.clusterInfo.getClusterNodeNames() + + # write new hosts to host file + self.write_new_hosts_to_hosts_file() + + def write_new_hosts_to_hosts_file(self): + hosts_file = FileUtil.get_hosts_file() + contents = FileUtil.read_hosts_file(hosts_file) + if os.path.exists(hosts_file): + FileUtil.removeFile(hosts_file) + contents.update(self.new_hostname_ip_map) + FileUtil.write_hosts_file(hosts_file, contents) + def check_env_variable(self): """ check whether env file is sourced @@ -340,17 +372,19 @@ General options: currentPath = os.path.dirname(os.path.realpath(__file__)) gs_om = os.path.join(currentPath, "gs_om") # get new hostname and hostip - hostname_str, hostip_str = self.get_new_hostname_and_hostip() + hostip_str = ",".join(self.newHostList) + hostname_str = ",".join(self.new_hostname_list) # execute gs_om -t generate_xml + user_path = pwd.getpwnam(self.user).pw_dir if not self.envFile: - self.envFile = "/home/%s/.bashrc" % self.user + self.envFile = os.path.normpath(os.path.join(user_path, ".bashrc")) cmd = "source %s; %s -t generate_xml --add-hostname=%s --add-hostip=%s" % (self.envFile, gs_om, hostname_str, hostip_str) if os.getuid() == 0: cmd = "su - %s -c '%s'" % (self.user, cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50231"] % self.xmlFile) - xml_tmp_file = "/home/%s/tmp_generate_xml" % self.user + xml_tmp_file = os.path.normpath(os.path.join(user_path, "tmp_generate_xml")) if not os.path.exists(xml_tmp_file): GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] % xml_tmp_file) self.xmlFile = FileUtil.readFile(xml_tmp_file)[0].strip() @@ -359,13 +393,17 @@ General options: self.logger.log("Successfully generate xml, the xml file is %s" % self.xmlFile) def get_new_hostname_and_hostip(self): - hostip_str = ",".join(self.newHostList) - hostname_list = [] + gpHome = EnvUtil.getEnv("GPHOME") + pssh_path = "python3 %s/script/gspylib/pssh/bin/pssh" % gpHome for ip in self.newHostList: - hostname = socket.gethostbyaddr(ip)[0] - hostname_list.append(hostname) - hostname_str = ",".join(hostname_list) - return hostname_str, hostip_str + cmd = "source ~/.bashrc && source %s; %s -s -H %s 'hostname'" \ + %(self.envFile, pssh_path, ip) + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception("Failed to pssh -s -H %s 'hostname'" % ip) + else: + self.new_hostname_list.append(str(output).strip()) + self.new_hostname_ip_map[ip] = str(output).strip() def getExpansionInfo(self): self._getClusterInfoDict() @@ -403,61 +441,42 @@ General options: if currentHost != primaryHost: GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50110"] % (currentHost + ", which is not primary")) - - def init_cluster_info_all_node(self): - """ - init cluster info for all node - """ - clusterInfo = dbClusterInfo() - clusterInfo.initFromXml(self.xmlFile) - self.clusterInfo = clusterInfo - self.nodeNameList = clusterInfo.getClusterNodeNames() - def checkTrust(self, hostList = None): + def checkTrust(self): """ check trust between primary/current host and every host in hostList """ - if hostList == None: - hostList = list(self.nodeNameList) - backIpList = self.clusterInfo.getClusterBackIps() - hostList += backIpList gpHome = EnvUtil.getEnv("GPHOME") psshPath = "python3 %s/script/gspylib/pssh/bin/pssh" % gpHome - ssh_exception_hosts = [] - for host in hostList: + create_ssh = False + for host in self.node_ip_list: if os.getuid() == 0: # check individual user's trust check_user_trust_cmd = "su - %s -c '%s -s -H %s pwd'" % (self.user, psshPath, host) (status, output) = subprocess.getstatusoutput(check_user_trust_cmd) if status != 0: - ssh_exception_hosts.append(host) + create_ssh = True # check current user's trust check_user_trust_cmd = "%s -s -H %s 'pwd'" % (psshPath, host) (status, output) = subprocess.getstatusoutput(check_user_trust_cmd) if status != 0: - ssh_exception_hosts.append(host) + create_ssh = True # output ssh exception info if ssh connect failed - if ssh_exception_hosts: + if create_ssh: self.logger.log("The cluster need create ssh trust") - self.create_trust() + self.create_trust(self.node_ip_list) else: self.logger.log("The cluster no need create ssh trust") - def create_trust(self): - cluster_info = dbClusterInfo() - cluster_info.initFromXml(self.xmlFile) - all_ips = [] - sships = cluster_info.getClusterSshIps() - for ips in sships: - all_ips.extend(ips) + def create_trust(self, node_ips): if os.getuid() == 0: - self.create_trust_for_user("root", all_ips) - self.create_trust_for_user(self.user, all_ips) + self.create_trust_for_user("root", node_ips) + self.create_trust_for_user(self.user, node_ips) def create_trust_for_user(self, user, all_ips): self.logger.log("Please enter password for %s" % user) - self.sshTool = SshTool(self.nodeNameList, self.logFile, DefaultValue.TIMEOUT_PSSH_PREINSTALL) + self.sshTool = SshTool(all_ips, self.logFile, DefaultValue.TIMEOUT_PSSH_PREINSTALL) self.sshTool.createTrust(user, all_ips) self.logger.debug("Successfully created SSH trust for the %s" % user) @@ -516,13 +535,13 @@ General options: This is expansion frame start """ if self.check_cm_component() and self.standbyLocalMode: - expand_impl = ExpansionImplWithCmLocal(self) + expand_impl = ExpansionImplWithCmLocal(expansion) self.logger.log("Start expansion with cluster manager component on standby node.") elif self.check_cm_component(): - expand_impl = ExpansionImplWithCm(self) + expand_impl = ExpansionImplWithCm(expansion) self.logger.log("Start expansion with cluster manager component.") else: - expand_impl = ExpansionImpl(self) + expand_impl = ExpansionImpl(expansion) self.logger.log("Start expansion without cluster manager component.") expand_impl.run() @@ -684,9 +703,10 @@ if __name__ == "__main__": expansion.checkParameters() expansion.initLogs() expansion.check_env_variable() - expansion.generate_xml() - expansion.init_cluster_info_all_node() expansion.checkTrust() + expansion.get_new_hostname_and_hostip() + expansion.generate_xml() + expansion.global_init() expansion.getExpansionInfo() expansion.check_xml_env_consistent() expansion.checkXmlIncludeNewHost() diff --git a/script/gs_preinstall b/script/gs_preinstall index 53633dc..2805fc3 100644 --- a/script/gs_preinstall +++ b/script/gs_preinstall @@ -523,6 +523,11 @@ General options: # init the log file self.initLogger("gs_preinstall") + + current_path = os.path.dirname(os.path.realpath(__file__)) + package_dir = os.path.normpath(os.path.join(current_path, "../")) + self.write_host_file(package_dir) + # get the clusterToolPath self.clusterToolPath = ClusterDir.getPreClusterToolPath(self.xmlFile) temp_nodes = ClusterConfigFile.getOneClusterConfigItem("nodeNames", self.xmlFile) @@ -532,6 +537,7 @@ General options: self.cluster_core_path = self.clusterInfo.readClustercorePath(self.xmlFile) self.logger.log("Parsing the configuration file.", "addStep") + try: # parse the configuration file self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), @@ -549,6 +555,23 @@ General options: self.logger.log("Successfully parsed the configuration file.", "constant") + def write_host_file(self, package_dir): + hosts_file = os.path.normpath(os.path.join(package_dir, "hosts")) + if os.path.exists(hosts_file): + FileUtil.removeFile(hosts_file) + FileUtil.createFile(hosts_file) + + # hosts contents + contents = {} + hostname_list = self.clusterInfo.getClusterNodeNames() + for hostname in hostname_list: + node = self.clusterInfo.getDbNodeByName(hostname) + ip = node.sshIps[0] + contents[ip] = hostname + + FileUtil.write_hosts_file(hosts_file, contents) + FileUtil.changeMode(DefaultValue.FILE_MODE, hosts_file, cmd_type="shell") + # check expect for cm/create trust def check_expect(self): """ diff --git a/script/gs_sshexkey b/script/gs_sshexkey index f746a9f..95ef1ed 100644 --- a/script/gs_sshexkey +++ b/script/gs_sshexkey @@ -51,6 +51,7 @@ from subprocess import PIPE from base_utils.common.fast_popen import FastPopen from gspylib.common.copy_python_lib import copy_lib from base_utils.os.user_util import UserUtil +from base_utils.os.crontab_util import CrontabUtil copy_lib() DefaultValue.doConfigForParamiko() @@ -667,6 +668,8 @@ General options: Ips.extend(self.hostList) result = self.getAllHosts(Ips) self.checkNetworkInfo() + # write hosts file + self.write_hosts_file(result) if not self.skipHostnameSet: self.writeLocalHosts(result) @@ -683,40 +686,62 @@ General options: self.synchronizationLicenseFile(result) self.retry_register_other_ssh_agent() self.verifyTrust() - self.set_user_crontab(result) + self.set_user_ssh_crontab(result) self.logger.log("Successfully created SSH trust.") except Exception as e: self.logger.logExit(str(e)) finally: self.passwd = [] - def set_user_crontab(self, hostnames_ips): - if os.getuid() == 0: - return - hostnames = list(hostnames_ips.values()) - self.logger.log("Start set cron for %s" % self.user) - package_path = os.path.dirname(os.path.realpath(__file__)) + def get_package_dir(self): + """ + get package dir + """ + return os.path.dirname(os.path.realpath(__file__)) + + def get_gp_home(self): + """ + get pg home + """ + package_path = self.get_package_dir() gp_home = os.path.join(package_path, '../') - check_cron = True - try: - # create cron tmp file - cron_file = "/tmp/gauss_cron_%s" % self.user + if EnvUtil.getEnv("GPHOME"): + gp_home = EnvUtil.getEnv("GPHOME") + return gp_home + + def set_user_ssh_crontab(self, hostnames_ips): + """ + set user cron + """ + # The current user is not root and does not have cron task permissions, exit + if os.getuid() != 0 and not CrontabUtil.check_user_crontab_permission(): + return + self.logger.log("Start set cron for %s" % self.user) + # hostnames_ips key is ip; value is hostname + hostnames = list(hostnames_ips.values()) + + gp_home = self.get_gp_home() + + # create cron tmp file + cron_file = "/tmp/gauss_cron_%s" % self.user + if os.getuid() == 0: + set_cron_cmd = "crontab -u %s -l > %s && " % (self.user, cron_file) + else: set_cron_cmd = "crontab -l > %s && " % cron_file - set_cron_cmd += "sed -i '/CheckSshAgent.py/d' %s;" % cron_file - set_cron_cmd += "echo '*/1 * * * * source ~/.bashrc;python3 %s/script/local/CheckSshAgent.py >>/dev/null 2>&1 &' >> %s" % \ + set_cron_cmd += "sed -i '/CheckSshAgent.py/d' %s;" % cron_file + set_cron_cmd += "echo '*/1 * * * * source ~/.bashrc;python3 %s/script/local/CheckSshAgent.py >>/dev/null 2>&1 &' >> %s" % \ (gp_home, cron_file) - set_cron_cmd += "&& crontab %s" % cron_file - set_cron_cmd += "&& rm -f '%s';" % cron_file - self.logger.debug("Command for setting CRON: %s" % set_cron_cmd) - ssh_tool = SshTool(hostnames) - os.environ['GPHOME'] = gp_home - ssh_tool.executeCommand(set_cron_cmd, DefaultValue.SUCCESS, '', '') - ssh_tool.clenSshResultFiles() - except Exception as e: - check_cron = False - self.logger.log("Warning: The %s user does not have permission for crontab." % self.user) - if check_cron: - self.logger.log("Successfully to set cron for %s" % self.user) + if os.getuid() == 0: + set_cron_cmd += "crontab -u %s %s ;" % (self.user, cron_file) + else: + set_cron_cmd += "crontab %s ;" % cron_file + set_cron_cmd += "rm -f '%s';" % cron_file + ssh_tool = SshTool(hostnames) + os.environ['GPHOME'] = gp_home + self.logger.debug("Command for setting CRON: %s" % set_cron_cmd) + ssh_tool.executeCommand(set_cron_cmd, DefaultValue.SUCCESS) + ssh_tool.clenSshResultFiles() + self.logger.log("Successfully to set cron for %s" % self.user) def createPublicPrivateKeyFile(self): """ @@ -820,8 +845,13 @@ General options: self._log("Updating the known_hosts file.", "addStep") hostnameList = [] hostnameList.extend(self.hostList) - for(key, value) in list(result.items()): - hostnameList.append(value) + # add hostname when using root user + os_hosts_file = "/etc/hosts" + for _, hostname in result.items(): + cmd = "cat %s | grep %s | wc -l" % (os_hosts_file, hostname) + (status, output) = subprocess.getstatusoutput(cmd) + if status == 0 and int(output) > 0: + hostnameList.append(hostname) for hostname in hostnameList: cmd = '%s;/usr/bin/ssh-keyscan -t ed25519 %s >> %s ' % (SYSTEM_SSH_ENV, hostname, self.known_hosts_fname) cmd += '&& sed -i "$ s/$/ #OM/" %s ' % self.known_hosts_fname @@ -829,9 +859,10 @@ General options: (status, output) = subprocess.getstatusoutput(cmd) if status != 0 or "Name or service not known".lower() in output.lower(): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error:\n%s" % output) - (status, output) = self.checkAuthentication(self.localHost) - if not status: - raise Exception(ErrorCode.GAUSS_511["GAUSS_51100"] % self.localHost) + if os.getuid() == 0: + (status, output) = self.checkAuthentication(self.localHost) + if not status: + raise Exception(ErrorCode.GAUSS_511["GAUSS_51100"] % self.localHost) self._log("Successfully updated the known_hosts file.", "constant") def tryParamikoConnect(self, hostname, client, pswd = None, silence = False): @@ -1120,6 +1151,26 @@ General options: self.logger.logExit(str(e)) self._log("Successfully distributed SSH trust file to all node.", "constant") + def write_hosts_file(self, contents): + """ + function: Write hosts file + input : contents + output: NA + """ + if os.getuid() == 0: + return + # write hosts file + current_path = os.path.dirname(os.path.realpath(__file__)) + package_dir = os.path.normpath(os.path.join(current_path, "../")) + hosts_file = os.path.normpath(os.path.join(package_dir, "hosts")) + if os.path.exists(hosts_file): + FileUtil.removeFile(hosts_file) + FileUtil.createFile(hosts_file) + + FileUtil.write_hosts_file(hosts_file, contents) + FileUtil.changeMode(DefaultValue.FILE_MODE, hosts_file, cmd_type="shell") + FileUtil.changeOwner(self.user, hosts_file, cmd_type="shell") + def write_hostname_to_list(self, ip_hostname): """ function: Add hostname to the list diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index 9644bf4..ebe5260 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -277,6 +277,8 @@ class DefaultValue(): ALARM_COMPONENT_PATH = "/opt/huawei/snas/bin/snas_cm_cmd" # root scripts path ROOT_SCRIPTS_PATH = "/root/gauss_om" + # preinstall flag path + PREINSTALL_FLAG_PATH = "/tmp/preinstall_flag" # package bak file name list PACKAGE_BACK_LIST = ["Gauss200-OLAP-Package-bak.tar.gz", @@ -529,6 +531,12 @@ class DefaultValue(): SSH_AUTHORIZED_KEYS = os.path.expanduser("~/.ssh/authorized_keys") SSH_KNOWN_HOSTS = os.path.expanduser("~/.ssh/known_hosts") + # os parameter + MAX_REMAIN_SEM = 240000 + MIN_REMAIN_SEM = 10000 + NOFILE_LIMIT = 640000 + DEFAULT_SEM = 32000 + @staticmethod def encodeParaline(cmd, keyword): """ @@ -540,6 +548,20 @@ class DefaultValue(): cmd = base64.b64decode(cmd.encode()).decode() return cmd + @staticmethod + def read_preinstall_flag(): + preinstall_flag_file = DefaultValue.PREINSTALL_FLAG_PATH + if not os.path.exists(preinstall_flag_file): + return True + with open(preinstall_flag_file, 'r') as f: + content = f.read().strip() + if len(content.split()) != 2: + raise Exception("The content of the preinstall flag file is incorrect.") + if content.split()[0] == "preinstall" and content.split()[1] == "root": + return True + else: + return False + @staticmethod def CheckNetWorkBonding(serviceIP, isCheckOS=True): """ @@ -712,12 +734,23 @@ class DefaultValue(): # get hostname hostname = socket.gethostname() + hostIp = "" + preinstall_flag = DefaultValue.read_preinstall_flag() + if preinstall_flag: # get local host in /etc/hosts - cmd = "grep -E \"^[1-9 \\t].*%s[ \\t]*#Gauss.* IP Hosts Mapping$\" " \ - "/etc/hosts | grep -E \" %s \"" % (hostname, hostname) - (status, output) = subprocess.getstatusoutput(cmd) - if (status == 0 and output != ""): - hostIp = output.strip().split(' ')[0].strip() + cmd = "grep -E \"^[1-9 \\t].*%s[ \\t]*#Gauss.* IP Hosts Mapping$\" " \ + "/etc/hosts | grep -E \" %s \"" % (hostname, hostname) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output != ""): + hostIp = output.strip().split(' ')[0].strip() + else: + cmd = "grep -E \"^[1-9 \\t].*%s\" " \ + "%s | grep -E \" %s \"" % (hostname, DefaultValue.PREINSTALL_FLAG_PATH, hostname) + (status, output) = subprocess.getstatusoutput(cmd) + if (status == 0 and output != ""): + hostIp = output.strip().split(' ')[0].strip() + + if hostIp: return hostIp # get local host by os function @@ -851,7 +884,6 @@ class DefaultValue(): + " Error:\n%s" % output) return output.strip() - @staticmethod def getOSInitFile(): """ @@ -1883,6 +1915,42 @@ class DefaultValue(): except Exception as e: raise Exception(str(e)) + @staticmethod + def distribute_hosts_file(g_sshTool, hosts_file, hostname=None, + mpprc_file="", local_mode=False): + ''' + function: distribute the hosts file to remote nodes + input: g_sshTool, hostname, hosts file, mpprcFile + output:NA + ''' + if hostname is None: + hostname = [] + try: + # distribute xml file + # check and create xml file path + hosts_dir = os.path.dirname(hosts_file) + hosts_dir = os.path.normpath(hosts_dir) + LocalRemoteCmd.checkRemoteDir(g_sshTool, hosts_dir, hostname, mpprc_file, + local_mode) + local_node = NetUtil.GetHostIpOrName() + # Skip local file overwriting + if not hostname: + hostname = g_sshTool.hostNames[:] + if local_node in hostname: + hostname.remove(local_node) + if (not local_mode): + # Send xml file to every host + g_sshTool.scpFiles(hosts_file, hosts_dir, hostname, mpprc_file) + # change owner and mode of xml file + cmd = CmdUtil.getChmodCmd(str(DefaultValue.FILE_MODE), hosts_file) + CmdExecutor.execCommandWithMode(cmd, + g_sshTool, + local_mode, + mpprc_file, + hostname) + except Exception as e: + raise Exception(str(e)) + @staticmethod def getSecurityMode(): """ @@ -3330,6 +3398,44 @@ class DefaultValue(): # failed to read the upgrade_step.csv in isgreyUpgradeNodeSpecify logger.logExit(str(e)) + @staticmethod + def get_remain_kernel_sem(): + """ + get remain kernel sem + """ + # get total sem + cmd = "cat /proc/sys/kernel/sem" + (status, output) = subprocess.getstatusoutput(cmd) + + if status != 0: + raise Exception(ErrorCode.GAUSS_501["GAUSS_50110"] % cmd) + + if output == "": + return None + if len(output.split()) > 1: + if int(output.split()[1]) > DefaultValue.DEFAULT_SEM: + return None + else: + raise Exception("cat /proc/sys/kernel/sem failed") + parts = output.split() + semmns = int(parts[1]) + + # get used sem + cmd = "ipcs -s" + (status, output) = subprocess.getstatusoutput(cmd) + if status: + raise Exception(ErrorCode.GAUSS_501["GAUSS_50110"] % cmd) + current_sems_lines = output.split('\n') + # skip the first three lines and process the remaining lines + current_sems = [int(line.split()[3]) for line in current_sems_lines[3:] if line.strip()] + + # Calculate the number of semaphores currently in use + used_sems = sum(current_sems) + + # Calculate the number of remaining semaphores + remaining_sems = semmns - used_sems + return remaining_sems + class ClusterCommand(): ''' Common for cluster command diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index 269d7b5..53731e1 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -4828,7 +4828,9 @@ class dbClusterInfo(): cmd = "cp -f %s %s" % (sourceFile, targetFile) status, output = subprocess.getstatusoutput(cmd) else: - cmd = "export LD_LIBRARY_PATH=/usr/lib64;/usr/bin/scp %s %s:%s" % (sourceFile, dbNode.name, targetFile) + node = self.getDbNodeByName(dbNode.name) + node_ip = node.sshIps[0] + cmd = "export LD_LIBRARY_PATH=/usr/lib64;/usr/bin/scp %s %s:%s" % (sourceFile, node_ip, targetFile) status, output = subprocess.getstatusoutput(cmd) if status: raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index 05cccea..b06aa60 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -436,6 +436,11 @@ class DN_OLAP(Kernel): dynamicDict = {} dynamicDict = DefaultValue.dynamicGuc("dn", tmpGucFile, gucXml) + # get os remain sem + remain_sem = DefaultValue.get_remain_kernel_sem() + if remain_sem is not None: + # calc max_connections for remain sem + self.calc_max_connections_for_sems(remain_sem, dynamicDict) if gucXml: dynamicDict["log_line_prefix"] = "'%s'" % \ dynamicDict["log_line_prefix"] @@ -476,6 +481,18 @@ class DN_OLAP(Kernel): self.modifyDummpyStandbyConfigItem() + def calc_max_connections_for_sems(self, remaining_sems, guc_dict): + """ + calc max connetions for remain sem + """ + if int(remaining_sems) >= DefaultValue.MAX_REMAIN_SEM: + return + elif int(remaining_sems) < DefaultValue.MIN_REMAIN_SEM: + raise Exception("Error: The remaining signal quantity of the current system is less than %s" % DefaultValue.MIN_REMAIN_SEM) + else: + # Number of connections with 1w semaphore=200 + guc_dict["max_connections"] = int(600 / 30000 * int(remaining_sems)) + def setPghbaConfig(self, clusterAllIpList, try_reload=False, float_ips=None): """ """ diff --git a/script/gspylib/inspection/common/SharedFuncs.py b/script/gspylib/inspection/common/SharedFuncs.py index b40977a..c5e3768 100644 --- a/script/gspylib/inspection/common/SharedFuncs.py +++ b/script/gspylib/inspection/common/SharedFuncs.py @@ -32,6 +32,7 @@ from domain_utils.sql_handler.sql_file import SqlFile from base_utils.os.net_util import NetUtil from os_platform.linux_distro import LinuxDistro from base_diff.sql_commands import SqlCommands +from base_utils.os.hosts_util import HostsUtil localPath = os.path.dirname(__file__) sys.path.insert(0, localPath + "/../lib") @@ -340,7 +341,6 @@ def runSqlSimplely(sql, user, host, port, tmpPath, database="postgres", return output - def cleanFile(fileName, hostname=""): """ function : remove file @@ -361,7 +361,8 @@ def cleanFile(fileName, hostname=""): + " Error: \n%s." % output + "The cmd is %s " % cmd) else: - sshCmd = "pssh -s -H %s '%s'" % (hostname, cmd) + ip = HostsUtil.hostname_to_ip(hostname) + sshCmd = "pssh -s -H %s '%s'" % (ip, cmd) (status, output) = subprocess.getstatusoutput(sshCmd) if (status != 0): raise Exception(ErrorCode.GAUSS_502["GAUSS_50207"] % "file" diff --git a/script/gspylib/threads/SshTool.py b/script/gspylib/threads/SshTool.py index 8432193..0f5cfd5 100644 --- a/script/gspylib/threads/SshTool.py +++ b/script/gspylib/threads/SshTool.py @@ -27,6 +27,7 @@ import weakref import time from random import sample import copy +import re sys.path.append(sys.path[0] + "/../../") from gspylib.common.ErrorCode import ErrorCode from gspylib.common.Common import DefaultValue @@ -37,6 +38,8 @@ from base_utils.os.cmd_util import CmdUtil from domain_utils.domain_common.cluster_constants import ClusterConstants from base_utils.security.sensitive_mask import SensitiveMask from gspylib.common.Constants import Constants +from base_utils.security.security_checker import SecurityChecker +from base_utils.os.hosts_util import HostsUtil try: import paramiko @@ -79,6 +82,19 @@ def get_sshexkey_file(): trust_file = os.path.normpath(os.path.join(package_path, "gs_sshexkey")) return trust_file +def check_local_mode(host): + """ + function: check single host valid + input : host + output: NA + """ + if host is None or not host: + host = [] + if len(host) == 1: + if host[0] == NetUtil.GetHostIpOrName() or host[0] in NetUtil.getIpAddressList(): + return True + return False + class SshTool(): """ Class for controling multi-hosts @@ -95,6 +111,19 @@ class SshTool(): self.__timeout = timeout + 10 self._finalizer = weakref.finalize(self, self.clenSshResultFiles) self.__sessions = {} + self.is_ip = False + + if hostNames: + # if not ip, convert hostname to ip + if not SecurityChecker.check_is_ip(hostNames[0]): + self.is_ip = False + # key:value hostname:ip + host_ip_list = HostsUtil.hostname_list_to_ip_list(hostNames) + if not host_ip_list: + raise Exception("Failed to hostname to ip.") + self.hostNames = host_ip_list + else: + self.is_ip = True current_time = str(datetime.datetime.now()).replace(" ", "_").replace( ".", "_") @@ -296,9 +325,20 @@ class SshTool(): outputCollect = "" prefix = "" resultMap = self.__readCmdResult(self.__resultFile, len(hostList)) + + if not self.is_ip: + res_map = {} + for key, value in resultMap.items(): + name = HostsUtil.ip_to_hostname(key) + res_map[name] = value + resultMap = res_map + for host in hostList: sshOutPutFile = "%s/%s" % (self.__outputPath, host) sshErrorPutFile = "%s/%s" % (self.__errorPath, host) + # ip to hostname + if not self.is_ip: + host = HostsUtil.ip_to_hostname(host) if resultMap[host] == DefaultValue.SUCCESS: prefix = "SUCCESS" else: @@ -323,8 +363,9 @@ class SshTool(): """ function: timeout clean """ - if hostList is None: - hostList = [] + localMode = False + localMode = check_local_mode(hostList) + hostList = self.check_host_ip_list(hostList) pstree = "python3 %s -sc" % os.path.realpath(os.path.dirname( os.path.realpath(__file__)) + "/../../py_pstree.py") mpprcFile, userProfile, osProfile = self.getUserOSProfile(env_file) @@ -354,6 +395,7 @@ class SshTool(): self.__errorPath, osProfile, userProfile, timeOutCmd, self.__resultFile) + hostList = self.hostNames else: if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): sshCmd = "source %s && %s -t %s -H %s -P -p %s -o %s -e" \ @@ -371,11 +413,38 @@ class SshTool(): self.__outputPath, self.__errorPath, osProfile, userProfile, timeOutCmd, self.__resultFile) - (status, output) = CmdUtil.getstatusoutput_by_fast_popen(sshCmd) + if localMode: + if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): + sshCmd = "source %s ; %s 2>&1" % (osProfile, cmd) + else: + sshCmd = "source %s ; source %s; %s 2>&1" \ + % (osProfile, userProfile, cmd) + (status, output) = subprocess.getstatusoutput(sshCmd) + + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % sshCmd + " Error:\n%s" % SensitiveMask.mask_pwd(output)) + if logger: logger.debug("{timeout clean} status: %s, output: %s" % ( status, SensitiveMask.mask_pwd(output))) + def check_host_ip_list(self, host_list): + """ + function: check host valid + input : host_list + output: NA + """ + host = [] + if host_list is None or not host_list: + return host + else: + if SecurityChecker.check_is_ip(host_list[0]): + self.is_ip = True + if not self.is_ip: + host = HostsUtil.hostname_list_to_ip_list(host_list) + return host + def executeCommand(self, cmd, cmdReturn=DefaultValue.SUCCESS, hostList=None, env_file="", parallel_num=300, checkenv=False, parallelism=True ): @@ -389,8 +458,8 @@ class SshTool(): resultMap = {} outputCollect = "" isTimeOut = False - if hostList is None: - hostList = [] + localMode = check_local_mode(hostList) + hostList = self.check_host_ip_list(hostList) try: mpprcFile, userProfile, osProfile = self.getUserOSProfile( env_file) @@ -425,6 +494,7 @@ class SshTool(): sshCmd = sshCmd.replace('parallelism_flag', '-h ' + self.__hostsFile) hostList = self.hostNames + localMode = check_local_mode(hostList) else: if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): sshCmd = "source %s && %s -t %s -H %s -P -p %s -o %s -e" \ @@ -443,10 +513,7 @@ class SshTool(): self.__resultFile) # single cluster or execute only in local node. - if (len(hostList) == 1 and - hostList[0] == NetUtil.GetHostIpOrName() - and cmd.find(" --lock-cluster ") < 0): - localMode = True + if localMode: if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): sshCmd = "source %s ; %s 2>&1" % (osProfile, cmd) else: @@ -494,6 +561,8 @@ class SshTool(): " Error:\n%s" % SensitiveMask.mask_pwd(output)) if localMode: + if not self.is_ip: + hostList = [HostsUtil.ip_to_hostname(hostList[0])] resultMap[hostList[0]] = DefaultValue.SUCCESS if status == 0 \ else DefaultValue.FAILURE outputCollect = "[%s] %s:\n%s" \ @@ -506,8 +575,10 @@ class SshTool(): if not isTimeOut: self.clenSshResultFiles() raise Exception(str(e)) - + for host in hostList: + if not localMode and not self.is_ip: + host = HostsUtil.ip_to_hostname(host) if resultMap.get(host) != cmdReturn: if outputCollect.find("GAUSS-5") == -1: raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] @@ -542,9 +613,8 @@ class SshTool(): outputCollect = "" isTimeOut = False need_replace_quotes = False - - if hostList is None: - hostList = [] + localMode = check_local_mode(hostList) + hostList = self.check_host_ip_list(hostList) if cmd.find("[need_replace_quotes]") != -1: cmd = cmd.replace("[need_replace_quotes]", "") @@ -587,6 +657,7 @@ class SshTool(): osProfile, userProfile, cmd, self.__resultFile) hostList = self.hostNames + localMode = check_local_mode(hostList) else: if need_replace_quotes: remote_cmd = cmd.replace("\"", "\\\"") @@ -609,9 +680,7 @@ class SshTool(): self.__resultFile) # single cluster or execute only in local node. - if (len(hostList) == 1 and - hostList[0] == NetUtil.GetHostIpOrName()): - localMode = True + if localMode: if os.getuid() == 0 and (mpprcFile == "" or not mpprcFile): sshCmd = "source %s ; %s 2>&1" % (osProfile, cmd) else: @@ -635,6 +704,8 @@ class SshTool(): if localMode: dir_permission = 0o700 + if not self.is_ip: + hostList = [HostsUtil.ip_to_hostname(hostList[0])] if status == 0: resultMap[hostList[0]] = DefaultValue.SUCCESS outputCollect = "[%s] %s:\n%s" % ("SUCCESS", hostList[0], @@ -669,8 +740,10 @@ class SshTool(): if not isTimeOut: self.clenSshResultFiles() raise Exception(str(e)) - + for host in hostList: + if not localMode and not self.is_ip: + host = HostsUtil.ip_to_hostname(host) if resultMap.get(host) != DefaultValue.SUCCESS: if outputCollect.find("GAUSS-5") == -1: outputCollect = ErrorCode.GAUSS_514["GAUSS_51400"] \ @@ -703,11 +776,15 @@ class SshTool(): """ resultMap = {} try: + if not SecurityChecker.check_is_ip(hostList[0]): + hostList = HostsUtil.hostname_list_to_ip_list(hostList) for host in hostList: context = "" sshOutPutFile = "%s/%s" % (self.__outputPath, host) sshErrorPutFile = "%s/%s" % (self.__errorPath, host) + if not self.is_ip: + host = HostsUtil.ip_to_hostname(host) if os.path.isfile(sshOutPutFile): with open(sshOutPutFile, "r") as fp: context = fp.read() @@ -737,9 +814,10 @@ class SshTool(): outputCollect = "" localMode = False resultMap = {} - sshHosts = [] - if hostList is None: - hostList = [] + ssh_hosts = [] + localMode = False + localMode = check_local_mode(hostList) + hostList = self.check_host_ip_list(hostList) try: if env_file != "": mpprcFile = env_file @@ -758,37 +836,23 @@ class SshTool(): pscppre = "python3 %s/script/gspylib/pssh/bin/pscp" % gp_home if len(hostList) == 0: - hostSshs = copy.deepcopy(self.hostNames) + ssh_hosts = copy.deepcopy(self.hostNames) + localMode = check_local_mode(ssh_hosts) else: - hostSshs = copy.deepcopy(hostList) - for host in hostSshs: - if NetUtil.get_ip_version(host) == NetUtil.NET_IPV6: - sshHosts.append("[" + host + "]") - else: - sshHosts.append(host) - if len(sshHosts) == 1 and sshHosts[0] == socket.gethostname() and \ + ssh_hosts = copy.deepcopy(hostList) + if localMode and \ srcFile != targetDir and \ srcFile != os.path.join(targetDir, os.path.split(srcFile)[1]): - localMode = True scpCmd = "cp -r %s %s" % (srcFile, targetDir) else: # cp file on local node - if socket.gethostname() in sshHosts: - localhost_idx = sshHosts.index(socket.gethostname()) - sshHosts.pop(localhost_idx) - cpcmd = "cp -r %s %s" % (srcFile, targetDir) - if srcFile != targetDir and srcFile != os.path.join(targetDir, os.path.basename(srcFile)): - (status, output) = subprocess.getstatusoutput(cpcmd) - if status == 0: - resultMap[socket.gethostname()] = DefaultValue.SUCCESS - else: - resultMap[socket.gethostname()] = DefaultValue.FAILURE - if not sshHosts: + self.cp_file_on_local_node(srcFile, targetDir, resultMap, ssh_hosts) + if not ssh_hosts: return scpCmd = "%s -r -v -t %s -p %s -H %s -o %s -e %s %s %s" \ " 2>&1 | tee %s" % (pscppre, self.__timeout, parallel_num, - " -H ".join(sshHosts), + " -H ".join(ssh_hosts), self.__outputPath, self.__errorPath, srcFile, targetDir, self.__resultFile) @@ -815,39 +879,43 @@ class SshTool(): # ip and host name should match here if localMode: dir_permission = 0o700 + if not self.is_ip: + ssh_hosts = [HostsUtil.ip_to_hostname(ssh_hosts[0])] if status == 0: - resultMap[sshHosts[0]] = DefaultValue.SUCCESS - outputCollect = "[%s] %s:\n%s" % ("SUCCESS", sshHosts[0], + resultMap[ssh_hosts[0]] = DefaultValue.SUCCESS + outputCollect = "[%s] %s:\n%s" % ("SUCCESS", ssh_hosts[0], SensitiveMask.mask_pwd(output)) if not os.path.exists(self.__outputPath): os.makedirs(self.__outputPath, mode=dir_permission) - file_path = os.path.join(self.__outputPath, sshHosts[0]) + file_path = os.path.join(self.__outputPath, ssh_hosts[0]) FileUtil.createFileInSafeMode(file_path) with open(file_path, "w") as fp: fp.write(SensitiveMask.mask_pwd(output)) fp.flush() fp.close() else: - resultMap[sshHosts[0]] = DefaultValue.FAILURE - outputCollect = "[%s] %s:\n%s" % ("FAILURE", sshHosts[0], + resultMap[ssh_hosts[0]] = DefaultValue.FAILURE + outputCollect = "[%s] %s:\n%s" % ("FAILURE", ssh_hosts[0], SensitiveMask.mask_pwd(output)) if not os.path.exists(self.__errorPath): os.makedirs(self.__errorPath, mode=dir_permission) - file_path = os.path.join(self.__errorPath, sshHosts[0]) + file_path = os.path.join(self.__errorPath, ssh_hosts[0]) FileUtil.createFileInSafeMode(file_path) with open(file_path, "w") as fp: fp.write(SensitiveMask.mask_pwd(output)) fp.flush() fp.close() else: - resultMap, outputCollect = self.parseSshResult(sshHosts) + resultMap, outputCollect = self.parseSshResult(ssh_hosts) except Exception as e: self.clenSshResultFiles() raise Exception(str(e)) - - for host in sshHosts: + + for host in ssh_hosts: + if not self.is_ip: + host = HostsUtil.ip_to_hostname(host) if resultMap.get(host) != DefaultValue.SUCCESS: raise Exception(ErrorCode.GAUSS_502["GAUSS_50216"] % ("file [%s]" % srcFile) + @@ -855,6 +923,27 @@ class SshTool(): " Command: %s.\nError:\n%s" % (SensitiveMask.mask_pwd(scpCmd), SensitiveMask.mask_pwd(outputCollect))) + def cp_file_on_local_node(self, src_file, target_dir, result_map, ssh_hosts): + """" + copy file on local node + """ + local_name_or_ip = "" + local_name = socket.gethostname() + if not self.is_ip: + local_name_or_ip = HostsUtil.hostname_list_to_ip_list([local_name])[0] + else: + local_name_or_ip = local_name + if local_name_or_ip in ssh_hosts: + localhost_idx = ssh_hosts.index(local_name_or_ip) + ssh_hosts.pop(localhost_idx) + cpcmd = "cp -r %s %s" % (src_file, target_dir) + if src_file != target_dir and src_file != os.path.join(target_dir, os.path.basename(src_file)): + (status, output) = subprocess.getstatusoutput(cpcmd) + if status == 0: + result_map[local_name_or_ip] = DefaultValue.SUCCESS + else: + result_map[local_name_or_ip] = DefaultValue.FAILURE + def checkRemoteFileExist(self, node, fileAbsPath, mpprcFile): """ check remote node exist file @@ -915,8 +1004,12 @@ class SshTool(): with open(resultFile, "r") as fp: lines = fp.readlines() context = "".join(lines) + for line in lines: resultPair = line.strip().split(" ") + if len(resultPair) < 4: + continue + if len(resultPair) >= 4 and resultPair[2] == "[FAILURE]": resultMap[resultPair[3]] = "Failure" if len(resultPair) >= 4 and resultPair[2] == "[SUCCESS]": diff --git a/script/impl/checkperf/OLAP/CheckperfImplOLAP.py b/script/impl/checkperf/OLAP/CheckperfImplOLAP.py index 2b9b85e..684031a 100644 --- a/script/impl/checkperf/OLAP/CheckperfImplOLAP.py +++ b/script/impl/checkperf/OLAP/CheckperfImplOLAP.py @@ -32,6 +32,8 @@ from base_utils.os.file_util import FileUtil from domain_utils.sql_handler.sql_executor import SqlExecutor from base_utils.os.net_util import NetUtil from base_utils.os.cmd_util import CmdUtil +from base_utils.security.security_checker import SecurityChecker +from base_utils.os.hosts_util import HostsUtil # Database size inspection interval DB_SIZE_CHECK_INTERVAL = 21600 @@ -1250,6 +1252,8 @@ class CheckperfImplOLAP(CheckperfImpl): return # launch asyn collection for database size + if not SecurityChecker.check_is_ip(host): + host = HostsUtil.hostname_to_ip(host) cmd = "pssh -s -H %s \'" % (str(host)) if (self.opts.mpprcFile != ""): diff --git a/script/impl/dropnode/DropnodeImpl.py b/script/impl/dropnode/DropnodeImpl.py index c2a5dda..788cf43 100644 --- a/script/impl/dropnode/DropnodeImpl.py +++ b/script/impl/dropnode/DropnodeImpl.py @@ -37,6 +37,8 @@ from gspylib.common.OMCommand import OMCommand from base_utils.os.env_util import EnvUtil from base_utils.os.net_util import NetUtil from domain_utils.domain_common.cluster_constants import ClusterConstants +from base_utils.os.file_util import FileUtil +from gspylib.common.DbClusterInfo import dbClusterInfo # master @@ -190,15 +192,16 @@ class DropnodeImpl(): operation only need to be executed on primary node """ for hostNameLoop in self.context.hostMapForExist.keys(): + data_dir = self.context.hostMapForExist[hostNameLoop]['datadir'] try: self.commonOper.SetPghbaConf(self.userProfile, hostNameLoop, self.resultDictOfPrimary[0][ - 'pghbaStr'], False) + 'pghbaStr'], data_dir, False) except ValueError: self.logger.log("[gs_dropnode]Rollback pghba conf.") self.commonOper.SetPghbaConf(self.userProfile, hostNameLoop, self.resultDictOfPrimary[0][ - 'pghbaStr'], True) + 'pghbaStr'], data_dir, True) indexLoop = 0 for i in self.context.hostMapForExist[self.localhostname]['datadir']: try: @@ -355,6 +358,23 @@ class DropnodeImpl(): else: pass + def rewrite_hosts_file(self): + """ + Rewrite hosts file + """ + cluster_info = dbClusterInfo() + cluster_info.initFromStaticConfig(self.context.user) + cluster_hostname_ip_map = {} + for name in cluster_info.getClusterNodeNames(): + node = cluster_info.getDbNodeByName(name) + ip = node.sshIps[0] + cluster_hostname_ip_map[ip] = name + + hosts_file = FileUtil.get_hosts_file() + if os.path.exists(hosts_file): + FileUtil.removeFile(hosts_file) + FileUtil.write_hosts_file(hosts_file, cluster_hostname_ip_map) + def run(self): """ start dropnode @@ -366,6 +386,7 @@ class DropnodeImpl(): self.operationOnlyOnPrimary() self.modifyStaticConf() self.restartInstance() + self.rewrite_hosts_file() self.logger.log("[gs_dropnode]Success to drop the target nodes.") @@ -652,7 +673,7 @@ class OperCommon: self.logger.log( "[gs_dropnode]End of set openGauss config file on %s." % host) - def SetPghbaConf(self, envProfile, host, pgHbaValue, + def SetPghbaConf(self, envProfile, host, pgHbaValue, data_dir, flagRollback=False): """ Set the value of pg_hba.conf @@ -660,34 +681,18 @@ class OperCommon: self.logger.log( "[gs_dropnode]Start of set pg_hba config file on %s." % host) cmd = 'source %s;' % envProfile - + ssh_tool = SshTool([host]) if len(pgHbaValue): ip_entries = pgHbaValue[:-1].split('|') for entry in ip_entries: entry = entry.strip() + v = entry[0:len(entry) - 9] if not flagRollback: - if NetUtil.get_ip_version(entry) == NetUtil.NET_IPV4: - v = entry[0:entry.find('/32') + 3] - cmd += "gs_guc set -N %s -I all -h '%s';" % (host, v) - elif NetUtil.get_ip_version(entry) == NetUtil.NET_IPV6: - v = entry[0:entry.find('/128') + 4] - cmd += "gs_guc set -N %s -I all -h '%s';" % (host, v) - elif NetUtil.get_ip_version(entry) == "": - raise ValueError(f"Invalid IP address format: {entry}") + cmd += "gs_guc set -D %s -h '%s';" % (data_dir[0], v) else: - cmd += "gs_guc set -N %s -I all -h '%s';" % (host, entry) - (status, output) = subprocess.getstatusoutput(cmd) - result_v = re.findall(r'Failed instances: (\d)\.', output) - if status: - self.logger.debug( - "[gs_dropnode]Set pg_hba config file failed:" + output) - raise ValueError(output) - if len(result_v): - if result_v[0] != '0': - self.logger.debug( - "[gs_dropnode]Set pg_hba config file failed:" + output) - raise ValueError(output) - else: + cmd += "gs_guc set -D %s -h '%s';" % (data_dir[0], v) + (status, output) = ssh_tool.getSshStatusOutput(cmd, [host]) + if status[host] != DefaultValue.SUCCESS: self.logger.debug( "[gs_dropnode]Set pg_hba config file failed:" + output) raise ValueError(output) diff --git a/script/impl/dropnode/drop_node_with_cm_impl.py b/script/impl/dropnode/drop_node_with_cm_impl.py index bddf239..fdb0f9a 100644 --- a/script/impl/dropnode/drop_node_with_cm_impl.py +++ b/script/impl/dropnode/drop_node_with_cm_impl.py @@ -442,4 +442,5 @@ class DropNodeWithCmImpl(DropnodeImpl): self.clean_del_dss() self.restart_new_cluster() self.remove_cm_res_backup() + self.rewrite_hosts_file() self.logger.log("[gs_dropnode] Success to drop the target nodes.") \ No newline at end of file diff --git a/script/impl/expansion/ExpansionImpl.py b/script/impl/expansion/ExpansionImpl.py index 0b36874..4d1255a 100644 --- a/script/impl/expansion/ExpansionImpl.py +++ b/script/impl/expansion/ExpansionImpl.py @@ -52,6 +52,7 @@ from domain_utils.cluster_file.version_info import VersionInfo from domain_utils.cluster_file.package_info import PackageInfo from base_utils.os.net_util import NetUtil from base_diff.comm_constants import CommConstants +from base_utils.os.file_util import FileUtil #boot/build mode MODE_PRIMARY = "primary" @@ -1703,9 +1704,24 @@ remoteservice={remoteservice}'"\ self.checkGaussdbAndGsomVersionOfStandby() self.logger.log("Start to establish the relationship.") self.buildStandbyRelation() + self.write_hosts_file() # process success pvalue.value = 1 + def write_hosts_file(self): + cluster_info = dbClusterInfo() + cluster_info.initFromStaticConfig(self.context.user) + cluster_hostname_ip_map = {} + for name in cluster_info.getClusterNodeNames(): + node = cluster_info.getDbNodeByName(name) + ip = node.sshIps[0] + cluster_hostname_ip_map[ip] = name + + hosts_file = FileUtil.get_hosts_file() + if os.path.exists(hosts_file): + FileUtil.removeFile(hosts_file) + FileUtil.write_hosts_file(hosts_file, cluster_hostname_ip_map) + def rollback(self): """ rollback all hosts' replconninfo about failed hosts @@ -1778,6 +1794,22 @@ remoteservice={remoteservice}'"\ self.logger.debug("Parse result is: {0}".format(parse_result)) return parse_result + def rewrite_hosts_file(self): + """ + Rewrite hosts file + """ + gp_home = EnvUtil.getEnv("GPHOME") + hosts_file = os.path.join(gp_home, "hosts") + if os.path.isfile(hosts_file): + os.remove(hosts_file) + hosts_content = {} + for name in self.context.clusterInfo.getClusterNodeNames: + node = self.context.clusterInfo.getDbNodeByName(name) + node_ip = node.sshIps[0] + hosts_content[node_ip] = name + + FileUtil.write_hosts_file(hosts_file, hosts_content) + def run(self): """ start expansion diff --git a/script/impl/expansion/expansion_impl_with_cm_local.py b/script/impl/expansion/expansion_impl_with_cm_local.py index 7777be7..f49d1a1 100644 --- a/script/impl/expansion/expansion_impl_with_cm_local.py +++ b/script/impl/expansion/expansion_impl_with_cm_local.py @@ -105,7 +105,7 @@ class ExpansionImplWithCmLocal(ExpansionImplWithCm): cmd = "ls {0} | wc -l".format(cm_agent_conf) _, output_collect = self.ssh_tool.getSshStatusOutput(cmd, hostList=[new_node.name]) result_dict = self._parse_ssh_tool_output_collect(output_collect) - if new_node.name not in result_dict: + if new_node.name not in result_dict.keys(): self.logger.error("Check remote node [{0}] cm_agent.conf failed. " "output: {1}".format(new_node.name, result_dict)) raise Exception("Check remote node [{0}] cm_agent.conf failed. " diff --git a/script/impl/install/OLAP/InstallImplOLAP.py b/script/impl/install/OLAP/InstallImplOLAP.py index 7b2b1a2..7d8a544 100644 --- a/script/impl/install/OLAP/InstallImplOLAP.py +++ b/script/impl/install/OLAP/InstallImplOLAP.py @@ -353,17 +353,15 @@ class InstallImplOLAP(InstallImpl): memCheck = "cat /proc/cpuinfo | grep processor | wc -l" coresCheck = "env -u LANGUAGE LC_ALL=C free -g --si | grep 'Mem' | awk -F ' ' '{print \$2}'" cmd = "pssh -s -H %s \"%s & %s\"" % ( - dbNode.name, memCheck, coresCheck) + dbNode.sshIps[0], memCheck, coresCheck) (status, output) = subprocess.getstatusoutput(cmd) if status != 0 or len(output.strip().split()) != 2: self.context.logger.debug( ErrorCode.GAUSS_514["GAUSS_51400"] % cmd - + " Error: \n%s" % str( - output)) + + " Error: \n%s" % str(output)) raise Exception( ErrorCode.GAUSS_514["GAUSS_51400"] % cmd - + " Error: \n%s" % str( - output)) + + " Error: \n%s" % str(output)) data_check_info[dbNode.name] = str(output).strip().split() self.context.logger.debug( "The check info on each node. \nNode : Info(MemSize | CPUCores)") diff --git a/script/impl/preinstall/OLAP/PreinstallImplOLAP.py b/script/impl/preinstall/OLAP/PreinstallImplOLAP.py index aa79961..77b65d7 100644 --- a/script/impl/preinstall/OLAP/PreinstallImplOLAP.py +++ b/script/impl/preinstall/OLAP/PreinstallImplOLAP.py @@ -221,11 +221,19 @@ class PreinstallImplOLAP(PreinstallImpl): Current_Path = os.path.dirname(os.path.realpath(__file__)) DefaultValue.checkPathVaild(os.path.normpath(Current_Path)) + # cp hosts file to tool dir + self.cp_host_file_to_tool_dir(packageDir) + except Exception as e: raise Exception(str(e)) self.context.logger.log( "Successfully installed the tools on the local node.", "constant") + + def cp_host_file_to_tool_dir(self, package_dir): + hosts_file = os.path.normpath(os.path.join(package_dir, "hosts")) + target_hosts_file = os.path.normpath(os.path.join(self.context.clusterToolPath, "hosts")) + FileUtil.cpFile(hosts_file, target_hosts_file) def checkDiskSpace(self): """ @@ -606,7 +614,7 @@ class PreinstallImplOLAP(PreinstallImpl): # exec the cmd CmdExecutor.execCommandWithMode(cmd, self.context.sshTool, - self.context.localMode, + self.context.localMode or self.context.isSingle, self.context.mpprcFile) self.del_remote_pkgpath() diff --git a/script/impl/preinstall/PreinstallImpl.py b/script/impl/preinstall/PreinstallImpl.py index c2e0e5b..f0dc3e9 100644 --- a/script/impl/preinstall/PreinstallImpl.py +++ b/script/impl/preinstall/PreinstallImpl.py @@ -39,6 +39,7 @@ from base_utils.os.net_util import NetUtil from base_utils.os.env_util import EnvUtil from domain_utils.cluster_file.profile_file import ProfileFile from domain_utils.cluster_file.version_info import VersionInfo +from base_utils.os.crontab_util import CrontabUtil # action name # prepare cluster tool package path @@ -83,6 +84,8 @@ ACTION_CHANGE_TOOL_ENV = "change_tool_env" ACTION_CHECK_CONFIG = "check_config" # check cpu ACTION_CHECK_CPU_INSTRUCTIONS = "check_cpu_instructions" +# check nofile limit +ACTION_CHECK_NOFILE_LIMIT = "check_nofile_limit" ############################################################################# # Global variables # self.context.logger: globle logger @@ -427,7 +430,7 @@ class PreinstallImpl: "Successfully distribute package to package path.") break # 3.distribute xml file - DefaultValue.distributeXmlConfFile(self.context.sshTool, + DefaultValue.distribute_hosts_file(self.context.sshTool, self.context.xmlFile, hosts, self.context.mpprcFile) cmd = "%s -t %s -u %s -X %s" % (OMCommand.getLocalScript("Local_PreInstall"), @@ -439,6 +442,12 @@ class PreinstallImpl: False, self.context.mpprcFile, hosts) + + # 4.distribute hosts file + hosts_file = os.path.normpath(os.path.join(self.context.clusterToolPath, "hosts")) + DefaultValue.distribute_hosts_file(self.context.sshTool, + hosts_file, hosts, + self.context.mpprcFile) except Exception as e: raise Exception(str(e)) @@ -771,6 +780,32 @@ class PreinstallImpl: self.context.logger.log("Warning: This cluster is missing the rdtscp or avx instruction.") self.context.logger.log("Successfully checked cpu instructions.", "constant") + def check_nofile_limit(self): + """ + function: Check if nofile limit more then 640000 + input:NA + output:NA + """ + if self.context.localMode or self.context.isSingle: + return + if not self.context.clusterInfo.hasNoCm(): + self.context.logger.log("Checking nofile limit.", "addStep") + try: + # Checking OS version + cmd = "%s -t %s -u %s -l %s" % ( + OMCommand.getLocalScript("Local_PreInstall"), + ACTION_CHECK_NOFILE_LIMIT, + self.context.user, + self.context.localLog) + CmdExecutor.execCommandWithMode( + cmd, + self.context.sshTool, + self.context.localMode or self.context.isSingle, + self.context.mpprcFile) + except Exception as e: + raise Exception(str(e)) + self.context.logger.log("Successfully checked nofile limit.", "constant") + def createOSUser(self): """ function: @@ -1610,6 +1645,9 @@ class PreinstallImpl: """ :return: """ + if not self.context.current_user_root and not CrontabUtil.check_user_crontab_permission(): + self.context.logger.log("Warning: The %s user does not have permission to set crontab." % self.context.user) + return False self.context.logger.debug("Start set cron for %s" %self.context.user) tmp_path = ClusterConfigFile.readClusterTmpMppdbPath( self.context.user, self.context.xmlFile) @@ -1632,6 +1670,7 @@ class PreinstallImpl: self.context.localMode or self.context.isSingle, self.context.mpprcFile) self.context.logger.debug("Successfully to set cron for %s" %self.context.user) + return True def do_perf_config(self): """ diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 5b4d2a2..53ef767 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -89,6 +89,7 @@ ACTION_SET_CGROUP = "set_cgroup" ACTION_CHECK_CONFIG = "check_config" ACTION_DSS_NIT = "dss_init" ACTION_CHECK_CPU_INSTRUCTIONS = "check_cpu_instructions" +ACTION_CHECK_NOFILE_LIMIT = "check_nofile_limit" g_nodeInfo = None envConfig = {} @@ -280,7 +281,8 @@ Common options: ACTION_SET_ARM_OPTIMIZATION, ACTION_CHECK_DISK_SPACE, ACTION_SET_WHITELIST, ACTION_FIX_SERVER_PACKAGE_OWNER, ACTION_DSS_NIT, - ACTION_CHANGE_TOOL_ENV, ACTION_CHECK_CONFIG, ACTION_CHECK_CPU_INSTRUCTIONS] + ACTION_CHANGE_TOOL_ENV, ACTION_CHECK_CONFIG, ACTION_CHECK_CPU_INSTRUCTIONS, + ACTION_CHECK_NOFILE_LIMIT] if self.action == "": GaussLog.exitWithError( ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".") @@ -2017,6 +2019,25 @@ Common options: except Exception as e: self.logger.debug(cpu_mission) raise Exception(cpu_mission) + + def check_nofile_limit(self): + """ + function: Check nofile limit + input:NA + output:NA + """ + self.logger.debug("Checking nofile limit.") + if os.getuid() == 0: + cmd = "su - %s -c \'ulimit -n\'" % self.user + else: + cmd = "ulimit -n" + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error: \n%s" % output) + ulimit_value = int(output.strip()) + if ulimit_value < DefaultValue.NOFILE_LIMIT: + raise Exception("Deploy cm, the number of file handles for %s user must be greater than %s" % (self.user, DefaultValue.NOFILE_LIMIT)) + self.logger.debug("Successfully checked nofile limit.") def checkPlatformArm(self): """ @@ -2717,6 +2738,11 @@ Common options: # Change owner to appropriate user FileUtil.changeOwner(self.user, dest_path) + # cp hosts file to /home/user/gauss_om/ + self.logger.debug("cp hosts file to /home/user/gauss_om.") + cmd = "cp -rf %s/hosts %s" % (self.clusterToolPath, dest_path) + CmdExecutor.execCommandLocally(cmd) + # cp cgroup to /home/user/gauss_om/script self.logger.debug("cp cgroup to /home/user/gauss_om/script.") backup_lib_dir = os.path.join(dest_path, "lib") @@ -2738,7 +2764,6 @@ Common options: "gs_checkos"] common_scripts = ["gs_sshexkey", "killall", "gs_checkperf"] # the script files are not stored in the env path - not_in_env_scripts = ["gs_expansion"] backup_save_files = backup_scripts + common_scripts self.logger.debug("Delete user scripts in om backup_om path.") # delete user scripts in om backup_om path @@ -2764,7 +2789,7 @@ Common options: user_om_files = os.listdir(om_user_path) for user_file in user_om_files: if user_file.startswith("gs_"): - if user_file in backup_scripts or user_file in not_in_env_scripts: + if user_file in backup_scripts: FileUtil.removeFile("%s/%s" % (om_user_path, user_file)) FileUtil.changeOwner(self.user, dest_path, recursive=True) self.logger.debug("Delete cluster decompress package in user path.")