#!/usr/bin/env python3 # -*- coding:utf-8 -*- ############################################################################# # Copyright (c) 2020 Huawei Technologies Co.,Ltd. # # openGauss is licensed under Mulan PSL v2. # You can use this software according to the terms # and conditions of the Mulan PSL v2. # You may obtain a copy of Mulan PSL v2 at: # # http://license.coscl.org.cn/MulanPSL2 # # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, # WITHOUT WARRANTIES OF ANY KIND, # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- # Description : gs_dropnode is a utility to drop a standby node from the cluster ############################################################################# import datetime import os import re import subprocess import sys import pwd import grp package_path = os.path.dirname(os.path.realpath(__file__)) ld_path = package_path + "/gspylib/clib" if 'LD_LIBRARY_PATH' not in os.environ: os.environ['LD_LIBRARY_PATH'] = ld_path os.execve(os.path.realpath(__file__), sys.argv, os.environ) if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path): os.environ['LD_LIBRARY_PATH'] = \ ld_path + ":" + os.environ['LD_LIBRARY_PATH'] os.execve(os.path.realpath(__file__), sys.argv, os.environ) sys.path.append(sys.path[0]) from gspylib.common.DbClusterInfo import dbClusterInfo from gspylib.common.DbClusterStatus import DbClusterStatus from gspylib.common.GaussLog import GaussLog from gspylib.common.Common import DefaultValue, ClusterCommand from gspylib.common.ErrorCode import ErrorCode from gspylib.common.ParallelBaseOM import ParallelBaseOM from gspylib.common.ParameterParsecheck import Parameter from gspylib.threads.SshTool import SshTool from impl.dropnode.DropnodeImpl import DropnodeImpl from impl.dropnode.drop_node_with_cm_impl import DropNodeWithCmImpl from base_utils.os.env_util import EnvUtil from base_utils.os.net_util import NetUtil from domain_utils.domain_common.cluster_constants import ClusterConstants ENV_LIST = ["MPPDB_ENV_SEPARATE_PATH", "GPHOME", "PATH", "LD_LIBRARY_PATH", "PYTHONPATH", "GAUSS_WARNING_TYPE", "GAUSSHOME", "PATH", "LD_LIBRARY_PATH", "S3_CLIENT_CRT_FILE", "GAUSS_VERSION", "PGHOST", "GS_CLUSTER_NAME", "GAUSSLOG", "GAUSS_ENV", "umask"] class Dropnode(ParallelBaseOM): """ """ def __init__(self): """ """ ParallelBaseOM.__init__(self) # Add the standby node backip list which need to be deleted self.hostIpListForDel = [] self.hostMapForDel = {} self.hostMapForExist = {} self.clusterInfo = dbClusterInfo() self.backIpNameMap = {} self.failureHosts = [] self.flagOnlyPrimary = False envFile = EnvUtil.getEnv("MPPDB_ENV_SEPARATE_PATH") if envFile: self.envFile = envFile self.userProfile = envFile else: self.envFile = ClusterConstants.ETC_PROFILE cmd = "echo ~%s" % self.user (status, output) = subprocess.getstatusoutput(cmd) self.userProfile = os.path.join(output, ".bashrc") def usage(self): """ gs_dropnode is a utility to delete the standby node from a cluster, streaming cluster does not yet support. Usage: gs_dropnode -? | --help gs_dropnode -V | --version gs_dropnode -U USER -G GROUP -h nodeList General options: -U Cluster user. -G Group of the cluster user. -h The standby node backip list which need to be deleted Separate multiple nodes with commas (,). such as '-h 192.168.0.1,192.168.0.2' -?, --help Show help information for this utility, and exit the command line mode. -V, --version Show version information. """ print(self.usage.__doc__) def parseCommandLine(self): """ parse parameter from command line """ ParaObj = Parameter() ParaDict = ParaObj.ParameterCommandLine("dropnode") # parameter -h or -? if (ParaDict.__contains__("helpFlag")): self.usage() sys.exit(0) # Resolves command line arguments # parameter -U if (ParaDict.__contains__("user")): self.user = ParaDict.get("user") DefaultValue.checkPathVaild(self.user) # parameter -G if (ParaDict.__contains__("group")): self.group = ParaDict.get("group") # parameter -h if (ParaDict.__contains__("nodename")): self.hostIpListForDel = ParaDict.get("nodename") def checkParameters(self): """ function: Check parameter from command line input: NA output: NA """ # check user | group | node if len(self.user) == 0: GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-U") if len(self.group) == 0: GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-G") if len(self.hostIpListForDel) == 0: GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-h") # check if upgrade action is exist if DefaultValue.isUnderUpgrade(self.user): GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"]) try: pw_user = pwd.getpwnam(self.user) gr_group = grp.getgrnam(self.group) except KeyError as e: if self.user in e.args[0]: GaussLog.exitWithError( ErrorCode.GAUSS_503["GAUSS_50300"] % self.user) if self.group in e.args[0]: self.logger.log("Group %s not exist." % self.group) sys.exit(1) # get dbcluster info from static config file self.clusterInfo.initFromStaticConfig(self.user) appPath = self.clusterInfo.appPath db_uid = os.stat(appPath).st_uid db_gid = os.stat(appPath).st_gid if db_uid != pw_user.pw_uid or db_gid != gr_group.gr_gid: GaussLog.exitWithError( ErrorCode.GAUSS_503["GAUSS_50323"] % self.user) self.backIpNameMap = {} for node in self.clusterInfo.dbNodes: self.backIpNameMap[node.name] = node.backIps[0] if node.backIps[0] in self.hostIpListForDel: self.hostMapForDel[node.name] = {'ipaddr': node.backIps[0], 'datadir': [], 'dn_id': [], 'port': []} for i in node.datanodes: self.hostMapForDel[node.name]['datadir'].append(i.datadir) self.hostMapForDel[node.name]['dn_id'].append( 'dn_' + str(i.instanceId)) self.hostMapForDel[node.name]['port'].append(str(i.port)) else: self.hostMapForExist[node.name] = {'ipaddr': node.backIps[0], 'datadir': [], 'dn_id': [], 'port': [], 'replToBeDel': [], 'syncStandbyDel': [], 'pghbaDel': []} for i in node.datanodes: self.hostMapForExist[node.name]['datadir'].append(i.datadir) self.hostMapForExist[node.name]['dn_id'].append( 'dn_' + str(i.instanceId)) self.hostMapForExist[node.name]['port'].append(str(i.port)) localIp = self.backIpNameMap[NetUtil.GetHostIpOrName()] if localIp in self.hostIpListForDel: GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35803"] % \ localIp) for ipLoop in self.hostIpListForDel: if ipLoop not in self.backIpNameMap.values(): GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35802"] % \ self.hostIpListForDel) if not self.hostMapForDel: GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35802"] % \ self.hostIpListForDel) def check_repeat_process(self): """ Check whether the same gs_dropnode command be run at the same time """ cmd = "ps -ef | grep 'gs_dropnode -U %s -G %s' | grep -v grep" \ % (self.user, self.group) (status, output) = subprocess.getstatusoutput(cmd) if status == 0 and len(output.split('\n')) > 1: GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35810"]) def flagForOnlyPrimaryLeft(self): """ function: Check whether only one node be left in the cluster return a flag """ countClusterNodes = len(self.backIpNameMap.values()) if (countClusterNodes - len(self.hostIpListForDel)) == 1: msgPrint = "The cluster will have only one standalone node left " \ "after the operation!\nDo you want to continue to drop " \ "the target node (yes/no)? " self.checkInput(msgPrint) self.flagOnlyPrimary = True def check_cluster_status(self): """ function: Check whether the status of cluster is normal input: NA output: NA """ tmpDir = EnvUtil.getTmpDirFromEnv() tmpFile = os.path.join(tmpDir, "gauss_cluster_status.dat_" + \ str(datetime.datetime.now().strftime( '%Y%m%d%H%M%S')) + "_" + str(os.getpid())) if not len(self.failureHosts): cmd = ClusterCommand.getQueryStatusCmd("", tmpFile, False) (status, output) = subprocess.getstatusoutput(cmd) if status != 0: self.logger.debug("The cmd is %s " % cmd) raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % \ cmd + "Error: \n%s" % output) clusterStatus = DbClusterStatus() clusterStatus.initFromFile(tmpFile) clsStatus = clusterStatus.clusterStatusDetail if clsStatus in ["Unknown", "Unavailable"]: GaussLog.exitWithError( ErrorCode.GAUSS_358["GAUSS_35806"] % clsStatus) statusDelHost = "The target node to be dropped is %s \n" % str( self.hostMapForDel.keys())[9:] for dndir_loop in \ self.hostMapForExist[NetUtil.GetHostIpOrName()]['datadir']: cmd = "gs_ctl query -D %s|grep '\'| " \ "awk -F ':' '{print $2}'" % dndir_loop (status, output) = subprocess.getstatusoutput(cmd) if 'Primary' not in output: GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35804"]) msgPrint = "%sDo you want to continue to drop the target node (yes/no)?" \ % statusDelHost self.checkInput(msgPrint) def checkConnection(self, hostnames, env): """ check the node connection, change the timeout to 30s as 330s is too long if the node which will not be deleted can't be connected, report ERR else continue """ command = "echo 1" sshTool = SshTool(hostnames, None, -20) resultMap, outputCollect = sshTool.getSshStatusOutput(command, hostnames, env) self.logger.debug(outputCollect) self.failureHosts = '.'.join(re.findall(r"\[FAILURE\] .*:.*\n", outputCollect)) for host in list(self.hostMapForExist.keys()): if host in self.failureHosts: GaussLog.exitWithError( ErrorCode.GAUSS_358["GAUSS_35807"] % host) def initLogs(self): """ init log file """ if not os.path.isfile(self.userProfile): raise Exception( ErrorCode.GAUSS_502["GAUSS_50210"] % self.userProfile) log_path = EnvUtil.getEnvironmentParameterValue("GAUSSLOG", self.user, self.userProfile) self.logFile = os.path.realpath( "%s/om/%s" % (log_path, DefaultValue.DROPNODE_LOG_FILE)) # if not absolute path if not os.path.isabs(self.logFile): GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") self.initLogger("gs_dropnode") self.logger.ignoreErr = True def checkInput(self, msgPrint): flag = input(msgPrint) count_f = 2 while count_f: if ( flag.upper() != "YES" and flag.upper() != "NO" and flag.upper() != "Y" and flag.upper() != "N"): count_f -= 1 flag = input("Please type 'yes' or 'no': ") continue break if flag.upper() != "YES" and flag.upper() != "Y": GaussLog.exitWithError( ErrorCode.GAUSS_358["GAUSS_35805"] % flag.upper()) def drop_run(self): """ This is factory method of drop node operation """ if DefaultValue.get_cm_server_num_from_static(self.clusterInfo) > 0: self.logger.log("Drop node start with CM node.") drop_node_impl = DropNodeWithCmImpl(self) else: self.logger.log("Drop node start without CM node.") drop_node_impl = DropnodeImpl(self) drop_node_impl.run() if __name__ == "__main__": # check if user is root if (os.getuid() == 0): GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"]) dropNode = Dropnode() dropNode.parseCommandLine() dropNode.initLogs() DefaultValue.check_is_streaming_dr_cluster() dropNode.check_repeat_process() dropNode.checkParameters() dropNode.checkConnection(list(dropNode.backIpNameMap.keys()), dropNode.envFile) dropNode.check_cluster_status() dropNode.flagForOnlyPrimaryLeft() dropNode.drop_run()