Files
openGauss-OM/script/gs_dropnode
coolany eae422baf3 适配CM组件
Signed-off-by: coolany <kyosang@163.com>

support cgroup

追加合入
2022-03-05 18:51:52 +08:00

346 lines
14 KiB
Python

#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################################
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
#
# openGauss is licensed under Mulan PSL v2.
# You can use this software according to the terms
# and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
# http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
# ----------------------------------------------------------------------------
# Description : gs_dropnode is a utility to drop a standby node from the cluster
#############################################################################
import datetime
import os
import re
import subprocess
import sys
import pwd
import grp
package_path = os.path.dirname(os.path.realpath(__file__))
ld_path = package_path + "/gspylib/clib"
if 'LD_LIBRARY_PATH' not in os.environ:
os.environ['LD_LIBRARY_PATH'] = ld_path
os.execve(os.path.realpath(__file__), sys.argv, os.environ)
if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path):
os.environ['LD_LIBRARY_PATH'] = \
ld_path + ":" + os.environ['LD_LIBRARY_PATH']
os.execve(os.path.realpath(__file__), sys.argv, os.environ)
sys.path.append(sys.path[0])
from gspylib.common.DbClusterInfo import dbClusterInfo
from gspylib.common.DbClusterStatus import DbClusterStatus
from gspylib.common.GaussLog import GaussLog
from gspylib.common.Common import DefaultValue, ClusterCommand
from gspylib.common.ErrorCode import ErrorCode
from gspylib.common.ParallelBaseOM import ParallelBaseOM
from gspylib.common.ParameterParsecheck import Parameter
from gspylib.threads.SshTool import SshTool
from impl.dropnode.DropnodeImpl import DropnodeImpl
from impl.dropnode.drop_node_with_cm_impl import DropNodeWithCmImpl
from base_utils.os.env_util import EnvUtil
from base_utils.os.net_util import NetUtil
from domain_utils.domain_common.cluster_constants import ClusterConstants
ENV_LIST = ["MPPDB_ENV_SEPARATE_PATH", "GPHOME", "PATH",
"LD_LIBRARY_PATH", "PYTHONPATH", "GAUSS_WARNING_TYPE",
"GAUSSHOME", "PATH", "LD_LIBRARY_PATH",
"S3_CLIENT_CRT_FILE", "GAUSS_VERSION", "PGHOST",
"GS_CLUSTER_NAME", "GAUSSLOG", "GAUSS_ENV", "umask"]
class Dropnode(ParallelBaseOM):
"""
"""
def __init__(self):
"""
"""
ParallelBaseOM.__init__(self)
# Add the standby node backip list which need to be deleted
self.hostIpListForDel = []
self.hostMapForDel = {}
self.hostMapForExist = {}
self.clusterInfo = dbClusterInfo()
self.backIpNameMap = {}
self.failureHosts = []
self.flagOnlyPrimary = False
envFile = EnvUtil.getEnv("MPPDB_ENV_SEPARATE_PATH")
if envFile:
self.envFile = envFile
self.userProfile = envFile
else:
self.envFile = ClusterConstants.ETC_PROFILE
cmd = "echo ~%s" % self.user
(status, output) = subprocess.getstatusoutput(cmd)
self.userProfile = os.path.join(output, ".bashrc")
def usage(self):
"""
gs_dropnode is a utility to delete the standby node from a cluster.
Usage:
gs_dropnode -? | --help
gs_dropnode -V | --version
gs_dropnode -U USER -G GROUP -h nodeList
General options:
-U Cluster user.
-G Group of the cluster user.
-h The standby node backip list which need to be deleted
Separate multiple nodes with commas (,).
such as '-h 192.168.0.1,192.168.0.2'
-?, --help Show help information for this
utility, and exit the command line mode.
-V, --version Show version information.
"""
print(self.usage.__doc__)
def parseCommandLine(self):
"""
parse parameter from command line
"""
ParaObj = Parameter()
ParaDict = ParaObj.ParameterCommandLine("dropnode")
# parameter -h or -?
if (ParaDict.__contains__("helpFlag")):
self.usage()
sys.exit(0)
# Resolves command line arguments
# parameter -U
if (ParaDict.__contains__("user")):
self.user = ParaDict.get("user")
DefaultValue.checkPathVaild(self.user)
# parameter -G
if (ParaDict.__contains__("group")):
self.group = ParaDict.get("group")
# parameter -h
if (ParaDict.__contains__("nodename")):
self.hostIpListForDel = ParaDict.get("nodename")
def checkParameters(self):
"""
function: Check parameter from command line
input: NA
output: NA
"""
# check user | group | node
if len(self.user) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-U")
if len(self.group) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-G")
if len(self.hostIpListForDel) == 0:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-h")
# check if upgrade action is exist
if DefaultValue.isUnderUpgrade(self.user):
GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"])
try:
pw_user = pwd.getpwnam(self.user)
gr_group = grp.getgrnam(self.group)
except KeyError as e:
if self.user in e.args[0]:
GaussLog.exitWithError(
ErrorCode.GAUSS_503["GAUSS_50300"] % self.user)
if self.group in e.args[0]:
self.logger.log("Group %s not exist." % self.group)
sys.exit(1)
# get dbcluster info from static config file
self.clusterInfo.initFromStaticConfig(self.user)
appPath = self.clusterInfo.appPath
db_uid = os.stat(appPath).st_uid
db_gid = os.stat(appPath).st_gid
if db_uid != pw_user.pw_uid or db_gid != gr_group.gr_gid:
GaussLog.exitWithError(
ErrorCode.GAUSS_503["GAUSS_50323"] % self.user)
self.backIpNameMap = {}
for node in self.clusterInfo.dbNodes:
self.backIpNameMap[node.name] = node.backIps[0]
if node.backIps[0] in self.hostIpListForDel:
self.hostMapForDel[node.name] = {'ipaddr': node.backIps[0],
'datadir': [], 'dn_id': [],
'port': []}
for i in node.datanodes:
self.hostMapForDel[node.name]['datadir'].append(i.datadir)
self.hostMapForDel[node.name]['dn_id'].append(
'dn_' + str(i.instanceId))
self.hostMapForDel[node.name]['port'].append(str(i.port))
else:
self.hostMapForExist[node.name] = {'ipaddr': node.backIps[0],
'datadir': [], 'dn_id': [],
'port': [],
'replToBeDel': [],
'syncStandbyDel': [],
'pghbaDel': []}
for i in node.datanodes:
self.hostMapForExist[node.name]['datadir'].append(i.datadir)
self.hostMapForExist[node.name]['dn_id'].append(
'dn_' + str(i.instanceId))
self.hostMapForExist[node.name]['port'].append(str(i.port))
localIp = self.backIpNameMap[NetUtil.GetHostIpOrName()]
if localIp in self.hostIpListForDel:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35803"] % \
localIp)
for ipLoop in self.hostIpListForDel:
if ipLoop not in self.backIpNameMap.values():
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35802"] % \
self.hostIpListForDel)
if not self.hostMapForDel:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35802"] % \
self.hostIpListForDel)
def check_repeat_process(self):
"""
function: Check whether only one node be left in the cluster
return a flag
"""
cmd = "ps -ef | grep 'gs_dropnode -U %s -G %s' | grep -v grep" \
% (self.user, self.group)
(status, output) = subprocess.getstatusoutput(cmd)
if status == 0 and len(output.split('\n')) > 1:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35810"])
def flagForOnlyPrimaryLeft(self):
"""
function: Check whether only one node be left in the cluster
return a flag
"""
countClusterNodes = len(self.backIpNameMap.values())
if (countClusterNodes - len(self.hostIpListForDel)) == 1:
msgPrint = "The cluster will have only one standalone node left " \
"after the operation!\nDo you want to continue to drop " \
"the target node (yes/no)? "
self.checkInput(msgPrint)
self.flagOnlyPrimary = True
def check_cluster_status(self):
"""
function: Check whether the status of cluster is normal
input: NA
output: NA
"""
tmpDir = EnvUtil.getTmpDirFromEnv()
tmpFile = os.path.join(tmpDir, "gauss_cluster_status.dat_" + \
str(datetime.datetime.now().strftime(
'%Y%m%d%H%M%S')) + "_" + str(os.getpid()))
if not len(self.failureHosts):
cmd = ClusterCommand.getQueryStatusCmd("", tmpFile, False)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
self.logger.debug("The cmd is %s " % cmd)
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % \
cmd + "Error: \n%s" % output)
clusterStatus = DbClusterStatus()
clusterStatus.initFromFile(tmpFile)
clsStatus = clusterStatus.clusterStatusDetail
if clsStatus in ["Unknown", "Unavailable"]:
GaussLog.exitWithError(
ErrorCode.GAUSS_358["GAUSS_35806"] % clsStatus)
statusDelHost = "The target node to be dropped is %s \n" % str(
self.hostMapForDel.keys())[9:]
for dndir_loop in \
self.hostMapForExist[NetUtil.GetHostIpOrName()]['datadir']:
cmd = "gs_ctl query -D %s|grep '\<local_role\>'| " \
"awk -F ':' '{print $2}'" % dndir_loop
(status, output) = subprocess.getstatusoutput(cmd)
if 'Primary' not in output:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35804"])
msgPrint = "%sDo you want to continue to drop the target node (yes/no)?" \
% statusDelHost
self.checkInput(msgPrint)
def checkConnection(self, hostnames, env):
"""
check the node connection, change the timeout to 30s as 330s is too long
if the node which will not be deleted can't be connected, report ERR
else continue
"""
command = "echo 1"
sshTool = SshTool(hostnames, None, -20)
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
hostnames, env)
self.logger.debug(outputCollect)
self.failureHosts = '.'.join(re.findall(r"\[FAILURE\] .*:.*\n",
outputCollect))
for host in list(self.hostMapForExist.keys()):
if host in self.failureHosts:
GaussLog.exitWithError(
ErrorCode.GAUSS_358["GAUSS_35807"] % host)
def initLogs(self):
"""
init log file
"""
if not os.path.isfile(self.userProfile):
raise Exception(
ErrorCode.GAUSS_502["GAUSS_50210"] % self.userProfile)
log_path = EnvUtil.getEnvironmentParameterValue("GAUSSLOG",
self.user,
self.userProfile)
self.logFile = os.path.realpath(
"%s/om/%s" % (log_path, DefaultValue.DROPNODE_LOG_FILE))
# if not absolute path
if not os.path.isabs(self.logFile):
GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log")
self.initLogger("gs_dropnode")
self.logger.ignoreErr = True
def checkInput(self, msgPrint):
flag = input(msgPrint)
count_f = 2
while count_f:
if (
flag.upper() != "YES"
and flag.upper() != "NO"
and flag.upper() != "Y" and flag.upper() != "N"):
count_f -= 1
flag = input("Please type 'yes' or 'no': ")
continue
break
if flag.upper() != "YES" and flag.upper() != "Y":
GaussLog.exitWithError(
ErrorCode.GAUSS_358["GAUSS_35805"] % flag.upper())
def drop_run(self):
"""
This is factory method of drop node operation
"""
if DefaultValue.get_cm_server_num_from_static(self.clusterInfo) > 0:
self.logger.log("Drop node start with CM node.")
drop_node_impl = DropNodeWithCmImpl(self)
else:
self.logger.log("Drop node start without CM node.")
drop_node_impl = DropnodeImpl(self)
drop_node_impl.run()
if __name__ == "__main__":
# check if user is root
if (os.getuid() == 0):
GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"])
dropNode = Dropnode()
dropNode.parseCommandLine()
dropNode.initLogs()
dropNode.check_repeat_process()
dropNode.checkParameters()
dropNode.checkConnection(list(dropNode.backIpNameMap.keys()),
dropNode.envFile)
dropNode.check_cluster_status()
dropNode.flagForOnlyPrimaryLeft()
dropNode.drop_run()