346 lines
14 KiB
Python
346 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding:utf-8 -*-
|
|
#############################################################################
|
|
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
#
|
|
# openGauss is licensed under Mulan PSL v2.
|
|
# You can use this software according to the terms
|
|
# and conditions of the Mulan PSL v2.
|
|
# You may obtain a copy of Mulan PSL v2 at:
|
|
#
|
|
# http://license.coscl.org.cn/MulanPSL2
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OF ANY KIND,
|
|
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
# See the Mulan PSL v2 for more details.
|
|
# ----------------------------------------------------------------------------
|
|
# Description : gs_dropnode is a utility to drop a standby node from the cluster
|
|
#############################################################################
|
|
|
|
import datetime
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import pwd
|
|
import grp
|
|
package_path = os.path.dirname(os.path.realpath(__file__))
|
|
ld_path = package_path + "/gspylib/clib"
|
|
if 'LD_LIBRARY_PATH' not in os.environ:
|
|
os.environ['LD_LIBRARY_PATH'] = ld_path
|
|
os.execve(os.path.realpath(__file__), sys.argv, os.environ)
|
|
if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path):
|
|
os.environ['LD_LIBRARY_PATH'] = \
|
|
ld_path + ":" + os.environ['LD_LIBRARY_PATH']
|
|
os.execve(os.path.realpath(__file__), sys.argv, os.environ)
|
|
|
|
sys.path.append(sys.path[0])
|
|
from gspylib.common.DbClusterInfo import dbClusterInfo
|
|
from gspylib.common.DbClusterStatus import DbClusterStatus
|
|
from gspylib.common.GaussLog import GaussLog
|
|
from gspylib.common.Common import DefaultValue, ClusterCommand
|
|
from gspylib.common.ErrorCode import ErrorCode
|
|
from gspylib.common.ParallelBaseOM import ParallelBaseOM
|
|
from gspylib.common.ParameterParsecheck import Parameter
|
|
from gspylib.threads.SshTool import SshTool
|
|
from impl.dropnode.DropnodeImpl import DropnodeImpl
|
|
from impl.dropnode.drop_node_with_cm_impl import DropNodeWithCmImpl
|
|
from base_utils.os.env_util import EnvUtil
|
|
from base_utils.os.net_util import NetUtil
|
|
from domain_utils.domain_common.cluster_constants import ClusterConstants
|
|
|
|
ENV_LIST = ["MPPDB_ENV_SEPARATE_PATH", "GPHOME", "PATH",
|
|
"LD_LIBRARY_PATH", "PYTHONPATH", "GAUSS_WARNING_TYPE",
|
|
"GAUSSHOME", "PATH", "LD_LIBRARY_PATH",
|
|
"S3_CLIENT_CRT_FILE", "GAUSS_VERSION", "PGHOST",
|
|
"GS_CLUSTER_NAME", "GAUSSLOG", "GAUSS_ENV", "umask"]
|
|
|
|
|
|
class Dropnode(ParallelBaseOM):
|
|
"""
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""
|
|
"""
|
|
ParallelBaseOM.__init__(self)
|
|
# Add the standby node backip list which need to be deleted
|
|
self.hostIpListForDel = []
|
|
self.hostMapForDel = {}
|
|
self.hostMapForExist = {}
|
|
self.clusterInfo = dbClusterInfo()
|
|
self.backIpNameMap = {}
|
|
self.failureHosts = []
|
|
self.flagOnlyPrimary = False
|
|
envFile = EnvUtil.getEnv("MPPDB_ENV_SEPARATE_PATH")
|
|
if envFile:
|
|
self.envFile = envFile
|
|
self.userProfile = envFile
|
|
else:
|
|
self.envFile = ClusterConstants.ETC_PROFILE
|
|
cmd = "echo ~%s" % self.user
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
self.userProfile = os.path.join(output, ".bashrc")
|
|
|
|
def usage(self):
|
|
"""
|
|
gs_dropnode is a utility to delete the standby node from a cluster.
|
|
|
|
Usage:
|
|
gs_dropnode -? | --help
|
|
gs_dropnode -V | --version
|
|
gs_dropnode -U USER -G GROUP -h nodeList
|
|
General options:
|
|
-U Cluster user.
|
|
-G Group of the cluster user.
|
|
-h The standby node backip list which need to be deleted
|
|
Separate multiple nodes with commas (,).
|
|
such as '-h 192.168.0.1,192.168.0.2'
|
|
-?, --help Show help information for this
|
|
utility, and exit the command line mode.
|
|
-V, --version Show version information.
|
|
"""
|
|
print(self.usage.__doc__)
|
|
|
|
def parseCommandLine(self):
|
|
"""
|
|
parse parameter from command line
|
|
"""
|
|
ParaObj = Parameter()
|
|
ParaDict = ParaObj.ParameterCommandLine("dropnode")
|
|
|
|
# parameter -h or -?
|
|
if (ParaDict.__contains__("helpFlag")):
|
|
self.usage()
|
|
sys.exit(0)
|
|
# Resolves command line arguments
|
|
# parameter -U
|
|
if (ParaDict.__contains__("user")):
|
|
self.user = ParaDict.get("user")
|
|
DefaultValue.checkPathVaild(self.user)
|
|
# parameter -G
|
|
if (ParaDict.__contains__("group")):
|
|
self.group = ParaDict.get("group")
|
|
# parameter -h
|
|
if (ParaDict.__contains__("nodename")):
|
|
self.hostIpListForDel = ParaDict.get("nodename")
|
|
|
|
def checkParameters(self):
|
|
"""
|
|
function: Check parameter from command line
|
|
input: NA
|
|
output: NA
|
|
"""
|
|
|
|
# check user | group | node
|
|
if len(self.user) == 0:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-U")
|
|
if len(self.group) == 0:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-G")
|
|
if len(self.hostIpListForDel) == 0:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35801"] % "-h")
|
|
# check if upgrade action is exist
|
|
if DefaultValue.isUnderUpgrade(self.user):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_529["GAUSS_52936"])
|
|
|
|
try:
|
|
pw_user = pwd.getpwnam(self.user)
|
|
gr_group = grp.getgrnam(self.group)
|
|
except KeyError as e:
|
|
if self.user in e.args[0]:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_503["GAUSS_50300"] % self.user)
|
|
if self.group in e.args[0]:
|
|
self.logger.log("Group %s not exist." % self.group)
|
|
sys.exit(1)
|
|
|
|
# get dbcluster info from static config file
|
|
self.clusterInfo.initFromStaticConfig(self.user)
|
|
appPath = self.clusterInfo.appPath
|
|
db_uid = os.stat(appPath).st_uid
|
|
db_gid = os.stat(appPath).st_gid
|
|
if db_uid != pw_user.pw_uid or db_gid != gr_group.gr_gid:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_503["GAUSS_50323"] % self.user)
|
|
self.backIpNameMap = {}
|
|
for node in self.clusterInfo.dbNodes:
|
|
self.backIpNameMap[node.name] = node.backIps[0]
|
|
if node.backIps[0] in self.hostIpListForDel:
|
|
self.hostMapForDel[node.name] = {'ipaddr': node.backIps[0],
|
|
'datadir': [], 'dn_id': [],
|
|
'port': []}
|
|
for i in node.datanodes:
|
|
self.hostMapForDel[node.name]['datadir'].append(i.datadir)
|
|
self.hostMapForDel[node.name]['dn_id'].append(
|
|
'dn_' + str(i.instanceId))
|
|
self.hostMapForDel[node.name]['port'].append(str(i.port))
|
|
else:
|
|
self.hostMapForExist[node.name] = {'ipaddr': node.backIps[0],
|
|
'datadir': [], 'dn_id': [],
|
|
'port': [],
|
|
'replToBeDel': [],
|
|
'syncStandbyDel': [],
|
|
'pghbaDel': []}
|
|
for i in node.datanodes:
|
|
self.hostMapForExist[node.name]['datadir'].append(i.datadir)
|
|
self.hostMapForExist[node.name]['dn_id'].append(
|
|
'dn_' + str(i.instanceId))
|
|
self.hostMapForExist[node.name]['port'].append(str(i.port))
|
|
localIp = self.backIpNameMap[NetUtil.GetHostIpOrName()]
|
|
if localIp in self.hostIpListForDel:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35803"] % \
|
|
localIp)
|
|
|
|
for ipLoop in self.hostIpListForDel:
|
|
if ipLoop not in self.backIpNameMap.values():
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35802"] % \
|
|
self.hostIpListForDel)
|
|
|
|
if not self.hostMapForDel:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35802"] % \
|
|
self.hostIpListForDel)
|
|
|
|
def check_repeat_process(self):
|
|
"""
|
|
function: Check whether only one node be left in the cluster
|
|
return a flag
|
|
"""
|
|
cmd = "ps -ef | grep 'gs_dropnode -U %s -G %s' | grep -v grep" \
|
|
% (self.user, self.group)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status == 0 and len(output.split('\n')) > 1:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35810"])
|
|
|
|
def flagForOnlyPrimaryLeft(self):
|
|
"""
|
|
function: Check whether only one node be left in the cluster
|
|
return a flag
|
|
"""
|
|
countClusterNodes = len(self.backIpNameMap.values())
|
|
if (countClusterNodes - len(self.hostIpListForDel)) == 1:
|
|
msgPrint = "The cluster will have only one standalone node left " \
|
|
"after the operation!\nDo you want to continue to drop " \
|
|
"the target node (yes/no)? "
|
|
self.checkInput(msgPrint)
|
|
self.flagOnlyPrimary = True
|
|
|
|
def check_cluster_status(self):
|
|
"""
|
|
function: Check whether the status of cluster is normal
|
|
input: NA
|
|
output: NA
|
|
"""
|
|
tmpDir = EnvUtil.getTmpDirFromEnv()
|
|
tmpFile = os.path.join(tmpDir, "gauss_cluster_status.dat_" + \
|
|
str(datetime.datetime.now().strftime(
|
|
'%Y%m%d%H%M%S')) + "_" + str(os.getpid()))
|
|
if not len(self.failureHosts):
|
|
cmd = ClusterCommand.getQueryStatusCmd("", tmpFile, False)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
self.logger.debug("The cmd is %s " % cmd)
|
|
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % \
|
|
cmd + "Error: \n%s" % output)
|
|
clusterStatus = DbClusterStatus()
|
|
clusterStatus.initFromFile(tmpFile)
|
|
clsStatus = clusterStatus.clusterStatusDetail
|
|
if clsStatus in ["Unknown", "Unavailable"]:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_358["GAUSS_35806"] % clsStatus)
|
|
|
|
statusDelHost = "The target node to be dropped is %s \n" % str(
|
|
self.hostMapForDel.keys())[9:]
|
|
for dndir_loop in \
|
|
self.hostMapForExist[NetUtil.GetHostIpOrName()]['datadir']:
|
|
cmd = "gs_ctl query -D %s|grep '\<local_role\>'| " \
|
|
"awk -F ':' '{print $2}'" % dndir_loop
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if 'Primary' not in output:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35804"])
|
|
msgPrint = "%sDo you want to continue to drop the target node (yes/no)?" \
|
|
% statusDelHost
|
|
self.checkInput(msgPrint)
|
|
|
|
def checkConnection(self, hostnames, env):
|
|
"""
|
|
check the node connection, change the timeout to 30s as 330s is too long
|
|
if the node which will not be deleted can't be connected, report ERR
|
|
else continue
|
|
"""
|
|
command = "echo 1"
|
|
sshTool = SshTool(hostnames, None, -20)
|
|
resultMap, outputCollect = sshTool.getSshStatusOutput(command,
|
|
hostnames, env)
|
|
self.logger.debug(outputCollect)
|
|
self.failureHosts = '.'.join(re.findall(r"\[FAILURE\] .*:.*\n",
|
|
outputCollect))
|
|
for host in list(self.hostMapForExist.keys()):
|
|
if host in self.failureHosts:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_358["GAUSS_35807"] % host)
|
|
|
|
def initLogs(self):
|
|
"""
|
|
init log file
|
|
"""
|
|
if not os.path.isfile(self.userProfile):
|
|
raise Exception(
|
|
ErrorCode.GAUSS_502["GAUSS_50210"] % self.userProfile)
|
|
log_path = EnvUtil.getEnvironmentParameterValue("GAUSSLOG",
|
|
self.user,
|
|
self.userProfile)
|
|
self.logFile = os.path.realpath(
|
|
"%s/om/%s" % (log_path, DefaultValue.DROPNODE_LOG_FILE))
|
|
# if not absolute path
|
|
if not os.path.isabs(self.logFile):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log")
|
|
self.initLogger("gs_dropnode")
|
|
self.logger.ignoreErr = True
|
|
|
|
def checkInput(self, msgPrint):
|
|
flag = input(msgPrint)
|
|
count_f = 2
|
|
while count_f:
|
|
if (
|
|
flag.upper() != "YES"
|
|
and flag.upper() != "NO"
|
|
and flag.upper() != "Y" and flag.upper() != "N"):
|
|
count_f -= 1
|
|
flag = input("Please type 'yes' or 'no': ")
|
|
continue
|
|
break
|
|
if flag.upper() != "YES" and flag.upper() != "Y":
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_358["GAUSS_35805"] % flag.upper())
|
|
|
|
def drop_run(self):
|
|
"""
|
|
This is factory method of drop node operation
|
|
"""
|
|
if DefaultValue.get_cm_server_num_from_static(self.clusterInfo) > 0:
|
|
self.logger.log("Drop node start with CM node.")
|
|
drop_node_impl = DropNodeWithCmImpl(self)
|
|
else:
|
|
self.logger.log("Drop node start without CM node.")
|
|
drop_node_impl = DropnodeImpl(self)
|
|
drop_node_impl.run()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# check if user is root
|
|
if (os.getuid() == 0):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"])
|
|
dropNode = Dropnode()
|
|
dropNode.parseCommandLine()
|
|
dropNode.initLogs()
|
|
dropNode.check_repeat_process()
|
|
dropNode.checkParameters()
|
|
dropNode.checkConnection(list(dropNode.backIpNameMap.keys()),
|
|
dropNode.envFile)
|
|
dropNode.check_cluster_status()
|
|
dropNode.flagForOnlyPrimaryLeft()
|
|
dropNode.drop_run()
|
|
|