openGauss-OM/script/impl/dropnode/DropnodeImpl.py
openGaussDev 8cada91e73 集中式支持vip本地化安装
Match-id-c7906104d5bcad5351beb7863fb3c72c990f7c25
2023-02-28 15:53:23 +08:00

782 lines
36 KiB
Python

# -*- coding:utf-8 -*-
#############################################################################
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
#
# openGauss is licensed under Mulan PSL v2.
# You can use this software according to the terms
# and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
# http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
# ----------------------------------------------------------------------------
# Description : DropnodeImpl.py
#############################################################################
import subprocess
import sys
import re
import os
import pwd
import datetime
import grp
import socket
sys.path.append(sys.path[0] + "/../../../../")
from gspylib.threads.SshTool import SshTool
from gspylib.common.ErrorCode import ErrorCode
from gspylib.common.Common import DefaultValue
from gspylib.common.GaussLog import GaussLog
from gspylib.inspection.common.Exception import CheckException
from gspylib.common.OMCommand import OMCommand
from base_utils.os.env_util import EnvUtil
from base_utils.os.net_util import NetUtil
from domain_utils.domain_common.cluster_constants import ClusterConstants
# master
MASTER_INSTANCE = 0
# standby
STANDBY_INSTANCE = 1
# status failed
STATUS_FAIL = "Failure"
class DropnodeImpl():
"""
class for drop a standby node.
step:
1. check whether all standby can be reached or the switchover/failover is happening
2. shutdown the program of the target node if it can be reached
3. flush the configuration on all nodes if it is still a HA cluster
4. flush the configuration on primary if it is the only one left
"""
def __init__(self, dropnode):
"""
"""
self.context = dropnode
self.user = self.context.user
self.userProfile = self.context.userProfile
self.group = self.context.group
self.backupFilePrimary = ''
self.localhostname = NetUtil.GetHostIpOrName()
self.logger = self.context.logger
self.resultDictOfPrimary = []
self.replSlot = ''
envFile = EnvUtil.getEnv("MPPDB_ENV_SEPARATE_PATH")
if envFile:
self.envFile = envFile
else:
self.envFile = ClusterConstants.ETC_PROFILE
gphomepath = EnvUtil.getEnv("GPHOME")
if gphomepath:
self.gphomepath = gphomepath
else:
(status, output) = subprocess.getstatusoutput("which gs_om")
if "no gs_om in" in output:
raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GPHOME")
self.gphomepath = os.path.normpath(output.replace("/gs_om", ""))
if not EnvUtil.getEnv("PGHOST"):
GaussLog.exitWithError(ErrorCode.GAUSS_518["GAUSS_51802"] % (
"\"PGHOST\", please import environment variable"))
self.pghostPath = EnvUtil.getEnv("PGHOST")
self.appPath = self.context.clusterInfo.appPath
self.gsql_path = "source %s;%s/bin/gsql" % (self.userProfile, self.appPath)
self.dnIdForDel = []
for hostDelName in self.context.hostMapForDel.keys():
self.dnIdForDel += self.context.hostMapForDel[hostDelName]['dn_id']
self.commonOper = OperCommon(dropnode)
def change_user(self):
if os.getuid() == 0:
user = self.user
try:
pw_record = pwd.getpwnam(user)
except CheckException:
GaussLog.exitWithError(ErrorCode.GAUSS_503["GAUSS_50300"] % user)
user_uid = pw_record.pw_uid
user_gid = pw_record.pw_gid
os.setgid(user_gid)
os.setuid(user_uid)
def checkAllStandbyState(self):
"""
check all standby state whether switchover is happening
"""
for hostNameLoop in self.context.hostMapForExist.keys():
sshtool_host = SshTool([hostNameLoop])
for i in self.context.hostMapForExist[hostNameLoop]['datadir']:
# check whether switchover/failover is happening
self.commonOper.checkStandbyState(hostNameLoop, i,
sshtool_host,
self.userProfile)
self.cleanSshToolFile(sshtool_host)
for hostNameLoop in self.context.hostMapForDel.keys():
if hostNameLoop not in self.context.failureHosts:
sshtool_host = SshTool([hostNameLoop])
for i in self.context.hostMapForDel[hostNameLoop]['datadir']:
# check whether switchover/failover is happening
self.commonOper.checkStandbyState(hostNameLoop, i,
sshtool_host,
self.userProfile, True)
self.commonOper.stopInstance(hostNameLoop, sshtool_host, i,
self.userProfile)
cmdDelCert = "ls %s/share/sslcert/grpc/* | " \
"grep -v openssl.cnf | xargs rm -rf" % self.appPath
result, output = sshtool_host.getSshStatusOutput(cmdDelCert,
[hostNameLoop], self.userProfile)
if result[hostNameLoop] != 'Success':
self.logger.debug(output)
self.logger.log("[gs_dropnode]Failed to delete the GRPC "
"sslcert of %s." % hostNameLoop)
self.logger.log("[gs_dropnode]Please check and delete the "
"GRPC sslcert of %s manually." % hostNameLoop)
self.cleanSshToolFile(sshtool_host)
else:
self.logger.log("[gs_dropnode]Cannot connect %s. Please check "
"and delete the GRPC sslcert of %s manually."
% (hostNameLoop, hostNameLoop))
def dropNodeOnAllHosts(self):
"""
drop the target node on the other host
"""
for hostNameLoop in self.context.hostMapForExist.keys():
sshtool_host = SshTool([hostNameLoop])
# backup
backupfile = self.commonOper.backupConf(
self.gphomepath, self.user,
hostNameLoop, self.userProfile, sshtool_host, self.pghostPath)
self.logger.log(
"[gs_dropnode]The backup file of " + hostNameLoop + " is " + backupfile)
if hostNameLoop == self.localhostname:
self.backupFilePrimary = backupfile
indexForuse = 0
for i in self.context.hostMapForExist[hostNameLoop]['datadir']:
# parse
resultDict = self.commonOper.parseConfigFile(hostNameLoop, i,
self.dnIdForDel,
self.context.hostIpListForDel,
sshtool_host,
self.envFile)
resultDictForRollback = self.commonOper.parseBackupFile(
hostNameLoop, backupfile,
self.context.hostMapForExist[hostNameLoop][
'dn_id'][indexForuse],
resultDict['replStr'], sshtool_host,
self.envFile)
if hostNameLoop == self.localhostname:
self.resultDictOfPrimary.append(resultDict)
# try set
try:
self.commonOper.SetPgsqlConf(resultDict['replStr'],
hostNameLoop, i,
resultDict['syncStandbyStr'],
sshtool_host,
self.userProfile,
'',
self.context.flagOnlyPrimary)
except ValueError:
self.logger.log("[gs_dropnode]Rollback pgsql process.")
self.commonOper.SetPgsqlConf(resultDict['replStr'],
hostNameLoop, i,
resultDict['syncStandbyStr'],
sshtool_host,
self.userProfile,
resultDictForRollback[
'rollbackReplStr'])
indexForuse += 1
self.cleanSshToolFile(sshtool_host)
def operationOnlyOnPrimary(self):
"""
operation only need to be executed on primary node
"""
for hostNameLoop in self.context.hostMapForExist.keys():
try:
self.commonOper.SetPghbaConf(self.userProfile, hostNameLoop,
self.resultDictOfPrimary[0][
'pghbaStr'], False)
except ValueError:
self.logger.log("[gs_dropnode]Rollback pghba conf.")
self.commonOper.SetPghbaConf(self.userProfile, hostNameLoop,
self.resultDictOfPrimary[0][
'pghbaStr'], True)
indexLoop = 0
for i in self.context.hostMapForExist[self.localhostname]['datadir']:
try:
self.commonOper.SetReplSlot(self.localhostname, self.gsql_path,
self.context.hostMapForExist[self.localhostname]['port'][indexLoop],
self.dnIdForDel)
except ValueError:
self.logger.log("[gs_dropnode]Rollback replslot")
self.commonOper.SetReplSlot(self.localhostname, self.gsql_path,
self.context.hostMapForExist[self.localhostname]['port'][indexLoop],
self.dnIdForDel, True)
indexLoop += 1
def modifyStaticConf(self):
"""
Modify the cluster static conf and save it
"""
self.logger.log("[gs_dropnode]Start to modify the cluster static conf.")
staticConfigPath = "%s/bin/cluster_static_config" % self.appPath
# first backup, only need to be done on primary node
tmpDir = EnvUtil.getEnvironmentParameterValue("PGHOST", self.user,
self.userProfile)
cmd = "cp %s %s/%s_BACKUP" % (
staticConfigPath, tmpDir, 'cluster_static_config')
(status, output) = subprocess.getstatusoutput(cmd)
if status:
self.logger.debug("[gs_dropnode]Backup cluster_static_config failed"
+ output)
backIpDict = self.context.backIpNameMap
backIpDict_values = list(backIpDict.values())
backIpDict_keys = list(backIpDict.keys())
for ipLoop in self.context.hostIpListForDel:
nameLoop = backIpDict_keys[backIpDict_values.index(ipLoop)]
dnLoop = self.context.clusterInfo.getDbNodeByName(nameLoop)
self.context.clusterInfo.dbNodes.remove(dnLoop)
for dbNode in self.context.clusterInfo.dbNodes:
if dbNode.name == self.localhostname:
self.context.clusterInfo.saveToStaticConfig(staticConfigPath,
dbNode.id)
continue
staticConfigPath_dn = "%s/cluster_static_config_%s" % (
tmpDir, dbNode.name)
self.context.clusterInfo.saveToStaticConfig(staticConfigPath_dn,
dbNode.id)
self.logger.debug(
"[gs_dropnode]Start to scp the cluster static conf to any other node.")
if not self.context.flagOnlyPrimary:
cmd = "%s/script/gs_om -t refreshconf" % self.gphomepath
subprocess.getstatusoutput(cmd)
for hostName in self.context.hostMapForExist.keys():
hostSsh = SshTool([hostName])
if hostName != self.localhostname:
staticConfigPath_name = "%s/cluster_static_config_%s" % (
tmpDir, hostName)
hostSsh.scpFiles(staticConfigPath_name, staticConfigPath,
[hostName], self.envFile)
try:
os.unlink(staticConfigPath_name)
except FileNotFoundError:
pass
self.cleanSshToolFile(hostSsh)
self.logger.log("[gs_dropnode]End of modify the cluster static conf.")
def cleanSshToolFile(self, sshTool):
"""
"""
try:
sshTool.clenSshResultFiles()
except Exception as e:
self.logger.debug(str(e))
def checkUserAndGroupExists(self):
"""
check system user and group exists and be same
on primary and standby nodes
"""
inputUser = self.user
inputGroup = self.group
user_group_id = ""
isUserExits = False
localHost = socket.gethostname()
for user in pwd.getpwall():
if user.pw_name == self.user:
user_group_id = user.pw_gid
isUserExits = True
break
if not isUserExits:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \
% ("User", self.user, localHost))
isGroupExits = False
group_id = ""
for group in grp.getgrall():
if group.gr_name == self.group:
group_id = group.gr_gid
isGroupExits = True
if not isGroupExits:
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \
% ("Group", self.group, localHost))
if user_group_id != group_id:
GaussLog.exitWithError("User [%s] is not in the group [%s]." \
% (self.user, self.group))
hostNames = list(self.context.hostMapForExist.keys())
envfile = self.envFile
sshTool = SshTool(hostNames)
# get username in the other standy nodes
getUserNameCmd = "cat /etc/passwd | grep -w %s" % inputUser
resultMap, outputCollect = sshTool.getSshStatusOutput(getUserNameCmd,
[], envfile)
for hostKey in resultMap:
if resultMap[hostKey] == STATUS_FAIL:
self.cleanSshToolFile(sshTool)
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \
% ("User", self.user, hostKey))
# get groupname in the other standy nodes
getGroupNameCmd = "cat /etc/group | grep -w %s" % inputGroup
resultMap, outputCollect = sshTool.getSshStatusOutput(getGroupNameCmd,
[], envfile)
for hostKey in resultMap:
if resultMap[hostKey] == STATUS_FAIL:
self.cleanSshToolFile(sshTool)
GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \
% ("Group", self.group, hostKey))
self.cleanSshToolFile(sshTool)
def restartInstance(self):
if self.context.flagOnlyPrimary:
self.logger.log("[gs_dropnode]Remove the dynamic conf.")
dynamicConfigPath = "%s/bin/cluster_dynamic_config" % self.appPath
try:
os.unlink(dynamicConfigPath)
except FileNotFoundError:
pass
msgPrint = "Only one primary node is left. It is recommended to " \
"restart the node.\nDo you want to restart the primary " \
"node now (yes/no)? "
self.context.checkInput(msgPrint)
sshTool = SshTool([self.localhostname])
for i in self.context.hostMapForExist[self.localhostname]['datadir']:
self.commonOper.stopInstance(self.localhostname, sshTool, i,
self.userProfile)
self.commonOper.startInstance(i, self.userProfile)
self.cleanSshToolFile(sshTool)
else:
pass
def run(self):
"""
start dropnode
"""
self.change_user()
self.logger.log("[gs_dropnode]Start to drop nodes of the cluster.")
self.checkAllStandbyState()
self.dropNodeOnAllHosts()
self.operationOnlyOnPrimary()
self.modifyStaticConf()
self.restartInstance()
self.logger.log("[gs_dropnode]Success to drop the target nodes.")
class OperCommon:
def __init__(self, dropnode):
"""
"""
self.logger = dropnode.logger
self.user = dropnode.user
def checkStandbyState(self, host, dirDn, sshTool, envfile, isForDel=False):
"""
check the existed standby node state
Exit if the role is not standby or the state of database is not normal
"""
sshcmd = "gs_ctl query -D %s" % dirDn
(statusMap, output) = sshTool.getSshStatusOutput(sshcmd, [host],
envfile)
if 'Is server running?' in output and not isForDel:
GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51651"] % host)
elif 'Is server running?' in output and isForDel:
return
else:
res = re.findall(r'db_state\s*:\s*(\w+)', output)
if not len(res) and isForDel:
return
elif not len(res):
GaussLog.exitWithError(ErrorCode.GAUSS_516["GAUSS_51651"] % host)
dbState = res[0]
if dbState in ['Promoting', 'Wait', 'Demoting']:
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35808"] % host)
def backupConf(self, appPath, user, host, envfile, sshTool, pghostPath):
"""
backup the configuration file (postgresql.conf and pg_hba.conf)
The Backup.py can do this
"""
self.logger.log(
"[gs_dropnode]Start to backup parameter config file on %s." % host)
tmpPath = "%s/gs_dropnode_backup%s" % (pghostPath,
str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')))
backupPyPath = os.path.join(appPath, './script/local/Backup.py')
cmd = "(find %s -type d | grep gs_dropnode_backup | xargs rm -rf;" \
"if [ ! -d '%s' ]; then mkdir -p '%s' -m %s;fi)" \
% (pghostPath, tmpPath, tmpPath, DefaultValue.KEY_DIRECTORY_MODE)
sshTool.executeCommand(cmd, DefaultValue.SUCCESS, [host], envfile)
logfile = os.path.join(tmpPath, 'gs_dropnode_call_Backup_py.log')
cmd = "python3 %s -U %s -P %s -p --nodeName=%s -l %s" \
% (backupPyPath, user, tmpPath, host, logfile)
(statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile)
if statusMap[host] != 'Success':
self.logger.debug(
"[gs_dropnode]Backup parameter config file failed." + output)
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"])
self.logger.log(
"[gs_dropnode]End to backup parameter config file on %s." % host)
return '%s/parameter_%s.tar' % (tmpPath, host)
def check_is_vip_mode(self):
"""
Check whether the current mode is VIP
"""
cmd = "cm_ctl res --list | awk -F \"|\" '{print $2}' | grep -w \"VIP\""
self.logger.log("Command for Checking VIP mode: %s" % cmd)
stat, out= subprocess.getstatusoutput(cmd)
if stat != 0 or not out:
return False
return True
def get_float_ip_from_json(self, base_ip, host_ips_for_del):
"""
Get float IP from json file by cmd
"""
cmd = "cm_ctl res --list | grep \"VIP\" | awk -F \"|\" '{print $1}' | " \
"xargs -i cm_ctl res --list --res_name={} --list_inst |grep \"base_ip=%s\""\
" | awk -F \"|\" '{print $1}' | xargs -i cm_ctl res --list --res_name={}" \
" | grep \"VIP\" | awk -F \"|\" '{print $3}'" % base_ip
stat, out= subprocess.getstatusoutput(cmd)
if stat != 0:
GaussLog.exitWithError(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd)
if not out:
self.logger.log("Failed to get float IP from json. Cmd: %s" % cmd)
return ""
float_ip = re.findall("float_ip=([\.\d]+)", out.strip())[0]
cmd = "cm_ctl res --list | grep \"VIP\" | awk -F \"|\" '{print $1}' | " \
"xargs -i cm_ctl res --list --res_name={} | grep \"float_ip=%s\" | " \
"awk -F \"|\" '{print $1}' | xargs -i cm_ctl res --list --res_name={} " \
"--list_inst | grep \"VIP\" | awk -F \"|\" '{print $5}'" % float_ip
stat, out= subprocess.getstatusoutput(cmd)
if stat != 0 or not out:
raise Exception("Failed to get base IP list from json. Cmd: %s" % cmd)
for item in out.split('\n'):
_ip = re.findall("base_ip=([\.\d]+)", item.strip())[0]
if _ip not in host_ips_for_del:
return ""
self.logger.log("Successfully get float IP from json, %s." % float_ip)
return float_ip
def get_float_ip_config(self, host, dn_dir, host_ips_for_del, ssh_tool, env_file):
"""
Get float IP configuration str
"""
if not self.check_is_vip_mode():
self.logger.log("The current cluster does not support VIP.")
return ""
float_ips_for_del = []
for _ip in host_ips_for_del:
float_ip = self.get_float_ip_from_json(_ip, host_ips_for_del)
if float_ip and float_ip not in float_ips_for_del:
float_ips_for_del.append(float_ip)
cmd = "grep '^host.*sha256' %s" % os.path.join(dn_dir, 'pg_hba.conf')
stat_map, output = ssh_tool.getSshStatusOutput(cmd, [host], env_file)
if stat_map[host] != 'Success':
self.logger.debug("[gs_dropnode]Parse pg_hba file failed:" + output)
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"])
ret = ""
for float_ip in float_ips_for_del:
if float_ip in output:
s = output.rfind('host', 0, output.find(float_ip))
e = output.find('\n', output.find(float_ip), len(output))
ret += output[s:e] + '|'
return ret
def parseConfigFile(self, host, dirDn, dnId, hostIpListForDel, sshTool,
envfile):
"""
parse the postgresql.conf file and get the replication info
"""
self.logger.log(
"[gs_dropnode]Start to parse parameter config file on %s." % host)
resultDict = {'replStr': '', 'syncStandbyStr': '*', 'pghbaStr': ''}
pgConfName = os.path.join(dirDn, 'postgresql.conf')
pghbaConfName = os.path.join(dirDn, 'pg_hba.conf')
cmd = "grep -o '^replconninfo.*' %s | egrep -o '^replconninfo.*'" \
% pgConfName
(statusMap, output1) = sshTool.getSshStatusOutput(cmd, [host], envfile)
if statusMap[host] != 'Success':
self.logger.debug("[gs_dropnode]Parse replconninfo failed:" + output1)
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"])
cmd = "grep -o '^synchronous_standby_names.*' %s" % pgConfName
(statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile)
if statusMap[host] != 'Success':
self.logger.debug(
"[gs_dropnode]Parse synchronous_standby_names failed:" + output)
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"])
output_v = output.split("'")[-2]
if output_v == '*':
resultDict['syncStandbyStr'] = output_v
else:
resultDict['syncStandbyStr'] = self.check_syncStandbyStr(dnId,
output_v)
cmd = "grep '^host.*trust' %s" % pghbaConfName
(statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile)
if statusMap[host] != 'Success':
self.logger.debug("[gs_dropnode]Parse pg_hba file failed:" + output)
for ip in hostIpListForDel:
if ip in output1:
i = output1.rfind('replconninfo', 0, output1.find(ip)) + 12
resultDict['replStr'] += output1[i]
if ip in output:
s = output.rfind('host', 0, output.find(ip))
e = output.find('\n', output.find(ip), len(output))
resultDict['pghbaStr'] += output[s:e] + '|'
resultDict['pghbaStr'] += self.get_float_ip_config(host, dirDn, hostIpListForDel,
sshTool, envfile)
self.logger.log(
"[gs_dropnode]End to parse parameter config file on %s." % host)
return resultDict
def check_syncStandbyStr(self, dnlist, output):
output_no = '0'
output_result = output
output_new_no = '1'
if '(' in output:
output_dn = re.findall(r'\((.*)\)', output)[0]
output_no = re.findall(r'.*(\d) *\(.*\)', output)[0]
else:
output_dn = output
output_dn_nospace = re.sub(' *', '', output_dn)
init_no = len(output_dn_nospace.split(','))
quorum_no = int(init_no / 2) + 1
half_no = quorum_no - 1
count_dn = 0
list_output1 = '*'
for dninst in dnlist:
if dninst in output_dn_nospace:
list_output1 = output_dn_nospace.split(',')
list_output1.remove(dninst)
list_output1 = ','.join(list_output1)
output_dn_nospace = list_output1
init_no -= 1
count_dn += 1
if count_dn == 0:
return output_result
if list_output1 == '':
return ''
if list_output1 != '*':
output_result = output.replace(output_dn, list_output1)
if output_no == '0':
return output_result
if int(output_no) == quorum_no:
output_new_no = str(int(init_no / 2) + 1)
output_result = output_result.replace(output_no, output_new_no, 1)
return output_result
elif int(output_no) > half_no and (int(output_no) - count_dn) > 0:
output_new_no = str(int(output_no) - count_dn)
elif int(output_no) > half_no and (int(output_no) - count_dn) <= 0:
output_new_no = '1'
elif int(output_no) < half_no and int(output_no) <= init_no:
output_new_no = output_no
elif half_no > int(output_no) > init_no:
output_new_no = str(init_no)
output_result = output_result.replace(output_no, output_new_no, 1)
return output_result
def parseBackupFile(self, host, backupfile, dnId, replstr, sshTool,
envfile):
"""
parse the backup file eg.parameter_host.tar to get the value for rollback
"""
self.logger.log(
"[gs_dropnode]Start to parse backup parameter config file on %s." % host)
resultDict = {'rollbackReplStr': '', 'syncStandbyStr': ''}
backupdir = os.path.dirname(backupfile)
cmd = "tar xf %s -C %s;grep -o '^replconninfo.*' %s/%s/%s_postgresql.conf;" \
"grep -o '^synchronous_standby_names.*' %s/%s/%s_postgresql.conf;" \
% (
backupfile, backupdir, backupdir, 'parameter_' + host, dnId[3:],
backupdir, 'parameter_' + host, dnId[3:])
(statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile)
if statusMap[host] != 'Success':
self.logger.log(
"[gs_dropnode]Parse backup parameter config file failed:" + output)
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"])
for i in replstr:
tmp_v = 'replconninfo' + i
s = output.index(tmp_v)
e = output.find('\n', s, len(output))
resultDict['rollbackReplStr'] += output[s:e].split("'")[-2] + '|'
s = output.index('synchronous_standby_names')
resultDict['syncStandbyStr'] = output[s:].split("'")[-2]
self.logger.log(
"[gs_dropnode]End to parse backup parameter config file %s." % host)
return resultDict
def SetPgsqlConf(self, replNo, host, dndir, syncStandbyValue, sshTool, envfile,
replValue='', singleLeft=False):
"""
Set the value of postgresql.conf
"""
self.logger.log(
"[gs_dropnode]Start to set openGauss config file on %s." % host)
setvalue = ''
if not replValue and replNo != '':
for i in replNo:
setvalue += " -c \"replconninfo%s = ''\"" % i
if len(replValue) > 0:
count = 0
for i in replNo:
setvalue += " -c \"replconninfo%s = '%s'\"" % (
i, replValue[:-1].split('|')[count])
count += 1
if not singleLeft and syncStandbyValue != '*':
setvalue += " -c \"synchronous_standby_names = '%s'\"" \
% syncStandbyValue
if singleLeft:
setvalue += " -c \"synchronous_standby_names = ''\""
if setvalue != '':
cmd = "[need_replace_quotes] source %s;gs_guc reload -D %s%s" % \
(envfile, dndir, setvalue)
self.logger.debug(
"[gs_dropnode]Start to set pgsql by guc on %s:%s" % (host, cmd))
(statusMap, output) = sshTool.getSshStatusOutput(cmd, [host], envfile)
if statusMap[host] != 'Success' or "Failure to perform gs_guc" in output:
self.logger.debug(
"[gs_dropnode]Failed to set pgsql by guc on %s:%s" % (host, output))
raise ValueError(output)
self.logger.log(
"[gs_dropnode]End of set openGauss config file on %s." % host)
def SetPghbaConf(self, envProfile, host, pgHbaValue,
flagRollback=False):
"""
Set the value of pg_hba.conf
"""
self.logger.log(
"[gs_dropnode]Start of set pg_hba config file on %s." % host)
cmd = 'source %s;' % envProfile
if len(pgHbaValue):
if not flagRollback:
for i in pgHbaValue[:-1].split('|'):
v = i[0:i.find('/32') + 3]
cmd += "gs_guc set -N %s -I all -h '%s';" % (host, v)
if flagRollback:
for i in pgHbaValue[:-1].split('|'):
cmd += "gs_guc set -N %s -I all -h '%s';" \
% (host, i.strip())
(status, output) = subprocess.getstatusoutput(cmd)
result_v = re.findall(r'Failed instances: (\d)\.', output)
if status:
self.logger.debug(
"[gs_dropnode]Set pg_hba config file failed:" + output)
raise ValueError(output)
if len(result_v):
if result_v[0] != '0':
self.logger.debug(
"[gs_dropnode]Set pg_hba config file failed:" + output)
raise ValueError(output)
else:
self.logger.debug(
"[gs_dropnode]Set pg_hba config file failed:" + output)
raise ValueError(output)
else:
self.logger.log(
"[gs_dropnode]Nothing need to do with pg_hba config file.")
self.logger.log(
"[gs_dropnode]End of set pg_hba config file on %s." % host)
def get_repl_slot(self, host, gsql_path, port):
"""
Get the replication slot (need to do it on standby for cascade_standby)
But can't do it on standby which enabled extreme rto
"""
self.logger.log("[gs_dropnode]Start to get repl slot on %s." % host)
selectSQL = "SELECT slot_name,plugin,slot_type FROM pg_replication_slots;"
sqlcmd = "%s -p %s postgres -A -t -c '%s'" % (gsql_path, port, selectSQL)
(status, output) = subprocess.getstatusoutput(sqlcmd)
if status or "ERROR" in output:
self.logger.debug(
"[gs_dropnode]Get repl slot failed:" + output)
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"])
return ','.join(output.split('\n'))
def SetReplSlot(self, host, gsqlPath, port, dnid,
flag_rollback=False):
self.logger.log("[gs_dropnode]Start to set repl slot on %s." % host)
replslot = self.get_repl_slot(host, gsqlPath, port)
setcmd = ''
sql = ''
if not flag_rollback:
for i in dnid:
if i in replslot:
sql += "SELECT pg_drop_replication_slot('%s');" % i
sql = "SET enable_slot_log TO 1;" + sql
setcmd = "sleep 5;%s -p %s postgres -A -t -c \"%s\";" % (gsqlPath, port, sql)
if flag_rollback:
list_o = [i.split('|') for i in replslot.split(',')]
for r in list_o:
if r[0] in dnid and r[2] == 'physical':
sql += "SELECT * FROM pg_create_physical_replication_slot('%s', " \
"false);" % r[0]
elif r[0] in dnid and r[2] == 'logical':
sql += "SELECT * FROM pg_create_logical_replication_slot('%s', " \
"'%s');" % (r[0], r[1])
setcmd = "%s -p %s postgres -A -t -c \"%s\";" % (gsqlPath, port, sql)
if sql != '':
(status, output) = subprocess.getstatusoutput(setcmd)
if status or "ERROR" in output:
self.logger.debug("[gs_dropnode]Set repl slot failed:" + output)
raise ValueError(output)
self.logger.log("[gs_dropnode]End of set repl slot on %s." % host)
def stopInstance(self, host, sshTool, dirDn, env):
"""
"""
self.logger.log("[gs_dropnode]Start to stop the target node %s." % host)
command = "source %s ; gs_ctl stop -D %s -M immediate" % (env, dirDn)
resultMap, outputCollect = sshTool.getSshStatusOutput(command, [host],
env)
if 'Is server running?' in outputCollect:
self.logger.log("[gs_dropnode]End of stop the target node %s."
% host)
return
elif resultMap[host] != 'Success':
self.logger.debug(outputCollect)
self.logger.log(
"[gs_dropnode]Cannot connect the target node %s." % host)
self.logger.log(
"[gs_dropnode]It may be still running.")
return
self.logger.log("[gs_dropnode]End of stop the target node %s." % host)
def startInstance(self, dirDn, env):
"""
"""
self.logger.log("[gs_dropnode]Start to start the target node.")
command = "source %s ; %s -U %s -D %s" % (env,
OMCommand.getLocalScript("Local_StartInstance"), self.user, dirDn)
(status, output) = subprocess.getstatusoutput(command)
self.logger.debug(output)
if status:
self.logger.debug("[gs_dropnode]Failed to start the node.")
GaussLog.exitWithError(ErrorCode.GAUSS_358["GAUSS_35809"])
elif re.search("another server might be running", output):
self.logger.log(output)
elif re.search("] WARNING:", output):
tmp = '\n'.join(re.findall(".*] WARNING:.*", output))
self.logger.log(tmp)
self.logger.debug("[gs_dropnode]End to start the node.")