Files
openGauss-OM/script/local/UpgradeUtility.py

4678 lines
193 KiB
Python

#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################################
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
#
# openGauss is licensed under Mulan PSL v2.
# You can use this software according to the terms
# and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
# http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
# ----------------------------------------------------------------------------
# Description :
# UpgradeUtility.py is a utility to execute upgrade on each local node
#############################################################################
import getopt
import sys
import os
import subprocess
import pwd
import re
import time
import timeit
import traceback
import json
import platform
import shutil
import copy
import csv
import fcntl
from multiprocessing.dummy import Pool as ThreadPool
sys.path.append(sys.path[0] + "/../")
from gspylib.common.GaussLog import GaussLog
from gspylib.common.Common import DefaultValue, ClusterCommand, \
ClusterInstanceConfig
from gspylib.common.ParameterParsecheck import Parameter
from gspylib.common.DbClusterInfo import dbClusterInfo, \
MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY
from gspylib.common.ErrorCode import ErrorCode
from gspylib.common.DbClusterStatus import DbClusterStatus
from gspylib.component.CM.CM_OLAP.CM_OLAP import CM_OLAP
import impl.upgrade.UpgradeConst as const
from base_utils.executor.cmd_executor import CmdExecutor
from base_utils.os.cmd_util import CmdUtil
from base_utils.os.compress_util import CompressUtil
from base_utils.os.env_util import EnvUtil
from base_utils.os.file_util import FileUtil
from base_utils.os.net_util import NetUtil
from domain_utils.cluster_file.cluster_dir import ClusterDir
from domain_utils.cluster_file.cluster_log import ClusterLog
from domain_utils.sql_handler.sql_result import SqlResult
from domain_utils.sql_handler.sql_file import SqlFile
from domain_utils.domain_common.cluster_constants import ClusterConstants
from base_diff.sql_commands import SqlCommands
INSTANCE_TYPE_UNDEFINED = -1
DUMMY_STANDBY_INSTANCE = 2
# init value
INSTANCE_ROLE_UNDEFINED = -1
# cn
INSTANCE_ROLE_COODINATOR = 3
# dn
INSTANCE_ROLE_DATANODE = 4
# Global parameter
g_oldVersionModules = None
g_clusterInfo = None
g_oldClusterInfo = None
g_logger = None
g_dbNode = None
g_opts = None
g_DWS_mode = False
g_gausshome = None
class CmdOptions():
"""
Class to define some cmd options
"""
def __init__(self):
"""
function: constructor
"""
# action value
self.action = ""
# user value
self.user = ""
# app install path
self.appPath = ""
# env file
self.mpprcFile = ""
self.userProfile = ""
# log file
self.logFile = ""
# backup path
self.bakPath = ""
# old cluster version
self.oldVersion = ""
# xml file
self.xmlFile = ""
# inplace upgrade bak path or grey upgrade path
self.upgrade_bak_path = ""
self.scriptType = ""
self.rollback = False
self.forceRollback = False
self.rolling = False
self.oldClusterAppPath = ""
self.newClusterAppPath = ""
self.gucStr = ""
self.oldclusternum = ""
self.postgisSOFileList = \
{"postgis-*.*.so": "lib/postgresql/",
"libgeos_c.so.*": "lib/",
"libproj.so.*": "lib/",
"libjson-c.so.*": "lib/",
"libgeos-*.*.*so": "lib/",
"postgis--*.*.*.sql": "share/postgresql/extension/",
"postgis.control": "share/postgresql/extension/",
"pgsql2shp": "bin/",
"shp2pgsql": "bin/",
"libgcc_s.so.*": "lib/",
"libstdc++.so.*": "lib/"}
self.fromFile = False
self.setType = "reload"
self.isSingleInst = False
class OldVersionModules():
"""
Class for providing some functions to apply old version cluster
"""
def __init__(self):
"""
function: constructor
"""
# old cluster information module
self.oldDbClusterInfoModule = None
# old cluster status module
self.oldDbClusterStatusModule = None
def importOldVersionModules():
"""
function: import some needed modules from the old cluster.
currently needed are: DbClusterInfo
input: NA
output:NA
"""
# get install directory by user name
installDir = ClusterDir.getInstallDir(g_opts.user)
if installDir == "":
GaussLog.exitWithError(
ErrorCode.GAUSS_503["GAUSS_50308"] + " User: %s." % g_opts.user)
# import DbClusterInfo module
global g_oldVersionModules
g_oldVersionModules = OldVersionModules()
sys.path.append("%s/bin/script/util" % installDir)
g_oldVersionModules.oldDbClusterInfoModule = __import__('DbClusterInfo')
def initGlobals():
"""
function: init global variables
input: NA
output: NA
"""
global g_oldVersionModules
global g_clusterInfo
global g_oldClusterInfo
global g_logger
global g_dbNode
# make sure which env file we use
g_opts.userProfile = g_opts.mpprcFile
# init g_logger
g_logger = GaussLog(g_opts.logFile, g_opts.action)
if g_opts.action in [const.ACTION_RESTORE_CONFIG,
const.ACTION_SWITCH_BIN,
const.ACTION_GREY_UPGRADE_CONFIG_SYNC,
const.ACTION_CLEAN_INSTALL_PATH,
const.ACTION_GREY_RESTORE_CONFIG]:
g_logger.debug(
"No need to init cluster information under action %s."
% g_opts.action)
return
# init g_clusterInfo
# not all action need init g_clusterInfo
try:
g_clusterInfo = dbClusterInfo()
if g_opts.xmlFile == "" or not os.path.exists(g_opts.xmlFile):
g_clusterInfo.initFromStaticConfig(g_opts.user)
else:
g_clusterInfo.initFromXml(g_opts.xmlFile)
except Exception as e:
g_logger.debug(traceback.format_exc())
g_logger.error(str(e))
# init cluster info from install path failed
# try to do it from backup path again
g_opts.bakPath = EnvUtil.getTmpDirFromEnv() + "/"
staticConfigFile = "%s/cluster_static_config" % g_opts.bakPath
if os.path.isfile(staticConfigFile):
try:
# import old module
g_oldVersionModules = OldVersionModules()
sys.path.append(os.path.dirname(g_opts.bakPath))
g_oldVersionModules.oldDbClusterInfoModule = __import__(
'OldDbClusterInfo')
# init old cluster config
g_clusterInfo = \
g_oldVersionModules.oldDbClusterInfoModule.dbClusterInfo()
g_clusterInfo.initFromStaticConfig(g_opts.user,
staticConfigFile)
except Exception as e:
g_logger.error(str(e))
# maybe the old cluster is V1R5C00 TR5 version,
# not support specify static config file
# path for initFromStaticConfig function,
# so use new cluster format try again
try:
g_clusterInfo = dbClusterInfo()
g_clusterInfo.initFromStaticConfig(g_opts.user,
staticConfigFile)
except Exception as e:
g_logger.error(str(e))
try:
# import old module
importOldVersionModules()
# init old cluster config
g_clusterInfo = \
g_oldVersionModules \
.oldDbClusterInfoModule.dbClusterInfo()
g_clusterInfo.initFromStaticConfig(g_opts.user)
except Exception as e:
raise Exception(str(e))
elif g_opts.xmlFile and os.path.exists(g_opts.xmlFile):
try:
sys.path.append(sys.path[0] + "/../../gspylib/common")
curDbClusterInfoModule = __import__('DbClusterInfo')
g_clusterInfo = curDbClusterInfoModule.dbClusterInfo()
g_clusterInfo.initFromXml(g_opts.xmlFile)
except Exception as e:
raise Exception(str(e))
else:
try:
# import old module
importOldVersionModules()
# init old cluster config
g_clusterInfo = \
g_oldVersionModules.oldDbClusterInfoModule.dbClusterInfo()
g_clusterInfo.initFromStaticConfig(g_opts.user)
except Exception as e:
raise Exception(str(e))
# init g_dbNode
localHost = NetUtil.GetHostIpOrName()
g_dbNode = g_clusterInfo.getDbNodeByName(localHost)
if g_dbNode is None:
raise Exception(
ErrorCode.GAUSS_512["GAUSS_51209"] % ("NODE", localHost))
def usage():
"""
Usage:
python3 UpgradeUtility.py -t action [-U user] [-R path] [-l log]
Common options:
-t the type of action
-U the user of old cluster
-R the install path of cluster
-l the path of log file
-V original Version
-X the xml configure file
--help show this help, then exit
--upgrade_bak_path always be the $PGHOST/binary_upgrade
--scriptType upgrade script type
--old_cluster_app_path absolute path with old commit id
--new_cluster_app_path absolute path with new commit id
--rollback is rollback
--guc_string check the guc string has been successfully
--oldcluster_num old cluster number
--rolling is rolling upgrade or rollback
wrote in the configure file, format is guc:value,
can only check upgrade_from, upgrade_mode
"""
print(usage.__doc__)
def parseCommandLine():
"""
function: Parse command line and save to global variables
input: NA
output: NA
"""
try:
opts, args = getopt.getopt(sys.argv[1:], "t:U:R:l:V:X:",
["help", "upgrade_bak_path=", "script_type=",
"old_cluster_app_path=", "new_cluster_app_path=", "rollback",
"force", "rolling", "oldcluster_num=", "guc_string=",
"fromFile", "setType=", "HA"])
except Exception as er:
usage()
raise Exception(ErrorCode.GAUSS_500["GAUSS_50000"] % str(er))
if len(args) > 0:
raise Exception(ErrorCode.GAUSS_500["GAUSS_50000"] % str(args[0]))
for (key, value) in opts:
initCommandPara(key, value)
Parameter.checkParaVaild(key, value)
def initCommandPara(key, value):
"""
function: The given value save to global variables
:param key:
:param value:
"""
if key == "--help":
usage()
sys.exit(0)
parseShortOptions(key, value)
parseLongOptions(key, value)
def parseShortOptions(key, value):
"""
parse short options like "-X"
"""
if key == "-t":
g_opts.action = value
elif key == "-U":
g_opts.user = value
elif key == "-R":
g_opts.appPath = value
elif key == "-l":
g_opts.logFile = os.path.realpath(value)
elif key == "-V":
g_opts.oldVersion = value
elif key == "-X":
g_opts.xmlFile = os.path.realpath(value)
def parseLongOptions(key, value):
"""
parse short options like "--force"
"""
if key == "--upgrade_bak_path":
g_opts.upgrade_bak_path = os.path.normpath(value)
elif key == "--script_type":
g_opts.scriptType = os.path.normpath(value)
elif key == "--old_cluster_app_path":
g_opts.oldClusterAppPath = os.path.normpath(value)
elif key == "--oldcluster_num":
g_opts.oldclusternum = value
elif key == "--new_cluster_app_path":
g_opts.newClusterAppPath = os.path.normpath(value)
elif key == "--rollback":
g_opts.rollback = True
elif key == "--rolling":
g_opts.rolling = True
elif key == "--guc_string":
if "=" in value and len(value.split("=")) == 2 and "'" not in value.split("=")[1]:
value = value.split("=")[0] + "=" + "'%s'" % value.split("=")[1]
g_opts.gucStr = value
elif key == "--fromFile":
g_opts.fromFile = True
elif key == "--setType":
g_opts.setType = value
elif key == "--HA":
g_opts.isSingleInst = True
def checkParameter():
"""
function: check parameter for different action
input: NA
output: NA
"""
# check mpprc file path
g_opts.mpprcFile = EnvUtil.getMpprcFile()
# the value of "-t" can not be ""
if g_opts.action == "":
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "t" + ".")
# check the value of "-t"
if g_opts.action in [const.ACTION_SWITCH_PROCESS,
const.ACTION_COPY_CERTS,
const.ACTION_GREY_UPGRADE_CONFIG_SYNC,
const.ACTION_SWITCH_DN,
const.ACTION_GREY_RESTORE_CONFIG] and \
(not g_opts.newClusterAppPath or not g_opts.oldClusterAppPath):
GaussLog.exitWithError(
ErrorCode.GAUSS_500["GAUSS_50001"]
% "-new_cluster_app_path and --old_cluster_app_path")
elif g_opts.action in \
[const.ACTION_SYNC_CONFIG,
const.ACTION_RESTORE_CONFIG] and not g_opts.newClusterAppPath:
GaussLog.exitWithError(
ErrorCode.GAUSS_500["GAUSS_50001"] % "-new_cluster_app_path")
elif g_opts.action in \
[const.ACTION_SWITCH_BIN,
const.ACTION_CLEAN_INSTALL_PATH] and not g_opts.appPath:
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % "R")
elif g_opts.action in [
const.ACTION_GREY_SYNC_GUC, const.ACTION_UPGRADE_SQL_FOLDER] and\
not g_opts.upgrade_bak_path:
GaussLog.exitWithError(
ErrorCode.GAUSS_500["GAUSS_50001"] % "-upgrade_bak_path")
elif g_opts.action in [const.ACTION_GREY_RESTORE_GUC] and\
not g_opts.oldClusterAppPath:
raise Exception(
ErrorCode.GAUSS_500["GAUSS_50001"] % "-old_cluster_app_path")
# Check the incoming parameter -U
if g_opts.user == "":
g_opts.user = pwd.getpwuid(os.getuid()).pw_name
# Check the incoming parameter -l
if g_opts.logFile == "":
g_opts.logFile = ClusterLog.getOMLogPath(ClusterConstants.LOCAL_LOG_FILE,
g_opts.user, "")
global g_gausshome
g_gausshome = ClusterDir.getInstallDir(g_opts.user)
if g_gausshome == "":
GaussLog.exitWithError(
ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME")
g_gausshome = os.path.normpath(g_gausshome)
def switchBin():
"""
function: switch link bin from old to new
input : NA
output : NA
"""
if g_opts.forceRollback:
if not os.path.exists(g_opts.appPath):
FileUtil.createDirectory(g_opts.appPath, True,
DefaultValue.KEY_DIRECTORY_MODE)
g_logger.log("Switch to %s." % g_opts.appPath)
if g_opts.appPath == g_gausshome:
raise Exception(ErrorCode.GAUSS_502["GAUSS_50233"] % (
"install path", "$GAUSSHOME"))
if os.path.exists(g_gausshome):
if os.path.samefile(g_opts.appPath, g_gausshome):
g_logger.log(
"$GAUSSHOME points to %s. No need to switch." % g_opts.appPath)
cmd = "ln -snf %s %s" % (g_opts.appPath, g_gausshome)
g_logger.log("Command for switching binary directory: '%s'." % cmd)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
raise Exception(
ErrorCode.GAUSS_508["GAUSS_50803"] + " Error: \n%s" % str(output))
def readPostgresqlConfig(filePath):
"""
function: read postgres sql config
input filepath
output gucParamDict
"""
GUC_PARAM_PATTERN = "^\\s*.*=.*$"
pattern = re.compile(GUC_PARAM_PATTERN)
gucParamDict = {}
try:
with open(filePath, 'r') as fp:
resList = fp.readlines()
for oneLine in resList:
# skip blank line
if oneLine.strip() == "":
continue
# skip comment line
if (oneLine.strip()).startswith('#'):
continue
# search valid line
result = pattern.match(oneLine)
if result is not None:
paramAndValue = oneLine
# remove comment if eixst
pos = oneLine.find(' #')
if pos >= 0:
paramAndValue = oneLine[:pos]
# should use tab here
pos = oneLine.find('\t#')
if pos >= 0:
paramAndValue = oneLine[:pos]
# if the value contain "$" ,
# we should using "\\\\\\$" to instead of it
resList = paramAndValue.split('=')
if len(resList) == 2:
param = resList[0]
value = resList[1].replace("$", "\\\\\\$")
gucParamDict[param.strip()] = value.strip()
elif len(resList) > 2:
# invalid line, skip it
# only support replconninfo1, replconninfo2
if not resList[0].strip().startswith("replconninfo"):
continue
pos = paramAndValue.find('=')
param = paramAndValue[:pos]
value = paramAndValue[pos + 1:].replace("$", "\\\\\\$")
gucParamDict[param.strip()] = value.strip()
else:
continue
except Exception as e:
g_logger.debug(str(e))
raise Exception(
ErrorCode.GAUSS_502["GAUSS_50204"] % "postgressql.conf file")
return gucParamDict
def syncPostgresqlconf(dbInstance):
"""
function: syncPostgresqlconf during inplace upgrade
input: dbInstance
output: NA
"""
# get config info of current node
try:
# get guc param info from old cluster
gucCmd = "source %s" % g_opts.userProfile
oldPostgresConf = "%s/postgresql.conf" % dbInstance.datadir
gucParamDict = readPostgresqlConfig(oldPostgresConf)
synchronousStandbyNames = ""
# synchronous_standby_names only can be set by write file
if "synchronous_standby_names" in gucParamDict.keys():
synchronousStandbyNames = gucParamDict["synchronous_standby_names"]
del gucParamDict["synchronous_standby_names"]
# internal parameters are not supported. So skip them when do gs_guc
internalGucList = ['block_size', 'current_logic_cluster',
'integer_datetimes', 'lc_collate',
'lc_ctype', 'max_function_args',
'max_identifier_length', 'max_index_keys',
'node_group_mode', 'segment_size',
'server_encoding', 'server_version',
'server_version_num', 'sql_compatibility',
'wal_block_size', 'wal_segment_size', 'enable_beta_nestloop_fusion',
'enable_upsert_to_merge', 'gs_clean_timeout', 'force_parallel_mode',
'max_background_workers', 'max_parallel_workers_per_gather',
'min_parallel_table_scan_size', 'pagewriter_threshold',
'parallel_leader_participation', 'parallel_setup_cost',
'parallel_tuple_cost', 'parctl_min_cost', 'tcp_recv_timeout',
'transaction_sync_naptime', 'transaction_sync_timeout',
'twophase_clean_workers', 'wal_compression']
temp_del_guc = readDeleteGuc()
g_logger.debug("readDeleteGuc: %s" % temp_del_guc)
if 'datanode' in temp_del_guc:
internalGucList += temp_del_guc['datanode']
for gucName in internalGucList:
if gucName in gucParamDict.keys():
del gucParamDict[gucName]
if dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE:
# rebuild replconninfo
connInfo1 = None
peerInsts = g_clusterInfo.getPeerInstance(dbInstance)
if len(peerInsts) > 0:
(connInfo1, _) = ClusterInstanceConfig.\
setReplConninfoForSinglePrimaryMultiStandbyCluster(
dbInstance, peerInsts, g_clusterInfo)
for i in range(len(connInfo1)):
connInfo = "replconninfo" + "%d" % (i + 1)
gucParamDict[connInfo] = "'%s'" % connInfo1[i]
if len(gucParamDict) > 0:
gucStr = ""
for key, value in gucParamDict.items():
gucStr += " -c \\\"%s=%s\\\" " % (key, value)
gucCmd += "&& gs_guc set -D %s %s" % (dbInstance.datadir, gucStr)
# set guc parameters about DummpyStandbyConfig at DN
if dbInstance.instanceType == DUMMY_STANDBY_INSTANCE:
gucstr = ""
for entry in DefaultValue.getPrivateGucParamList().items():
gucstr += " -c \"%s=%s\"" % (entry[0], entry[1])
gucCmd += "&& gs_guc set -D %s %s " % (dbInstance.datadir, gucstr)
g_logger.debug("Command for setting [%s] guc parameter:%s" % (
dbInstance.datadir, gucCmd))
# save guc parameter to temp file
gucTempFile = "%s/setGucParam_%s.sh" % (
g_opts.upgrade_bak_path, dbInstance.instanceId)
# Do not modify the write file operation.
# Escape processing of special characters in the content
cmd = "echo \"%s\" > %s" % (gucCmd, gucTempFile)
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
if status != 0:
g_logger.debug("Command: %s. Error: \n%s" % (cmd, output))
g_logger.logExit(
ErrorCode.GAUSS_502["GAUSS_50205"] % gucTempFile
+ " Error: \n%s" % str(
output))
FileUtil.changeOwner(g_opts.user, gucTempFile)
FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, gucTempFile)
# replace old guc file with sample file
newPostgresConf = "%s/share/postgresql/postgresql.conf.sample" \
% g_opts.newClusterAppPath
if os.path.exists(newPostgresConf):
FileUtil.cpFile(newPostgresConf, oldPostgresConf)
FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, oldPostgresConf)
# set guc param
cmd = "sh %s" % gucTempFile
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
g_logger.debug("Command: %s. Error: \n%s" % (cmd, output))
g_logger.logExit(
ErrorCode.GAUSS_514["GAUSS_51401"] % gucTempFile[:-3]
+ " Error: \n%s" % str(output))
if synchronousStandbyNames != "":
g_logger.debug(
"Set the GUC value %s to synchronous_standby_names for %s" % (
synchronousStandbyNames, oldPostgresConf))
FileUtil.deleteLine(oldPostgresConf,
"^\\s*synchronous_standby_names\\s*=.*$")
FileUtil.writeFile(
oldPostgresConf,
["synchronous_standby_names "
"= %s # standby servers that provide sync rep"
% synchronousStandbyNames])
# clean temp file
if os.path.isfile(gucTempFile):
os.remove(gucTempFile)
except Exception as e:
g_logger.logExit(str(e))
def syncClusterConfig():
"""
function: sync newly added guc during upgrade,
for now we only sync CN/DN, gtm, cm_agent and cm_server
input: NA
output: NA
"""
DnInstances = g_dbNode.datanodes
if len(DnInstances) > 0:
try:
# sync postgresql.conf in parallel
pool = ThreadPool(DefaultValue.getCpuSet())
pool.map(syncPostgresqlconf, DnInstances)
pool.close()
pool.join()
except Exception as e:
g_logger.logExit(str(e))
def touchInstanceInitFile():
"""
function: touch upgrade init file for every primary and standby instance
input: NA
output: NA
"""
g_logger.log("Touch init file.")
try:
InstanceList = []
# find all DB instances need to touch
if len(g_dbNode.datanodes) != 0:
for eachInstance in g_dbNode.datanodes:
if eachInstance.instanceType in \
[MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY]:
InstanceList.append(eachInstance)
# touch each instance parallelly
if len(InstanceList) != 0:
pool = ThreadPool(len(InstanceList))
pool.map(touchOneInstanceInitFile, InstanceList)
pool.close()
pool.join()
else:
g_logger.debug(
"No instance found on this node, nothing need to do.")
return
g_logger.log(
"Successfully created all instances init file on this node.")
except Exception as e:
g_logger.logExit(str(e))
def reloadCmagent():
"""
reload the cm_agent instance, make the guc parameter working
"""
cmd = "ps ux | grep '%s/bin/cm_agent' | grep -v grep | awk '{print $2}' | " \
"xargs -r -n 100 kill -1" % g_clusterInfo.appPath
g_logger.debug("Command for reload cm_agent:%s" % cmd)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 3, 5)
if status == 0:
g_logger.log("Successfully reload cmagent.")
else:
raise Exception("Failed to reload cmagent.")
def reload_cmserver():
"""
reload the cm_server instance, make the guc parameter working
"""
# reload the cm_server instance, make the guc parameter working
cmd = "ps ux | grep '%s/bin/cm_server' | grep -v grep | awk '{print $2}' | " \
"xargs -r -n 100 kill -1" % g_clusterInfo.appPath
g_logger.debug("Command for reload cm_server:%s" % cmd)
status, _ = CmdUtil.retryGetstatusoutput(cmd, 3, 5)
if status == 0:
g_logger.log("Successfully reload cmserver.")
else:
raise Exception("Failed to reload cmserver.")
def initDbInfo():
"""
function: create a init dbInfo dict
input: NA
output: NA
"""
tmpDbInfo = {}
tmpDbInfo['dbname'] = ""
tmpDbInfo['dboid'] = -1
tmpDbInfo['spclocation'] = ""
tmpDbInfo['CatalogList'] = []
tmpDbInfo['CatalogNum'] = 0
return tmpDbInfo
def initCatalogInfo():
"""
function: create a init catalog dict
input: NA
output: NA
"""
tmpCatalogInfo = {}
tmpCatalogInfo['relname'] = ""
tmpCatalogInfo['oid'] = -1
tmpCatalogInfo['relfilenode'] = -1
return tmpCatalogInfo
def cpDirectory(srcDir, destDir):
"""
function: copy directory
input : NA
output : NA
"""
cmd = "rm -rf '%s' && cp -r -p '%s' '%s'" % (destDir, srcDir, destDir)
g_logger.debug("Backup commad:[%s]." % cmd)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
raise Exception(
ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output)
def touchOneInstanceInitFile(instance):
"""
function: touch upgrade init file for this instance
input: NA
output: NA
"""
g_logger.debug(
"Touch instance init file. Instance data dir: %s" % instance.datadir)
dbInfoDict = {}
dbInfoDict["dblist"] = []
dbInfoDict["dbnum"] = 0
try:
# we touch init file by executing a simple query for every database
get_db_list_sql = """
SELECT d.datname, d.oid, pg_catalog.pg_tablespace_location(t.oid)
AS spclocation
FROM pg_catalog.pg_database d
LEFT OUTER JOIN pg_catalog.pg_tablespace t
ON d.dattablespace = t.oid
ORDER BY 2;"""
g_logger.debug("Get database info command: \n%s" % get_db_list_sql)
(status, output) = ClusterCommand.execSQLCommand(get_db_list_sql,
g_opts.user, "",
instance.port,
"postgres",
"-m",
IsInplaceUpgrade=True)
if status != 0:
raise Exception(
ErrorCode.GAUSS_513["GAUSS_51300"] % get_db_list_sql
+ " Error:\n%s" % output)
if output == "":
raise Exception(ErrorCode.GAUSS_529["GAUSS_52938"]
% "any database!!")
g_logger.debug("Get database info result: \n%s." % output)
resList = output.split('\n')
for each_line in resList:
tmpDbInfo = initDbInfo()
(datname, oid, spclocation) = each_line.split('|')
tmpDbInfo['dbname'] = datname.strip()
tmpDbInfo['dboid'] = oid.strip()
tmpDbInfo['spclocation'] = spclocation.strip()
dbInfoDict["dblist"].append(tmpDbInfo)
dbInfoDict["dbnum"] += 1
# connect each database, run a simple query
touch_sql = "SELECT 1;"
for each_db in dbInfoDict["dblist"]:
(status, output) = ClusterCommand.execSQLCommand(
touch_sql,
g_opts.user, "",
instance.port,
each_db["dbname"],
"-m",
IsInplaceUpgrade=True)
if status != 0 or not output.isdigit():
raise Exception(
ErrorCode.GAUSS_513["GAUSS_51300"] % touch_sql
+ " Error:\n%s" % output)
except Exception as e:
raise Exception(str(e))
g_logger.debug(
"Successfully created instance init file. Instance data dir: %s"
% instance.datadir)
def is_dcf_mode():
"""
is dcf mode or not
"""
try:
if g_clusterInfo.enable_dcf != "on":
g_logger.debug("Current cluster is not in dcf mode. ")
return False
else:
g_logger.debug("Current cluster is in dcf mode")
return True
except Exception as er:
raise Exception(str(er))
def getInstanceName(instance):
"""
get master instance name
"""
instance_name = ""
if instance.instanceRole == INSTANCE_ROLE_DATANODE:
if g_clusterInfo.isSingleInstCluster():
# the instance type must be master or standby dn
peerInsts = g_clusterInfo.getPeerInstance(instance)
(instance_name, masterInst, _) = \
ClusterInstanceConfig.\
getInstanceInfoForSinglePrimaryMultiStandbyCluster(
instance, peerInsts)
else:
# if dn, it should be master or standby dn
if instance.instanceType == DUMMY_STANDBY_INSTANCE:
raise Exception(
"Invalid instance type:%s" % instance.instanceType)
peerInsts = g_clusterInfo.getPeerInstance(instance)
if len(peerInsts) != 2 and len(peerInsts) != 1:
raise Exception(ErrorCode.GAUSS_516["GAUSS_51620"] % "peer")
for i in range(len(peerInsts)):
if peerInsts[i].instanceType == MASTER_INSTANCE:
masterInst = peerInsts[i]
standbyInst = instance
instance_name = "dn_%d_%d" % (masterInst.instanceId,
standbyInst.instanceId)
elif peerInsts[i].instanceType == STANDBY_INSTANCE:
standbyInst = peerInsts[i]
masterInst = instance
instance_name = "dn_%d_%d" % (masterInst.instanceId,
standbyInst.instanceId)
else:
# we are searching master or standby dn instance,
# if dummy dn, just continue
continue
if instance_name == "":
raise Exception("Can not get instance name!")
else:
raise Exception("Invalid node type:%s" % instance.instanceRole)
return instance_name.strip()
def getJsonFile(instance, backup_path):
"""
function: get json file
input : instance, backup_path
output : db_and_catalog_info_file_name: str
"""
try:
instance_name = getInstanceName(instance)
# load db and catalog info from json file
if instance.instanceRole == INSTANCE_ROLE_COODINATOR:
db_and_catalog_info_file_name = \
"%s/cn_db_and_catalog_info_%s.json" % (
backup_path, instance_name)
elif instance.instanceRole == INSTANCE_ROLE_DATANODE:
if instance.instanceType in [MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY]:
db_and_catalog_info_file_name = \
"%s/dn_db_and_catalog_info_%s.json" % (
backup_path, instance_name)
else:
raise Exception(
"Invalid instance type:%s" % instance.instanceType)
else:
raise Exception("Invalid instance role:%s" % instance.instanceRole)
return db_and_catalog_info_file_name
except Exception as e:
raise Exception(str(e))
def __backup_base_folder(instance):
"""
"""
g_logger.debug("Backup instance catalog physical files. "
"Instance data dir: %s" % instance.datadir)
backup_path = "%s/oldClusterDBAndRel/" % g_opts.upgrade_bak_path
db_and_catalog_info_file_name = getJsonFile(instance, backup_path)
fp = open(db_and_catalog_info_file_name, 'r')
dbInfoStr = fp.read()
fp.close()
dbInfoDict = json.loads(dbInfoStr)
# get instance name
instance_name = getInstanceName(instance)
# backup base folder
for each_db in dbInfoDict["dblist"]:
if each_db["spclocation"] != "":
if each_db["spclocation"].startswith('/'):
tbsBaseDir = each_db["spclocation"]
else:
tbsBaseDir = "%s/pg_location/%s" % (instance.datadir,
each_db["spclocation"])
pg_catalog_base_dir = "%s/%s_%s/%d" % (
tbsBaseDir, DefaultValue.TABLESPACE_VERSION_DIRECTORY,
instance_name, int(each_db["dboid"]))
else:
pg_catalog_base_dir = "%s/base/%d" % (instance.datadir,
int(each_db["dboid"]))
# for base folder, template0 need handle specially
if each_db["dbname"] == 'template0':
pg_catalog_base_back_dir = "%s_bak" % pg_catalog_base_dir
cpDirectory(pg_catalog_base_dir, pg_catalog_base_back_dir)
g_logger.debug(
"Template0 has been backed up from {0} to {1}".format(
pg_catalog_base_dir, pg_catalog_base_back_dir))
continue
# handle other db's base folder
if len(each_db["CatalogList"]) <= 0:
raise Exception(
"Can not find any catalog in database %s" % each_db["dbname"])
for each_catalog in each_db["CatalogList"]:
cmd = ""
# main/vm/fsm -- main.1 ..
main_file = "%s/%d" % (
pg_catalog_base_dir, int(each_catalog['relfilenode']))
# for unlog table, maybe not have data file on slave DN
if os.path.isfile(main_file):
cmd = "cp -f -p '%s' '%s_bak'" % (main_file, main_file)
g_logger.debug("{0} needs to be backed up to {0}_bak".format(main_file))
elif each_catalog['relpersistence'] != 'u':
raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % main_file)
# for unlog table, such as statement_history, need copy the init file
if each_catalog['relpersistence'] == 'u':
main_init_file = main_file + '_init'
if not os.path.isfile(main_init_file):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % main_init_file)
if cmd == "":
cmd = "cp -f -p '%s' '%s_bak'" % (main_init_file, main_init_file)
else:
cmd += "&& cp -f -p '%s' '%s_bak'" % (main_init_file, main_init_file)
g_logger.debug("{0} needs to be backed up to {0}_bak".format(main_init_file))
seg_idx = 1
while 1:
seg_file = "%s/%d.%d" % (pg_catalog_base_dir,
int(each_catalog['relfilenode']),
seg_idx)
if os.path.isfile(seg_file):
cmd += "&& cp -f -p '%s' '%s_bak'" % (seg_file, seg_file)
seg_idx += 1
else:
break
g_logger.debug("seg_file needs to be backed up")
vm_file = "%s/%d_vm" % (pg_catalog_base_dir,
int(each_catalog['relfilenode']))
if os.path.isfile(vm_file):
cmd += "&& cp -f -p '%s' '%s_bak'" % (vm_file, vm_file)
g_logger.debug(
"{0} needs to be backed up to {0}_bak".format(vm_file))
fsm_file = "%s/%d_fsm" % (pg_catalog_base_dir,
int(each_catalog['relfilenode']))
if os.path.isfile(fsm_file):
cmd += "&& cp -f -p '%s' '%s_bak'" % (fsm_file, fsm_file)
g_logger.debug(
"{0} needs to be backed up to {0}_bak".format(fsm_file))
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
# special files pg_filenode.map pg_internal.init
cmd = ""
pg_filenode_map_file = "%s/pg_filenode.map" % pg_catalog_base_dir
if os.path.isfile(pg_filenode_map_file):
if cmd == "":
cmd = "cp -f -p '%s' '%s_bak'" % (
pg_filenode_map_file, pg_filenode_map_file)
else:
cmd += "&& cp -f -p '%s' '%s_bak'" % (
pg_filenode_map_file, pg_filenode_map_file)
g_logger.debug("{0} needs to be backed up to {0}_bak".format(
pg_filenode_map_file))
pg_internal_init_file = "%s/pg_internal.init" % pg_catalog_base_dir
if os.path.isfile(pg_internal_init_file):
if cmd == "":
cmd = "cp -f -p '%s' '%s_bak'" % (
pg_internal_init_file, pg_internal_init_file)
else:
cmd += "&& cp -f -p '%s' '%s_bak'" % (
pg_internal_init_file, pg_internal_init_file)
g_logger.debug("{0} needs to be backed up to {0}_bak".format(
pg_internal_init_file))
if cmd != 0:
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
g_logger.debug("Successfully backuped instance catalog physical files."
" Instance data dir: %s" % instance.datadir)
def __restore_base_folder(instance):
"""
"""
g_logger.debug("Restore instance base folders. Instance data dir: {0}".format(instance.datadir))
backup_path = "%s/oldClusterDBAndRel/" % g_opts.upgrade_bak_path
# get instance name
instance_name = getInstanceName(instance)
# load db and catalog info from json file
if instance.instanceRole == INSTANCE_ROLE_DATANODE:
if instance.instanceType in [MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY]:
db_and_catalog_info_file_name = "%s/dn_db_and_catalog_info_%s.json" % \
(backup_path, instance_name)
else:
raise Exception("Invalid instance type:%s" % instance.instanceType)
else:
raise Exception("Invalid instance role:%s" % instance.instanceRole)
fp = open(db_and_catalog_info_file_name, 'r')
db_info_str = fp.read()
fp.close()
db_info_dict = json.loads(db_info_str)
# restore base folder
for each_db in db_info_dict["dblist"]:
if each_db["spclocation"] != "":
if each_db["spclocation"].startswith('/'):
tbsBaseDir = each_db["spclocation"]
else:
tbsBaseDir = "%s/pg_location/%s" % (instance.datadir, each_db["spclocation"])
pg_catalog_base_dir = "%s/%s_%s/%d" % (tbsBaseDir,
DefaultValue.TABLESPACE_VERSION_DIRECTORY,
instance_name, int(each_db["dboid"]))
else:
pg_catalog_base_dir = "%s/base/%d" % (instance.datadir, int(each_db["dboid"]))
# for base folder, template0 need handle specially
if each_db["dbname"] == 'template0':
pg_catalog_base_back_dir = "%s_bak" % pg_catalog_base_dir
cpDirectory(pg_catalog_base_back_dir, pg_catalog_base_dir)
g_logger.debug(
"Template0 has been restored from {0} to {1}".format(
pg_catalog_base_back_dir, pg_catalog_base_dir))
continue
# handle other db's base folder
if len(each_db["CatalogList"]) <= 0:
raise Exception("Can not find any catalog in database %s" % each_db["dbname"])
for each_catalog in each_db["CatalogList"]:
cmd = ""
# main/vm/fsm -- main.1 ..
main_file = "%s/%d" % (pg_catalog_base_dir, int(each_catalog['relfilenode']))
if not os.path.isfile(main_file):
g_logger.debug("Instance data dir: %s, database: %s, relnodefile: %s does not "
"exists." % (instance.datadir, each_db["dbname"], main_file))
# for unlog table, maybe not have data file on slave DN
if os.path.isfile(main_file + '_bak'):
cmd = "cp -f -p '%s_bak' '%s'" % (main_file, main_file)
g_logger.debug("{0} needs to be restored from {0}_bak".format(main_file))
elif each_catalog['relpersistence'] != 'u':
raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % (main_file + '_bak'))
# for unlog table, such as statement_history, need copy back the init file
if each_catalog['relpersistence'] == 'u':
main_init_file = main_file + '_init'
if not os.path.isfile(main_init_file):
g_logger.debug("Instance data dir: %s, database: %s, "
"relnodefile: %s does not exists." %
(instance.datadir, each_db["dbname"], main_init_file))
if cmd == "":
cmd = "cp -f -p '%s_bak' '%s'" % (main_init_file, main_init_file)
else:
cmd += "&& cp -f -p '%s_bak' '%s'" % (main_init_file, main_init_file)
g_logger.debug("{0} needs to be restored from {0}_bak".format(main_init_file))
seg_idx = 1
while 1:
seg_file = "%s/%d.%d" % (pg_catalog_base_dir,
int(each_catalog['relfilenode']), seg_idx)
seg_file_bak = "%s_bak" % seg_file
if os.path.isfile(seg_file):
if os.path.isfile(seg_file_bak):
cmd += "&& cp -f -p '%s' '%s'" % (seg_file_bak, seg_file)
else:
cmd += "&& rm -f '%s'" % seg_file
seg_idx += 1
else:
break
g_logger.debug("seg_file needs to be restored")
vm_file = "%s/%d_vm" % (pg_catalog_base_dir, int(each_catalog['relfilenode']))
vm_file_bak = "%s_bak" % vm_file
if os.path.isfile(vm_file):
if os.path.isfile(vm_file_bak):
cmd += "&& cp -f -p '%s' '%s'" % (vm_file_bak, vm_file)
else:
cmd += "&& rm -f '%s'" % vm_file
g_logger.debug("{0} needs to be restored from {0}_bak".format(vm_file))
fsm_file = "%s/%d_fsm" % (pg_catalog_base_dir, int(each_catalog['relfilenode']))
fsm_file_bak = "%s_bak" % fsm_file
if os.path.isfile(fsm_file):
if os.path.isfile(fsm_file_bak):
cmd += "&& cp -f -p '%s' '%s'" % (fsm_file_bak, fsm_file)
else:
cmd += "&& rm -f '%s'" % fsm_file
g_logger.debug("{0} needs to be restored from {0}_bak".format(fsm_file))
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output)
# special files pg_filenode.map pg_internal.init
cmd = ""
pg_filenode_map_file = "%s/pg_filenode.map" % pg_catalog_base_dir
if os.path.isfile(pg_filenode_map_file):
if cmd == "":
cmd = "cp -f -p '%s_bak' '%s'" % (pg_filenode_map_file, pg_filenode_map_file)
else:
cmd += "&& cp -f -p '%s_bak' '%s'" % (pg_filenode_map_file, pg_filenode_map_file)
g_logger.debug("{0} needs to be restored from {0}_bak".format(pg_filenode_map_file))
pg_internal_init_file = "%s/pg_internal.init" % pg_catalog_base_dir
if os.path.isfile(pg_internal_init_file):
if cmd == "":
cmd = "cp -f -p '%s_bak' '%s'" % (pg_internal_init_file, pg_internal_init_file)
else:
cmd += "&& cp -f -p '%s_bak' '%s'" % (pg_internal_init_file, pg_internal_init_file)
g_logger.debug("{0} needs to be restored from {0}_bak".format(pg_internal_init_file))
if cmd != 0:
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output)
g_logger.debug("Successfully restore instance base folders. "
"Instance data dir: {0}".format(instance.datadir))
def cleanBackUpDir(backupDir):
"""
function: clean backup dir
input : backupDir
output : NA
"""
# clean backupDir folder. First, we kill any pending backup process
bakDir = "%s_bak" % backupDir
backcmd = "cp -r -p %s %s" % (backupDir, bakDir)
killCmd = DefaultValue.killInstProcessCmd(backcmd, False, 9, False)
CmdExecutor.execCommandLocally(killCmd)
# Then do clean
if os.path.isdir(bakDir):
FileUtil.removeDirectory(bakDir)
def checkExistsVersion(cooInst, curCommitid):
"""
function: check exits version
input : instanceNames, cooInst, curCommitid
output : needKill False/True
"""
needKill = False
sql = "select version();"
(status, output) = ClusterCommand.remoteSQLCommand(
sql, g_opts.user,
cooInst.hostname,
cooInst.port, False,
DefaultValue.DEFAULT_DB_NAME,
IsInplaceUpgrade=True)
g_logger.debug("Command to check version: %s" % sql)
if status != 0 or SqlResult.findErrorInSql(output):
raise Exception(
ErrorCode.GAUSS_513["GAUSS_51300"] % sql + " Error: \n%s" % str(
output))
if not output:
raise Exception(ErrorCode.GAUSS_516["GAUSS_51654"])
resList = output.split('\n')
pattern = re.compile(r'[(](.*?)[)]')
for record in resList:
versionInBrackets = re.findall(pattern, record)
commitid = versionInBrackets[0].split(" ")[-1]
g_logger.debug(
"checkExistsVersion commitid {0} record {1} brackets {2}".format(commitid, record,
versionInBrackets))
if commitid != curCommitid:
needKill = True
break
return needKill
def getTimeFormat(seconds):
"""
format secends to h-m-s
input:int
output:int
"""
seconds = int(seconds)
if seconds == 0:
return 0
# Converts the seconds to standard time
hour = seconds / 3600
minute = (seconds - hour * 3600) / 60
s = seconds % 60
resultstr = ""
if hour != 0:
resultstr += "%dh" % hour
if minute != 0:
resultstr += "%dm" % minute
return "%s%ds" % (resultstr, s)
def backupConfig():
"""
function: backup config
output: none
"""
try:
bakPath = g_opts.upgrade_bak_path
clusterAppPath = g_clusterInfo.appPath
# Backup cluster_static_config and cluster_dynamic_config,
# logic_cluster_name.txt
# cluster_static_config* at least one
cmd = "cp -f -p '%s'/bin/*cluster_static_config* '%s'" % (
clusterAppPath, bakPath)
dynamic_config = "%s/bin/cluster_dynamic_config" % clusterAppPath
logicalNameFile = "%s/bin/logic_cluster_name.txt" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
dynamic_config, dynamic_config, bakPath)
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
logicalNameFile, logicalNameFile, bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# Backup libcgroup config
MAX_PARA_NUMBER = 20
cgroup_file_list = []
gs_cgroup_path = "%s/etc" % clusterAppPath
file_name_list = os.listdir(gs_cgroup_path)
for file_name in file_name_list:
if file_name.endswith('.cfg'):
gs_cgroup_config_file = "%s/%s" % (gs_cgroup_path, file_name)
cgroup_file_list.append(gs_cgroup_config_file)
# build cmd string list
# Every 20 records merged into one
i = 0
cmdCgroup = ""
cmdList = []
for gs_cgroup_config_file in cgroup_file_list:
i += 1
cmdCgroup += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
gs_cgroup_config_file, gs_cgroup_config_file, bakPath)
if i % MAX_PARA_NUMBER == 0:
cmdList.append(cmdCgroup)
i = 0
cmdCgroup = ""
if cmdCgroup != "":
cmdList.append(cmdCgroup)
for exeCmd in cmdList:
g_logger.debug("Backup command: %s" % exeCmd)
CmdExecutor.execCommandLocally(exeCmd[3:])
# Backup libsimsearch etc files and libs files
searchConfigFile = "%s/etc/searchletConfig.yaml" % clusterAppPath
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
searchConfigFile, searchConfigFile, bakPath)
searchIniFile = "%s/etc/searchServer.ini" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
searchIniFile, searchIniFile, bakPath)
cmd += " && (if [ -d '%s/lib/libsimsearch' ];" \
"then cp -r '%s/lib/libsimsearch' '%s';fi)" % (
clusterAppPath, clusterAppPath, bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# Backup library file and database size file
cmd = "cp -r '%s'/lib/postgresql/pg_plugin '%s'" % (
clusterAppPath, bakPath)
backup_dbsize = "%s/bin/%s" % (
clusterAppPath, DefaultValue.DB_SIZE_FILE)
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
backup_dbsize, backup_dbsize, bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# sync kerberos conf files
krbConfigFile = "%s/kerberos" % clusterAppPath
cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % (
krbConfigFile, krbConfigFile, bakPath)
cmd += "&& (if [ -d '%s/var/krb5kdc' ];then mkdir %s/var;" \
" cp -r '%s/var/krb5kdc' '%s/var/';fi)" % (
clusterAppPath, bakPath, clusterAppPath, bakPath)
g_logger.debug("Grey upgrade sync command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# backup obsserver.key.cipher/obsserver.key.rand and server.key.
# cipher/server.key.rand and datasource.key.cipher/datasource.key.rand.
# usermapping.key.cipher/usermapping.key.rand and subscription.key.cipher
# subscription.key.rand
OBS_cipher_key_bak_file = \
"%s/bin/obsserver.key.cipher" % clusterAppPath
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
OBS_cipher_key_bak_file, OBS_cipher_key_bak_file, bakPath)
OBS_rand_key_bak_file = "%s/bin/obsserver.key.rand" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
OBS_rand_key_bak_file, OBS_rand_key_bak_file, bakPath)
trans_encrypt_cipher_key_bak_file = \
"%s/bin/trans_encrypt.key.cipher" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
trans_encrypt_cipher_key_bak_file,
trans_encrypt_cipher_key_bak_file,
bakPath)
trans_encrypt_rand_key_bak_file = \
"%s/bin/trans_encrypt.key.rand" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
trans_encrypt_rand_key_bak_file, trans_encrypt_rand_key_bak_file,
bakPath)
trans_encrypt_cipher_ak_sk_key_bak_file = \
"%s/bin/trans_encrypt_ak_sk.key" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
trans_encrypt_cipher_ak_sk_key_bak_file,
trans_encrypt_cipher_ak_sk_key_bak_file, bakPath)
server_cipher_key_bak_file = \
"%s/bin/server.key.cipher" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
server_cipher_key_bak_file, server_cipher_key_bak_file, bakPath)
server_rand_key_bak_file = "%s/bin/server.key.rand" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
server_rand_key_bak_file, server_rand_key_bak_file, bakPath)
datasource_cipher = "%s/bin/datasource.key.cipher" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
datasource_cipher, datasource_cipher, bakPath)
datasource_rand = "%s/bin/datasource.key.rand" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
datasource_rand, datasource_rand, bakPath)
usermapping_cipher = "%s/bin/usermapping.key.cipher" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
usermapping_cipher, usermapping_cipher, bakPath)
usermapping_rand = "%s/bin/usermapping.key.rand" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
usermapping_rand, usermapping_rand, bakPath)
subscription_cipher = "%s/bin/subscription.key.cipher" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
subscription_cipher, subscription_cipher, bakPath)
subscription_rand = "%s/bin/subscription.key.rand" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
subscription_rand, subscription_rand, bakPath)
tde_key_cipher = "%s/bin/gs_tde_keys.cipher" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
tde_key_cipher, tde_key_cipher, bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# backup utilslib
utilslib = "%s/utilslib" % clusterAppPath
cmd = "if [ -d '%s' ];then cp -r '%s' '%s';fi" % (
utilslib, utilslib, bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# backup ca.key,etcdca.crt, client.key and client.crt
CA_key_file = "%s/share/sslcert/etcd/ca.key" % clusterAppPath
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
CA_key_file, CA_key_file, bakPath)
CA_cert_file = "%s/share/sslcert/etcd/etcdca.crt" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
CA_cert_file, CA_cert_file, bakPath)
client_key_file = "%s/share/sslcert/etcd/client.key" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
client_key_file, client_key_file, bakPath)
client_cert_file = "%s/share/sslcert/etcd/client.crt" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
client_cert_file, client_cert_file, bakPath)
if int(g_opts.oldVersion) >= 92019:
client_key_cipher_file = \
"%s/share/sslcert/etcd/client.key.cipher" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
client_key_cipher_file, client_key_cipher_file, bakPath)
client_key_rand_file = \
"%s/share/sslcert/etcd/client.key.rand" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
client_key_rand_file, client_key_rand_file, bakPath)
etcd_key_cipher_file = \
"%s/share/sslcert/etcd/etcd.key.cipher" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
etcd_key_cipher_file, etcd_key_cipher_file, bakPath)
etcd_key_rand_file = \
"%s/share/sslcert/etcd/etcd.key.rand" % clusterAppPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
etcd_key_rand_file, etcd_key_rand_file, bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# backup java UDF
javadir = "'%s'/lib/postgresql/java" % clusterAppPath
cmd = "if [ -d '%s' ];then cp -r '%s' '%s';fi" % (
javadir, javadir, bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# backup postGIS
cmdPostGis = ""
for sofile in g_opts.postgisSOFileList.keys():
absPath = os.path.join(clusterAppPath,
g_opts.postgisSOFileList[sofile])
srcFile = "'%s'/%s" % (absPath, sofile)
cmdPostGis += " && (if [ -f %s ];then cp -f -p %s '%s';fi)" % (
srcFile, srcFile, bakPath)
# skip " &&"
cmd = cmdPostGis[3:]
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# backup extension library and config files
hadoop_odbc_connector = \
"%s/lib/postgresql/hadoop_odbc_connector.so" % clusterAppPath
extension_config01 = \
"%s/share/postgresql/extension/hadoop_odbc_connector--1.0.sql" \
% clusterAppPath
extension_config02 = \
"%s/share/postgresql/extension/hadoop_odbc_connector.control" \
% clusterAppPath
extension_config03 = \
"%s/share/postgresql/extension/" \
"hadoop_odbc_connector--unpackaged--1.0.sql" % clusterAppPath
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
hadoop_odbc_connector, hadoop_odbc_connector, bakPath)
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
extension_config01, extension_config01, bakPath)
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
extension_config02, extension_config02, bakPath)
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s';fi)" % (
extension_config03, extension_config03, bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# backup dict file and grpc files
dictFileDir = "'%s'/share/postgresql/tsearch_data" % clusterAppPath
grpcFileDir = "'%s'/share/sslcert/grpc" % clusterAppPath
cmd = "if [ -d '%s' ];then cp -r '%s' '%s';fi && " % (dictFileDir,
dictFileDir,
bakPath)
cmd += "if [ -d '%s' ];then cp -r '%s' '%s';fi" % (grpcFileDir,
grpcFileDir,
bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# backup gtm.control and gtm.sequence
if len(g_dbNode.gtms) > 0:
gtm_control = "%s/gtm.control" % g_dbNode.gtms[0].datadir
gtm_sequence = "%s/gtm.sequence" % g_dbNode.gtms[0].datadir
cmd = "(if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/gtm.control.bak';fi)" % \
(gtm_control, gtm_control, bakPath)
cmd += " && (if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/gtm.sequence.bak';fi)" % \
(gtm_sequence, gtm_sequence, bakPath)
g_logger.debug("Backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
cm_cert_file_dir = "'%s'/share/sslcert/cm" % clusterAppPath
back_up_cm_cert_file_cmd = "if [ -d '%s' ]; " \
"then cp -r '%s' '%s'; fi" % (cm_cert_file_dir,
cm_cert_file_dir, bakPath)
g_logger.debug("Backup CM cert files command: %s" % back_up_cm_cert_file_cmd)
CmdExecutor.execCommandLocally(back_up_cm_cert_file_cmd)
g_logger.debug("Backup CM cert files successfully.")
om_cert_file_dir = "'%s'/share/sslcert/om" % clusterAppPath
back_up_om_cert_file_cmd = "if [ -d '%s' ]; " \
"then cp -r '%s' '%s'; fi" % (om_cert_file_dir,
om_cert_file_dir, bakPath)
g_logger.debug("Backup OM cert files command: %s" % back_up_om_cert_file_cmd)
CmdExecutor.execCommandLocally(back_up_om_cert_file_cmd)
g_logger.debug("Backup OM cert files successfully.")
except Exception as e:
raise Exception(str(e))
def get_local_node(cluster_info):
"""
Get local node information object from DbClusterInfo object.
"""
for node in cluster_info.dbNodes:
if node.name == NetUtil.GetHostIpOrName():
return node
return None
def install_cm_agent(node_info, gauss_home):
"""
Install CM agent instance
"""
g_logger.debug("Start install cm_agent instance.")
agent_component = CM_OLAP()
agent_component.instInfo = node_info.cmagents[0]
agent_component.logger = g_logger
agent_component.binPath = os.path.realpath(os.path.join(gauss_home, "bin"))
agent_component.setMonitor(g_opts.user)
agent_component.initInstance()
g_logger.debug("Install cm_agent instance successfully.")
def install_cm_server(node_info, gauss_home):
"""
Install CM agent instance
"""
g_logger.debug("Start install cm_server instance.")
server_component = CM_OLAP()
server_component.instInfo = node_info.cmservers[0]
server_component.logger = g_logger
server_component.binPath = os.path.realpath(os.path.join(gauss_home, "bin"))
server_component.initInstance()
g_logger.debug("Install cm_server instance successfully.")
def set_manual_start(node_info, gauss_home):
"""
create manual_start file
"""
all_inst_id_list = [dn_inst.instanceId for dn_inst in node_info.datanodes]
manual_start_file = os.path.realpath(os.path.join(gauss_home, "bin", "cluster_manual_start"))
if not os.path.isfile(manual_start_file):
FileUtil.createFile(manual_start_file)
FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, manual_start_file)
g_logger.debug("Create %s file successfully." % manual_start_file)
for dn_inst_id in all_inst_id_list:
dn_manual_start_file = \
os.path.realpath(os.path.join(gauss_home,
"bin",
"instance_manual_start_%s" % dn_inst_id))
if not os.path.isfile(dn_manual_start_file):
FileUtil.createFile(dn_manual_start_file)
FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, dn_manual_start_file)
g_logger.debug("Create %s file for datanode instance "
"successfully." % dn_manual_start_file)
g_logger.debug("Set manual start file successfully.")
def install_cm_instance(static_config_file):
"""
Install CM on node.
"""
# 1. cluster_menu_start
# 2. set monitor cron tab
# 3. init cm instance
g_logger.debug("Start install cluster management instance.")
new_cluster_info = dbClusterInfo()
new_cluster_info.initFromStaticConfig(g_opts.user, static_config_file)
local_node = get_local_node(new_cluster_info)
if not local_node:
raise Exception("Cluster Information object error. Not obtain local node.")
# Set manual start
set_manual_start(local_node, g_opts.newClusterAppPath)
install_cm_agent(local_node, g_opts.newClusterAppPath)
install_cm_server(local_node, g_opts.newClusterAppPath)
g_logger.debug("Cluster management instance install successfully.")
def restoreConfig():
"""
function: restore config
output: none
"""
try:
bakPath = g_opts.upgrade_bak_path
clusterAppPath = g_opts.newClusterAppPath
# init old cluster config
old_static_config_file = os.path.join(
g_opts.oldClusterAppPath, "bin/cluster_static_config")
oldStaticClusterInfo = dbClusterInfo()
oldStaticClusterInfo.initFromStaticConfig(g_opts.user,
old_static_config_file)
# flush new static configuration
new_static_config_file = os.path.join(
clusterAppPath, "bin/cluster_static_config")
if not os.path.isfile(new_static_config_file):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
os.path.realpath(new_static_config_file))
if DefaultValue.check_add_cm(old_static_config_file, new_static_config_file, g_logger):
install_cm_instance(new_static_config_file)
else:
FileUtil.removeFile(new_static_config_file)
newStaticClusterInfo = dbClusterInfo()
newStaticClusterInfo.saveToStaticConfig(
new_static_config_file, oldStaticClusterInfo.localNodeId,
oldStaticClusterInfo.dbNodes, upgrade=True)
# restore dynamic configuration
dynamic_config = "%s/cluster_dynamic_config" % bakPath
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
dynamic_config, dynamic_config, clusterAppPath)
# no need to restore alarm.conf at here,
# because it has been done on upgradeNodeApp
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# restore libsimsearch etc files and libsimsearch libs files
searchConfigFile = "%s/searchletConfig.yaml" % bakPath
cmd = "(if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/etc/searchletConfig.yaml'; fi)" % (
searchConfigFile, searchConfigFile, clusterAppPath)
searchIniFile = "%s/searchServer.ini" % bakPath
cmd += " && (if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/etc/searchServer.ini'; fi)" % (
searchIniFile, searchIniFile, clusterAppPath)
cmd += " && (if [ -d '%s/libsimsearch' ];" \
"then cp -r '%s/libsimsearch' '%s/lib/';fi)" % (
bakPath, bakPath, clusterAppPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# restore library file,
# database size file and initialized configuration parameters files
cmd = "cp -r '%s/pg_plugin' '%s'/lib/postgresql" % (
bakPath, clusterAppPath)
backup_dbsize = os.path.join(bakPath, DefaultValue.DB_SIZE_FILE)
cmd += " && (if [ -f '%s' ];then cp '%s' '%s/bin';fi)" % (
backup_dbsize, backup_dbsize, clusterAppPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# sync kerberos conf files
cmd = "(if [ -d '%s/kerberos' ];then cp -r '%s/kerberos' '%s/';fi)" % (
bakPath, bakPath, clusterAppPath)
cmd += "&& (if [ -d '%s/var/krb5kdc' ];" \
"then mkdir %s/var; cp -r '%s/var/krb5kdc' '%s/var/';fi)" % (
bakPath, clusterAppPath, bakPath, clusterAppPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# restore obsserver.key.cipher/obsserver.key.rand
# and server.key.cipher/server.key.rand
# and datasource.key.cipher/datasource.key.rand
# and usermapping.key.cipher/usermapping.key.rand
# and subscription.key.cipher/subscription.key.rand
OBS_cipher_key_bak_file = "%s/obsserver.key.cipher" % bakPath
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
OBS_cipher_key_bak_file, OBS_cipher_key_bak_file, clusterAppPath)
OBS_rand_key_bak_file = "%s/obsserver.key.rand" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
OBS_rand_key_bak_file, OBS_rand_key_bak_file, clusterAppPath)
trans_encrypt_cipher_key_bak_file = \
"%s/trans_encrypt.key.cipher" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
trans_encrypt_cipher_key_bak_file,
trans_encrypt_cipher_key_bak_file,
clusterAppPath)
trans_encrypt_rand_key_bak_file = "%s/trans_encrypt.key.rand" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
trans_encrypt_rand_key_bak_file, trans_encrypt_rand_key_bak_file,
clusterAppPath)
trans_encrypt_cipher_ak_sk_key_bak_file = \
"%s/trans_encrypt_ak_sk.key" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
trans_encrypt_cipher_ak_sk_key_bak_file,
trans_encrypt_cipher_ak_sk_key_bak_file, clusterAppPath)
server_cipher_key_bak_file = "%s/server.key.cipher" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
server_cipher_key_bak_file, server_cipher_key_bak_file,
clusterAppPath)
server_rand_key_bak_file = "%s/server.key.rand" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
server_rand_key_bak_file, server_rand_key_bak_file, clusterAppPath)
datasource_cipher = "%s/datasource.key.cipher" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
datasource_cipher, datasource_cipher, clusterAppPath)
datasource_rand = "%s/datasource.key.rand" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
datasource_rand, datasource_rand, clusterAppPath)
usermapping_cipher = "%s/usermapping.key.cipher" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
usermapping_cipher, usermapping_cipher, clusterAppPath)
usermapping_rand = "%s/usermapping.key.rand" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
usermapping_rand, usermapping_rand, clusterAppPath)
subscription_cipher = "%s/subscription.key.cipher" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
subscription_cipher, subscription_cipher, clusterAppPath)
subscription_rand = "%s/subscription.key.rand" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
subscription_rand, subscription_rand, clusterAppPath)
tde_key_cipher = "%s/gs_tde_keys.cipher" % bakPath
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
tde_key_cipher, tde_key_cipher, clusterAppPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# restore utilslib
utilslib = "%s/utilslib" % bakPath
cmd = "if [ -d '%s' ];then cp -r '%s' '%s'/;" % (
utilslib, utilslib, clusterAppPath)
# create new $GAUSSHOME/utilslib if not exist.
# no need to do chown, it will be done at all restore finished
cmd += " else mkdir -p '%s'/utilslib -m %s; fi " % (
clusterAppPath, DefaultValue.DIRECTORY_MODE)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# restore ca.key,etcdca.crt, client.key and client.crt
CA_key_file = "%s/ca.key" % bakPath
cmd = "(if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % (
CA_key_file, CA_key_file, clusterAppPath)
CA_cert_file = "%s/etcdca.crt" % bakPath
cmd += " && (if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % (
CA_cert_file, CA_cert_file, clusterAppPath)
client_key_file = "%s/client.key" % bakPath
cmd += " && (if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % (
client_key_file, client_key_file, clusterAppPath)
client_cert_file = "%s/client.crt" % bakPath
cmd += " && (if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % (
client_cert_file, client_cert_file, clusterAppPath)
if int(g_opts.oldVersion) >= 92019:
client_key_cipher_file = "%s/client.key.cipher" % bakPath
cmd += " && (if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % (
client_key_cipher_file, client_key_cipher_file,
clusterAppPath)
client_key_rand_file = "%s/client.key.rand" % bakPath
cmd += " && (if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % (
client_key_rand_file, client_key_rand_file,
clusterAppPath)
etcd_key_cipher_file = "%s/etcd.key.cipher" % bakPath
cmd += " && (if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % (
etcd_key_cipher_file, etcd_key_cipher_file,
clusterAppPath)
etcd_key_rand_file = "%s/etcd.key.rand" % bakPath
cmd += " && (if [ -f '%s' ];" \
"then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % (
etcd_key_rand_file, etcd_key_rand_file, clusterAppPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# restore javaUDF
# lib/postgresql/java/pljava.jar use new package, no need to restore.
javadir = "%s/java" % bakPath
desPath = "%s/lib/postgresql/" % clusterAppPath
cmd = "if [ -d '%s' ];" \
"then rm -f '%s/pljava.jar'&&cp -r '%s' '%s' ;fi" % (
javadir, javadir, javadir, desPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# restore postGIS
cmdPostGis = ""
machineType = platform.machine()
for sofile in g_opts.postgisSOFileList.keys():
# To solve the dependency problem on the ARM platform,
# the dependency library libbgcc_s.so* and libstdc++.
# so.* is contained in the ARM package.
# The libgcc_s.so.*
# on the ARM platform is the database built-in library.
# Therefore, no restoration is required.
if machineType == "aarch64" and sofile.find('libgcc_s.so') >= 0:
continue
desPath = os.path.join(clusterAppPath,
g_opts.postgisSOFileList[sofile])
srcFile = "'%s'/%s" % (bakPath, sofile)
cmdPostGis += " && (if [ -f %s ];then cp -f -p %s '%s';fi)" % (
srcFile, srcFile, desPath)
# skip " &&"
cmd = cmdPostGis[3:]
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# restore extension library and config files
hadoop_odbc_connector = \
"%s/lib/postgresql/hadoop_odbc_connector.so" % bakPath
extension_config01 = \
"%s/share/postgresql/extension/hadoop_odbc_connector--1.0.sql" \
% bakPath
extension_config02 = \
"%s/share/postgresql/extension/hadoop_odbc_connector.control" \
% bakPath
extension_config03 = \
"%s/share/postgresql/extension/" \
"hadoop_odbc_connector--unpackaged--1.0.sql" % bakPath
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/lib/postgresql/';fi)" % (
hadoop_odbc_connector, hadoop_odbc_connector, clusterAppPath)
cmd += \
" && (if [ -f '%s' ];then cp -f " \
"-p '%s/share/postgresql/extension/' '%s';fi)" % (
extension_config01, extension_config01, clusterAppPath)
cmd += \
" && (if [ -f '%s' ];then cp " \
"-f -p '%s/share/postgresql/extension/' '%s';fi)" % (
extension_config02, extension_config02, clusterAppPath)
cmd += \
" && (if [ -f '%s' ];then cp -f " \
"-p '%s/share/postgresql/extension/' '%s';fi)" % (
extension_config03, extension_config03, clusterAppPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# restore dict file and grpc file
dictFileDir = "'%s'/tsearch_data" % bakPath
dictDesPath = "'%s'/share/postgresql" % clusterAppPath
grpcFileDir = "'%s'/grpc" % bakPath
grpcDesPath = "'%s'/share/sslcert" % clusterAppPath
cmd = "if [ -d '%s' ];then cp -r '%s' '%s/' ;fi &&" % (
dictFileDir, dictFileDir, dictDesPath)
cmd += "if [ -d '%s' ];then cp -r '%s' '%s/' ;fi" % (
grpcFileDir, grpcFileDir, grpcDesPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
cm_cert_backup_dir = os.path.realpath(os.path.join(bakPath, "cm"))
cm_cert_dest_dir = os.path.realpath(os.path.join(clusterAppPath, "share", "sslcert"))
restore_cm_cert_file_cmd = "if [ -d '%s' ]; " \
"then cp -r '%s' '%s'; fi" % (cm_cert_backup_dir,
cm_cert_backup_dir,
cm_cert_dest_dir)
g_logger.debug("Restore CM cert files command: %s" % restore_cm_cert_file_cmd)
CmdExecutor.execCommandLocally(restore_cm_cert_file_cmd)
g_logger.debug("Restore CM cert files successfully.")
om_cert_backup_dir = os.path.realpath(os.path.join(bakPath, "om"))
om_cert_dest_dir = os.path.realpath(os.path.join(clusterAppPath, "share", "sslcert"))
restore_om_cert_file_cmd = "if [ -d '%s' ]; " \
"then cp -r '%s' '%s'; fi" % (om_cert_backup_dir,
om_cert_backup_dir,
om_cert_dest_dir)
g_logger.debug("Restore OM cert files command: %s" % restore_om_cert_file_cmd)
CmdExecutor.execCommandLocally(restore_om_cert_file_cmd)
g_logger.debug("Restore OM cert files successfully.")
except Exception as e:
raise Exception(str(e))
def restoreDynamicConfigFile():
"""
function: restore dynamic config file
output: None
:return:
"""
bakPath = g_opts.upgrade_bak_path
newClusterAppPath = g_opts.newClusterAppPath
oldClusterAppPath = g_opts.oldClusterAppPath
# cp new dynamic config file to new app path
newDynamicConfigFile = "%s/bin/cluster_dynamic_config" % oldClusterAppPath
FileUtil.removeFile("%s/bin/cluster_dynamic_config" % newClusterAppPath)
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
newDynamicConfigFile, newDynamicConfigFile, newClusterAppPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# cp old dynamic config file to old app path
dynamic_config = "%s/cluster_dynamic_config" % bakPath
FileUtil.removeFile(newDynamicConfigFile)
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
dynamic_config, dynamic_config, oldClusterAppPath)
g_logger.debug("Restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
def inplaceBackup():
"""
function: backup config
output: none
"""
try:
# backup gds files
bakPath = g_opts.upgrade_bak_path
gdspath = "%s/share/sslcert/gds" % g_clusterInfo.appPath
cmd = "(if [ -d '%s' ];" \
"then chmod 600 -R '%s'/*; cp -r '%s' '%s';fi)" % (
gdspath, gdspath, gdspath, bakPath)
g_logger.debug("Inplace backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# backup gsql files
bakPath = g_opts.upgrade_bak_path
gsqlpath = "%s/share/sslcert/gsql" % g_clusterInfo.appPath
cmd = "(if [ -d '%s' ];then chmod 600 -R '%s'/*; cp -r '%s' '%s';fi)" %\
(gsqlpath, gsqlpath, gsqlpath, bakPath)
g_logger.debug("Inplace backup command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
except Exception as e:
raise Exception(str(e))
def inplaceRestore():
"""
function: restore config
output: none
"""
try:
# restore gds files
gdspath = "%s/share/sslcert/" % g_clusterInfo.appPath
gdsbackup = "%s/gds" % g_opts.upgrade_bak_path
cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % (
gdsbackup, gdsbackup, gdspath)
g_logger.debug("Inplace restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
except Exception as e:
raise Exception(str(e))
def checkGucValue():
"""
function: check guc value
input : NA
output : NA
"""
try:
if g_opts.fromFile:
checkGucValueFromFile()
else:
checkGucValueByShowing()
except Exception as e:
g_logger.debug("Failed to check dn guc paramter by "
"showing. Error is:{0}."
"Trying to check form file".format(str(e)))
checkGucValueFromFile()
def checkGucValueByShowing():
"""
check dn guc value by "show guc" in database in all nodes
"""
instance_list = getDnInstance()
if len(instance_list) != 0:
pool = ThreadPool(len(instance_list))
pool.map(checkOneInstanceGucValueByShowing, instance_list)
pool.close()
pool.join()
def checkOneInstanceGucValueByShowing(instance):
"""
check dn guc value by "show guc" in database in every node
:param instance:
:return:
"""
key = g_opts.gucStr.split(':')[0].strip()
value = g_opts.gucStr.split(':')[1].strip().split(",")
g_logger.debug(
"Check if the value of guc {0} is {1}. "
"Instance data dir is: {2}".format(key, value, instance.datadir))
sql = "show %s;" % key
g_logger.debug("Command to check value is: %s" % sql)
retryTimes = 300
for _ in range(retryTimes):
(status, output) = \
ClusterCommand.execSQLCommand(
sql, g_opts.user, "", instance.port, "postgres",
"-m", IsInplaceUpgrade=True)
g_logger.debug("SQL [{0}] perform output: {1}".format(sql, output))
if status == 0 and output != "":
g_logger.debug("Output is: %s" % output)
checkValue = output.strip()
if str(checkValue) in value:
return
raise Exception(ErrorCode.GAUSS_521["GAUSS_52102"] % key +
" expect value %s" % (str(value)))
def getDnInstance():
"""
get all dn instance
"""
instance_list = []
if len(g_dbNode.datanodes) != 0:
for eachInstance in g_dbNode.datanodes:
if eachInstance.instanceType in [MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY]:
instance_list.append(eachInstance)
return instance_list
def checkGucValueFromFile():
"""
check cm guc value from conf file
by now, it is only used for upgrade_from
"""
try:
key = g_opts.gucStr.split(':')[0].strip()
value = g_opts.gucStr.split(':')[1].strip()
value, instances, fileName = getGucInfo(key, value)
if key in [const.ENABLE_STREAM_REPLICATION_NAME]:
g_logger.debug("Jump to check paremeter: {0}".format(key))
return
if key in ["upgrade_mode"]:
sql = "show {0};".format(key)
cmd = "gsql -p {0} -d postgres -c '{1}'".format(instances[0].port, sql)
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
if status != 0:
g_logger.debug("Gsql check GUC parameter [{0}] failed. "
"output:{1}".format(key, output))
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd)
gsql_result_list = output.strip().split("\n")
g_logger.debug("Check GUC parameter from database result: "
"{0}".format(gsql_result_list))
if gsql_result_list and gsql_result_list[0].strip() != key:
g_logger.debug("Check GUC parameter result error, first line is : "
"{0}".format(gsql_result_list[0].strip()))
raise Exception("Check GUC parameter result error, first line is : "
"{0}".format(gsql_result_list[0].strip()))
if str(gsql_result_list[2].strip()) not in str(value):
raise Exception(ErrorCode.GAUSS_521["GAUSS_52102"] % key +
" Real value %s, expect value %s" % (
str(gsql_result_list[2].strip()), str(value)))
return
for inst in instances:
configFile = "%s/%s" % (inst.datadir, fileName)
cmd = "sed 's/\t/ /g' %s | grep '^[ ]*\<%s\>[ ]*=' | awk -F '=' '{print $2}'" % \
(configFile, key)
g_logger.debug("Command for checking guc:%s" % cmd)
retryTimes = 10
for _ in range(retryTimes):
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
if status != 0:
time.sleep(3)
g_logger.debug(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
" Output: \n%s" % output)
continue
if "" == output:
raise Exception(
ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " There is no %s in %s" %
(key, configFile))
realValue = output.split('\n')[0].strip().strip("'")
if '#' in realValue:
realValue = realValue.split('#')[0].strip().strip("'")
g_logger.debug("[key:%s]: Realvalue %s, ExpectValue %s" % (key, str(realValue),
str(value)))
if str(realValue) not in str(value):
raise Exception(ErrorCode.GAUSS_521["GAUSS_52102"] % key +
" Real value %s, expect value %s" % (
str(realValue), str(value)))
break
except Exception as er:
raise Exception(str(er))
def get_dn_instance():
"""
get all cn and dn instance in this node
"""
try:
InstanceList = []
if len(g_dbNode.datanodes) != 0:
for eachInstance in g_dbNode.datanodes:
if eachInstance.instanceType in \
[MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY]:
InstanceList.append(eachInstance)
return InstanceList
except Exception as er:
raise Exception(str(er))
def backupInstanceHotpatchConfig(instanceDataDir):
"""
function: backup
input : instanceDataDir
output : NA
"""
hotpatch_info_file = "%s/hotpatch/patch.info" % instanceDataDir
hotpatch_info_file_bak = "%s/hotpatch/patch.info.bak" % instanceDataDir
cmd = "(if [ -f '%s' ];then mv -f '%s' '%s';fi)" % (
hotpatch_info_file, hotpatch_info_file, hotpatch_info_file_bak)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
raise Exception(
ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output)
def backupHotpatch():
"""
function: if the upgrade process failed in check cluster status,
user can reenter upgrade process
"""
if os.path.samefile(g_gausshome, g_opts.newClusterAppPath):
g_logger.debug("Has switched to new version, no need to backup again.")
return
for dbInstance in g_dbNode.cmservers:
backupInstanceHotpatchConfig(dbInstance.datadir)
for dbInstance in g_dbNode.coordinators:
backupInstanceHotpatchConfig(dbInstance.datadir)
for dbInstance in g_dbNode.datanodes:
backupInstanceHotpatchConfig(dbInstance.datadir)
for dbInstance in g_dbNode.gtms:
backupInstanceHotpatchConfig(dbInstance.datadir)
def rollbackInstanceHotpatchConfig(instanceDataDir):
"""
function: rollback
input : instanceDataDir
output : NA
"""
hotpatch_info_file = "%s/hotpatch/patch.info" % instanceDataDir
hotpatch_info_file_bak = "%s/hotpatch/patch.info.bak" % instanceDataDir
cmd = "(if [ -f '%s' ];then mv -f '%s' '%s';fi)" % (
hotpatch_info_file_bak, hotpatch_info_file_bak, hotpatch_info_file)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
raise Exception(
ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output)
def rollbackHotpatch():
"""
function: rollback
input : NA
output : NA
"""
for dbInstance in g_dbNode.cmservers:
rollbackInstanceHotpatchConfig(dbInstance.datadir)
for dbInstance in g_dbNode.coordinators:
rollbackInstanceHotpatchConfig(dbInstance.datadir)
for dbInstance in g_dbNode.datanodes:
rollbackInstanceHotpatchConfig(dbInstance.datadir)
for dbInstance in g_dbNode.gtms:
rollbackInstanceHotpatchConfig(dbInstance.datadir)
def readDeleteGuc():
"""
function: get the delete guc from file,
input: NA
output: return the dict gucContent[instanceName]: guc_name
:return:the key instancename is gtm, coordinator,
datanode, cmserver, cmagent
"""
deleteGucFile = os.path.join(g_opts.upgrade_bak_path,
"upgrade_sql/set_guc/delete_guc")
# Create tmp dir for delete_guc
delete_guc_tmp = "%s/upgrade_sql/set_guc" % g_opts.upgrade_bak_path
FileUtil.createDirectory(delete_guc_tmp)
FileUtil.createFileInSafeMode(deleteGucFile)
if not os.path.isfile(deleteGucFile):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % deleteGucFile)
g_logger.debug("Get the delete GUC from file %s." % deleteGucFile)
gucContent = {}
with open(deleteGucFile, 'r') as fp:
resList = fp.readlines()
for oneLine in resList:
oneLine = oneLine.strip()
# skip blank line and comment line
if not oneLine or oneLine.startswith('#'):
continue
result = oneLine.split()
if len(result) != 2:
raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % deleteGucFile)
gucName = result[0]
instanceName = result[1]
gucContent.setdefault(instanceName, []).append(gucName)
g_logger.debug("Successfully get the delete GUC from file.")
return gucContent
def cleanInstallPath():
"""
function: clean install path
input : NA
output : NA
"""
installPath = g_opts.appPath
if not os.path.exists(installPath):
g_logger.debug(ErrorCode.GAUSS_502[
"GAUSS_50201"] % installPath + " No need to clean.")
return
if not os.listdir(installPath):
g_logger.debug("The path %s is empty." % installPath)
cmd = "(if [ -d '%s' ]; then rm -rf '%s'; fi)" % (
installPath, installPath)
g_logger.log("Command for cleaning install path: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
return
if g_opts.forceRollback and not os.path.islink(g_gausshome):
g_logger.log(
"Under force rollback mode, "
"$GAUSSHOME is not symbolic link. No need to clean.")
return
elif os.path.samefile(installPath, g_gausshome):
g_logger.log("The install path is $GAUSSHOME, cannot clean.")
return
tmpDir = EnvUtil.getTmpDirFromEnv(g_opts.user)
if tmpDir == "":
raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST")
# under upgrade, we will change the mode to read and execute
# in order to not change the dir, so we need to restore
# the permission to original mode after we switch to new version,
# and then we will have the permission to clean
# appPath under commit-upgrade
# under rollback, we also need to restore the permission
pluginPath = "%s/lib/postgresql/pg_plugin" % installPath
cmd = "(if [ -d '%s' ]; then chmod -R %d '%s'; fi)" % (
pluginPath, DefaultValue.KEY_DIRECTORY_MODE, pluginPath)
cm_cert_dir = os.path.realpath(os.path.join(installPath, "share", "sslcert", "cm"))
cmd += " && (if [ -d '%s' ]; then chmod -R %d '%s'; fi)" % (cm_cert_dir,
DefaultValue.KEY_DIRECTORY_MODE,
cm_cert_dir)
appBakPath = "%s/to_be_delete" % tmpDir
cmd += " && (if [ -d '%s' ]; then chmod -R %d '%s'; fi)" % (appBakPath,
DefaultValue.KEY_DIRECTORY_MODE,
appBakPath)
cmd += " && (if [ ! -d '%s' ]; then mkdir -p '%s'; fi)" % (
appBakPath, appBakPath)
cmd += " && (if [ -d '%s' ]; then cp -r '%s/' '%s/to_be_delete/'; fi)" % (
installPath, installPath, tmpDir)
g_logger.debug(
"Command for change permission and backup install path: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
cmd = "(if [ -d '%s/bin' ]; then rm -rf '%s/bin'; fi) &&" % \
(installPath, installPath)
cmd += "(if [ -d '%s/etc' ]; then rm -rf '%s/etc'; fi) &&" % \
(installPath, installPath)
cmd += "(if [ -d '%s/include' ]; then rm -rf '%s/include'; fi) &&" % \
(installPath, installPath)
cmd += "(if [ -d '%s/lib' ]; then rm -rf '%s/lib'; fi) &&" % \
(installPath, installPath)
cmd += "(if [ -d '%s/share' ]; then rm -rf '%s/share'; fi) &&" % \
(installPath, installPath)
cmd += "(if [ -d '%s/logs' ]; then rm -rf '%s/logs'; fi) &&" % \
(installPath, installPath)
cmd += "(if [ -d '%s/utilslib' ]; then rm -rf '%s/utilslib'; fi) && " % \
(installPath, installPath)
cmd += "(if [ -d '%s/jre' ]; then rm -rf '%s/jre'; fi) && " % \
(installPath, installPath)
cmd += "(if [ -d '%s/jdk' ]; then rm -rf '%s/jdk'; fi) && " % \
(installPath, installPath)
cmd += "(if [ -d '%s/kerberos' ]; then rm -rf '%s/kerberos'; fi) &&" % \
(installPath, installPath)
cmd += "(if [ -d '%s/var/krb5kdc' ]; then rm -rf '%s/var/krb5kdc'; fi) &&" \
% (installPath, installPath)
cmd += "(if [ -d '%s/simpleInstall' ]; then rm -rf '%s/simpleInstall';" \
" fi) &&" % (installPath, installPath)
cmd += "(if [ -e '%s/version.cfg' ]; then rm -rf '%s/version.cfg'; fi) &&"\
% (installPath, installPath)
cmd += "(if [ -e '%s/.gaussUDF.socket' ]; then rm -rf '%s/.gaussUDF.socket'; fi)" \
% (installPath, installPath)
CmdExecutor.execCommandLocally(cmd)
if os.listdir(installPath):
g_logger.log(
"The path %s has personal file ot directory, please remove it."
% installPath)
else:
cmd = "(if [ -d '%s' ]; then rm -rf '%s'; fi)" % (
installPath, installPath)
g_logger.log("Command for cleaning install path: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
def copyCerts():
"""
function: copy certs
input : NA
output : NA
"""
g_logger.debug("Starting copy Certs")
oldBinPath = os.path.join(g_opts.oldClusterAppPath, "bin")
newBinPath = os.path.join(g_opts.newClusterAppPath, "bin")
oldOmSslCerts = os.path.join(g_opts.oldClusterAppPath, "share/sslcert/om")
newOmSslCerts = os.path.join(g_opts.newClusterAppPath, "share/sslcert/om")
if FileUtil.checkFileExists("%s/server.key.cipher" % oldBinPath):
FileUtil.cpFile("%s/server.key.cipher" % oldBinPath, "%s/" % newBinPath)
FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.key.cipher" %
newBinPath)
if FileUtil.checkFileExists("%s/server.key.rand" % oldBinPath):
FileUtil.cpFile("%s/server.key.rand" % oldBinPath, "%s/" % newBinPath)
FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, "%s/server.key.rand" %
newBinPath)
for certFile in DefaultValue.SERVER_CERT_LIST:
if FileUtil.checkFileExists("%s/%s" % (oldOmSslCerts, certFile)):
FileUtil.cpFile("%s/%s" % (oldOmSslCerts, certFile), "%s/" %
newOmSslCerts)
FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, "%s/*" %
newOmSslCerts)
def prepareUpgradeSqlFolder():
"""
function: verify upgrade_sql.tar.gz and extract it to binary backup path,
if execute gs_upgradectl again, we will decompress the sql folder
again to avoid the file in backup path destroyed
input : NA
output: NA
"""
g_logger.debug("Preparing upgrade sql folder.")
# verify upgrade_sql.tar.gz
dirName = os.path.dirname(os.path.realpath(__file__))
packageDir = os.path.join(dirName, "./../../")
packageDir = os.path.normpath(packageDir)
upgrade_sql_gz_file = "%s/%s" % (packageDir, const.UPGRADE_SQL_FILE)
upgrade_sql_sha256_file = "%s/%s" % (packageDir, const.UPGRADE_SQL_SHA)
if not os.path.isfile(upgrade_sql_gz_file):
raise Exception(
ErrorCode.GAUSS_502["GAUSS_50201"] % upgrade_sql_gz_file)
if not os.path.isfile(upgrade_sql_sha256_file):
raise Exception(
ErrorCode.GAUSS_502["GAUSS_50201"] % upgrade_sql_sha256_file)
g_logger.debug(
"The SQL file is %s, the sha256 file is %s." % (
upgrade_sql_gz_file, upgrade_sql_sha256_file))
g_logger.debug("Checking the SHA256 value of upgrade sql folder.")
sha256Actual = FileUtil.getFileSHA256(upgrade_sql_gz_file)
sha256Record = FileUtil.readFile(upgrade_sql_sha256_file)
if sha256Actual.strip() != sha256Record[0].strip():
raise Exception(ErrorCode.GAUSS_516["GAUSS_51635"] + \
" The SHA256 value is different: \nTar file: "
"%s \nSHA256 file: %s " % \
(upgrade_sql_gz_file, upgrade_sql_sha256_file))
# extract it to binary backup path
# self.context.upgradeBackupPath just recreated at last step,
# it should not has upgrade_sql folder, so no need do clean
g_logger.debug("Extracting upgrade sql folder.")
CompressUtil.decompressFiles(upgrade_sql_gz_file, g_opts.upgrade_bak_path)
g_logger.debug("Successfully prepared upgrade sql folder.")
def backupOldClusterDBAndRel():
"""
backup old cluster db and rel info
get database list
connect to each cn and master dn
connect to each database, and get rel info
"""
g_logger.log("Backing up old cluster database and catalog.")
try:
InstanceList = []
# find all instances need to do backup
if len(g_dbNode.coordinators) != 0:
InstanceList.append(g_dbNode.coordinators[0])
primaryDnIntance = getLocalPrimaryDNInstance()
if primaryDnIntance:
InstanceList.extend(primaryDnIntance)
# do backup parallelly
if len(InstanceList) != 0:
pool = ThreadPool(len(InstanceList))
pool.map(backupOneInstanceOldClusterDBAndRel, InstanceList)
pool.close()
pool.join()
else:
g_logger.debug("No master instance found on this node, "
"nothing need to do.")
return
g_logger.log("Successfully backed up old cluster database and catalog.")
except Exception as e:
g_logger.logExit(str(e))
def getLocalPrimaryDNInstance():
"""
function: Get local primary DN instance
input: NA
output: NA
"""
g_logger.log("We will find all primary dn instance in the local node.")
tmpFile = os.path.join(EnvUtil.getTmpDirFromEnv(
g_opts.user), const.TMP_DYNAMIC_DN_INFO)
primaryDNList = []
try:
# Match query results and cluster configuration
clusterStatus = DbClusterStatus()
clusterStatus.initFromFile(tmpFile)
# Find the master DN instance
for dbNode in clusterStatus.dbNodes:
for instance in dbNode.datanodes:
if instance.status == 'Primary' and \
instance.nodeId == g_dbNode.id:
for eachInstance in g_dbNode.datanodes:
if eachInstance.instanceId == instance.instanceId:
primaryDNList.append(eachInstance)
g_logger.log(
"Success get the primary dn instance:{0}.".format(
instance.__dict__))
return primaryDNList
except Exception as er:
raise Exception(str(er))
def getGucInfo(key, value):
"""
:return:
"""
if value in const.VALUE_OFF:
value = const.VALUE_OFF
if value in const.VALUE_ON:
value = const.VALUE_ON
if key in const.CMA_GUC:
instances = g_dbNode.cmagents
fileName = "cm_agent.conf"
elif key in const.CMS_GUC:
instances = g_dbNode.cmservers
fileName = "cm_server.conf"
elif key in const.DN_GUC:
instances = g_dbNode.datanodes
fileName = "postgresql.conf"
else:
raise Exception("No such key to check guc value.")
return value, instances, fileName
def backupOneInstanceOldClusterDBAndRel(instance):
"""
backup db and catalog info for one old cluster instance
do checkpoint
get database info list
remove template0
connect each database, get catalog info
save to file
"""
tmpDir = EnvUtil.getTmpDirFromEnv(g_opts.user)
if tmpDir == "":
raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST")
g_logger.debug(
"Obtaining instance catalog information. Instance data dir: %s" %
instance.datadir)
dbInfoDict = {}
dbInfoDict["dblist"] = []
dbInfoDict["dbnum"] = 0
backup_path = "%s/oldClusterDBAndRel/" % g_opts.upgrade_bak_path
try:
# get database info
get_db_list_sql = """SELECT d.datname, d.oid,
pg_catalog.pg_tablespace_location(t.oid) AS spclocation
FROM pg_catalog.pg_database d LEFT OUTER JOIN
pg_catalog.pg_tablespace t ON d.dattablespace = t.oid ORDER BY 2;"""
g_logger.debug("Get database info command: \n%s" % get_db_list_sql)
(status, output) = ClusterCommand.execSQLCommand(get_db_list_sql,
g_opts.user, "",
instance.port,
"postgres",
"-m",
IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513[
"GAUSS_51300"] % get_db_list_sql +
" Error:\n%s" % output)
if output == "":
raise Exception("can not find any database!!")
g_logger.debug("Get database info result: \n%s." % output)
resList = output.split('\n')
for each_line in resList:
tmpDbInfo = initDbInfo()
(datname, oid, spclocation) = each_line.split('|')
tmpDbInfo['dbname'] = datname.strip()
tmpDbInfo['dboid'] = oid.strip()
tmpDbInfo['spclocation'] = spclocation.strip()
dbInfoDict["dblist"].append(tmpDbInfo)
dbInfoDict["dbnum"] += 1
# connect each database, get catalog info
get_catalog_list_sql = """SELECT p.oid, n.nspname, p.relname,
pg_catalog.pg_relation_filenode(p.oid) AS relfilenode,
p.reltablespace, pg_catalog.pg_tablespace_location(t.oid) AS
spclocation, p.relpersistence
FROM pg_catalog.pg_class p INNER JOIN pg_catalog.pg_namespace n ON
(p.relnamespace = n.oid)
LEFT OUTER JOIN pg_catalog.pg_tablespace t ON (p.reltablespace = t.oid)
WHERE p.oid < 16384 AND
p.relkind IN ('r', 'i', 't') AND
p.relisshared= false
ORDER BY 1;"""
g_logger.debug("Get catalog info command: \n%s" % get_catalog_list_sql)
for each_db in dbInfoDict["dblist"]:
# template0 need handle specially, skip it here
if each_db["dbname"] == 'template0':
continue
(status, output) = ClusterCommand.execSQLCommand(
get_catalog_list_sql, g_opts.user, "", instance.port,
each_db["dbname"], "-m", IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513[
"GAUSS_51300"] % get_catalog_list_sql +
" Error:\n%s" % output)
if output == "":
raise Exception("can not find any catalog!!")
g_logger.debug("Get catalog info result of %s: \n%s." % (
each_db["dbname"], output))
resList = output.split('\n')
for each_line in resList:
tmpCatalogInfo = initCatalogInfo()
(oid, nspname, relname, relfilenode, reltablespace, spclocation, relpersistence) = \
each_line.split('|')
tmpCatalogInfo['oid'] = oid.strip()
tmpCatalogInfo['relname'] = relname.strip()
tmpCatalogInfo['relfilenode'] = relfilenode.strip()
tmpCatalogInfo['relpersistence'] = relpersistence.strip()
each_db["CatalogList"].append(tmpCatalogInfo)
each_db["CatalogNum"] += 1
# save db and catlog info into file
instance_name = getInstanceName(instance)
# handle master dn instance
dn_db_and_catalog_info_file_name = \
"%s/dn_db_and_catalog_info_%s.json" % (
backup_path, instance_name)
DbInfoStr = json.dumps(dbInfoDict, indent=2)
fp = open(dn_db_and_catalog_info_file_name, 'w')
fp.write(DbInfoStr)
fp.flush()
fp.close()
standbyInstLst = []
peerInsts = g_clusterInfo.getPeerInstance(instance)
for i in range(len(peerInsts)):
if peerInsts[i].instanceType in \
[MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY]:
standbyInstLst.append(peerInsts[i])
for standbyInstance in standbyInstLst:
cmd = "pscp -H %s %s %s" % (
standbyInstance.hostname, dn_db_and_catalog_info_file_name,
dn_db_and_catalog_info_file_name)
g_logger.debug("exec cmd is: %s" % cmd)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514[
"GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
except Exception as e:
raise Exception(str(e))
g_logger.debug(
"Successfully obtained instance catalog information. "
"Instance data dir: %s" % instance.datadir)
def updateCatalog():
"""
connect database and update catalog one by one
1.get database list
2.connect each database, and exec update sql/check sql
"""
g_logger.log("Updating catalog.")
try:
update_catalog_maindb_sql = "{0}/{1}_catalog_maindb_tmp.sql".format(
g_opts.upgrade_bak_path, g_opts.scriptType)
update_catalog_otherdb_sql = "{0}/{1}_catalog_otherdb_tmp.sql".format(
g_opts.upgrade_bak_path,
g_opts.scriptType)
check_upgrade_sql = ""
if "upgrade" == g_opts.scriptType:
check_upgrade_sql = "{0}/check_upgrade_tmp.sql".format(
g_opts.upgrade_bak_path)
if not os.path.isfile(check_upgrade_sql):
raise Exception(
ErrorCode.GAUSS_502["GAUSS_50210"] % check_upgrade_sql)
if not os.path.isfile(update_catalog_maindb_sql):
raise Exception(
ErrorCode.GAUSS_502["GAUSS_50210"] % update_catalog_maindb_sql)
if not os.path.isfile(update_catalog_otherdb_sql):
raise Exception(
ErrorCode.GAUSS_502["GAUSS_50210"] % update_catalog_otherdb_sql)
# get database list
clusterNodes = g_clusterInfo.dbNodes
for dbNode in clusterNodes:
if len(dbNode.datanodes) == 0:
continue
dnInst = dbNode.datanodes[0]
primaryDnNode, _ = DefaultValue.getPrimaryNode(g_opts.userProfile)
if dnInst.hostname not in primaryDnNode:
continue
break
reslines = get_database_list(dnInst)
# connect each database, and exec update sql/check sql
maindb = "postgres"
otherdbs = reslines
otherdbs.remove("postgres")
# 1.handle maindb first
upgrade_one_database([maindb, dnInst.port,
update_catalog_maindb_sql, check_upgrade_sql])
# 2.handle otherdbs
upgrade_info = []
for eachdb in otherdbs:
g_logger.debug("Updating catalog for database %s." % eachdb)
upgrade_info.append([eachdb, dnInst.port,
update_catalog_otherdb_sql, check_upgrade_sql])
if len(upgrade_info) != 0:
pool = ThreadPool(1)
pool.map(upgrade_one_database, upgrade_info)
pool.close()
pool.join()
g_logger.log("Successfully updated catalog.")
except Exception as e:
g_logger.logExit(str(e))
def get_database_list(dnInst):
"""
get database list
:return:
"""
# get database list
sqlSelect = "select datname from pg_database;"
g_logger.debug("Command for getting database list: %s" % sqlSelect)
(status, output) = ClusterCommand.execSQLCommand(
sqlSelect, g_opts.user, "", dnInst.port, IsInplaceUpgrade=True)
g_logger.debug("The result of database list: %s." % output)
if 0 != status:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] %
sqlSelect + " Error:\n%s" % output)
if "" == output:
raise Exception(
"No database objects were found in the cluster!")
reslines = (output.strip()).split('\n')
if (len(reslines) < 3
or "template1" not in reslines
or "template0" not in reslines
or "postgres" not in reslines):
raise Exception(
"The database list is invalid:%s." % str(reslines))
return reslines
def upgrade_one_database(upgrade_info):
"""
upgrade catalog for one database
"""
try:
db_name = upgrade_info[0]
port = upgrade_info[1]
update_catalog_file = upgrade_info[2]
check_upgrade_file = upgrade_info[3]
g_logger.debug("Updating catalog for database %s" % db_name)
execSQLFile(db_name, update_catalog_file, port)
if "" != check_upgrade_file:
execSQLFile(db_name, check_upgrade_file, port)
except Exception as e:
raise Exception(str(e))
def execSQLFile(dbname, sqlFile, cn_port):
"""
exec sql file
"""
gsql_cmd = SqlCommands.getSQLCommandForInplaceUpgradeBackup(
cn_port, dbname.replace('$', '\$'))
cmd = "%s -X --echo-queries --set ON_ERROR_STOP=on -f %s" % (
gsql_cmd, sqlFile)
(status, output) = subprocess.getstatusoutput(cmd)
g_logger.debug("Catalog modification log for database %s:\n%s." % (
dbname, output))
if status != 0 or SqlFile.findErrorInSqlFile(sqlFile, output):
g_logger.debug(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd)
raise Exception("Failed to update catalog. Error: %s" % str(output))
def backupOldClusterCatalogPhysicalFiles():
"""
backup old cluster catalog physical files
get database list
connect to each cn and dn,
connect to each database, and do backup
"""
g_logger.log("Backing up old cluster catalog physical files.")
try:
InstanceList = []
# find all instances need to do backup
if len(g_dbNode.coordinators) != 0:
InstanceList.append(g_dbNode.coordinators[0])
if len(g_dbNode.datanodes) != 0:
for eachInstance in g_dbNode.datanodes:
InstanceList.append(eachInstance)
# do backup parallelly
if len(InstanceList) != 0:
pool = ThreadPool(len(InstanceList))
pool.map(
backupOneInstanceOldClusterCatalogPhysicalFiles, InstanceList)
pool.close()
pool.join()
else:
g_logger.debug("No master instance found on this node,"
" nothing need to do.")
return
g_logger.log(
"Successfully backed up old cluster catalog physical files.")
except Exception as e:
g_logger.logExit(str(e))
def backupOneInstanceOldClusterCatalogPhysicalFiles(instance):
"""
backup catalog physical files for one old cluster instance
read database and catalog info from file
connect each database, do backup
"""
g_logger.debug("Backup instance catalog physical files and xlog. "
"Instance data dir: %s" % instance.datadir)
try:
# backup list folder
__backup_global_dir(instance)
if instance.instanceRole == INSTANCE_ROLE_DATANODE and \
instance.instanceType in [DUMMY_STANDBY_INSTANCE, CASCADE_STANDBY]:
g_logger.debug("There is no need to backup catalog. "
"Instance data dir: %s" % instance.datadir)
return
if is_dcf_mode():
__backup_dcf_file(instance)
__backup_xlog_file(instance)
__backup_cbm_file(instance)
__backup_base_folder(instance)
except Exception as e:
raise Exception(str(e))
g_logger.debug(
"Successfully backuped instance catalog physical files and xlog. "
"Instance data dir: %s" % instance.datadir)
def __backup_global_dir(instance):
"""
"""
g_logger.debug("Start to back up global_dir")
try:
backup_dir_list = const.BACKUP_DIR_LIST_BASE
if float(g_opts.oldclusternum) < float(const.UPGRADE_VERSION_64bit_xid):
backup_dir_list.extend(const.BACKUP_DIR_LIST_64BIT_XID)
for name in backup_dir_list:
srcDir = "%s/%s" % (instance.datadir, name)
destDir = "%s_bak" % srcDir
if os.path.isdir(srcDir):
cpDirectory(srcDir, destDir)
g_logger.debug("Successfully backed up global_dir")
except Exception as e:
raise Exception(str(e))
def __backup_dcf_file(instance):
"""
backup dcf files for in-place upgrade.
"""
try:
g_logger.debug("Backup instance dcf files. Instance data_dir: %s" % instance.datadir)
dcf_back_dir = os.path.join(instance.datadir, "dcf_data_bak")
cmd = "rm -rf '%s' " % dcf_back_dir
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output)
dcf_dir = os.path.join(instance.datadir, "dcf_data")
if not os.path.exists(dcf_dir):
g_logger.debug("There is no dcf dir to backup for %d." % instance.instanceId)
return
# backup dcf data dir
cpDirectory(dcf_dir, dcf_back_dir)
# backup dcf apply index
dcf_index = "paxosindex"
dcf_index_back = dcf_index + ".backup"
dcf_index_file = os.path.join(instance.datadir, dcf_index)
dcf_index_back_file = os.path.join(instance.datadir, dcf_index_back)
if not os.path.exists(dcf_index_file) or not os.path.exists(dcf_index_back_file):
raise Exception("These is no paxos index file for instance %d, dir is: %s" \
% (instance.instanceId, instance.datadir))
dest_dcf_index_file = os.path.join(instance.datadir, dcf_index + "_upgrade_backup")
dest_dcf_index_back_file = os.path.join(instance.datadir, \
dcf_index_back + "_upgrade_backup")
shutil.copy2(dcf_index_file, dest_dcf_index_file)
shutil.copy2(dcf_index_back_file, dest_dcf_index_back_file)
g_logger.debug("Successfully backuped instance dcf files. "
"Instance data dir: %s" % instance.datadir)
except Exception as er:
raise Exception(str(er))
def __backup_xlog_file(instance):
"""
"""
try:
g_logger.debug("Backup instance xlog files. "
"Instance data dir: %s" % instance.datadir)
# get Latest checkpoint location
pg_xlog_info = __get_latest_checkpoint_location(instance)
xlog_back_file = os.path.join(
instance.datadir, "pg_xlog", pg_xlog_info.get(
'latest_checkpoint_redo_xlog_file'))
if not os.path.exists(xlog_back_file):
raise Exception("There is no xlog to backup for %d."
% instance.instanceId)
xlog_dir = os.path.join(instance.datadir, "pg_xlog")
xlog_file_list = os.listdir(xlog_dir)
xlog_file_list.sort()
backup_xlog_list = []
for one_file in xlog_file_list:
if not os.path.isfile(os.path.join(xlog_dir, one_file)):
continue
if len(one_file) != 24:
continue
if one_file >= pg_xlog_info.get('latest_checkpoint_redo_xlog_file'):
backup_xlog_list.append(one_file)
if len(backup_xlog_list) == 0:
raise Exception("There is no xlog to backup for %d." %
instance.instanceId)
for one_file in backup_xlog_list:
src_file = os.path.join(xlog_dir, one_file)
dst_file = os.path.join(xlog_dir, one_file + "_upgrade_backup")
shutil.copy2(src_file, dst_file)
g_logger.debug("file {0} has been backed up to {1}".format(
src_file, dst_file))
xlog_backup_info = copy.deepcopy(pg_xlog_info)
xlog_backup_info['backup_xlog_list'] = backup_xlog_list
xlog_backup_info_target_file = os.path.join(xlog_dir,
const.XLOG_BACKUP_INFO)
FileUtil.createFileInSafeMode(xlog_backup_info_target_file)
with open(xlog_backup_info_target_file, "w") as fp:
json.dump(xlog_backup_info, fp)
g_logger.debug("XLOG backup info:%s." % xlog_backup_info)
g_logger.debug("Successfully backuped instance xlog files. "
"Instance data dir: %s" % instance.datadir)
except Exception as e:
raise Exception(str(e))
def __get_latest_checkpoint_location(instance):
try:
result = dict()
cmd = "pg_controldata '%s'" % instance.datadir
if g_opts.mpprcFile != "" and g_opts.mpprcFile is not None:
cmd = "source %s; %s" % (g_opts.mpprcFile, cmd)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
g_logger.debug("Command for get control data:%s.Output:\n%s." % (
cmd, output))
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
time_line_id = ""
latest_checkpoint_redo_location = ""
for one_line in output.split('\n'):
one_line = one_line.strip()
if len(one_line.split(':')) == 2:
if one_line.split(':')[0].strip() == \
"Latest checkpoint's TimeLineID":
time_line_id = one_line.split(':')[1].strip()
elif one_line.split(':')[0].strip() == \
"Latest checkpoint's REDO location":
latest_checkpoint_redo_location = \
one_line.split(':')[1].strip()
if time_line_id != "" and latest_checkpoint_redo_location != "":
break
if time_line_id == "":
raise Exception(
"Failed to get Latest checkpoint's TimeLineID for %d." %
instance.instanceId)
if latest_checkpoint_redo_location == "":
raise Exception("Failed to get Latest checkpoint' "
"REDO location for %d." % instance.instanceId)
redo_log_id = latest_checkpoint_redo_location.split('/')[0]
redo_tmp_log_seg = latest_checkpoint_redo_location.split('/')[1]
if len(redo_tmp_log_seg) > 6:
redo_log_seg = redo_tmp_log_seg[0:-6]
else:
redo_log_seg = 0
latest_checkpoint_redo_xlog_file = \
"%08d%s%s" % (int(time_line_id, 16),
str(redo_log_id).zfill(8), str(redo_log_seg).zfill(8))
result['latest_checkpoint_redo_location'] = \
latest_checkpoint_redo_location
result['time_line_id'] = time_line_id
result['latest_checkpoint_redo_xlog_file'] = \
latest_checkpoint_redo_xlog_file
g_logger.debug("%d(pg_xlog_info):%s." % (instance.instanceId, result))
return result
except Exception as e:
raise Exception(str(e))
def __backup_cbm_file(instance):
"""
"""
try:
g_logger.debug("Backup instance cbm files. "
"Instance data dir: %s" % instance.datadir)
cbm_back_dir = os.path.join(instance.datadir, "pg_cbm_back")
cmd = "rm -rf '%s' " % cbm_back_dir
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
cbm_dir = os.path.join(instance.datadir, "pg_cbm")
if not os.path.exists(cbm_dir):
g_logger.debug("There is no cbm dir to backup for %d."
% instance.instanceId)
return
cpDirectory(cbm_dir, cbm_back_dir)
g_logger.debug("Successfully backuped instance cbm files. "
"Instance data dir: %s" % instance.datadir)
except Exception as e:
raise Exception(str(e))
def restoreOldClusterCatalogPhysicalFiles():
"""
restore old cluster catalog physical files
get database list
connect to each cn and dn,
connect to each database, and do backup
"""
g_logger.log("Restoring old cluster catalog physical files.")
try:
InstanceList = []
# find all instances need to do restore
if len(g_dbNode.datanodes) != 0:
for eachInstance in g_dbNode.datanodes:
InstanceList.append(eachInstance)
# do restore parallelly
if len(InstanceList) != 0:
pool = ThreadPool(len(InstanceList))
pool.map(
restoreOneInstanceOldClusterCatalogPhysicalFiles, InstanceList)
pool.close()
pool.join()
else:
g_logger.debug("No master instance found on this node, "
"nothing need to do.")
return
g_logger.log(
"Successfully restored old cluster catalog physical files.")
except Exception as e:
g_logger.logExit(str(e))
def restoreOneInstanceOldClusterCatalogPhysicalFiles(instance):
"""
restore catalog physical files for one old cluster instance
read database and catalog info from file
connect each database, do restore
"""
g_logger.debug("Restore instance catalog physical files. "
"Instance data dir: %s" % instance.datadir)
try:
# handle dummy standby dn instance first
if instance.instanceRole == INSTANCE_ROLE_DATANODE and \
instance.instanceType == DUMMY_STANDBY_INSTANCE:
# clean pg_xlog folder of dummy standby dn instance and return
pg_xlog_dir = "%s/pg_xlog" % instance.datadir
cmd = "find '%s' -type f | xargs -r -n 100 rm -f" % pg_xlog_dir
CmdExecutor.execCommandLocally(cmd)
# restore list folder
__restore_global_dir(instance)
return
if is_dcf_mode():
__restore_dcf_file(instance)
__restore_global_dir(instance)
__restore_xlog_file(instance)
__restore_cbm_file(instance)
__restore_base_folder(instance)
except Exception as e:
raise Exception(str(e))
g_logger.debug("Successfully restored instance catalog physical files. "
"Instance data dir: %s" % instance.datadir)
def __restore_global_dir(instance):
"""
"""
try:
g_logger.debug("Start to restore global_dir")
backup_dir_list = const.BACKUP_DIR_LIST_BASE + const.BACKUP_DIR_LIST_64BIT_XID
for name in backup_dir_list:
srcDir = "%s/%s" % (instance.datadir, name)
destDir = "%s/%s_bak" % (instance.datadir, name)
if os.path.isdir(destDir):
cpDirectory(destDir, srcDir)
g_logger.debug("Successfully restored global_dir")
except Exception as e:
raise Exception(str(e))
def __restore_dcf_file(instance):
"""
"""
try:
g_logger.debug("restore instance dcf files. Instance data dir: %s" % instance.datadir)
dcf_dir = os.path.join(instance.datadir, "dcf_data")
dcf_back_dir = os.path.join(instance.datadir, "dcf_data_bak")
if not os.path.exists(dcf_back_dir):
g_logger.debug("There is no dcf dir to restore for %d." % instance.instanceId)
return
cmd = "rm -rf '%s' " % dcf_dir
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output)
cpDirectory(dcf_back_dir, dcf_dir)
# backup dcf apply index
dcf_index = "paxosindex"
dcf_index_back = dcf_index + ".backup"
src_dcf_index_file = os.path.join(instance.datadir, dcf_index + "_upgrade_backup")
src_dcf_index_back_file = os.path.join(instance.datadir, dcf_index_back + "_upgrade_backup")
if not os.path.exists(src_dcf_index_file) or not os.path.exists(src_dcf_index_back_file):
raise Exception("There is no paxos index backup files, " \
"but they should be exist at this step." \
"instance is %d, data dir is: %s" \
% (instance.instanceId, instance.datadir))
dcf_index_file = os.path.join(instance.datadir, dcf_index)
dcf_index_back_file = os.path.join(instance.datadir, dcf_index_back)
shutil.copy2(src_dcf_index_file, dcf_index_file)
shutil.copy2(src_dcf_index_back_file, dcf_index_back_file)
g_logger.debug("Successfully restored instance dcf files. "
"Instance data dir: %s" % instance.datadir)
except Exception as er:
raise Exception(str(er))
def __restore_xlog_file(instance):
"""
"""
try:
g_logger.debug("Restore instance xlog files. "
"Instance data dir: %s" % instance.datadir)
# read xlog_backup_info
xlog_backup_info_file = os.path.join(instance.datadir,
"pg_xlog", const.XLOG_BACKUP_INFO)
if not os.path.exists(xlog_backup_info_file):
raise Exception(
ErrorCode.GAUSS_502["GAUSS_50201"] % xlog_backup_info_file)
with open(xlog_backup_info_file, "r") as fp:
xlog_backup_info_str = fp.read()
xlog_backup_info = json.loads(xlog_backup_info_str)
# clean new xlog after latest_checkpoint_xlog_file
xlog_dir = os.path.join(instance.datadir, "pg_xlog")
xlog_list = os.listdir(xlog_dir)
xlog_list.sort()
for one_file in xlog_list:
xlog_path = os.path.join(xlog_dir, one_file)
if len(one_file) == 24 and one_file >= xlog_backup_info[
'latest_checkpoint_redo_xlog_file'] and \
os.path.isfile(xlog_path):
g_logger.debug("%s:Removing %s." % (
instance.instanceId, xlog_path))
os.remove(xlog_path)
# restore old xlog file
for one_file in xlog_backup_info['backup_xlog_list']:
src_file = os.path.join(xlog_dir, one_file + "_upgrade_backup")
dst_file = os.path.join(xlog_dir, one_file)
if os.path.exists(src_file):
g_logger.debug("%s:Restoring %s." % (
instance.instanceId, dst_file))
shutil.copy2(src_file, dst_file)
else:
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % src_file)
g_logger.debug("Successfully restore instance xlog files. "
"Instance data dir: {0}".format(instance.datadir))
except Exception as e:
raise Exception(str(e))
def __restore_cbm_file(instance):
"""
"""
try:
g_logger.debug("restore instance cbm files. "
"Instance data dir: %s" % instance.datadir)
cbm_dir = os.path.join(instance.datadir, "pg_cbm")
cmd = "rm -rf '%s' " % cbm_dir
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
cbm_back_dir = os.path.join(instance.datadir, "pg_cbm_back")
if not os.path.exists(cbm_back_dir):
g_logger.debug("There is no cbm dir to restore for %d." %
instance.instanceId)
return
cpDirectory(cbm_back_dir, cbm_dir)
g_logger.debug("Successfully restored instance cbm files. "
"Instance data dir: %s" % instance.datadir)
except Exception as e:
raise Exception(str(e))
def cleanOldClusterCatalogPhysicalFiles():
"""
clean old cluster catalog physical files
get database list
connect to each cn and dn,
connect to each database, and do backup
"""
g_logger.log("Cleaning old cluster catalog physical files.")
try:
# kill any pending processes that are
# copying backup catalog physical files
killCmd = DefaultValue.killInstProcessCmd(
"backup_old_cluster_catalog_physical_files")
(status, output) = subprocess.getstatusoutput(killCmd)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % killCmd +
"\nOutput:%s" % output)
InstanceList = []
# find all instances need to do clean
if len(g_dbNode.datanodes) != 0:
for eachInstance in g_dbNode.datanodes:
InstanceList.append(eachInstance)
# do clean parallelly
if len(InstanceList) != 0:
pool = ThreadPool(len(InstanceList))
pool.map(
cleanOneInstanceOldClusterCatalogPhysicalFiles, InstanceList)
pool.close()
pool.join()
else:
g_logger.debug("No master instance found on this node, "
"nothing need to do.")
return
g_logger.log("Successfully cleaned old cluster catalog physical files.")
except Exception as e:
g_logger.logExit(str(e))
def cleanOneInstanceOldClusterCatalogPhysicalFiles(instance):
"""
clean catalog physical files for one old cluster instance
read database and catalog info from file
connect each database, do restore
"""
g_logger.debug("clean up instance catalog backup. "
"Instance data dir: %s" % instance.datadir)
try:
__clean_global_dir(instance)
if g_opts.rollback:
pg_csnlog_dir = os.path.join(instance.datadir, "pg_csnlog")
# when do rollback, if old cluster num less than
# UPGRADE_VERSION_64bit_xid, remove the pg_csnlog directory
if float(g_opts.oldclusternum) < float(
const.UPGRADE_VERSION_64bit_xid) and \
os.path.isdir(pg_csnlog_dir):
FileUtil.removeDirectory(pg_csnlog_dir)
else:
pg_subtrans_dir = os.path.join(instance.datadir, "pg_subtrans")
# when do commit, remove the pg_subtrans directory
if os.path.isdir(pg_subtrans_dir):
FileUtil.removeDirectory(pg_subtrans_dir)
if instance.instanceRole == INSTANCE_ROLE_DATANODE and \
instance.instanceType == DUMMY_STANDBY_INSTANCE:
g_logger.debug("There is no need to clean catalog. "
"Instance data dir: %s" % instance.datadir)
return
if is_dcf_mode():
__clean_dcf_file(instance)
__clean_xlog_file(instance)
__clean_cbm_file(instance)
__clean_base_folder(instance)
except Exception as e:
raise Exception(str(e))
g_logger.debug("Successfully cleaned up instance catalog backup. "
"Instance data dir: %s" % instance.datadir)
def __clean_global_dir(instance):
"""
"""
# clean pg_internal.init*
g_logger.debug("Start to clean global_dir")
cmd = "rm -f %s/global/pg_internal.init*" % instance.datadir
CmdExecutor.execCommandLocally(cmd)
backup_dir_list = const.BACKUP_DIR_LIST_BASE + const.BACKUP_DIR_LIST_64BIT_XID
for name in backup_dir_list:
backup_dir = "%s/%s" % (instance.datadir, name)
cleanBackUpDir(backup_dir)
g_logger.debug("Successfully cleaned global_dir")
def __clean_dcf_file(instance):
"""
"""
# clean dcf backup files
dcf_back_dir = os.path.join(instance.datadir, "dcf_data_bak")
cmd = "rm -rf '%s' && rm -rf '%s'/*_upgrade_backup" % (dcf_back_dir, instance.datadir)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "\nOutput:%s" % output)
g_logger.debug("Successfully clean instance dcf files. " \
"Instance data dir: {0}".format(instance.datadir))
def __clean_xlog_file(instance):
"""
"""
# clean *.upgrade_backup files
cmd = "rm -f '%s'/pg_xlog/*_upgrade_backup && rm -f '%s'/pg_xlog/%s" % \
(instance.datadir, instance.datadir, const.XLOG_BACKUP_INFO)
CmdExecutor.execCommandLocally(cmd)
g_logger.debug("Successfully clean instance xlog files. "
"Instance data dir: {0}".format(instance.datadir))
def __clean_cbm_file(instance):
"""
"""
# clean pg_cbm_back files
cbm_back_dir = os.path.join(instance.datadir, "pg_cbm_back")
cmd = "rm -rf '%s' " % cbm_back_dir
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
g_logger.debug("Successfully clean instance cbm files. "
"Instance data dir: {0}".format(instance.datadir))
def __clean_base_folder(instance):
"""
"""
g_logger.debug("Clean instance base folders. "
"Instance data dir: {0}".format(instance.datadir))
backup_path = os.path.join(g_opts.upgrade_bak_path, "oldClusterDBAndRel")
# get instance name
instance_name = getInstanceName(instance)
# load db and catalog info from json file
if instance.instanceRole == INSTANCE_ROLE_COODINATOR:
db_and_catalog_info_file_name = \
"%s/cn_db_and_catalog_info_%s.json" % (backup_path, instance_name)
elif instance.instanceRole == INSTANCE_ROLE_DATANODE:
if instance.instanceType in [MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY]:
db_and_catalog_info_file_name = \
"%s/dn_db_and_catalog_info_%s.json" % (
backup_path, instance_name)
else:
raise Exception("Invalid instance type:%s" % instance.instanceType)
else:
raise Exception("Invalid instance role:%s" % instance.instanceRole)
with open(db_and_catalog_info_file_name, 'r') as fp:
dbInfoStr = fp.read()
try:
dbInfoDict = json.loads(dbInfoStr)
except Exception as ee:
raise Exception(str(ee))
# clean base folder
for each_db in dbInfoDict["dblist"]:
if each_db["spclocation"] != "":
if each_db["spclocation"].startswith('/'):
tbsBaseDir = each_db["spclocation"]
else:
tbsBaseDir = "%s/pg_location/%s" % (
instance.datadir, each_db["spclocation"])
pg_catalog_base_dir = "%s/%s_%s/%d" % (
tbsBaseDir,
DefaultValue.TABLESPACE_VERSION_DIRECTORY,
instance_name,
int(each_db["dboid"]))
else:
pg_catalog_base_dir = "%s/base/%d" % (
instance.datadir, int(each_db["dboid"]))
# for base folder, template0 need handle specially
if each_db["dbname"] == 'template0':
cmd = "rm -rf '%s_bak' && rm -f %s/pg_internal.init*" % \
(pg_catalog_base_dir, pg_catalog_base_dir)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
g_logger.debug("{0} has been cleaned".format(pg_catalog_base_dir))
continue
# main/vm/fsm -- main.1 ..
# can not add '' for this cmd
cmd = "rm -f %s/*_bak && rm -f %s/pg_internal.init*" % (
pg_catalog_base_dir, pg_catalog_base_dir)
g_logger.debug("{0} needs to be cleaned".format(pg_catalog_base_dir))
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
g_logger.debug("Successfully clean instance base folders. "
"Instance data dir: {0}".format(instance.datadir))
def replacePgprocFile():
"""
function: replace pg_proc data file by pg_proc_temp data file
input: NA
output: NA
"""
g_logger.log("Replace pg_proc file.")
try:
InstanceList = []
# find all DB instances need to replace pg_proc
if len(g_dbNode.datanodes) != 0:
for eachInstance in g_dbNode.datanodes:
if eachInstance.instanceType \
in [MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY]:
InstanceList.append(eachInstance)
# replace each instance pg_proc
if len(InstanceList) != 0:
pool = ThreadPool(len(InstanceList))
pool.map(replaceOneInstancePgprocFile, InstanceList)
pool.close()
pool.join()
else:
g_logger.debug(
"No instance found on this node, nothing need to do.")
return
g_logger.log(
"Successfully replaced all instances pg_proc file on this node.")
except Exception as e:
g_logger.logExit(str(e))
def replaceOneInstancePgprocFile(instance):
"""
function: touch upgrade init file for this instance
input: NA
output: NA
"""
g_logger.debug("Replace instance pg_proc file. "
"Instance data dir: %s" % instance.datadir)
pg_proc_mapping_file = os.path.join(g_opts.appPath,
'pg_proc_mapping.txt')
with open(pg_proc_mapping_file, 'r') as fp:
pg_proc_dict_str = fp.read()
proc_dict = eval(pg_proc_dict_str)
try:
# replace pg_proc data file with pg_proc_temp data file
for proc_file_path, pg_proc_temp_file_path in proc_dict.items():
pg_proc_data_file = \
os.path.join(instance.datadir, proc_file_path)
if not os.path.exists(pg_proc_data_file):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
pg_proc_data_file)
pg_proc_temp_data_file = os.path.join(
instance.datadir, pg_proc_temp_file_path)
if not os.path.exists(pg_proc_temp_data_file):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
pg_proc_temp_data_file)
FileUtil.removeFile(pg_proc_data_file)
FileUtil.cpFile(pg_proc_temp_data_file, pg_proc_data_file)
except Exception as e:
raise Exception(str(e))
g_logger.debug(
"Successfully replaced instance pg_proc file. Instance data dir: %s"
% instance.datadir)
def createPgprocPathMappingFile():
"""
create pg_proc and pg_proc_temp_oids data file path mapping
:return:
"""
g_logger.log("Create file to save mapping between pg_proc file path and"
" pg_proc_temp_oids file path.")
clusterNodes = g_clusterInfo.dbNodes
dnInst = None
for dbNode in clusterNodes:
if len(dbNode.datanodes) == 0:
continue
dnInst = dbNode.datanodes[0]
primaryDnNode, _ = DefaultValue.getPrimaryNode(g_opts.userProfile)
if dnInst.hostname not in primaryDnNode:
continue
break
database_list = get_database_list(dnInst)
pg_proc_list = ['pg_proc', 'pg_proc_oid_index',
'pg_proc_proname_args_nsp_index']
pg_proc_temp_list = ['pg_proc_temp_oids', 'pg_proc_oid_index_temp',
'pg_proc_proname_args_nsp_index_temp']
proc_file_path_list = []
pg_proc_temp_file_path_list = []
for eachdb in database_list:
for info in pg_proc_list:
pg_proc_file_path = getTableFilePath(info, dnInst, eachdb)
proc_file_path_list.append(pg_proc_file_path)
for temp_info in pg_proc_temp_list:
pg_proc_temp_file_path = getTableFilePath(temp_info, dnInst, eachdb)
pg_proc_temp_file_path_list.append(pg_proc_temp_file_path)
proc_dict = dict((proc_file_path, pg_proc_temp_file_path) for
proc_file_path, pg_proc_temp_file_path in
zip(proc_file_path_list, pg_proc_temp_file_path_list))
pg_proc_mapping_file = os.path.join(g_opts.appPath, 'pg_proc_mapping.txt')
with open(pg_proc_mapping_file, 'w') as fp:
fp.write(str(proc_dict))
g_logger.log(
"Successfully created file to save mapping between pg_proc file path"
" and pg_proc_temp_oids file path.")
def getTableFilePath(tablename, dnInst, db_name):
"""
get table file path by oid
:return:
"""
sql = "select oid from pg_class where relname='%s';" % tablename
(status, output) = ClusterCommand.remoteSQLCommand(
sql, g_opts.user,
dnInst.hostname,
dnInst.port, False,
db_name,
IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
" Error: \n%s" % str(output))
table_oid = output.strip('\n')
g_logger.debug("pg_proc oid is %s" % table_oid)
sql = "select pg_relation_filepath(%s);" % table_oid
(status, output) = ClusterCommand.remoteSQLCommand(
sql, g_opts.user,
dnInst.hostname,
dnInst.port, False,
db_name,
IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
" Error: \n%s" % str(output))
table_file_path = output.strip('\n')
g_logger.debug("pg_proc file path is %s" % table_file_path)
return table_file_path
def createNewCsvFile():
"""
1. copy pg_proc info to csv file
2. modify csv file
3. create new table and get info by csv file
:return:
"""
g_logger.log("Create new csv file.")
clusterNodes = g_clusterInfo.dbNodes
dnInst = None
for dbNode in clusterNodes:
if len(dbNode.datanodes) == 0:
continue
dnInst = dbNode.datanodes[0]
primaryDnNode, _ = DefaultValue.getPrimaryNode(g_opts.userProfile)
if dnInst.hostname not in primaryDnNode:
continue
break
dndir = dnInst.datadir
pg_proc_csv_path = '%s/pg_copydir/tbl_pg_proc_oids.csv' % dndir
new_pg_proc_csv_path = '%s/pg_copydir/new_tbl_pg_proc_oids.csv' % dndir
sql = \
"""copy pg_proc( proname, pronamespace, proowner, prolang,
procost, prorows, provariadic, protransform, prosecdef,
proleakproof, proisstrict, proretset, provolatile, pronargs,
pronargdefaults, prorettype, proargtypes, proallargtypes,
proargmodes, proargnames, proargdefaults, prosrc, probin,
proconfig, proacl, prodefaultargpos, fencedmode, proshippable,
propackage,prokind) WITH OIDS to '%s' delimiter ','
csv header;""" % pg_proc_csv_path
(status, output) = ClusterCommand.remoteSQLCommand(
sql, g_opts.user,
dnInst.hostname, dnInst.port, False,
DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True)
if status != 0:
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
" Error: \n%s" % str(output))
pg_proc_csv_reader = csv.reader(open(pg_proc_csv_path, 'r'))
pg_proc_csv_data = list(pg_proc_csv_reader)
header = pg_proc_csv_data[0]
header.insert(header.index('protransform') + 1, 'proisagg')
header.insert(header.index('protransform') + 2, 'proiswindow')
new_pg_proc_csv_data = []
new_pg_proc_csv_data.append(header)
pg_proc_data_info = pg_proc_csv_data[1:]
for i in range(2):
for info in pg_proc_data_info:
info.insert(header.index('protransform') + 2, 'True')
for info in pg_proc_data_info:
new_pg_proc_csv_data.append(info)
f = open(new_pg_proc_csv_path, 'w')
new_pg_proc_csv_writer = csv.writer(f)
for info in new_pg_proc_csv_data:
new_pg_proc_csv_writer.writerow(info)
f.close()
# scp csv file to other nodes
standbyInstLst = []
peerInsts = g_clusterInfo.getPeerInstance(dnInst)
for i in range(len(peerInsts)):
if peerInsts[i].instanceType in \
[MASTER_INSTANCE,STANDBY_INSTANCE, CASCADE_STANDBY]:
standbyInstLst.append(peerInsts[i])
for standbyInstance in standbyInstLst:
standbyCsvFilePath = \
'%s/pg_copydir/new_tbl_pg_proc_oids.csv' % standbyInstance.datadir
cmd = "pscp -H %s %s %s" % (
standbyInstance.hostname, new_pg_proc_csv_path,
standbyCsvFilePath)
g_logger.debug("exec cmd is: %s" % cmd)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514[
"GAUSS_51400"] % cmd +
"\nOutput:%s" % output)
def greySyncGuc():
# delete old guc from configure file
global g_deleteGucDict
g_deleteGucDict = readDeleteGuc()
allInstances = g_dbNode.datanodes
pool = ThreadPool(DefaultValue.getCpuSet())
pool.map(greySyncInstanceGuc, allInstances)
pool.close()
pool.join()
def greySyncInstanceGuc(dbInstance):
"""
from .conf file delete the old deleted GUC, need to have all
the .conf.bak.old, because new version may set new GUC
in config file, under rollback, we need to restore.
"""
if dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR:
oldConfig = "%s/postgresql.conf" % dbInstance.datadir
instanceName = "coordinator"
elif dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE:
oldConfig = "%s/postgresql.conf" % dbInstance.datadir
instanceName = "datanode"
elif dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_CMSERVER:
oldConfig = "%s/cm_server.conf" % dbInstance.datadir
instanceName = "cmserver"
elif dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_CMAGENT:
oldConfig = "%s/cm_agent.conf" % dbInstance.datadir
instanceName = "cmagent"
elif dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_GTM:
oldConfig = "%s/gtm.conf" % dbInstance.datadir
instanceName = "gtm"
else:
raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] % (
"specified", dbInstance.instanceRole))
if not os.path.exists(oldConfig):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % oldConfig)
oldFileBak = oldConfig + ".bak.old"
oldTempFileBak = oldFileBak + ".temp"
# if reenter the upgrade process, we may have synced
if os.path.exists(oldFileBak):
g_logger.log("File %s exists, No need to backup old configure again."
% oldFileBak)
return
# if the bak.old.temp file exists while bak.old not exists, it may have
# try to deleted, but not finished,
# so cannot copy again, this oldConfig file may have deleted the old GUC
if not os.path.exists(oldTempFileBak):
FileUtil.cpFile(oldConfig, oldTempFileBak)
# if do not have delete line, no need to deal with old .conf
if instanceName in g_deleteGucDict.keys():
gucNames = g_deleteGucDict[instanceName]
else:
# the rename must be the last, which is the finish flag
FileUtil.rename(oldTempFileBak, oldFileBak)
g_logger.debug("No need to sync %s guc with %s." % (
instanceName, oldConfig))
return
g_logger.debug("Sync %s guc with %s." % (instanceName, oldConfig))
bakFile = oldConfig + ".bak.upgrade"
pattern = re.compile("^\\s*.*=.*$")
lineno = -1
deleteLineNoList = []
f = None
try:
if dbInstance.instanceRole in [DefaultValue.INSTANCE_ROLE_COODINATOR,
DefaultValue.INSTANCE_ROLE_GTM,
DefaultValue.INSTANCE_ROLE_DATANODE]:
lockFile = oldConfig + '.lock'
if not os.path.exists(lockFile):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % lockFile)
f = open(lockFile, 'r+')
fcntl.lockf(f.fileno(), fcntl.LOCK_EX)
g_logger.debug("Successfully locked file %s." % lockFile)
with open(oldConfig, 'r') as oldFile:
resList = oldFile.readlines()
for line in resList:
lineno += 1
# skip blank line
line = line.strip()
if not line:
continue
# search valid line
result = pattern.match(line)
if result is None:
continue
nameInFile = line.split('=')[0].strip()
if nameInFile.startswith('#'):
name = nameInFile.lstrip('#')
if name in gucNames:
deleteLineNoList.append(lineno)
else:
if nameInFile in gucNames:
deleteLineNoList.append(lineno)
if deleteLineNoList:
g_logger.debug("Deleting line number: %s." % deleteLineNoList)
FileUtil.createFile(bakFile, True, DefaultValue.KEY_FILE_MODE)
deleteContent = []
for lineno in deleteLineNoList:
deleteContent.append(resList[lineno])
resList[lineno] = ''
with open(bakFile, 'w') as bak:
bak.writelines(resList)
FileUtil.rename(bakFile, oldConfig)
g_logger.debug("Deleting guc content: %s" % deleteContent)
# the rename must be the last, which is the finish flag
FileUtil.rename(oldTempFileBak, oldFileBak)
if f:
f.close()
except Exception as e:
if f:
f.close()
if bakFile:
FileUtil.removeFile(bakFile)
raise Exception(str(e))
g_logger.debug("Successfully dealt with %s." % oldConfig)
def config_cm_agent_instance(cluster_info_file):
"""
Config cm_agent.conf
"""
g_logger.log("Start to config cm_agent.conf")
cluster_info_obj = dbClusterInfo()
cluster_info_obj.initFromStaticConfig(g_opts.user, cluster_info_file)
local_node = cluster_info_obj.get_local_node_info()
space_count = 17
cm_agent_conf = os.path.realpath(os.path.join(local_node.cmagents[0].datadir,
"cm_agent.conf"))
g_logger.log("Local cm_agent config file path [{0}]".format(cm_agent_conf))
replace_str = "upgrade_from = {0}{1}# the version number of the cluster " \
"before upgrade".format(g_opts.oldVersion,
" " * (space_count - len(str(g_opts.oldVersion))))
config_cmd = "sed -i 's/^upgrade_from =.*/{0}/g' {1} && " \
"grep 'upgrade_from' {1}".format(replace_str, cm_agent_conf)
_, output = subprocess.getstatusoutput(config_cmd)
if not "upgrade_from = {0}".format(g_opts.oldVersion) in output:
g_logger.debug("Config cm_agent.conf failed. Output: {0}".format(output))
raise Exception("Config cm_agent.conf failed. Output: {0}".format(output))
g_logger.log("Local cm_agent config file set seccessfully.")
def greyUpgradeSyncConfig():
"""
"""
# check if we have switched to new version, if we have switched to
# new version, no need to sync configure
srcDir = g_opts.oldClusterAppPath
destDir = g_opts.newClusterAppPath
if os.path.samefile(g_gausshome, destDir):
g_logger.debug("Current version is the new version, "
"no need to sync old configure to new install path.")
return
old_static_config_file = os.path.join(g_opts.oldClusterAppPath, "bin/cluster_static_config")
old_static_cluster_info = dbClusterInfo()
old_static_cluster_info.initFromStaticConfig(g_opts.user, old_static_config_file)
new_static_config_file = os.path.join(g_opts.newClusterAppPath, "bin/cluster_static_config")
if not os.path.isfile(new_static_config_file):
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
os.path.realpath(new_static_config_file))
if DefaultValue.check_add_cm(old_static_config_file, new_static_config_file, g_logger):
install_cm_instance(new_static_config_file)
config_cm_agent_instance(new_static_config_file)
cmd = ""
else:
g_logger.debug("No need to install CM component for grey upgrade sync config.")
# synchronize static and dynamic configuration files
static_config = "%s/bin/cluster_static_config" % srcDir
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
static_config, static_config, destDir)
dynamic_config = "%s/bin/cluster_dynamic_config" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi) && " % (
dynamic_config, dynamic_config, destDir)
# sync obsserver.key.cipher/obsserver.key.rand and
# server.key.cipher/server.key.rand and
# datasource.key.cipher/datasource.key.rand
# usermapping.key.cipher/usermapping.key.rand
# subscription.key.cipher/subscription.key.rand
OBS_cipher_key_bak_file = "%s/bin/obsserver.key.cipher" % srcDir
cmd += "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
OBS_cipher_key_bak_file, OBS_cipher_key_bak_file, destDir)
OBS_rand_key_bak_file = "%s/bin/obsserver.key.rand" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
OBS_rand_key_bak_file, OBS_rand_key_bak_file, destDir)
trans_encrypt_cipher_key_bak_file = "%s/bin/trans_encrypt.key.cipher" %\
srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
trans_encrypt_cipher_key_bak_file,
trans_encrypt_cipher_key_bak_file, destDir)
trans_encrypt_rand_key_bak_file = "%s/bin/trans_encrypt.key.rand" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
trans_encrypt_rand_key_bak_file, trans_encrypt_rand_key_bak_file,
destDir)
trans_encrypt_cipher_ak_sk_key_bak_file = "%s/bin/trans_encrypt_ak_sk.key"\
% srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
trans_encrypt_cipher_ak_sk_key_bak_file,
trans_encrypt_cipher_ak_sk_key_bak_file, destDir)
roach_cipher_key_bak_file = "%s/bin/roach.key.cipher" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
roach_cipher_key_bak_file, roach_cipher_key_bak_file, destDir)
roach_rand_key_bak_file = "%s/bin/roach.key.rand" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
roach_rand_key_bak_file, roach_rand_key_bak_file, destDir)
roach_cipher_ak_sk_key_bak_file = "%s/bin/roach_ak_sk.key" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
roach_cipher_ak_sk_key_bak_file, roach_cipher_ak_sk_key_bak_file,
destDir)
server_cipher_key_bak_file = "%s/bin/server.key.cipher" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
server_cipher_key_bak_file, server_cipher_key_bak_file, destDir)
server_rand_key_bak_file = "%s/bin/server.key.rand" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
server_rand_key_bak_file, server_rand_key_bak_file, destDir)
datasource_cipher = "%s/bin/datasource.key.cipher" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
datasource_cipher, datasource_cipher, destDir)
datasource_rand = "%s/bin/datasource.key.rand" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
datasource_rand, datasource_rand, destDir)
usermapping_cipher = "%s/bin/usermapping.key.cipher" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
usermapping_cipher, usermapping_cipher, destDir)
usermapping_rand = "%s/bin/usermapping.key.rand" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
usermapping_rand, usermapping_rand, destDir)
subscription_cipher = "%s/bin/subscription.key.cipher" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
subscription_cipher, subscription_cipher, destDir)
subscription_rand = "%s/bin/subscription.key.rand" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
subscription_rand, subscription_rand, destDir)
tde_key_cipher = "%s/bin/gs_tde_keys.cipher" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
tde_key_cipher, tde_key_cipher, destDir)
g_logger.debug("Grey upgrade sync command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# sync ca.key,etcdca.crt, client.key and client.crt
CA_key_file = "%s/share/sslcert/etcd/ca.key" % srcDir
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/share/sslcert/etcd/';fi)" % (
CA_key_file, CA_key_file, destDir)
CA_cert_file = "%s/share/sslcert/etcd/etcdca.crt" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/share/sslcert/etcd/';" \
"fi)" % (CA_cert_file, CA_cert_file, destDir)
client_key_file = "%s/share/sslcert/etcd/client.key" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/share/sslcert/etcd/';" \
"fi)" % (
client_key_file, client_key_file, destDir)
# copy cm_agent.lock file
cm_agent_lock_file = "%s/bin/cm_agent.lock" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
cm_agent_lock_file, cm_agent_lock_file, destDir)
client_cert_file = "%s/share/sslcert/etcd/client.crt" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/share/sslcert/etcd/';" \
"fi)" % (client_cert_file, client_cert_file, destDir)
if int(g_opts.oldVersion) >= 92019:
client_key_cipher_file = \
"%s/share/sslcert/etcd/client.key.cipher" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \
"'%s/share/sslcert/etcd/';fi)" % (
client_key_cipher_file, client_key_cipher_file, destDir)
client_key_rand_file = "%s/share/sslcert/etcd/client.key.rand" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \
"'%s/share/sslcert/etcd/';fi)" % (
client_key_rand_file, client_key_rand_file, destDir)
etcd_key_cipher_file = "%s/share/sslcert/etcd/etcd.key.cipher" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \
"'%s/share/sslcert/etcd/';fi)" % (
etcd_key_cipher_file, etcd_key_cipher_file, destDir)
etcd_key_rand_file = "%s/share/sslcert/etcd/etcd.key.rand" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \
"'%s/share/sslcert/etcd/';fi)" % (
etcd_key_rand_file, etcd_key_rand_file, destDir)
g_logger.debug("Grey upgrade sync command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# sync gsql certs
gsqlOldpath = "%s/share/sslcert/gsql/" % srcDir
gsqlNewDir = "%s/share/sslcert/" % destDir
cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % (
gsqlOldpath, gsqlOldpath, gsqlNewDir)
g_logger.debug("Inplace restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# sync gds certs
gdsOldpath = "%s/share/sslcert/gds/" % srcDir
gdsNewDir = "%s/share/sslcert/" % destDir
cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % (
gdsOldpath, gdsOldpath, gdsNewDir)
g_logger.debug("Inplace restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# sync grpc certs
grpcOldpath = "%s/share/sslcert/grpc/" % srcDir
grpcNewDir = "%s/share/sslcert/" % destDir
cmd = "(if [ -d '%s' ];then cp -r '%s' '%s';fi)" % (
grpcOldpath, grpcOldpath, grpcNewDir)
g_logger.debug("Inplace restore command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# sync java UDF
javadir = "%s/lib/postgresql/java" % srcDir
desPath = "%s/lib/postgresql/" % destDir
cmd = "(if [ -d '%s' ];then mv '%s/java/pljava.jar' " \
"'%s'&&cp -r '%s' '%s'&&mv '%s/pljava.jar' '%s/java/';fi)" % \
(javadir, desPath, desPath, javadir, desPath, desPath, desPath)
g_logger.debug("Grey upgrade sync command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# sync postGIS
cmdPostGis = ""
for sofile in g_opts.postgisSOFileList.keys():
desPath = os.path.join(destDir, g_opts.postgisSOFileList[sofile])
srcFile = "'%s'/%s" % (srcDir, sofile)
cmdPostGis += " && (if [ -f %s ];then cp -f -p %s '%s';fi)" % (
srcFile, srcFile, desPath)
# skip " &&"
cmd = cmdPostGis[3:]
g_logger.debug("Grey upgrade sync command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# sync library file
# sync libsimsearch etc files
searchConfigFile = "%s/etc/searchletConfig.yaml" % srcDir
cmd = "(if [ -f '%s' ];then cp -f -p '%s' " \
"'%s/etc/searchletConfig.yaml';fi)" % (
searchConfigFile, searchConfigFile, destDir)
searchIniFile = "%s/etc/searchServer.ini" % srcDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' " \
"'%s/etc/searchServer.ini';fi)" % (
searchIniFile, searchIniFile, destDir)
# sync libsimsearch libs files
cmd += "&& (if [ -d '%s/lib/libsimsearch' ];" \
"then cp -r '%s/lib/libsimsearch' '%s/lib/';fi)" % (
srcDir, srcDir, destDir)
# sync initialized configuration parameters files
cmd += " && (if [-f '%s/bin/initdb_param'];" \
"then cp -f -p '%s/bin/initdb_param' '%s/bin/';fi)" % (
srcDir, srcDir, destDir)
CmdExecutor.execCommandLocally(cmd)
# sync kerberos conf files
krbConfigFile = "%s/kerberos" % srcDir
cmd = "(if [ -d '%s' ];then cp -r '%s' '%s/';fi)" % (
krbConfigFile, krbConfigFile, destDir)
cmd += "&& (if [ -d '%s/var/krb5kdc' ];then mkdir %s/var;" \
" cp -r '%s/var/krb5kdc' '%s/var/';fi)" % (
srcDir, destDir, srcDir, destDir)
g_logger.debug("Grey upgrade sync command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
# the pg_plugin should be the last to sync, because user may
# create C function, it may increase file under upgrade,
# after switch new bin, we will restore the file mode to original mode,
# it can write the C function
FileUtil.changeMode(DefaultValue.SPE_FILE_MODE,
'%s/lib/postgresql/pg_plugin' % srcDir, True)
cmd = "(cp -r '%s/lib/postgresql/pg_plugin' '%s/lib/postgresql')" % (
srcDir, destDir)
g_logger.debug("Grey upgrade sync command: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
cm_cert_old_dir = os.path.realpath(os.path.join(srcDir, "share", "sslcert", "cm"))
cm_cert_dest_dir = os.path.realpath(os.path.join(destDir, "share", "sslcert"))
restore_cm_cert_file_cmd = "(if [ -d '%s' ];" \
"then cp -r '%s' '%s/';fi)" % (cm_cert_old_dir,
cm_cert_old_dir,
cm_cert_dest_dir)
g_logger.debug("Restore CM cert files for grey upgrade cmd: " \
"{0}".format(restore_cm_cert_file_cmd))
CmdExecutor.execCommandLocally(restore_cm_cert_file_cmd)
g_logger.debug("Restore CM cert files for grey upgrade successfully.")
def setGucValue():
"""
set cn dn guc value
"""
if g_opts.setType == "reload":
dn_standby_instance_list = []
success_instance = []
# find all DN instances need to touch
if len(g_dbNode.datanodes) != 0:
dn_primary_instance_list = getLocalPrimaryDNInstance()
for each_instance in g_dbNode.datanodes:
if each_instance.instanceType in \
[MASTER_INSTANCE, STANDBY_INSTANCE, CASCADE_STANDBY]:
dn_standby_instance_list.append(each_instance)
dn_standby_instance_list = [instance for instance in dn_standby_instance_list if
instance not in dn_primary_instance_list]
try:
if len(dn_standby_instance_list) != 0:
pool = ThreadPool(len(dn_standby_instance_list))
pool.map(setOneInstanceGuc, dn_standby_instance_list)
pool.close()
pool.join()
except Exception as er:
g_logger.debug("Command for setting GUC parameter: %s" % str(er))
success_instance = dn_primary_instance_list
else:
success_instance = get_dn_instance()
if len(success_instance) != 0:
pool = ThreadPool(len(success_instance))
pool.map(setOneInstanceGuc, success_instance)
pool.close()
pool.join()
def setOneInstanceGuc(instance):
"""
set guc value for one instance
:return:
"""
if not instance:
return
cmd = "gs_guc %s -N %s -Z %s -D %s -c \"%s\"" % (g_opts.setType, instance.hostname,
const.INST_TYPE_MAP[instance.instanceRole],
instance.datadir, g_opts.gucStr)
g_logger.debug("Set guc cmd [%s]." % cmd)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 3, 10)
if status != 0:
raise Exception(ErrorCode.GAUSS_500["GAUSS_50007"] % cmd + " Error: \n%s" % str(output))
def switchDnNodeProcess():
"""
function: switch node process which CN or DN exits
:return:
"""
if g_opts.rolling:
# for rolling upgrade, gaussdb fenced udf will be
# switched after cm_agent has been switched
start_time = timeit.default_timer()
switchFencedUDFProcess()
elapsed = timeit.default_timer() - start_time
g_logger.log(
"Time to switch gaussdb fenced udf: %s" % getTimeFormat(elapsed))
start_time = timeit.default_timer()
switchDn()
elapsed = timeit.default_timer() - start_time
g_logger.log("Time to switch DN: %s" % getTimeFormat(elapsed))
def switchFencedUDFProcess():
"""
function: Kill gaussdb fenced UDF master process.
"""
if not isNeedSwitch("gaussdb fenced UDF master process"):
g_logger.log("No need to kill gaussdb fenced UDF master process.")
return
g_logger.log("Killing gaussdb fenced UDF master process.")
killCmd = DefaultValue.killInstProcessCmd(
"gaussdb fenced UDF master process")
g_logger.log(
"Command to kill gaussdb fenced UDF master process: %s" % killCmd)
(status, _) = CmdUtil.retryGetstatusoutput(killCmd, 3, 5)
if status == 0:
g_logger.log("Successfully killed gaussdb fenced UDF master process.")
else:
raise Exception("Failed to kill gaussdb fenced UDF master process.")
def isNeedSwitch(process, dataDir=""):
"""
get the pid from ps ux command, and then get the realpth of this pid from
/proc/$pid/exe, under upgrade, if we can find the new path, then we do not
need to kill process, otherwise we should kill process
:param process: can be "datanode"
:return:True means need switch
"""
if not g_opts.rollback:
path = g_opts.oldClusterAppPath
else:
path = g_opts.newClusterAppPath
if process == "datanode":
process = "gaussdb"
path = os.path.join(path, 'bin', process)
path = os.path.normpath(path)
if dataDir:
cmd = r"pidList=`ps ux | grep '\<%s\>' | grep '%s' | grep '%s'| " \
r"grep -v 'grep' | awk '{print $2}' | xargs `; " \
r"for pid in $pidList; do dir=`readlink -f /proc/$pid/exe | " \
r"xargs `; if [ `echo $dir | grep %s` ];then echo 'True'; " \
r"else echo 'False'; fi; done"
cmd = cmd % (process, g_gausshome, dataDir, path)
else:
cmd = r"pidList=`ps ux | grep '\<%s\>' | grep '%s' | grep -v 'grep'" \
r" | awk '{print $2}' | xargs `; " \
r"for pid in $pidList; do dir=`readlink -f /proc/$pid/exe | " \
r"xargs `; if [ `echo $dir | grep %s` ];then echo 'True'; " \
r"else echo 'False'; fi; done"
cmd = cmd % (process, g_gausshome, path)
g_logger.log("Command for finding if need switch: %s" % cmd)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % str(cmd) +
" Error: \n%s" % str(output))
if output.find('False') >= 0:
g_logger.log("No need to switch.")
return False
g_logger.log("Need to switch.")
return True
def switchDn():
"""
function: switch DN after checkpoint
"""
g_logger.log("Killing DN processes.")
needKillDn = isKillDn()
cmd = "(ps ux | grep '\-D' | grep '%s' | grep -v grep | " \
"awk '{print $2}' | xargs -r kill -9 )"
killCmd = ""
if needKillDn:
killCmd += " && " + cmd % g_gausshome
if killCmd:
killCmd = killCmd.strip()
if killCmd.startswith("&&"):
killCmd = killCmd[2:]
g_logger.log("Command to kill other process: %s" % killCmd)
(status, output) = CmdUtil.retryGetstatusoutput(killCmd, 3, 5)
if status == 0:
g_logger.log("Successfully killed DN processes.")
else:
raise Exception("Failed to kill DN processes.")
else:
g_logger.log("No need to kill DN.")
def isKillDn():
# if does not have cn and dn, no need to
if not g_dbNode.datanodes:
return False
needKillDn = False
try:
cmd = "gaussdb -V"
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0 and output != "":
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
"\nError: " + str(output))
pattern = re.compile(r'[(](.*?)[)]')
versionInBrackets = re.findall(pattern, output)
curCommitid = versionInBrackets[0].split(" ")[-1]
# get the dn and cn name
dnInst = None
clusterNodes = g_clusterInfo.dbNodes
for dbNode in clusterNodes:
if len(dbNode.datanodes) == 0:
continue
dnInst = dbNode.datanodes[0]
primaryDnNode, _ = DefaultValue.getPrimaryNode(g_opts.userProfile)
if dnInst.hostname not in primaryDnNode:
continue
break
localHost = NetUtil.GetHostIpOrName()
if int(g_opts.oldVersion) >= 92069:
sql = "select node_name, node_type from pg_catalog.pgxc_node " \
"where node_host = '%s';" % localHost
else:
if g_dbNode.name != dnInst.hostname:
sql = "select node_name, node_type from pg_catalog.pgxc_node " \
"where node_host = '%s';" % localHost
else:
sql = "select node_name, node_type from pg_catalog.pgxc_node" \
" where node_host = 'localhost';"
g_logger.debug("Sql to query node name: %s" % sql)
(status, output) = ClusterCommand.remoteSQLCommand(
sql, g_opts.user,
dnInst.hostname, dnInst.port, False,
DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True)
if status != 0 or SqlResult.findErrorInSql(output):
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
" Error: \n%s" % str(output))
resList = output.split('\n')
dnNames = []
for record in resList:
record = record.split('|')
nodeName = record[0].strip()
dnNames.append(nodeName)
g_logger.debug("isKillDn dnName:{0} "
"commitid:{1}".format(dnNames, curCommitid))
# execute on the dn and cn to get the exists process version
needKillDn = checkExistsVersion(dnInst, curCommitid)
return needKillDn
except Exception as e:
g_logger.debug("Cannot query the exists dn process "
"version form select version(). Error: \n%s" % str(e))
for dbInstance in g_dbNode.datanodes:
dataDir = os.path.normpath(dbInstance.datadir)
if isNeedSwitch("datanode", dataDir):
needKillDn = True
break
g_logger.log("needKillDn: %s" % (needKillDn))
return needKillDn
def getLsnInfo():
"""
get lsn info
:return:
"""
g_logger.log("Get lsn info.")
try:
InstanceList = []
dnInst = None
# find all instances need to do backup
clusterNodes = g_clusterInfo.dbNodes
for dbNode in clusterNodes:
if len(dbNode.datanodes) == 0:
continue
dnInst = dbNode.datanodes[0]
primaryDnIntance, _ = DefaultValue.getPrimaryNode(
g_opts.userProfile)
if dnInst.hostname not in primaryDnIntance:
continue
break
if dnInst:
InstanceList.append(dnInst)
if InstanceList:
getLsnSqlPath = os.path.join(
g_opts.upgrade_bak_path, const.GET_LSN_SQL_FILE)
if not os.path.exists(getLsnSqlPath):
FileUtil.createFileInSafeMode(getLsnSqlPath)
lsnSql = "select pg_current_xlog_location(), " \
"pg_xlogfile_name(pg_current_xlog_location()), " \
"pg_xlogfile_name_offset(pg_current_xlog_location());"
with os.fdopen(
os.open(getLsnSqlPath, os.O_WRONLY, 0o755), 'w') as fp:
fp.writelines(lsnSql)
# do backup parallelly
if len(InstanceList) != 0:
pool = ThreadPool(len(InstanceList))
pool.map(getLsnInfoImpl, InstanceList)
pool.close()
pool.join()
else:
g_logger.debug("No master instance found on this node, "
"nothing need to do.")
return
g_logger.log("Successfully get lsn info.")
except Exception as e:
raise Exception(str(e))
def getLsnInfoImpl(instanceList):
"""
Run the SQL file of the LSN to obtain the current LSN information.
"""
getLsnSqlPath = os.path.join(
g_opts.upgrade_bak_path, const.GET_LSN_SQL_FILE)
execSQLFile("postgres", getLsnSqlPath, instanceList.port)
def greyRestoreConfig():
oldDir = g_opts.oldClusterAppPath
newDir = g_opts.newClusterAppPath
if not os.path.exists(oldDir):
if g_opts.forceRollback:
g_logger.log(
ErrorCode.GAUSS_502["GAUSS_50201"] % oldDir +
" Under force rollback mode, no need to sync config.")
return
else:
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % oldDir)
# if sync the pg_plugin, and change the mode,
# but not switch to new bin, we need to restore the mode
oldPluginDir = "%s/lib/postgresql/pg_plugin" % g_opts.oldClusterAppPath
if os.path.exists(oldPluginDir):
FileUtil.changeMode(DefaultValue.KEY_DIRECTORY_MODE, oldPluginDir, True)
if not os.path.exists(newDir):
g_logger.log(ErrorCode.GAUSS_502["GAUSS_50201"] % newDir +
" No need to sync.")
return
if os.path.samefile(g_opts.oldClusterAppPath, g_gausshome):
g_logger.log("Current version is old version, nothing need to do.")
return
old_static_config_file = os.path.realpath(os.path.join(g_opts.oldClusterAppPath,
"bin", "cluster_static_config"))
new_static_config_file = os.path.realpath(os.path.join(g_opts.newClusterAppPath,
"bin", "cluster_static_config"))
if DefaultValue.check_add_cm(old_static_config_file, new_static_config_file, g_logger):
g_logger.log("There is no need to copy static and dynamic config file.")
return
g_logger.log("Start to copy static and dynamic config file.")
cmd = "(if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (new_static_config_file,
new_static_config_file,
oldDir)
dynamic_config = "%s/bin/cluster_dynamic_config" % newDir
cmd += " && (if [ -f '%s' ];then cp -f -p '%s' '%s/bin/';fi)" % (
dynamic_config, dynamic_config, oldDir)
CmdExecutor.execCommandLocally(cmd)
mergePlugin()
def mergePlugin():
"""
under rollback, use the new dir as base, if the version is old version,
no need to sync
:return: NA
"""
g_logger.log("Sync pg_plugin.")
oldDir = "%s/lib/postgresql/pg_plugin" % g_opts.oldClusterAppPath
newDir = "%s/lib/postgresql/pg_plugin" % g_opts.newClusterAppPath
if not os.path.exists(newDir):
g_logger.log(ErrorCode.GAUSS_502["GAUSS_50201"] % newDir +
" No need to sync pg_plugin.")
return
FileUtil.changeMode(DefaultValue.SPE_FILE_MODE, newDir, True)
oldLines = os.listdir(oldDir)
newLines = os.listdir(newDir)
newAdd = [i for i in newLines if i not in oldLines]
newDelete = [i for i in oldLines if i not in newLines]
cmd = ""
for add in newAdd:
newFile = "%s/%s" % (newDir, add)
cmd += "(if [ -f '%s' ];then cp -f -p '%s' '%s';fi) && " % (
newFile, newFile, oldDir)
for delete in newDelete:
deleteFile = "%s/%s" % (oldDir, delete)
cmd += "(if [ -f '%s' ];then rm '%s';fi) && " % (
deleteFile, deleteFile)
if cmd != "":
cmd = cmd[:-3]
g_logger.debug("Command to sync plugin: %s" % cmd)
CmdExecutor.execCommandLocally(cmd)
else:
g_logger.log("No need to sync pg_plugin.")
def greyRestoreGuc():
# delete old guc from configure file
oldDir = g_opts.oldClusterAppPath
if not os.path.exists(oldDir):
# the node is disable after rollback
if g_opts.forceRollback:
g_logger.log(ErrorCode.GAUSS_502["GAUSS_50201"] % oldDir +
" Under force rollback mode.")
else:
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % oldDir)
# if the upgrade process interrupt when record delete guc,
# but not switch to new version and the record is not reliable if user
# set the GUC during the failure upgrade status, so we need to check if the
# configure file have had this record, if user has set,
# cannot sync this guc again
allInstances = g_dbNode.datanodes
pool = ThreadPool(DefaultValue.getCpuSet())
pool.map(greyRestoreInstanceGuc, allInstances)
pool.close()
pool.join()
def greyRestoreInstanceGuc(dbInstance):
if dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE:
oldConfig = "%s/postgresql.conf" % dbInstance.datadir
else:
raise Exception(ErrorCode.GAUSS_512["GAUSS_51204"] % (
"specified", dbInstance.instanceRole))
# record the guc without delete guc
bakFile = oldConfig + ".bak.upgrade"
FileUtil.removeFile(bakFile)
oldBakFile = oldConfig + ".bak.old"
oldTempFileBak = oldBakFile + ".temp"
FileUtil.removeFile(oldTempFileBak)
if not os.path.exists(oldConfig):
if g_opts.forceRollback:
g_logger.warn(ErrorCode.GAUSS_502["GAUSS_50201"] % oldConfig)
else:
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % oldConfig)
if not os.path.exists(oldBakFile):
g_logger.debug(ErrorCode.GAUSS_502["GAUSS_50201"] % oldBakFile +
" No need to restore guc.")
return
f = None
try:
if dbInstance.instanceRole in [DefaultValue.INSTANCE_ROLE_COODINATOR,
DefaultValue.INSTANCE_ROLE_GTM,
DefaultValue.INSTANCE_ROLE_DATANODE]:
lockFile = oldConfig + '.lock'
if not os.path.exists(lockFile):
if not g_opts.forceRollback:
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
lockFile)
else:
g_logger.warn(ErrorCode.GAUSS_502["GAUSS_50201"] %
lockFile + " Without lock to restore guc.")
else:
f = open(lockFile, 'r+')
fcntl.lockf(f.fileno(), fcntl.LOCK_EX)
# if user has set in the configure file, cannot sync, use the user set
FileUtil.rename(oldBakFile, oldConfig)
if f:
f.close()
except Exception as e:
if f:
f.close()
raise Exception(str(e))
g_logger.debug("Successfully restore guc to %s." % oldConfig)
def cleanConfBakOld():
"""
clean conf.bak.old files
"""
allInstances = g_dbNode.datanodes
pool = ThreadPool(DefaultValue.getCpuSet())
pool.map(cleanOneInstanceConfBakOld, allInstances)
pool.close()
pool.join()
def kill_cm_server_process(gauss_home):
"""
kill cm_server process
"""
cmd = "ps ux | grep '{0}' | grep -v grep | awk '{print $2}' | " \
"xargs kill -9".format(os.path.join(gauss_home, "bin", "cm_server"))
subprocess.getstatusoutput(cmd)
g_logger.debug("Kill cm_server finish.")
def clean_cm_instance():
"""
Clean all CM instance
"""
g_logger.debug("Local clean CM instance start.")
current_user = pwd.getpwuid(os.getuid()).pw_name
gauss_home = EnvUtil.getEnvironmentParameterValue("GAUSSHOME", current_user)
static_config_file = os.path.join(gauss_home, "bin", "cluster_static_config")
cluster_info = dbClusterInfo()
cluster_info.initFromStaticConfig(current_user, static_config_file)
local_node = [node for node in cluster_info.dbNodes
if node.name == NetUtil.GetHostIpOrName()][0]
clean_dir = os.path.dirname(local_node.cmagents[0].datadir)
if local_node.cmservers:
server_component = CM_OLAP()
server_component.instInfo = local_node.cmservers[0]
server_component.logger = g_logger
server_component.killProcess()
FileUtil.cleanDirectoryContent(local_node.cmservers[0].datadir)
g_logger.debug("Clean cm_server instance successfully.")
if os.path.isdir(local_node.cmagents[0].datadir):
FileUtil.cleanDirectoryContent(local_node.cmagents[0].datadir)
g_logger.debug("Local clean CM instance directory [{0}] successfully.".format(clean_dir))
def cleanOneInstanceConfBakOld(dbInstance):
"""
clean conf.bak.old files in one instance
"""
if dbInstance.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE:
oldConfig = "%s/%s" % (
dbInstance.datadir, const.POSTGRESQL_CONF_BAK_OLD)
if not os.path.exists(oldConfig):
g_logger.debug(
"WARNING: " + ErrorCode.GAUSS_502["GAUSS_50201"] % oldConfig)
else:
cmd = "(if [ -f '%s' ]; then rm -f '%s'; fi)" % (oldConfig, oldConfig)
CmdExecutor.execCommandLocally(cmd)
g_logger.debug("Successfully cleaned up %s." % oldConfig)
def doFuncForAllNode(func):
"""
execute a specific func in all nodes
"""
oldVersion = int(g_opts.oldVersion)
relmap4kVersion = int(float(const.RELMAP_4K_VERSION) * 1000)
if oldVersion >= relmap4kVersion:
g_logger.debug("no need to operate global relmap file, old version: %s" % g_opts.oldVersion)
return
if len(g_dbNode.datanodes) != 0:
for dn in g_dbNode.datanodes:
func(dn.datadir)
def doRestoreGlobalRelmapFile(datadir):
"""
does the really work of restore global/pg_filenode.old.map
"""
oldRelmapFileName = os.path.join(datadir, "global/pg_filenode.old.map")
if not os.path.exists(oldRelmapFileName):
raise Exception("Failed to restore global relmap file. Error: \n%s doesn't exist" % oldRelmapFileName)
relmapFileName = os.path.join(datadir, "global/pg_filenode.map")
relmapBackFileName = os.path.join(datadir, "global/pg_filenode.map.backup")
cmd = "cp '%s' '%s' && cp '%s' '%s'" % (oldRelmapFileName, relmapFileName, \
oldRelmapFileName, relmapBackFileName)
g_logger.debug("restore global relmap file, cmd: %s" % cmd)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 3, 5)
if status != 0:
raise Exception("Failed to restore global relmap file. Error: \n%s" % str(output))
def restoreGlobalRelmapFile():
"""
restore global relmap file when rollback
:return:
"""
doFuncForAllNode(doRestoreGlobalRelmapFile)
def doCleanTmpGlobalRelmapFile(datadir):
"""
does the really work of clean temp global relmap file
"""
oldRelmapFileName = os.path.join(datadir, "global/pg_filenode.old.map")
cmd = "(if [ -f '%s' ];then rm '%s' -f;fi)" % (oldRelmapFileName, oldRelmapFileName)
g_logger.debug("remove tmp global relmap file, cmd: %s" % cmd)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 3, 5)
if status != 0:
raise Exception("Failed to clean tmp global relmap file. Error: \n%s" % str(output))
def cleanTmpGlobalRelmapFile():
"""
clean temp global relmap file
"""
doFuncForAllNode(doCleanTmpGlobalRelmapFile)
def doBackupGlobalRelmapFile(datadir):
"""
does the really work of backup global relmap file
"""
relmapFileName = os.path.join(datadir, "global/pg_filenode.map")
oldRelmapFileName = os.path.join(datadir, "global/pg_filenode.old.map")
cmd = "(if [ -f '%s' ];then cp '%s' '%s';fi)" \
% (relmapFileName, relmapFileName, oldRelmapFileName)
g_logger.debug("backup global relmap file, cmd: %s" % cmd)
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 2, 5)
if status != 0:
raise Exception("Failed to backup global relmap file. Error: \n%s" % str(output))
def backupGlobalRelmapFile():
"""
backup global relmap file before post upgrade
:return:
"""
doFuncForAllNode(doBackupGlobalRelmapFile)
def checkAction():
"""
function: check action
input : NA
output : NA
"""
if g_opts.action not in \
[const.ACTION_TOUCH_INIT_FILE,
const.ACTION_UPDATE_CATALOG,
const.ACTION_BACKUP_OLD_CLUSTER_DB_AND_REL,
const.ACTION_SYNC_CONFIG,
const.ACTION_BACKUP_CONFIG,
const.ACTION_RESTORE_CONFIG,
const.ACTION_RELOAD_CMAGENT,
const.ACTION_RELOAD_CMSERVER,
const.ACTION_INPLACE_BACKUP,
const.ACTION_INPLACE_RESTORE,
const.ACTION_CHECK_GUC,
const.ACTION_BACKUP_HOTPATCH,
const.ACTION_ROLLBACK_HOTPATCH,
const.ACTION_SWITCH_PROCESS,
const.ACTION_SWITCH_BIN,
const.ACTION_CLEAN_INSTALL_PATH,
const.ACTION_COPY_CERTS,
const.ACTION_UPGRADE_SQL_FOLDER,
const.ACTION_BACKUP_OLD_CLUSTER_CATALOG_PHYSICAL_FILES,
const.ACTION_RESTORE_OLD_CLUSTER_CATALOG_PHYSICAL_FILES,
const.ACTION_CLEAN_OLD_CLUSTER_CATALOG_PHYSICAL_FILES,
const.ACTION_REPLACE_PG_PROC_FILES,
const.ACTION_CREATE_PG_PROC_MAPPING_FILE,
const.ACTION_CREATE_NEW_CSV_FILE,
const.ACTION_GREY_SYNC_GUC,
const.ACTION_GREY_UPGRADE_CONFIG_SYNC,
const.ACTION_SWITCH_DN,
const.ACTION_GET_LSN_INFO,
const.ACTION_GREY_RESTORE_CONFIG,
const.ACTION_GREY_RESTORE_GUC,
const.ACTION_CLEAN_CONF_BAK_OLD,
const.ACTION_SET_GUC_VALUE,
const.ACTION_CLEAN_CM,
const.ACTION_CLEAN_TMP_GLOBAL_RELMAP_FILE,
const.ACTION_BACKUP_GLOBAL_RELMAP_FILE,
const.ACTION_RESTORE_GLOBAL_RELMAP_FILE]:
GaussLog.exitWithError(
ErrorCode.GAUSS_500["GAUSS_50004"] % 't'
+ " Value: %s" % g_opts.action)
def main():
"""
function: main function
"""
try:
global g_opts
g_opts = CmdOptions()
parseCommandLine()
checkParameter()
initGlobals()
except Exception as e:
GaussLog.exitWithError(str(e) + traceback.format_exc())
try:
# select the object's function by type
funcs = {
const.ACTION_SWITCH_BIN: switchBin,
const.ACTION_CLEAN_INSTALL_PATH: cleanInstallPath,
const.ACTION_TOUCH_INIT_FILE: touchInstanceInitFile,
const.ACTION_SYNC_CONFIG: syncClusterConfig,
const.ACTION_BACKUP_CONFIG: backupConfig,
const.ACTION_RESTORE_CONFIG: restoreConfig,
const.ACTION_INPLACE_BACKUP: inplaceBackup,
const.ACTION_INPLACE_RESTORE: inplaceRestore,
const.ACTION_RELOAD_CMAGENT: reloadCmagent,
const.ACTION_RELOAD_CMSERVER: reload_cmserver,
const.ACTION_CHECK_GUC: checkGucValue,
const.ACTION_BACKUP_HOTPATCH: backupHotpatch,
const.ACTION_ROLLBACK_HOTPATCH: rollbackHotpatch,
const.ACTION_COPY_CERTS: copyCerts,
const.ACTION_UPGRADE_SQL_FOLDER: prepareUpgradeSqlFolder,
const.ACTION_BACKUP_OLD_CLUSTER_DB_AND_REL:
backupOldClusterDBAndRel,
const.ACTION_UPDATE_CATALOG: updateCatalog,
const.ACTION_BACKUP_OLD_CLUSTER_CATALOG_PHYSICAL_FILES:
backupOldClusterCatalogPhysicalFiles,
const.ACTION_RESTORE_OLD_CLUSTER_CATALOG_PHYSICAL_FILES:
restoreOldClusterCatalogPhysicalFiles,
const.ACTION_CLEAN_OLD_CLUSTER_CATALOG_PHYSICAL_FILES:
cleanOldClusterCatalogPhysicalFiles,
const.ACTION_REPLACE_PG_PROC_FILES: replacePgprocFile,
const.ACTION_CREATE_PG_PROC_MAPPING_FILE:
createPgprocPathMappingFile,
const.ACTION_CREATE_NEW_CSV_FILE: createNewCsvFile,
const.ACTION_RESTORE_DYNAMIC_CONFIG_FILE: restoreDynamicConfigFile,
const.ACTION_GREY_SYNC_GUC: greySyncGuc,
const.ACTION_GREY_UPGRADE_CONFIG_SYNC: greyUpgradeSyncConfig,
const.ACTION_SWITCH_DN: switchDnNodeProcess,
const.ACTION_GET_LSN_INFO: getLsnInfo,
const.ACTION_GREY_RESTORE_CONFIG: greyRestoreConfig,
const.ACTION_GREY_RESTORE_GUC: greyRestoreGuc,
const.ACTION_CLEAN_CONF_BAK_OLD: cleanConfBakOld,
const.ACTION_SET_GUC_VALUE: setGucValue,
const.ACTION_CLEAN_CM: clean_cm_instance,
const.ACTION_CLEAN_TMP_GLOBAL_RELMAP_FILE: cleanTmpGlobalRelmapFile,
const.ACTION_BACKUP_GLOBAL_RELMAP_FILE: backupGlobalRelmapFile,
const.ACTION_RESTORE_GLOBAL_RELMAP_FILE: restoreGlobalRelmapFile}
func = funcs[g_opts.action]
func()
except Exception as e:
checkAction()
g_logger.debug(traceback.format_exc())
g_logger.logExit(str(e))
if __name__ == '__main__':
main()