# -*- coding:utf-8 -*- # Copyright (c) 2020 Huawei Technologies Co.,Ltd. # # openGauss is licensed under Mulan PSL v2. # You can use this software according to the terms # and conditions of the Mulan PSL v2. # You may obtain a copy of Mulan PSL v2 at: # # http://license.coscl.org.cn/MulanPSL2 # # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, # WITHOUT WARRANTIES OF ANY KIND, # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. # ---------------------------------------------------------------------------- import os import sys import subprocess import time import json import re import csv import traceback from datetime import datetime, timedelta from gspylib.common.Common import DefaultValue, ClusterCommand from gspylib.common.DbClusterInfo import instanceInfo, \ dbNodeInfo, dbClusterInfo, compareObject from gspylib.common.OMCommand import OMCommand from gspylib.common.ErrorCode import ErrorCode from gspylib.threads.SshTool import SshTool from gspylib.common.VersionInfo import VersionInfo from gspylib.os.gsplatform import g_Platform from gspylib.os.gsfile import g_file from impl.upgrade.UpgradeConst import GreyUpgradeStep import impl.upgrade.UpgradeConst as Const class OldVersionModules(): """ class: old version modules """ def __init__(self): """ function: constructor """ # old cluster information self.oldDbClusterInfoModule = None # old cluster status self.oldDbClusterStatusModule = None class UpgradeImpl: """ Class: The class is used to do perform upgrade """ def __init__(self, upgrade): """ function: constructor """ self.context = upgrade self.newCommitId = "" self.oldCommitId = "" def exitWithRetCode(self, action, succeed=True, msg=""): """ funtion: should be called after cmdline parameter check input : action, succeed, msg, strategy output: NA """ ######################################### # doUpgrade # # binary-upgrade success failure # 0 1 # # binary-rollback success failure # 2 3 # commit-upgrade success failure # 5 1 ######################################### ######################################### # choseStrategy # success failure # 4 1 ######################################### if not succeed: if action == Const.ACTION_AUTO_ROLLBACK: retCode = 3 else: retCode = 1 elif action in [Const.ACTION_SMALL_UPGRADE, Const.ACTION_LARGE_UPGRADE, Const.ACTION_INPLACE_UPGRADE]: retCode = 0 elif action == Const.ACTION_AUTO_ROLLBACK: retCode = 2 elif action == Const.ACTION_CHOSE_STRATEGY: retCode = 4 elif action == Const.ACTION_COMMIT_UPGRADE: retCode = 5 else: retCode = 1 if msg != "": if self.context.logger is not None: if succeed: self.context.logger.log(msg) else: self.context.logger.error(msg) else: print(msg) sys.exit(retCode) def initGlobalInfos(self): """ function: init global infos input : NA output: NA """ self.context.logger.debug("Init global infos", "addStep") self.context.sshTool = SshTool( self.context.clusterNodes, self.context.localLog, DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE) self.initClusterConfig() self.context.logger.debug("Successfully init global infos", "constant") def setClusterDetailInfo(self): """ function: set cluster detail info input : NA output : NA """ self.context.clusterInfo.setCnCount() for dbNode in self.context.clusterInfo.dbNodes: dbNode.setDnDetailNum() dbNode.cmsNum = len(dbNode.cmservers) dbNode.gtmNum = len(dbNode.gtms) self.context.clusterInfo.setClusterDnCount() def checkExistsProcess(self, greyNodeNames): """ function: check exists process input : greyNodeNames output : NA """ pass def removeOmRollbackProgressFile(self): """ function: remove om rollback process file input : NA output : NA """ self.context.logger.debug("Remove the om rollback" " record progress file.") fileName = os.path.join(self.context.tmpDir, ".upgrade_task_om_rollback_result") cmd = "(if [ -f '%s' ];then rm -f '%s';fi)" % (fileName, fileName) DefaultValue.execCommandWithMode(cmd, "remove om rollback " "record progress file", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) def initOmRollbackProgressFile(self): """ function: init om rollback process file input : NA output : NA """ filePath = os.path.join(self.context.tmpDir, ".upgrade_task_om_rollback_result") cmd = "echo \"OM:RUN\" > %s" % filePath (status, output) = subprocess.getstatusoutput(cmd) if status != 0: self.context.logger.debug("The cmd is %s " % cmd) raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % filePath + "Error: \n%s" % str(output)) if (not self.context.isSingle): # send file to remote nodes self.context.sshTool.scpFiles(filePath, self.context.tmpDir) self.context.logger.debug("Successfully write file %s." % filePath) def run(self): """ function: Do upgrade input : NA output: NA """ # the action may be changed in each step, # if failed in auto-rollback, # we will check if we need to rollback action = self.context.action # upgrade backup path self.context.tmpDir = DefaultValue.getTmpDirFromEnv(self.context.user) if self.context.tmpDir == "": raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST") self.context.upgradeBackupPath = \ "%s/%s" % (self.context.tmpDir, "binary_upgrade") try: self.initGlobalInfos() self.removeOmRollbackProgressFile() # 4. get upgrade type # After choseStrategy, it will assign action to self.context.action # to do full-upgrade or binary-upgrade if self.context.action == Const.ACTION_AUTO_UPGRADE: self.context.action = self.choseStrategy() self.context.logger.debug( "%s execution takes %s steps in total" % ( Const.GS_UPGRADECTL, ClusterCommand.countTotalSteps( Const.GS_UPGRADECTL, self.context.action))) # If get upgrade strategy failed, # then try to get rollback strategy. # Set strategyFlag as True to check # upgrade parameter is correct or not self.doInplaceBinaryUpgrade() # After choseStrategy, it will assign action to self.context.action elif self.context.action == Const.ACTION_AUTO_ROLLBACK: # because if we rollback with auto rollback, # we will rollback all the nodes, # but if we rollback under upgrade, # we will only rollback specified nodes self.context.action = self.choseStrategy() self.exitWithRetCode(Const.ACTION_AUTO_ROLLBACK, self.doInplaceBinaryRollback()) elif self.context.action == Const.ACTION_COMMIT_UPGRADE: self.context.action = self.choseStrategy() self.doInplaceCommitUpgrade() else: self.doChoseStrategy() except Exception as e: self.context.logger.debug(traceback.format_exc() + str(e)) if not self.context.sshTool: self.context.sshTool = SshTool( self.context.clusterNodes, self.context.logger, DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE) if action == Const.ACTION_AUTO_ROLLBACK and \ self.checkBakPathNotExists(): self.context.logger.log("No need to rollback.") self.exitWithRetCode(action, True) else: self.context.logger.error(str(e)) self.exitWithRetCode(action, False, str(e)) def checkBakPathNotExists(self): """ check binary_upgrade exists on all nodes, :return: True if not exists on all nodes """ try: cmd = "if [ -d '%s' ]; then echo 'GetDir'; else echo 'NoDir'; fi" \ % self.context.upgradeBackupPath self.context.logger.debug("Command for checking if upgrade bak " "path exists: %s" % cmd) outputCollect = self.context.sshTool.getSshStatusOutput(cmd)[1] if outputCollect.find('GetDir') >= 0: self.context.logger.debug("Checking result: %s" % outputCollect) return False self.context.logger.debug("Path %s does not exists on all node." % self.context.upgradeBackupPath) return True except Exception: self.context.logger.debug("Failed to check upgrade bak path.") return False def doChoseStrategy(self): """ function: chose the strategy for upgrade input : NA output: NA """ self.context.logger.debug("Choosing strategy.") try: self.context.action = self.choseStrategy() # we only support binary-upgrade. if self.context.action in [Const.ACTION_SMALL_UPGRADE, Const.ACTION_LARGE_UPGRADE]: self.exitWithRetCode(Const.ACTION_CHOSE_STRATEGY, True, "Upgrade strategy: %s." % self.context.action) # Use inplace upgrade under special case else: self.exitWithRetCode(Const.ACTION_CHOSE_STRATEGY, True, "Upgrade strategy: %s." % self.context.action) except Exception as e: self.exitWithRetCode(Const.ACTION_CHOSE_STRATEGY, False, str(e)) self.context.logger.debug("Successfully got the upgrade strategy.") def choseStrategy(self): """ function: chose upgrade strategy input : NA output: NA """ upgradeAction = None try: # get new cluster info newVersionFile = VersionInfo.get_version_file() newClusterVersion, newClusterNumber, newCommitId = \ VersionInfo.get_version_info(newVersionFile) gaussHome = DefaultValue.getInstallDir(self.context.user) if gaussHome == "": raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") if not os.path.islink(gaussHome): raise Exception(ErrorCode.GAUSS_529["GAUSS_52915"]) newPath = gaussHome + "_%s" % newCommitId # new app dir should exist after preinstall, # then we can use chose strategy if not os.path.exists(newPath): if self.context.action != Const.ACTION_AUTO_ROLLBACK: raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % newPath) self.context.logger.debug( "Successfully obtained version information" " of new clusters by %s." % newVersionFile) # get the old cluster info, if binary_upgrade does not exists, # try to copy from other nodes oldPath = self.getClusterAppPath(Const.OLD) if oldPath == "": self.context.logger.debug("Cannot get the old install " "path from table and file.") oldPath = os.path.realpath(gaussHome) self.context.logger.debug("Old cluster app path is %s" % oldPath) oldVersionFile = "%s/bin/upgrade_version" % oldPath try: (oldClusterVersion, oldClusterNumber, oldCommitId) = \ VersionInfo.get_version_info(oldVersionFile) self.context.logger.debug("Successfully obtained version" " information of old clusters by %s." % oldVersionFile) except Exception as e: if os.path.exists(self.context.upgradeBackupPath): # if upgradeBackupPath exist, # it means that we do rollback first. # and we get cluster version from the backup file possibOldVersionFile = "%s/old_upgrade_version" \ % self.context.upgradeBackupPath self.context.logger.debug(str(e)) self.context.logger.debug( "Try to get the version information from %s." % possibOldVersionFile) (oldClusterVersion, oldClusterNumber, oldCommitId) = \ VersionInfo.get_version_info(possibOldVersionFile) else: raise Exception(str(e)) if self.context.action == Const.ACTION_AUTO_ROLLBACK or \ self.context.action == Const.ACTION_COMMIT_UPGRADE: inplace_upgrade_flag_file = "%s/inplace_upgrade_flag" \ % self.context.upgradeBackupPath # we do rollback by the backup directory if os.path.isfile(inplace_upgrade_flag_file): self.context.logger.debug( "inplace upgrade flag exists, " "use inplace rollback or commit.") self.context.is_inplace_upgrade = True # if last success commit upgrade_type is grey upgrade, # the symbolic link should point to the # old app path with old commit id if oldCommitId == newCommitId: raise Exception(ErrorCode.GAUSS_529["GAUSS_52901"]) self.context.logger.debug( "Successfully obtained version information of new and old " "clusters.\n The old cluster number:%s, the new " "cluster number:%s." % (oldClusterNumber, newClusterNumber)) if oldClusterVersion > newClusterVersion: raise Exception(ErrorCode.GAUSS_529["GAUSS_52902"] % (oldClusterVersion, newClusterVersion)) self.checkLastUpgrade(newCommitId) if float(newClusterNumber) < float(oldClusterNumber): raise Exception(ErrorCode.GAUSS_516["GAUSS_51629"] % newClusterNumber) elif float(newClusterNumber) == float(oldClusterNumber): upgradeAction = Const.ACTION_INPLACE_UPGRADE else: if int(float(newClusterNumber)) > int(float(oldClusterNumber)): raise Exception(ErrorCode.GAUSS_529["GAUSS_52904"] + "This cluster version is " "not supported upgrade.") elif ((float(newClusterNumber) - int(float(newClusterNumber))) > (float(oldClusterNumber) - int(float(oldClusterNumber)))): raise Exception(ErrorCode.GAUSS_529["GAUSS_52904"] + "This cluster version is " "not supported upgrade.") else: raise Exception(ErrorCode.GAUSS_516["GAUSS_51629"] % newClusterNumber) self.context.logger.debug("The matched upgrade strategy is: %s." % upgradeAction) self.context.newClusterVersion = newClusterVersion self.context.newClusterNumber = newClusterNumber self.context.oldClusterVersion = oldClusterVersion self.context.oldClusterNumber = oldClusterNumber self.context.newClusterAppPath = newPath self.context.oldClusterAppPath = oldPath self.newCommitId = newCommitId self.oldCommitId = oldCommitId return upgradeAction except Exception as e: raise Exception(ErrorCode.GAUSS_529["GAUSS_52900"] % str(e) + " Do nothing this time.") def checkLastUpgrade(self, newCommitId): """ check the last fail upgrade type is same with this time check the last upgrade version is same with this time under grey upgrade, if under inplace upgrade, we will rollback first, under grey upgrade, we will upgrade again """ if self.context.action == Const.ACTION_AUTO_UPGRADE: stepFile = os.path.join(self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE) cmd = "if [ -f '%s' ]; then echo 'True';" \ " else echo 'False'; fi" % stepFile (resultMap, outputCollect) = \ self.context.sshTool.getSshStatusOutput(cmd) self.context.logger.debug( "The result of checking grey upgrade step flag" " file on all nodes is:\n%s" % outputCollect) if self.context.is_inplace_upgrade: # if the grey upgrade rollback failed, it should have file, # so cannot do grey upgrade now if outputCollect.find('True') >= 0: ermsg = ErrorCode.GAUSS_502["GAUSS_50200"] \ % Const.GREY_UPGRADE_STEP_FILE \ + "In grey upgrade process, " \ "cannot do inplace upgrade!" raise Exception(str(ermsg)) elif self.context.action == Const.ACTION_AUTO_ROLLBACK or \ self.context.action == Const.ACTION_COMMIT_UPGRADE: self.checkNewCommitid(newCommitId) def checkNewCommitid(self, newCommitId): """ the commitid is in version.cfg, it should be same with the record commitid in record app directory file :param newCommitId: version.cfg line 3 :return: NA """ newPath = self.getClusterAppPath(Const.NEW) if newPath != "": LastNewCommitId = newPath[-8:] # When repeatedly run gs_upgradectl script, # this time upgrade version should be same # with last record upgrade version if newCommitId != LastNewCommitId: raise Exception(ErrorCode.GAUSS_529["GAUSS_52935"]) def checkOldClusterVersion(self, gaussdbPath, oldClusterVersionFile): """ check old cluster version input : gaussdbPath, oldClusterVersionFile output: 1. (0,"V100R00XCXX") 2. (999,"NAC00Version") 3. (1, errorMsg) otherwise raise exception """ if os.path.isfile(oldClusterVersionFile): cmd = "cat %s" % oldClusterVersionFile else: gaussdbFile = "%s/gaussdb" % gaussdbPath if not os.path.exists(gaussdbFile): self.context.logger.debug("The %s does not exist." " Cannot obtain old cluster" " version." % gaussdbFile) return 1, " The %s does not exist. Cannot " \ "obtain old cluster version" % gaussdbFile if not os.path.isfile(gaussdbFile): self.context.logger.debug("The %s is not a file. " "Cannot obtain old cluster" " version." % gaussdbFile) return 1, " The %s is not a file. Cannot " \ "obtain old cluster version" % gaussdbFile # get old cluster version by gaussdb # the information of gaussdb like this: # gaussdb Gauss200 V100R00XCXX build xxxx # compiled at xxxx-xx-xx xx:xx:xx cmd = "export LD_LIBRARY_PATH=%s/lib:$LD_LIBRARY_PATH;%s " \ "--version" % (os.path.dirname(gaussdbPath), gaussdbFile) self.context.logger.debug("Command for getting old" " cluster version:%s" % cmd) (status, output) = subprocess.getstatusoutput(cmd) if status == 0 and re.compile(r'V[0-9]{3}R[0-9]{3}C[0-9]{2}' ).search(str(output)) is not None: return 0, re.compile( r'V[0-9]{3}R[0-9]{3}C[0-9]{2}').search(str(output)).group() else: self.context.logger.debug("Failed to obtain old cluster" " version. Error: \n%s" % str(output)) return 999, "NAC00Version" def setGUCValue(self, gucKey, gucValue, actionType="reload"): """ function: do gs_guc input : gucKey - parameter name gucValue - parameter value actionType - guc action type(set/reload). default is 'reload' """ userProfile = DefaultValue.getMpprcFile() if gucValue != "": gucStr = "%s='%s'" % (gucKey, gucValue) else: gucStr = "%s" % gucKey cmd = "source %s ;" % userProfile cmd += "gs_guc %s -N all -I all -c \"%s\"" % (actionType, gucStr) self.context.logger.debug("Command for setting " "GUC parameter %s: %s" % (gucKey, cmd)) (status, output) = DefaultValue.retryGetstatusoutput(cmd) return status, output def setClusterReadOnlyMode(self): """ function: set cluster read only mode input : NA output : int """ self.context.logger.debug("Setting up the cluster read-only mode.") (status, output) = self.setGUCValue("default_transaction_read_only", "true") if status == 0: self.context.logger.debug("successfully set the " "cluster read-only mode.") return 0 else: self.context.logger.debug( "Failed to set default_transaction_read_only parameter." + " Error: \n%s" % str(output)) return 1 def unSetClusterReadOnlyMode(self): """ function: Canceling the cluster read-only mode input : NA output: 0 successfully 1 failed """ self.context.logger.debug("Canceling the cluster read-only mode.") # un set cluster read only mode (status, output) = self.setGUCValue("default_transaction_read_only", "false") if status == 0: self.context.logger.debug("Successfully cancelled the" " cluster read-only mode.") return 0 else: self.context.logger.debug( "Failed to set default_transaction_read_only parameter." + " Error: \n%s" % str(output)) return 1 def stopCluster(self): """ function: Stopping the cluster input : NA output: NA """ self.context.logger.log("Stopping the cluster.", "addStep") # Stop cluster applications cmd = "%s -U %s -R %s -t %s" % ( OMCommand.getLocalScript("Local_StopInstance"), self.context.user, self.context.clusterInfo.appPath, Const.UPGRADE_TIMEOUT_CLUSTER_STOP) self.context.logger.debug("Command for stop cluster: %s" % cmd) DefaultValue.execCommandWithMode( cmd, "Stop cluster", self.context.sshTool, self.context.isSingle or self.context.localMode, self.context.mpprcFile) self.context.logger.log("Successfully stopped cluster.") def startCluster(self): """ function: start cluster input : NA output: NA """ cmd = "%s -U %s -R %s -t %s" % ( OMCommand.getLocalScript("Local_StartInstance"), self.context.user, self.context.clusterInfo.appPath, Const.UPGRADE_TIMEOUT_CLUSTER_START) DefaultValue.execCommandWithMode( cmd, "Start cluster", self.context.sshTool, self.context.isSingle or self.context.localMode, self.context.mpprcFile) self.context.logger.log("Successfully started cluster.") def createCommitFlagFile(self): """ function: create a flag file, if this file exists, means that user have called commit interface, but still not finished. if create failed, script should exit. input : NA output: NA """ commitFlagFile = "%s/commitFlagFile" % self.context.upgradeBackupPath self.context.logger.debug("Start to create the commit flag file.") try: cmd = "(if [ -d '%s' ]; then touch '%s'; fi) " % ( self.context.upgradeBackupPath, commitFlagFile) DefaultValue.execCommandWithMode(cmd, "create commit flag file", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: raise Exception(ErrorCode.GAUSS_502["GAUSS_50206"] % ("commit flag file: %s" % str(e))) self.context.logger.debug("Successfully created the commit flag file.") def checkCommitFlagFile(self): """ function: check if commit flag file exists. input : NA output: return 0, If there is the file commitFlagFile. else, return 1 """ commitFlagFile = "%s/commitFlagFile" % self.context.upgradeBackupPath if (os.path.isfile(commitFlagFile)): return 0 else: return 1 def createInplaceUpgradeFlagFile(self): """ function: create inplace upgrade flag file on all nodes if is doing inplace upgrade 1.check if is inplace upgrade 2.get new and old cluster version number 3.write file Input: NA output : NA """ self.context.logger.debug("Start to create inplace upgrade flag file.") try: newClusterNumber = self.context.newClusterNumber oldClusterNumber = self.context.oldClusterNumber inplace_upgrade_flag_file = "%s/inplace_upgrade_flag" % \ self.context.upgradeBackupPath g_file.createFile(inplace_upgrade_flag_file) g_file.writeFile(inplace_upgrade_flag_file, ["newClusterNumber:%s" % newClusterNumber], 'a') g_file.writeFile(inplace_upgrade_flag_file, ["oldClusterNumber:%s" % oldClusterNumber], 'a') if (not self.context.isSingle): self.context.sshTool.scpFiles(inplace_upgrade_flag_file, self.context.upgradeBackupPath) self.context.logger.debug("Successfully created inplace" " upgrade flag file.") except Exception as e: raise Exception(str(e)) def setUpgradeFromParam(self, ClusterVersionNumber, isCheck=True): """ function: set upgrade_from parameter Input : oldClusterNumber, isCheck output : NA """ self.context.logger.debug("Set upgrade_from guc parameter.") workingGrandVersion = int(float(ClusterVersionNumber) * 1000) cmd = "gs_guc set -Z cmagent -N all -I all -c " \ "'upgrade_from=%s'" % workingGrandVersion self.context.logger.debug("Command for setting cmagent" " parameter: %s." % cmd) try: (status, output) = subprocess.getstatusoutput(cmd) if status != 0: self.context.logger.debug("Set upgrade_from parameter" " failed. cmd:%s\nOutput:%s" % (cmd, str(output))) raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error: \n%s" % str(output)) if isCheck: gucStr = "%s:%s" % ("upgrade_from", str(workingGrandVersion).strip()) self.checkParam(gucStr) self.context.logger.debug("Successfully set cmagent parameter " "upgrade_from=%s." % workingGrandVersion) except Exception as e: if self.context.action == Const.ACTION_INPLACE_UPGRADE or not \ self.context.forceRollback: raise Exception(str(e)) self.context.logger.log("Failed to set upgrade_from," " please set it manually with" " command: \n%s" % str(cmd)) def setUpgradeMode(self, mode): """ function: set upgrade_mode parameter Input : mode output : NA """ try: self.setUpgradeModeGuc(mode) except Exception as e: if self.context.action == Const.ACTION_INPLACE_UPGRADE or \ not self.context.forceRollback: raise Exception(str(e)) try: self.setUpgradeModeGuc(mode, "set") except Exception as e: self.context.logger.log("Failed to set upgrade_mode," " please set it manually.") def setUpgradeModeGuc(self, mode, setType="reload"): """ function: set upgrade mode guc input : mode, setType output : NA """ self.context.logger.debug("Set upgrade_mode guc parameter.") cmd = "gs_guc %s -Z coordinator -Z datanode -N all " \ "-I all -c 'upgrade_mode=%d'" % (setType, mode) self.context.logger.debug("Command for setting database" " node parameter: %s." % cmd) (status, output) = subprocess.getstatusoutput(cmd) if status != 0: self.context.logger.debug("Set upgrade_mode parameter " "failed. cmd:%s\nOutput:%s" % (cmd, str(output))) raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error: \n%s" % str(output)) gucStr = "upgrade_mode:%d" % mode self.checkParam(gucStr) self.context.logger.debug("Successfully set " "upgrade_mode to %d." % mode) def checkParam(self, gucStr): """ function: check the cmagent guc value Input : gucStr the guc key:value string output : NA """ self.context.logger.debug("Start to check GUC value %s." % gucStr) try: # send cmd to that node and exec cmd = "%s -t %s -U %s --upgrade_bak_path=%s" \ " --guc_string=%s -l %s" % \ (OMCommand.getLocalScript("Local_Upgrade_Utility"), Const.ACTION_CHECK_GUC, self.context.user, self.context.upgradeBackupPath, gucStr, self.context.localLog) self.context.logger.debug("Command for checking" " parameter: %s." % cmd) DefaultValue.execCommandWithMode(cmd, "check GUC value", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) self.context.logger.debug("Successfully checked guc value.") except Exception as e: raise Exception(str(e)) def floatMoreThan(self, numOne, numTwo): """ function: float more than input : numOne, numTwo output : True/False """ if float(numOne) - float(numTwo) > float(Const.DELTA_NUM): return True return False def floatLessThan(self, numOne, numTwo): """ function: float less than input: numOne, numTwo output: True/False """ if float(numOne) - float(numTwo) < float(-Const.DELTA_NUM): return True return False def floatEqualTo(self, numOne, numTwo): """ function: float equal to input: numOne, numTwo output: True/False """ if float(-Const.DELTA_NUM) < (float(numOne) - float(numTwo)) \ < float(Const.DELTA_NUM): return True return False def floatGreaterOrEqualTo(self, numOne, numTwo): """ function: float greater or equal to input: numOne, numTwo output: True/False """ if self.floatMoreThan(numOne, numTwo) or \ self.floatEqualTo(numOne, numTwo): return True return False def doInplaceBinaryUpgrade(self): """ function: do binary upgrade, which essentially replace the binary files input : NA output: NA """ # 1. distribute new package to every nodes. self.distributeXml() # 2. check whether we should do rollback or not. if not self.doInplaceBinaryRollback(): self.exitWithRetCode(Const.ACTION_AUTO_ROLLBACK, False) try: self.checkUpgrade() # 3. before do binary upgrade, we must make sure the cluster is # Normal and the database could be connected # if not, exit. self.startCluster() if self.unSetClusterReadOnlyMode() != 0: raise Exception("NOTICE: " + ErrorCode.GAUSS_529["GAUSS_52907"]) self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_INIT_STATUS) (status, output) = self.doHealthCheck(Const.OPTION_PRECHECK) if status != 0: self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, False, ErrorCode.GAUSS_516["GAUSS_51601"] % "cluster" + output) # 4.record the old and new app dir in file self.recordDirFile() if self.setClusterReadOnlyMode() != 0: raise Exception(ErrorCode.GAUSS_529["GAUSS_52908"]) # after checkUpgrade, the bak path is ready, we can use it now # create inplace upgrade flag file if is doing inplace upgrade self.createInplaceUpgradeFlagFile() # 7. backup current application and configuration. # The function only be used by binary upgrade. # to ensure the transaction atomicity, # it will be used with checkUpgrade(). self.backupNodeVersion() # 8. stop old cluster self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_STOP_NODE) self.context.logger.debug("Start to stop all instances" " on the node.", "addStep") self.stopCluster() self.context.logger.debug("Successfully stop all" " instances on the node.", "constant") # 9. back cluster config. including this: # cluster_static_config # cluster_dynamic_config # etc/gscgroup_xxx.cfg # lib/postgresql/pg_plugin # server.key.cipher # server.key.rand # Data Studio lib files # gds files # physical catalog files if performing inplace upgrade self.recordNodeStepInplace( Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_BACKUP_VERSION) self.backupClusterConfig() # 10. Upgrade application on node # install new bin file self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_UPGRADE_APP) self.installNewBin() # 11. restore the cluster config. including this: # cluster_static_config # cluster_dynamic_config # etc/gscgroup_xxx.cfg # lib/postgresql/pg_plugin # server.key.cipher # server.key.rand # Data Studio lib files # gds files # cn cert files # At the same time, sync newly added guc for instances self.restoreClusterConfig() self.syncNewGUC() # 12. modify GUC parameter unix_socket_directory self.modifySocketDir() # 13. start new cluster self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_START_NODE) self.context.logger.debug("Start to start all instances" " on the node.", "addStep") # update catalog # start cluster in normal mode self.CopyCerts() self.context.createGrpcCa() self.context.logger.debug("Successfully createGrpcCa.") self.switchBin(Const.NEW) self.startCluster() self.context.logger.debug("Successfully start all " "instances on the node.", "constant") # 14. check the cluster status (status, output) = self.doHealthCheck(Const.OPTION_POSTCHECK) if status != 0: raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] % "cluster" + output) # 15. record precommit step status self.recordNodeStepInplace(Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_PRE_COMMIT) self.printPrecommitBanner() except Exception as e: self.context.logger.error(str(e)) self.context.logger.log("Binary upgrade failed. Rollback" " to the original cluster.") # do rollback self.exitWithRetCode(Const.ACTION_AUTO_ROLLBACK, self.doInplaceBinaryRollback()) self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, True) def doInplaceCommitUpgrade(self): """ function: commit binary upgrade and clean up backup files 1. unset read-only 2. drop old PMK schema 3. restore UDF 4. clean backup catalog physical files if doing inplace upgrade 5. clean up other upgrade tmp files input : NA output: NA """ if self.getNodeStepInplace() != Const.BINARY_UPGRADE_STEP_PRE_COMMIT: raise Exception(ErrorCode.GAUSS_529["GAUSS_52916"] + " Please check if previous upgrade" " operation was successful or if" " upgrade has already been committed.") # create commit flag file self.createCommitFlagFile() # variable to indicate whether we should keep step file # and cleanup list file for re-entry cleanUpSuccess = True # 1.unset read-only if self.unSetClusterReadOnlyMode() != 0: self.context.logger.log("NOTICE: " + ErrorCode.GAUSS_529["GAUSS_52907"]) cleanUpSuccess = False # 2. drop old PMK schema # we sleep 10 seconds first because DB might be updating # ha status after unsetting read-only time.sleep(10) if not cleanUpSuccess: self.context.logger.log("NOTICE: Cleanup is incomplete during" " commit. Please re-commit upgrade once" " again or cleanup manually") self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, False) else: # 8. clean up other upgrade tmp files # and uninstall inplace upgrade support functions self.cleanInstallPath(Const.OLD) self.cleanBinaryUpgradeBakFiles() self.context.logger.log("Commit binary upgrade succeeded.") self.exitWithRetCode(Const.ACTION_INPLACE_UPGRADE, True) def setNewVersionGuc(self): """ function: set new Version guc input : NA output : NA """ pass def setActionFile(self): """ set the action from step file, if not find, set it to large upgrade, if the upgrade type is small upgrade, but we set it to large upgrade, just kill the cm agent as expense, take no effect to transaction But if the action should be large, we does not set the upgrade_mode, some new feature will not opened :return: NA """ stepFile = os.path.join(self.context.upgradeBackupPath, Const.GREY_UPGRADE_STEP_FILE) self.context.logger.debug("Get the action from file %s." % stepFile) if not os.path.exists(stepFile) or os.path.isfile(stepFile): self.context.logger.debug("Step file does not exists or not file," " cannot get action from it. " "Set it to large upgrade.") self.context.action = Const.ACTION_LARGE_UPGRADE return with open(stepFile, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: self.context.action = row['upgrade_action'] break self.context.logger.debug("Set the action to %s" % self.context.action) def getClusterAppPath(self, mode=Const.OLD): """ if cannot get path from table, try to get from the backup file :param mode: :return: """ self.context.logger.debug("Get the install path from table or file.") path = self.getClusterAppPathFromFile(mode) return path def getClusterAppPathFromFile(self, mode=Const.OLD): """ get the app path from backup dir, mode is new or old, :param mode: 'old', 'new' :return: the real path of appPath """ dirFile = "%s/%s" % (self.context.upgradeBackupPath, Const.RECORD_UPGRADE_DIR) self.context.logger.debug("Get the %s app path from file %s" % (mode, dirFile)) if mode not in [Const.OLD, Const.NEW]: raise Exception(traceback.format_exc()) if not os.path.exists(dirFile): self.context.logger.debug(ErrorCode.GAUSS_502["GAUSS_50201"] % dirFile) if self.checkBakPathNotExists(): return "" # copy the binary_upgrade dir from other node, # if one node is damaged while binary_upgrade may disappear, # user repair one node before commit, and send the commit # command to the repair node, we need to copy the # dir from remote node cmd = "if [ -f '%s' ]; then echo 'GetFile';" \ " else echo 'NoThisFile'; fi" % dirFile self.context.logger.debug("Command for checking file: %s" % cmd) (status, output) = self.context.sshTool.getSshStatusOutput( cmd, self.context.clusterNodes, self.context.mpprcFile) outputMap = self.context.sshTool.parseSshOutput( self.context.clusterNodes) self.context.logger.debug("Output: %s" % output) copyNode = "" for node in self.context.clusterNodes: if status[node] == DefaultValue.SUCCESS: if 'GetFile' in outputMap[node]: copyNode = node break if copyNode: if not os.path.exists(self.context.upgradeBackupPath): self.context.logger.debug("Create directory %s." % self.context.tmpDir) g_file.createDirectory( self.context.upgradeBackupPath, True, DefaultValue.KEY_DIRECTORY_MODE) self.context.logger.debug("Copy the directory %s from node %s." % (self.context.upgradeBackupPath, copyNode)) cmd = g_Platform.getRemoteCopyCmd( self.context.upgradeBackupPath, self.context.tmpDir, str(copyNode), False, 'directory') self.context.logger.debug("Command for copying " "directory: %s" % cmd) DefaultValue.execCommandLocally(cmd) else: # binary_upgrade exists, but no step file return "" if not os.path.isfile(dirFile): raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % dirFile) with open(dirFile, 'r') as fp: retLines = fp.readlines() if len(retLines) != 2: raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % dirFile) if mode == Const.OLD: path = retLines[0].strip() else: path = retLines[1].strip() # if can get the path from file, the path must be valid, # otherwise the file is damaged accidentally DefaultValue.checkPathVaild(path) if not os.path.exists(path): if mode == Const.NEW and \ self.context.action == Const.ACTION_AUTO_ROLLBACK: self.context.logger.debug("Under rollback, the new " "cluster app path does not exists.") elif mode == Const.OLD and \ self.context.action == Const.ACTION_COMMIT_UPGRADE: self.context.logger.debug("Under commit, no need to " "check the old path exists.") else: self.context.logger.debug(traceback.format_exc()) raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path) self.context.logger.debug("Successfully Get the app" " path [%s] from file" % path) return path def printPrecommitBanner(self): """ funcation: if in pre-commit status, and do not execute the commit cmd, then can print this message input : NA output: NA """ self.context.logger.log("Upgrade main process has been finished," " user can do some check now.") self.context.logger.log("Once the check done, please execute " "following command to commit upgrade:") xmlFile = self.context.xmlFile \ if len(self.context.xmlFile) else "XMLFILE" self.context.logger.log("\n gs_upgradectl -t " "commit-upgrade -X %s \n" % xmlFile) def doInplaceBinaryRollback(self): """ function: rollback the upgrade of binary input : NA output: return True, if the operation is done successfully. return False, if the operation failed. """ self.context.logger.log("Performing inplace rollback.") # step flag # Const.BINARY_UPGRADE_NO_NEED_ROLLBACK value is -2 # Const.INVALID_UPRADE_STEP value is -1 # Const.BINARY_UPGRADE_STEP_INIT_STATUS value is 0 # Const.BINARY_UPGRADE_STEP_BACKUP_STATUS value is 1 # Const.BINARY_UPGRADE_STEP_STOP_NODE value is 2 # Const.BINARY_UPGRADE_STEP_BACKUP_VERSION value is 3 # Const.BINARY_UPGRADE_STEP_UPGRADE_APP value is 4 # Const.BINARY_UPGRADE_STEP_START_NODE value is 5 # Const.BINARY_UPGRADE_STEP_PRE_COMMIT value is 6 self.distributeXml() step = self.getNodeStepInplace() if step == Const.BINARY_UPGRADE_NO_NEED_ROLLBACK: self.context.logger.log("Rollback succeeded.") return True # if step <= -1, it means the step file is broken, exit. if step <= Const.INVALID_UPRADE_STEP: self.context.logger.debug("Invalid upgrade step: %s." % str(step)) return False # if step value is Const.BINARY_UPGRADE_STEP_PRE_COMMIT # and find commit flag file, # means user has commit upgrade, then can not do rollback if step == Const.BINARY_UPGRADE_STEP_PRE_COMMIT: if not self.checkCommitFlagFile(): self.context.logger.log( "Upgrade has already been committed, " "can not execute rollback command any more.") return False try: self.checkStaticConfig() # Mark that we leave pre commit status, # so that if we fail at the first few steps, # we won't be allowed to commit upgrade any more. if step == Const.BINARY_UPGRADE_STEP_PRE_COMMIT: self.recordNodeStepInplace( Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_START_NODE) if step >= Const.BINARY_UPGRADE_STEP_START_NODE: self.restoreClusterConfig(True) self.switchBin(Const.OLD) self.stopCluster() self.recordNodeStepInplace( Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_UPGRADE_APP) if step >= Const.BINARY_UPGRADE_STEP_UPGRADE_APP: self.restoreNodeVersion() self.restoreClusterConfig(True) self.recordNodeStepInplace( Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_BACKUP_VERSION) if step >= Const.BINARY_UPGRADE_STEP_BACKUP_VERSION: self.recordNodeStepInplace( Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_STOP_NODE) if step >= Const.BINARY_UPGRADE_STEP_STOP_NODE: self.startCluster() self.recordNodeStepInplace( Const.ACTION_INPLACE_UPGRADE, Const.BINARY_UPGRADE_STEP_INIT_STATUS) if step >= Const.BINARY_UPGRADE_STEP_INIT_STATUS: if self.unSetClusterReadOnlyMode() != 0: raise Exception("NOTICE: " + ErrorCode.GAUSS_529["GAUSS_52907"]) self.cleanBinaryUpgradeBakFiles(True) self.cleanInstallPath(Const.NEW) except Exception as e: self.context.logger.error(str(e)) self.context.logger.log("Rollback failed.") return False self.context.logger.log("Rollback succeeded.") return True def getSqlHeader(self): """ function: get sql header input : NA output : NA """ header = ["START TRANSACTION;"] header.append("SET %s = on;" % Const.ON_INPLACE_UPGRADE) header.append("SET search_path = 'pg_catalog';") header.append("SET local client_min_messages = NOTICE;") header.append("SET local log_min_messages = NOTICE;") return header def getFileNameList(self, filePathName): """ function: get file name list input : filePathName output : [] """ filePath = "%s/upgrade_sql/%s" % (self.context.upgradeBackupPath, filePathName) allFileList = os.listdir(filePath) upgradeFileList = [] if len(allFileList) == 0: return [] for each_sql_file in allFileList: if not os.path.isfile("%s/%s" % (filePath, each_sql_file)): continue prefix = each_sql_file.split('.')[0] resList = prefix.split('_') if len(resList) != 5: continue file_num = "%s.%s" % (resList[3], resList[4]) if self.floatMoreThan(float(file_num), self.context.oldClusterNumber) and \ self.floatGreaterOrEqualTo(self.context.newClusterNumber, float(file_num)): upgradeFileList.append(each_sql_file) return upgradeFileList def initClusterInfo(self, dbClusterInfoPath): """ function: init the cluster input : dbClusterInfoPath output: dbClusterInfo """ clusterInfoModules = OldVersionModules() fileDir = os.path.dirname(os.path.realpath(dbClusterInfoPath)) sys.path.insert(0, fileDir) # init cluster information clusterInfoModules.oldDbClusterInfoModule = __import__('DbClusterInfo') sys.path.remove(fileDir) return clusterInfoModules.oldDbClusterInfoModule.dbClusterInfo() def initOldClusterInfo(self, dbClusterInfoPath): """ function: init old cluster information input : dbClusterInfoPath output: clusterInfoModules.oldDbClusterInfoModule.dbClusterInfo() """ clusterInfoModules = OldVersionModules() fileDir = os.path.dirname(os.path.realpath(dbClusterInfoPath)) # script and OldDbClusterInfo.py are in the same PGHOST directory sys.path.insert(0, fileDir) # V1R8 DbClusterInfo.py is "from gspylib.common.ErrorCode import # ErrorCode" sys.path.insert(0, os.path.join(fileDir, "script")) # init old cluster information clusterInfoModules.oldDbClusterInfoModule = \ __import__('OldDbClusterInfo') return clusterInfoModules.oldDbClusterInfoModule.dbClusterInfo() def initClusterConfig(self): """ function: init cluster info input : NA output: NA """ gaussHome = \ DefaultValue.getEnvironmentParameterValue("GAUSSHOME", self.context.user) # $GAUSSHOME must has available value. if gaussHome == "": raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME") (appPath, appPathName) = os.path.split(gaussHome) commonDbClusterInfoModule = \ "%s/bin/script/gspylib/common/DbClusterInfo.py" % gaussHome commonStaticConfigFile = "%s/bin/cluster_static_config" % gaussHome try: if self.context.action == Const.ACTION_INPLACE_UPGRADE: # get DbClusterInfo.py and cluster_static_config both of backup # path and install path # get oldClusterInfo # if the backup file exists, we use them; # if the install file exists, we use them; # else, we can not get oldClusterInfo, exit. # backup path exists commonDbClusterInfoModuleBak = "%s/../OldDbClusterInfo.py" % \ self.context.upgradeBackupPath commonStaticConfigFileBak = "%s/../cluster_static_config" % \ self.context.upgradeBackupPath # if binary.tar exist, decompress it if os.path.isfile("%s/%s" % (self.context.upgradeBackupPath, self.context.binTarName)): cmd = "cd '%s'&&tar xfp '%s'" % \ (self.context.upgradeBackupPath, self.context.binTarName) (status, output) = subprocess.getstatusoutput(cmd) if (status != 0): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error: \n%s" % str(output)) if (os.path.isfile(commonDbClusterInfoModuleBak) and os.path.isfile(commonStaticConfigFileBak)): try: # import old module # init old cluster config self.context.oldClusterInfo = \ self.initOldClusterInfo( commonDbClusterInfoModuleBak) self.context.oldClusterInfo.initFromStaticConfig( self.context.user, commonStaticConfigFileBak) except Exception as e: # maybe the old cluster is V1R5C00 TR5 version, not # support specify static config file # path for initFromStaticConfig function, # so use new cluster format try again self.context.oldClusterInfo = dbClusterInfo() self.context.oldClusterInfo.initFromStaticConfig( self.context.user, commonStaticConfigFileBak) # if backup path not exist, then use install path elif (os.path.isfile(commonDbClusterInfoModule) and os.path.isfile(commonStaticConfigFile)): # import old module # init old cluster config self.context.oldClusterInfo = \ self.initClusterInfo(commonDbClusterInfoModule) self.context.oldClusterInfo.initFromStaticConfig( self.context.user, commonStaticConfigFile) else: raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % "static config file") # get the accurate logPath logPathWithUser = DefaultValue.getEnv("GAUSSLOG") DefaultValue.checkPathVaild(logPathWithUser) splitMark = "/%s" % self.context.user self.context.oldClusterInfo.logPath = \ logPathWithUser[0:(logPathWithUser.rfind(splitMark))] # init new cluster config # if xmlFile != "", init it by initFromXml(); # else, using oldClusterInfo if self.context.xmlFile != "": # get clusterInfo # if falied to do dbClusterInfo, it means the # DbClusterInfo.py is not correct # we will use the backup file to instead of it self.context.clusterInfo = dbClusterInfo() try: self.context.clusterInfo.initFromXml( self.context.xmlFile) except Exception as e: self.context.logger.error(str(e)) try: # init clusterinfo from backup dbclusterinfo self.context.clusterInfo = \ self.initOldClusterInfo( commonDbClusterInfoModuleBak) self.context.clusterInfo.initFromXml( self.context.xmlFile) except Exception as e: try: self.context.clusterInfo = \ self.initClusterInfo( commonDbClusterInfoModule) self.context.clusterInfo.initFromXml( self.context.xmlFile) except Exception as e: raise Exception(str(e)) # verify cluster config info between old and new cluster self.verifyClusterConfigInfo(self.context.clusterInfo, self.context.oldClusterInfo) # after doing verifyClusterConfigInfo(), # the clusterInfo and oldClusterInfo are be changed, # so we should do init it again self.context.clusterInfo = dbClusterInfo() try: self.context.clusterInfo.initFromXml( self.context.xmlFile) except Exception as e: self.context.logger.debug(str(e)) try: # init clusterinfo from backup dbclusterinfo self.context.clusterInfo = \ self.initOldClusterInfo( commonDbClusterInfoModuleBak) self.context.clusterInfo.initFromXml( self.context.xmlFile) except Exception as e: try: self.context.clusterInfo = \ self.initClusterInfo( commonDbClusterInfoModule) self.context.clusterInfo.initFromXml( self.context.xmlFile) except Exception as e: raise Exception(str(e)) else: self.context.clusterInfo = self.context.oldClusterInfo elif (self.context.action == Const.ACTION_CHOSE_STRATEGY or self.context.action == Const.ACTION_COMMIT_UPGRADE): # after switch to new bin, the gausshome points to newversion, # so the oldClusterNumber is same with # newClusterNumber, the oldClusterInfo is same with new try: self.context.oldClusterInfo = self.context.clusterInfo if os.path.isfile(commonDbClusterInfoModule) and \ os.path.isfile(commonStaticConfigFile): # import old module # init old cluster config self.context.oldClusterInfo = \ self.initClusterInfo(commonDbClusterInfoModule) self.context.oldClusterInfo.initFromStaticConfig( self.context.user, commonStaticConfigFile) else: raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % "static config file") except Exception as e: # upgrade backup path if (os.path.exists( "%s/%s/bin/script/util/DbClusterInfo.py" % ( self.context.upgradeBackupPath, appPathName))): binaryModuleBak = \ "%s/%s/bin/script/util/DbClusterInfo.py" % \ (self.context.upgradeBackupPath, appPathName) else: binaryModuleBak = \ "%s/%s/bin/script/gspylib/common/" \ "DbClusterInfo.py" % \ (self.context.upgradeBackupPath, appPathName) binaryStaticConfigFileBak = \ "%s/%s/bin/cluster_static_config" % \ (self.context.upgradeBackupPath, appPathName) if os.path.isfile(binaryModuleBak) and \ os.path.isfile(binaryStaticConfigFileBak): # import old module # init old cluster config commonDbClusterInfoModuleBak = \ "%s/../OldDbClusterInfo.py" % \ self.context.upgradeBackupPath self.context.oldClusterInfo = \ self.initOldClusterInfo( commonDbClusterInfoModuleBak) self.context.oldClusterInfo.initFromStaticConfig( self.context.user, binaryStaticConfigFileBak) else: raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % "static config file") elif (self.context.action in [Const.ACTION_SMALL_UPGRADE, Const.ACTION_AUTO_UPGRADE, Const.ACTION_LARGE_UPGRADE, Const.ACTION_AUTO_ROLLBACK]): # 1. get new cluster info self.context.clusterInfo = dbClusterInfo() self.context.clusterInfo.initFromXml(self.context.xmlFile) # 2. get oldClusterInfo # when under rollback # the gausshome may point to old or new clusterAppPath, # so we must choose from the record table # when upgrade abnormal nodes, the gausshome points to # newClusterAppPath oldPath = self.getClusterAppPath() if oldPath != "" and os.path.exists(oldPath): self.context.logger.debug("The old install path is %s" % oldPath) commonDbClusterInfoModule = \ "%s/bin/script/gspylib/common/DbClusterInfo.py" % \ oldPath commonStaticConfigFile = \ "%s/bin/cluster_static_config" % oldPath else: self.context.logger.debug("The old install path is %s" % os.path.realpath(gaussHome)) if (os.path.isfile(commonDbClusterInfoModule) and os.path.isfile(commonStaticConfigFile)): # import old module # init old cluster config self.context.oldClusterInfo = \ self.initClusterInfo(commonDbClusterInfoModule) self.context.oldClusterInfo.initFromStaticConfig( self.context.user, commonStaticConfigFile) else: raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % "static config file") staticClusterInfo = dbClusterInfo() config = os.path.join(gaussHome, "bin/cluster_static_config") if not os.path.isfile(config): raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % os.path.realpath(config)) staticClusterInfo.initFromStaticConfig(self.context.user, config) # verify cluster config info between old and new cluster self.verifyClusterConfigInfo(self.context.clusterInfo, staticClusterInfo) # after doing verifyClusterConfigInfo(), the clusterInfo and # oldClusterInfo are be changed, # so we should do init it again self.context.clusterInfo = dbClusterInfo() # we will get the self.context.newClusterAppPath in # choseStrategy self.context.clusterInfo.initFromXml(self.context.xmlFile) self.context.logger.debug("Successfully init cluster config.") else: raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' + " Value: %s" % self.context.action) except Exception as e: self.context.logger.debug(traceback.format_exc()) self.exitWithRetCode(self.context.action, False, str(e)) def verifyClusterConfigInfo(self, clusterInfo, oldClusterInfo, ignoreFlag="upgradectl"): """ function: verify cluster config info between xml and static config input : clusterInfo, oldClusterInfo output: NA """ try: # should put self.context.clusterInfo before # self.context.oldClusterInfo, # because self.context.oldClusterInfo is not the istance of # dbCluster # covert new cluster information to compare cluster compnew = self.covertToCompCluster(clusterInfo) # covert old cluster information to compare cluster compold = self.covertToCompCluster(oldClusterInfo) # do compare # if it is not same, print it. theSame, tempbuffer = compareObject(compnew, compold, "clusterInfo", [], ignoreFlag) if (theSame): self.context.logger.log("Static configuration matched with " "old static configuration files.") else: msg = "Instance[%s] are not the same.\nXml cluster " \ "information: %s\nStatic cluster information: %s\n" % \ (tempbuffer[0], tempbuffer[1], tempbuffer[2]) self.context.logger.debug("The old cluster information is " "from the cluster_static_config.") raise Exception(ErrorCode.GAUSS_512["GAUSS_51217"] + "Error: \n%s" % msg.strip("\n")) except Exception as e: raise Exception(str(e)) def covertToCompCluster(self, dbclusterInfo): """ function: covert to comp cluster input : clusterInfo, oldClusterInfo output: compClusterInfo """ # init dbcluster class compClusterInfo = dbClusterInfo() # get name compClusterInfo.name = dbclusterInfo.name # get appPath compClusterInfo.appPath = dbclusterInfo.appPath # get logPath compClusterInfo.logPath = dbclusterInfo.logPath for dbnode in dbclusterInfo.dbNodes: compNodeInfo = dbNodeInfo() # get datanode instance information for datanode in dbnode.datanodes: compNodeInfo.datanodes.append( self.coverToCompInstance(datanode)) # get node information compClusterInfo.dbNodes.append(compNodeInfo) return compClusterInfo def coverToCompInstance(self, compinstance): """ function: cover to comp instance 1. get instanceId 2. get mirrorId 3. get port 4. get datadir 5. get instanceType 6. get listenIps 7. get haIps input : compinstance output: covertedInstanceInfo """ covertedInstanceInfo = instanceInfo() # get instanceId covertedInstanceInfo.instanceId = compinstance.instanceId # get mirrorId covertedInstanceInfo.mirrorId = compinstance.mirrorId # get port covertedInstanceInfo.port = compinstance.port # get datadir covertedInstanceInfo.datadir = compinstance.datadir # get instanceType covertedInstanceInfo.instanceType = compinstance.instanceType # get listenIps covertedInstanceInfo.listenIps = compinstance.listenIps # get haIps covertedInstanceInfo.haIps = compinstance.haIps return covertedInstanceInfo def distributeXml(self): """ function: distribute package to every host input : NA output: NA """ self.context.logger.debug("Distributing xml configure file.", "addStep") try: hosts = self.context.clusterInfo.getClusterNodeNames() hosts.remove(DefaultValue.GetHostIpOrName()) # Send xml file to every host DefaultValue.distributeXmlConfFile(self.context.sshTool, self.context.xmlFile, hosts, self.context.mpprcFile, self.context.isSingle) except Exception as e: raise Exception(str(e)) self.context.logger.debug("Successfully distributed xml " "configure file.", "constant") def recordNodeStepInplace(self, action, step): """ function: record step info on all nodes input : action, step output: NA """ try: # record step info on local node tempPath = self.context.upgradeBackupPath filePath = os.path.join(tempPath, Const.INPLACE_UPGRADE_STEP_FILE) cmd = "echo \"%s:%d\" > %s" % (action, step, filePath) (status, output) = subprocess.getstatusoutput(cmd) if status != 0: raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % filePath + "Error: \n%s" % str(output)) if not self.context.isSingle: # send file to remote nodes self.context.sshTool.scpFiles(filePath, tempPath) self.context.logger.debug("Successfully wrote step file[%s:%d]." % (action, step)) except Exception as e: raise Exception(str(e)) def distributeFile(self, step_file): """ function: distribute file input : step_file output : NA """ self.context.logger.debug("Distribute the file %s" % step_file) # send the file to each node hosts = self.context.clusterInfo.getClusterNodeNames() hosts.remove(DefaultValue.GetHostIpOrName()) if not self.context.isSingle: stepDir = os.path.normpath(os.path.dirname(step_file)) self.context.sshTool.scpFiles(step_file, stepDir, hosts) self.context.logger.debug("Successfully distribute the file %s" % step_file) def getNodeStepInplace(self): """ function: Get the upgrade step info for inplace upgrade input : action output: the upgrade step info """ try: tempPath = self.context.upgradeBackupPath # get file path and check file exists filePath = os.path.join(tempPath, Const.INPLACE_UPGRADE_STEP_FILE) if not os.path.exists(filePath): self.context.logger.debug("The cluster status is Normal. " "No need to rollback.") return Const.BINARY_UPGRADE_NO_NEED_ROLLBACK # read and check record format stepInfo = g_file.readFile(filePath)[0] stepList = stepInfo.split(":") if len(stepList) != 2: raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % filePath) recordType = stepList[0].strip() recordStep = stepList[1].strip() # check upgrade type # the record value must be consistent with the upgrade type if self.context.action != recordType: raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % "t" + "Input upgrade type: %s record upgrade type: " "%s\nMaybe you chose the wrong interface." % (self.context.action, recordType)) # if record value is not digit, exit. if not recordStep.isdigit() or int(recordStep) > \ Const.BINARY_UPGRADE_STEP_PRE_COMMIT or \ int(recordStep) < Const.INVALID_UPRADE_STEP: raise Exception(ErrorCode.GAUSS_516["GAUSS_51633"] % recordStep) except Exception as e: self.context.logger.error(str(e)) return Const.INVALID_UPRADE_STEP self.context.logger.debug("The rollback step is %s" % recordStep) return int(recordStep) def checkStep(self, step): """ function: check step input : step output : NA """ if not step.isdigit() or \ int(step) > GreyUpgradeStep.STEP_BEGIN_COMMIT or \ int(step) < Const.INVALID_UPRADE_STEP: raise Exception(ErrorCode.GAUSS_516["GAUSS_51633"] % str(step)) ########################################################################## # Offline upgrade functions ########################################################################## def checkUpgrade(self): """ function: Check the environment for upgrade input : action output: NA """ self.context.logger.log("Checking upgrade environment.", "addStep") try: # Check the environment for upgrade cmd = "%s -t %s -R '%s' -l '%s' -N '%s' -X '%s'" % \ (OMCommand.getLocalScript("Local_Check_Upgrade"), self.context.action, self.context.oldClusterAppPath, self.context.localLog, self.context.newClusterAppPath, self.context.xmlFile) self.context.logger.debug("Command for checking upgrade " "environment: %s." % cmd) DefaultValue.execCommandWithMode(cmd, "check upgrade environment", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) self.context.logger.log("Successfully checked upgrade " "environment.", "constant") except Exception as e: self.context.logger.log("Failed to check upgrade environment.", "constant") raise Exception(str(e)) def backupClusterConfig(self): """ function: Backup the cluster config input : NA output: NA """ # backup list: # cluster_static_config # cluster_dynamic_config # etc/gscgroup_xxx.cfg # lib/postgresql/pg_plugin # server.key.cipher # server.key.rand # datasource.key.cipher # datasource.key.rand # utilslib # /share/sslsert/ca.key # /share/sslsert/etcdca.crt # catalog physical files # Data Studio lib files # gds files # javaUDF # postGIS # hadoop_odbc_connector extension files # libsimsearch etc files and lib files self.context.logger.log("Backing up cluster configuration.", "addStep") try: # send cmd to all node and exec cmd = "%s -t %s -U %s -V %d --upgrade_bak_path=%s -l %s" % \ (OMCommand.getLocalScript("Local_Upgrade_Utility"), Const.ACTION_BACKUP_CONFIG, self.context.user, int(float(self.context.oldClusterNumber) * 1000), self.context.upgradeBackupPath, self.context.localLog) self.context.logger.debug("Command for backing up cluster " "configuration: %s" % cmd) DefaultValue.execCommandWithMode(cmd, "backup config files", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) # backup hotpatch info file self.backupHotpatch() # backup version file. self.backup_version_file() except Exception as e: raise Exception(str(e)) self.context.logger.log("Successfully backed up cluster " "configuration.", "constant") def syncNewGUC(self): """ function: sync newly added guc during inplace upgrade. For now, we only sync guc of cm_agent and cm_server input : NA output: NA """ self.context.logger.debug("Start to sync new guc.", "addStep") try: # send cmd to all node and exec cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \ "--new_cluster_app_path=%s -l %s" % \ (OMCommand.getLocalScript("Local_Upgrade_Utility"), Const.ACTION_SYNC_CONFIG, self.context.user, self.context.upgradeBackupPath, self.context.newClusterAppPath, self.context.localLog,) self.context.logger.debug( "Command for synchronizing new guc: %s" % cmd) DefaultValue.execCommandWithMode(cmd, "sync new guc", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: self.context.logger.debug("Failed to synchronize new guc.", "constant") raise Exception(str(e)) self.context.logger.debug("Successfully synchronized new guc.", "constant") def cleanExtensionFiles(self): """ function: clean extension library and config files input: NA output: 0 / 1 """ try: # clean extension library and config files hadoop_odbc_connector = "%s/lib/postgresql/" \ "hadoop_odbc_connector.so" % \ self.context.oldClusterInfo.appPath extension_config01 = "%s/share/postgresql/extension/" \ "hadoop_odbc_connector--1.0.sql" % \ self.context.oldClusterInfo.appPath extension_config02 = "%s/share/postgresql/extension/" \ "hadoop_odbc_connector.control" % \ self.context.oldClusterInfo.appPath extension_config03 = "%s/share/postgresql/extension/hadoop_odbc_" \ "connector--unpackaged--1.0.sql" % \ self.context.oldClusterInfo.appPath cmd = "(if [ -f '%s' ];then rm -f '%s';fi)" % \ (hadoop_odbc_connector, hadoop_odbc_connector) cmd += " && (if [ -f '%s' ];then rm -f '%s';fi)" % \ (extension_config01, extension_config01) cmd += " && (if [ -f '%s' ];then rm -f '%s';fi)" % \ (extension_config02, extension_config02) cmd += " && (if [ -f '%s' ];then rm -f '%s';fi)" % \ (extension_config03, extension_config03) self.context.logger.debug("Command for cleaning extension " "library and config files: %s" % cmd) DefaultValue.execCommandWithMode( cmd, "clean extension library and config files", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) self.context.logger.debug("Command for cleaning extension " "library and config files: %s" % cmd) return 0 except Exception as e: self.context.logger.debug("Fail to clean extension library and " "config files.output:%s" % str(e)) return 1 def waitClusterForNormal(self, waitTimeOut=300): """ function: Wait the node become Normal input : waitTimeOut output: NA """ self.context.logger.log("Waiting for the cluster status to " "become normal.") dotCount = 0 # get the end time endTime = datetime.now() + timedelta(seconds=int(waitTimeOut)) while True: time.sleep(5) sys.stdout.write(".") dotCount += 1 if dotCount >= 12: dotCount = 0 sys.stdout.write("\n") (checkStatus, checkResult) = \ OMCommand.doCheckStaus(self.context.user, 0) if checkStatus == 0: if dotCount != 0: sys.stdout.write("\n") self.context.logger.log("The cluster status is normal.") break if datetime.now() >= endTime: if dotCount != 0: sys.stdout.write("\n") self.context.logger.debug(checkResult) raise Exception("Timeout." + "\n" + ErrorCode.GAUSS_516["GAUSS_51602"]) if checkStatus != 0: self.context.logger.debug(checkResult) raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster") def getLcgroupnameList(self, jsonFile): """ function: get Lc group name list input: jsonFile output: [] """ para = {} lcgroupnamelist = [] try: with open(jsonFile, "r") as fp_json: para = json.load(fp_json) except Exception as e: raise Exception(str(e)) if (para): lcgroupnamelist = para['lcgroupnamelist'] while '' in lcgroupnamelist: lcgroupnamelist.remove('') return lcgroupnamelist def restoreClusterConfig(self, isRollBack=False): """ function: Restore the cluster config input : isRollBack output: NA """ # restore list: # cluster_dynamic_config # etc/gscgroup_xxx.cfg # lib/postgresql/pg_plugin # server.key.cipher # server.key.rand # datasource.key.cipher # datasource.key.rand # utilslib # /share/sslsert/ca.key # /share/sslsert/etcdca.crt # Data Studio lib files # gds files # javaUDF # postGIS # hadoop_odbc_connector extension files # libsimsearch etc files and lib files if isRollBack: self.context.logger.log("Restoring cluster configuration.") else: self.context.logger.log("Restoring cluster configuration.", "addStep") try: if isRollBack: self.rollbackHotpatch() else: # restore static configuration cmd = "%s -t %s -U %s -V %d --upgrade_bak_path=%s " \ "--new_cluster_app_path=%s -l %s" % \ (OMCommand.getLocalScript("Local_Upgrade_Utility"), Const.ACTION_RESTORE_CONFIG, self.context.user, int(float(self.context.oldClusterNumber) * 1000), self.context.upgradeBackupPath, self.context.newClusterAppPath, self.context.localLog) self.context.logger.debug("Command for restoring " "config files: %s" % cmd) DefaultValue.execCommandWithMode(cmd, "restore config files", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) # change the owner of application cmd = "chown -R %s:%s '%s'" % \ (self.context.user, self.context.group, self.context.newClusterAppPath) DefaultValue.execCommandWithMode( cmd, "change the owner of application", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: raise Exception(str(e)) if isRollBack: self.context.logger.log("Successfully restored " "cluster configuration.") else: self.context.logger.log("Successfully restored cluster " "configuration.", "constant") def checkStaticConfig(self): """ function: Check if static config file exists in bin dir, if not exists, restore it from backup dir input : NA output: NA """ self.context.logger.log("Checking static configuration files.") try: # check static configuration path staticConfigPath = "%s/bin" % self.context.oldClusterAppPath # restore static configuration cmd = "(if [ ! -f '%s/cluster_static_config' ];then cp " \ "%s/cluster_static_config %s/bin;fi)" % \ (staticConfigPath, self.context.upgradeBackupPath, self.context.oldClusterAppPath) DefaultValue.execCommandWithMode(cmd, "restore static configuration", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: raise Exception(str(e)) self.context.logger.log("Successfully checked static " "configuration files.") def backupNodeVersion(self): """ function: Backup current application and configuration. The function only be used by binary upgrade. To ensure the transaction atomicity, it will be used with checkUpgrade(). input : NA output: NA """ self.context.logger.log("Backing up current application " "and configurations.", "addStep") try: # back up environment variables cmd = "cp '%s' '%s'_gauss" % (self.context.userProfile, self.context.userProfile) self.context.logger.debug( "Command for backing up environment file: %s" % cmd) DefaultValue.execCommandWithMode(cmd, "back up environment variables", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) # back up application and configuration cmd = "%s -U %s -P %s -p -b -l %s" % \ (OMCommand.getLocalScript("Local_Backup"), self.context.user, self.context.upgradeBackupPath, self.context.localLog) self.context.logger.debug( "Command for backing up application: %s" % cmd) DefaultValue.execCommandWithMode( cmd, "back up application and configuration", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: # delete binary backup directory delCmd = g_file.SHELL_CMD_DICT["deleteDir"] % \ (self.context.tmpDir, os.path.join(self.context.tmpDir, 'backupTemp_*')) DefaultValue.execCommandWithMode(delCmd, "delete binary backup directory", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) raise Exception(str(e)) self.context.logger.log("Successfully backed up current " "application and configurations.", "constant") def restoreNodeVersion(self): """ function: Restore the application and configuration 1. restore old version 2. restore environment variables input : NA output: NA """ self.context.logger.log("Restoring application and configurations.") try: # restore old version cmd = "%s -U %s -P %s -p -b -l %s" % \ (OMCommand.getLocalScript("Local_Restore"), self.context.user, self.context.upgradeBackupPath, self.context.localLog) self.context.logger.debug("Command for restoring " "old version: %s" % cmd) DefaultValue.execCommandWithMode(cmd, "restore old version", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) # restore environment variables cmd = "(if [ -f '%s'_gauss ];then mv '%s'_gauss '%s';fi)" % \ (self.context.userProfile, self.context.userProfile, self.context.userProfile) self.context.logger.debug("Command for restoring environment file:" " %s" % cmd) DefaultValue.execCommandWithMode(cmd, "restore environment variables", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: raise Exception(str(e)) self.context.logger.log("Successfully restored application and " "configuration.") def modifySocketDir(self): """ function: modify unix socket directory input : NA output: NA """ self.context.logger.log("Modifying the socket path.", "addStep") try: # modifying the socket path for all CN/DN instance (status, output) = self.setGUCValue( "unix_socket_directory", DefaultValue.getTmpDirAppendMppdb(self.context.user), "set") if (status != 0): raise Exception(ErrorCode.GAUSS_500["GAUSS_50007"] % "GUC" + " Error: \n%s" % str(output)) userProfile = DefaultValue.getMpprcFile() except Exception as e: raise Exception(str(e)) self.context.logger.log("Successfully modified socket path.", "constant") ########################################################################### # Rollback upgrade functions ########################################################################### def cleanBackupFiles(self): """ function: Clean backup files. input : action output : NA """ try: # clean backup files cmd = "(if [ -f '%s/OldDbClusterInfo.py' ]; then rm -f " \ "'%s/OldDbClusterInfo.py'; fi) &&" % \ (self.context.tmpDir, self.context.tmpDir) cmd += "(if [ -f '%s/OldDbClusterInfo.pyc' ]; then rm -f " \ "'%s/OldDbClusterInfo.pyc'; fi) &&" % \ (self.context.tmpDir, self.context.tmpDir) cmd += "(if [ -d '%s/script' ]; then rm -rf '%s/script'; " \ "fi) &&" % (self.context.tmpDir, self.context.tmpDir) cmd += "(if [ -f '%s/oldclusterinfo' ]; then rm -f " \ "'%s/oldclusterinfo'; fi) &&" % \ (self.context.tmpDir, self.context.tmpDir) cmd += "(if [ -f '%s/oldclusterGUC' ]; then rm -f " \ "'%s/oldclusterGUC'; fi) &&" % \ (self.context.tmpDir, self.context.tmpDir) cmd += "(if [ -f '%s/cluster_static_config' ]; then rm -f " \ "'%s/cluster_static_config'; fi) &&" % \ (self.context.tmpDir, self.context.tmpDir) cmd += "(if [ -f '%s/c_functionfilelist.dat' ]; then rm -f " \ "'%s/c_functionfilelist.dat'; fi) &&" % \ (self.context.tmpDir, self.context.tmpDir) cmd += "(if [ -f '%s'_gauss ]; then rm -f '%s'_gauss ; fi) &&" % \ (self.context.userProfile, self.context.userProfile) cmd += "(if [ -f '%s/oldclusterinfo.json' ]; then rm -f " \ "'%s/oldclusterinfo.json'; fi) &&" % \ (self.context.tmpDir, self.context.tmpDir) cmd += "(if [ -f '%s/%s' ]; then rm -f '%s/%s'; fi) &&" % \ (self.context.tmpDir, Const.CLUSTER_CNSCONF_FILE, self.context.tmpDir, Const.CLUSTER_CNSCONF_FILE) cmd += "(rm -f '%s'/gauss_crontab_file_*) &&" % self.context.tmpDir cmd += "(if [ -d '%s' ]; then rm -rf '%s'; fi) " % \ (self.context.upgradeBackupPath, self.context.upgradeBackupPath) self.context.logger.debug("Command for clean " "backup files: %s" % cmd) DefaultValue.execCommandWithMode(cmd, "clean backup files", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: raise Exception(str(e)) def cleanBinaryUpgradeBakFiles(self, isRollBack=False): """ function: Clean back up files, include cluster_static_config, cluster_dynamic_config, binary.tar, parameter.tar. input : isRollBack output: NA """ if (isRollBack): self.context.logger.debug("Cleaning backup files.") else: self.context.logger.debug("Cleaning backup files.", "addStep") try: # clean backup files self.cleanBackupFiles() except Exception as e: raise Exception(str(e)) if (isRollBack): self.context.logger.debug("Successfully cleaned backup files.") else: self.context.logger.debug("Successfully cleaned backup files.", "constant") ########################################################################### # Rollback upgrade functions ########################################################################### def doHealthCheck(self, checkPosition): """ function: Do health check, if healthy, return 0, else return 1 input : checkPosition output: 0 successfully 1 failed """ ####################################################################### # When do binary-upgrade: # Const.OPTION_PRECHECK -> cluster Normal # -> database can connec # Const.OPTION_POSTCHECK -> cluster Normal # -> package version Normal # -> database can connec ####################################################################### self.context.logger.log("Start to do health check.", "addStep") status = 0 output = "" if (checkPosition == Const.OPTION_PRECHECK): if (self.checkClusterStatus(checkPosition, True) != 0): output += "\n Cluster status does not match condition." if (self.checkConnection() != 0): output += "\n Database could not be connected." elif (checkPosition == Const.OPTION_POSTCHECK): if (self.checkClusterStatus(checkPosition) != 0): output += "\n Cluster status is Abnormal." if not self.checkVersion( self.context.newClusterVersion, self.context.clusterInfo.getClusterNodeNames()): output += "\n The gaussdb version is inconsistent." if (self.checkConnection() != 0): output += "\n Database could not be connected." else: # Invalid check position output += "\n Invalid check position." if (output != ""): status = 1 # all check has been pass, return 0 self.context.logger.log("Successfully checked cluster status.", "constant") return (status, output) def checkVersion(self, checkinfo, checknodes): """ function: Check if the node have been upgraded, if gaussdb bin file verison is same on all host, return 0, else retrun 1 input : checkinfo, checknodes output: 0 successfully 1 failed """ self.context.logger.debug( "Start to check gaussdb version consistency.") if self.context.isSingle: self.context.logger.debug("There is single cluster," " no need to check it.") return True try: # checking gaussdb bin file version VxxxRxxxCxx or commitid cmd = "source %s;%s -t %s -v %s -U %s -l %s" % \ (self.context.userProfile, OMCommand.getLocalScript("Local_Check_Upgrade"), Const.ACTION_CHECK_VERSION, checkinfo, self.context.user, self.context.localLog) self.context.logger.debug("Command for checking gaussdb version " "consistency: %s." % cmd) (status, output) = \ self.context.sshTool.getSshStatusOutput(cmd, checknodes) for node in status.keys(): failFlag = "Failed to check version information" if status[node] != DefaultValue.SUCCESS or \ output.find(failFlag) >= 0: raise Exception(ErrorCode.GAUSS_529["GAUSS_52929"] + "Error: \n%s" % str(output)) # gaussdb bin file version is same on all host, return 0 self.context.logger.debug("Successfully checked gaussdb" " version consistency.") return True except Exception as e: self.context.logger.debug(str(e)) return False def checkClusterStatus(self, checkPosition=Const.OPTION_PRECHECK, doDetailCheck=False): """ function: Check cluster status, if NORMAL, return 0, else return 1 For grey upgrade, if have switched to new bin, we will remove abnormal nodes and then return 0, else return 1 input : checkPosition, doDetailCheck output: 0 successfully 1 failed """ self.context.logger.debug("Start to check cluster status.") # build query cmd # according to the implementation of the results to determine whether # the implementation of success cmd = "source %s;gs_om -t query" % self.context.userProfile (status, output) = subprocess.getstatusoutput(cmd) if status != 0: self.context.logger.debug( "Failed to execute command %s.\nStatus:%s\nOutput:%s" % (cmd, status, output)) return 1 self.context.logger.debug( "Successfully obtained cluster status information. " "Cluster status information:\n%s" % output) if output.find("Normal") < 0: self.context.logger.debug("The cluster_state is Abnormal.") if checkPosition == Const.OPTION_POSTCHECK: if output.find("Degraded") < 0: self.context.logger.debug("The cluster_state is not " "Degraded under postcheck.") return 1 else: return 1 # do more check if required if doDetailCheck: cluster_state_check = False redistributing_check = False for line in output.split('\n'): if len(line.split(":")) != 2: continue (key, value) = line.split(":") if key.strip() == "cluster_state" and \ value.strip() == "Normal": cluster_state_check = True elif key.strip() == "redistributing" and value.strip() == "No": redistributing_check = True if cluster_state_check and redistributing_check: self.context.logger.debug("Cluster_state must be Normal, " "redistributing must be No.") return 0 else: self.context.logger.debug( "Cluster status information does not meet the upgrade " "condition constraints. When upgrading, cluster_state must" " be Normal, redistributing must be No and balanced" " must be Yes.") return 1 # cluster is NORMAL, return 0 return 0 def waitClusterNormalDegrade(self, waitTimeOut=300): """ function: Check if cluster status is Normal for each main step of online upgrade input : waitTimeOut, default is 60. output : NA """ # get the end time self.context.logger.log("Wait for the cluster status normal " "or degrade.") endTime = datetime.now() + timedelta(seconds=int(waitTimeOut)) while True: cmd = "source %s;cm_ctl query" % self.context.userProfile (status, output) = subprocess.getstatusoutput(cmd) if status == 0 and (output.find("Normal") >= 0 or output.find("Degraded") >= 0): self.context.logger.debug( "The cluster status is normal or degrade now.") break if datetime.now() >= endTime: self.context.logger.debug("The cmd is %s " % cmd) raise Exception("Timeout." + "\n" + ErrorCode.GAUSS_516["GAUSS_51602"]) else: self.context.logger.debug( "Cluster status has not reach normal. Wait for another 3" " seconds.\n%s" % output) time.sleep(3) # sleep 3 seconds def checkConnection(self): """ function: Check if cluster accept connecitons, upder inplace upgrade, all DB should be connected under grey upgrade, makesure all CN in nodes that does not under upgrade process or extracted abnormal nodes can be connected if accpet connection, return 0, else return 1 1. find a cn instance 2. connect this cn and exec sql cmd input : NA output: 0 successfully 1 failed """ self.context.logger.debug("Start to check database connection.") for dbNode in self.context.clusterInfo.dbNodes: if len(dbNode.datanodes) == 0 or dbNode.name: continue for dnInst in dbNode.datanodes: # connect this DB and exec sql cmd sql = "SELECT 1;" (status, output) = \ ClusterCommand.remoteSQLCommand( sql, self.context.user, dnInst.hostname, dnInst.port, False, DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True) if status != 0 or not output.isdigit(): self.context.logger.debug( "Failed to execute SQL on [%s]: %s. Error: \n%s" % (dnInst.hostname, sql, str(output))) return 1 # all DB accept connection, return 0 self.context.logger.debug("Successfully checked database connection.") return 0 def createBakPath(self): """ function: create bak path input : NA output : NA """ cmd = "(if [ ! -d '%s' ]; then mkdir -p '%s'; fi)" % \ (self.context.upgradeBackupPath, self.context.upgradeBackupPath) cmd += " && (chmod %d -R %s)" % (DefaultValue.KEY_DIRECTORY_MODE, self.context.upgradeBackupPath) self.context.logger.debug("Command for creating directory: %s" % cmd) DefaultValue.execCommandWithMode(cmd, "create binary_upgrade path", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) def recordDirFile(self): """ function: record dir file input: NA output: NA """ self.context.logger.debug("Create the file to record " "old and new app directory.") # write the old cluster number and new cluster number into backup dir appDirRecord = os.path.join(self.context.upgradeBackupPath, Const.RECORD_UPGRADE_DIR) g_file.createFile(appDirRecord, True, DefaultValue.KEY_FILE_MODE) g_file.writeFile(appDirRecord, [self.context.oldClusterAppPath, self.context.newClusterAppPath], 'w') self.distributeFile(appDirRecord) self.context.logger.debug("Successfully created the file to " "record old and new app directory.") def copyBakVersion(self): """ under commit, if we have cleaned old install path, then node disabled, we cannot get old version, under choseStrategy, we will not pass the check :return:NA """ versionFile = os.path.join(self.context.oldClusterAppPath, "bin/upgrade_version") bakVersionFile = os.path.join(self.context.upgradeBackupPath, "old_upgrade_version") cmd = "(if [ -f '%s' ]; then cp -f -p '%s' '%s';fi)" % \ (versionFile, versionFile, bakVersionFile) cmd += " && (chmod %d %s)" % \ (DefaultValue.KEY_FILE_MODE, bakVersionFile) DefaultValue.execCommandWithMode(cmd, "copy upgrade_version file", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) def cleanInstallPath(self, cleanNew=Const.NEW): """ function: after grey upgrade succeed, clean old install path input : cleanNew output: NA """ self.context.logger.debug("Cleaning %s install path." % cleanNew, "addStep") # clean old install path if cleanNew == Const.NEW: installPath = self.context.newClusterAppPath elif cleanNew == Const.OLD: installPath = self.context.oldClusterAppPath else: raise Exception(ErrorCode.GAUSS_529["GAUSS_52937"]) cmd = "%s -t %s -U %s -R %s -l %s" % \ (OMCommand.getLocalScript("Local_Upgrade_Utility"), Const.ACTION_CLEAN_INSTALL_PATH, self.context.user, installPath, self.context.localLog) if self.context.forceRollback: cmd += " --force" self.context.logger.debug("Command for clean %s install path: %s" % (cleanNew, cmd)) DefaultValue.execCommandWithMode(cmd, "clean %s install path" % cleanNew, self.context.sshTool, self.context.isSingle, self.context.mpprcFile) self.context.logger.log("Successfully cleaned %s install path." % cleanNew, "constant") def installNewBin(self): """ function: install new binary in a new directory 1. get env GAUSSLOG 2. get env PGHOST 3. install new bin file 4. sync old config to new bin path 5. update env input: none output: none """ try: self.context.logger.log("Installing new binary.", "addStep") # install new bin file cmd = "%s -t 'install_cluster' -U %s:%s -R '%s' -P %s -c %s" \ " -l '%s' -X '%s' -T -u" % \ (OMCommand.getLocalScript("Local_Install"), self.context.user, self.context.group, self.context.newClusterAppPath, self.context.tmpDir, self.context.clusterInfo.name, self.context.localLog, self.context.xmlFile) self.context.logger.debug( "Command for installing new binary: %s." % cmd) DefaultValue.execCommandWithMode(cmd, "install new application", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) self.context.logger.debug( "Successfully installed new binary files.") except Exception as e: self.context.logger.debug("Failed to install new binary files.") raise Exception(str(e)) def backupHotpatch(self): """ function: backup hotpatch config file patch.info in xxx/data/hotpatch input : NA output: NA """ self.context.logger.debug("Start to backup hotpatch.") try: cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \ "--new_cluster_app_path=%s -l %s" % \ (OMCommand.getLocalScript("Local_Upgrade_Utility"), Const.ACTION_BACKUP_HOTPATCH, self.context.user, self.context.upgradeBackupPath, self.context.newClusterAppPath, self.context.localLog) DefaultValue.execCommandWithMode(cmd, "backup hotpatch files", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: raise Exception(" Failed to backup hotpatch config file." + str(e)) self.context.logger.log("Successfully backup hotpatch config file.") def rollbackHotpatch(self): """ function: backup hotpatch config file patch.info in xxx/data/hotpatch input : NA output: NA """ self.context.logger.debug("Start to rollback hotpatch.") try: cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s -X '%s'" % \ (OMCommand.getLocalScript("Local_Upgrade_Utility"), Const.ACTION_ROLLBACK_HOTPATCH, self.context.user, self.context.upgradeBackupPath, self.context.localLog, self.context.xmlFile) if self.context.forceRollback: cmd += " --force" DefaultValue.execCommandWithMode(cmd, "rollback hotpatch", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: raise Exception(" Failed to rollback hotpatch config file." + str(e)) self.context.logger.log("Successfully rollback hotpatch config file.") def backup_version_file(self): """ Backup the old version file. """ oldVersionFile = "%s/bin/%s" % \ (self.context.oldClusterAppPath, DefaultValue.DEFAULT_DISABLED_FEATURE_FILE_NAME) oldLicenseFile = "%s/bin/%s" % (self.context.oldClusterAppPath, DefaultValue.DEFAULT_LICENSE_FILE_NAME) cmd = "(if [ -d %s ] && [ -f %s ]; then cp -f %s %s; fi) && " % \ (self.context.upgradeBackupPath, oldVersionFile, oldVersionFile, self.context.upgradeBackupPath) cmd += "(if [ -d %s ] && [ -f %s ]; then cp -f %s %s; fi)" % \ (self.context.upgradeBackupPath, oldLicenseFile, oldLicenseFile, self.context.upgradeBackupPath) self.context.logger.debug( "Execute command to backup the product version file and the " "license control file: %s" % cmd) DefaultValue.execCommandWithMode(cmd, "Backup old gaussdb.version file.", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) def getTimeFormat(self, seconds): """ format secends to h-m-s input:int output:int """ seconds = int(seconds) if seconds == 0: return 0 # Converts the seconds to standard time hour = seconds / 3600 minute = (seconds - hour * 3600) / 60 s = seconds % 60 resultstr = "" if hour != 0: resultstr += "%dh" % hour if minute != 0: resultstr += "%dm" % minute return "%s%ds" % (resultstr, s) def CopyCerts(self): """ function: copy certs input : NA output : NA """ self.context.logger.log("copy certs from %s to %s." % ( self.context.oldClusterAppPath, self.context.newClusterAppPath)) try: cmd = "%s -t %s -U %s --old_cluster_app_path=%s " \ "--new_cluster_app_path=%s -l %s" % \ (OMCommand.getLocalScript("Local_Upgrade_Utility"), Const.ACTION_COPY_CERTS, self.context.user, self.context.oldClusterAppPath, self.context.newClusterAppPath, self.context.localLog) self.context.logger.debug("Command for copy certs: '%s'." % cmd) DefaultValue.execCommandWithMode(cmd, "Command for copy certs", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: self.context.logger.log("Failed to copy certs from %s to %s." % (self.context.oldClusterAppPath, self.context.newClusterAppPath)) raise Exception(str(e)) time.sleep(10) self.context.logger.log("Successfully copy certs from %s to %s." % (self.context.oldClusterAppPath, self.context.newClusterAppPath), "constant") def switchBin(self, switchTo=Const.OLD): """ function: switch bin input : switchTo output : NA """ self.context.logger.log("Switch symbolic link to %s binary directory." % switchTo, "addStep") try: cmd = "%s -t %s -U %s -l %s" % \ (OMCommand.getLocalScript("Local_Upgrade_Utility"), Const.ACTION_SWITCH_BIN, self.context.user, self.context.localLog) if switchTo == Const.NEW: cmd += " -R '%s'" % self.context.newClusterAppPath else: cmd += " -R '%s'" % self.context.oldClusterAppPath if self.context.forceRollback: cmd += " --force" self.context.logger.debug("Command for switching binary directory:" " '%s'." % cmd) DefaultValue.execCommandWithMode(cmd, "Switch the binary directory", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: self.context.logger.log("Failed to switch symbolic link to %s " "binary directory." % switchTo) raise Exception(str(e)) time.sleep(10) self.context.logger.log("Successfully switch symbolic link to %s " "binary directory." % switchTo, "constant") def clearOtherToolPackage(self, action=""): """ function: clear other tool package input : action output : NA """ if action == Const.ACTION_AUTO_ROLLBACK: self.context.logger.debug("clean other tool package files.") else: self.context.logger.debug( "clean other tool package files.", "addStep") try: commonPart = DefaultValue.get_package_back_name().rsplit("_", 1)[0] gphomePath = os.listdir(DefaultValue.getClusterToolPath()) commitId = self.newCommitId if action == Const.ACTION_AUTO_ROLLBACK: commitId = self.oldCommitId for filePath in gphomePath: if commonPart in filePath and commitId not in filePath: toDeleteFilePath = os.path.join( DefaultValue.getClusterToolPath(), filePath) deleteCmd = "(if [ -f '%s' ]; then rm -rf '%s'; fi) " % \ (toDeleteFilePath, toDeleteFilePath) DefaultValue.execCommandWithMode( deleteCmd, "clean tool package files", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) except Exception as e: self.context.logger.log( "Failed to clean other tool package files.") raise Exception(str(e)) if action == Const.ACTION_AUTO_ROLLBACK: self.context.logger.debug( "Success to clean other tool package files.") else: self.context.logger.debug( "Success to clean other tool package files.", "constant") def createGphomePack(self): """ function: create Gphome pack input : NA output : NA """ try: cmd = "(if [ ! -d '%s' ]; then mkdir -p '%s'; fi)" % \ (DefaultValue.getClusterToolPath(), DefaultValue.getClusterToolPath()) cmd += " && (chmod %d -R %s)" % \ (DefaultValue.KEY_DIRECTORY_MODE, DefaultValue.getClusterToolPath()) self.context.logger.debug( "Command for creating directory: %s" % cmd) DefaultValue.execCommandWithMode(cmd, "create gphome path", self.context.sshTool, self.context.isSingle, self.context.mpprcFile) oldPackName = "%s-Package-bak_%s.tar.gz" % \ (VersionInfo.PRODUCT_NAME_PACKAGE, self.oldCommitId) packFilePath = "%s/%s" % (DefaultValue.getClusterToolPath(), oldPackName) copyNode = "" cmd = "if [ -f '%s' ]; then echo 'GetFile'; " \ "else echo 'NoThisFile'; fi" % packFilePath self.context.logger.debug("Command for checking file: %s" % cmd) (status, output) = self.context.sshTool.getSshStatusOutput( cmd, self.context.clusterNodes, self.context.mpprcFile) outputMap = self.context.sshTool.parseSshOutput( self.context.clusterNodes) self.context.logger.debug("Output: %s" % output) for node in self.context.clusterNodes: if status[node] == DefaultValue.SUCCESS: if 'GetFile' in outputMap[node]: copyNode = node break if copyNode: self.context.logger.debug("Copy the file %s from node %s." % (packFilePath, copyNode)) for node in self.context.clusterNodes: if status[node] == DefaultValue.SUCCESS: if 'NoThisFile' in outputMap[node]: cmd = g_Platform.getRemoteCopyCmd( packFilePath, DefaultValue.getClusterToolPath(), str(copyNode), False, 'directory', node) self.context.logger.debug( "Command for copying directory: %s" % cmd) DefaultValue.execCommandLocally(cmd) else: raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % packFilePath) except Exception as e: raise Exception(str(e))