6227 lines
278 KiB
Python
6227 lines
278 KiB
Python
# -*- coding:utf-8 -*-
|
|
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
#
|
|
# openGauss is licensed under Mulan PSL v2.
|
|
# You can use this software according to the terms
|
|
# and conditions of the Mulan PSL v2.
|
|
# You may obtain a copy of Mulan PSL v2 at:
|
|
#
|
|
# http://license.coscl.org.cn/MulanPSL2
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OF ANY KIND,
|
|
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
# See the Mulan PSL v2 for more details.
|
|
# ----------------------------------------------------------------------------
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import time
|
|
import timeit
|
|
import json
|
|
import csv
|
|
import traceback
|
|
import copy
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
from gspylib.common.Common import DefaultValue, ClusterCommand, \
|
|
ClusterInstanceConfig
|
|
from gspylib.common.DbClusterInfo import instanceInfo, \
|
|
dbNodeInfo, dbClusterInfo, compareObject
|
|
from gspylib.common.OMCommand import OMCommand
|
|
from gspylib.common.ErrorCode import ErrorCode
|
|
from gspylib.threads.SshTool import SshTool
|
|
from gspylib.common.DbClusterStatus import DbClusterStatus
|
|
from gspylib.os.gsfile import g_file
|
|
from gspylib.inspection.common import SharedFuncs
|
|
from gspylib.component.CM.CM_OLAP.CM_OLAP import CM_OLAP
|
|
from impl.upgrade.UpgradeConst import GreyUpgradeStep
|
|
import impl.upgrade.UpgradeConst as const
|
|
from base_utils.executor.cmd_executor import CmdExecutor
|
|
from base_utils.executor.local_remote_cmd import LocalRemoteCmd
|
|
from base_utils.os.cmd_util import CmdUtil
|
|
from domain_utils.cluster_file.cluster_dir import ClusterDir
|
|
from base_utils.os.env_util import EnvUtil
|
|
from base_utils.os.file_util import FileUtil
|
|
from domain_utils.cluster_file.package_info import PackageInfo
|
|
from domain_utils.cluster_file.version_info import VersionInfo
|
|
from domain_utils.sql_handler.sql_result import SqlResult
|
|
from base_utils.os.net_util import NetUtil
|
|
|
|
|
|
class OldVersionModules():
|
|
"""
|
|
class: old version modules
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""
|
|
function: constructor
|
|
"""
|
|
# old cluster information
|
|
self.oldDbClusterInfoModule = None
|
|
# old cluster status
|
|
self.oldDbClusterStatusModule = None
|
|
|
|
|
|
class UpgradeImpl:
|
|
"""
|
|
Class: The class is used to do perform upgrade
|
|
"""
|
|
def __init__(self, upgrade):
|
|
"""
|
|
function: constructor
|
|
"""
|
|
self.dnInst = None
|
|
self.dnStandbyInsts = []
|
|
self.context = upgrade
|
|
self.newCommitId = ""
|
|
self.oldCommitId = ""
|
|
self.isLargeInplaceUpgrade = False
|
|
self.__upgrade_across_64bit_xid = False
|
|
self.action = upgrade.action
|
|
|
|
def exitWithRetCode(self, action, succeed=True, msg=""):
|
|
"""
|
|
funtion: should be called after cmdline parameter check
|
|
input : action, succeed, msg, strategy
|
|
output: NA
|
|
"""
|
|
#########################################
|
|
# doUpgrade
|
|
#
|
|
# binary-upgrade success failure
|
|
# 0 1
|
|
#
|
|
# binary-rollback success failure
|
|
# 2 3
|
|
|
|
# commit-upgrade success failure
|
|
# 5 1
|
|
#########################################
|
|
|
|
#########################################
|
|
# choseStrategy
|
|
# success failure
|
|
# 4 1
|
|
#########################################
|
|
if not succeed:
|
|
if action == const.ACTION_AUTO_ROLLBACK:
|
|
retCode = 3
|
|
else:
|
|
retCode = 1
|
|
elif action in [const.ACTION_SMALL_UPGRADE,
|
|
const.ACTION_LARGE_UPGRADE,
|
|
const.ACTION_INPLACE_UPGRADE]:
|
|
retCode = 0
|
|
elif action == const.ACTION_AUTO_ROLLBACK:
|
|
retCode = 2
|
|
elif action == const.ACTION_CHOSE_STRATEGY:
|
|
retCode = 4
|
|
elif action == const.ACTION_COMMIT_UPGRADE:
|
|
retCode = 5
|
|
else:
|
|
retCode = 1
|
|
|
|
if msg != "":
|
|
if self.context.logger is not None:
|
|
if succeed:
|
|
self.context.logger.log(msg)
|
|
else:
|
|
self.context.logger.error(msg)
|
|
else:
|
|
print(msg)
|
|
sys.exit(retCode)
|
|
|
|
def initGlobalInfos(self):
|
|
"""
|
|
function: init global infos
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Init global infos", "addStep")
|
|
self.context.sshTool = SshTool(
|
|
self.context.clusterNodes, self.context.localLog,
|
|
DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE)
|
|
self.initClusterConfig()
|
|
self.context.logger.debug("Successfully init global infos", "constant")
|
|
|
|
def setClusterDetailInfo(self):
|
|
"""
|
|
function: set cluster detail info
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
for dbNode in self.context.clusterInfo.dbNodes:
|
|
dbNode.setDnDetailNum()
|
|
#self.context.clusterInfo.setClusterDnCount()
|
|
|
|
def removeOmRollbackProgressFile(self):
|
|
"""
|
|
function: remove om rollback process file
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
self.context.logger.debug("Remove the om rollback"
|
|
" record progress file.")
|
|
fileName = os.path.join(self.context.tmpDir,
|
|
".upgrade_task_om_rollback_result")
|
|
cmd = "(if [ -f '%s' ];then rm -f '%s';fi)" % (fileName, fileName)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
def initOmRollbackProgressFile(self):
|
|
"""
|
|
function: init om rollback process file
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
filePath = os.path.join(self.context.tmpDir,
|
|
".upgrade_task_om_rollback_result")
|
|
cmd = "echo \"OM:RUN\" > %s" % filePath
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
self.context.logger.debug("The cmd is %s " % cmd)
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % filePath
|
|
+ "Error: \n%s" % str(output))
|
|
|
|
if (not self.context.isSingle):
|
|
# send file to remote nodes
|
|
self.context.sshTool.scpFiles(filePath, self.context.tmpDir)
|
|
self.context.logger.debug("Successfully write file %s." % filePath)
|
|
|
|
def run(self):
|
|
"""
|
|
function: Do upgrade
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
# the action may be changed in each step,
|
|
# if failed in auto-rollback,
|
|
# we will check if we need to rollback
|
|
action = self.context.action
|
|
# upgrade backup path
|
|
self.context.tmpDir = EnvUtil.getTmpDirFromEnv(self.context.user)
|
|
if self.context.tmpDir == "":
|
|
raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST")
|
|
self.context.upgradeBackupPath = \
|
|
"%s/%s" % (self.context.tmpDir, "binary_upgrade")
|
|
try:
|
|
self.initGlobalInfos()
|
|
self.removeOmRollbackProgressFile()
|
|
self.commonCheck()
|
|
|
|
# 4. get upgrade type
|
|
# After choseStrategy, it will assign action to self.context.action
|
|
# to do full-upgrade or binary-upgrade
|
|
if self.context.action == const.ACTION_AUTO_UPGRADE:
|
|
self.context.action = self.choseStrategy()
|
|
self.context.logger.debug(
|
|
"%s execution takes %s steps in total" % (
|
|
const.GS_UPGRADECTL, ClusterCommand.countTotalSteps(
|
|
const.GS_UPGRADECTL, self.context.action)))
|
|
# If get upgrade strategy failed,
|
|
# then try to get rollback strategy.
|
|
# Set strategyFlag as True to check
|
|
# upgrade parameter is correct or not
|
|
if self.context.action in [const.ACTION_LARGE_UPGRADE,
|
|
const.ACTION_SMALL_UPGRADE]:
|
|
self.doGreyBinaryUpgrade()
|
|
else:
|
|
self.doInplaceBinaryUpgrade()
|
|
# After choseStrategy, it will assign action to self.context.action
|
|
elif self.context.action == const.ACTION_AUTO_ROLLBACK:
|
|
# because if we rollback with auto rollback,
|
|
# we will rollback all the nodes,
|
|
# but if we rollback under upgrade,
|
|
# we will only rollback specified nodes
|
|
self.context.action = self.choseStrategy()
|
|
self.context.rollback = True
|
|
if self.context.oldClusterNumber < const.RELMAP_4K_VERSION and self.context.forceRollback:
|
|
errMsg = "could not do force rollback in this version: %s" % self.context.oldClusterNumber
|
|
self.context.logger.log(errMsg)
|
|
self.exitWithRetCode(action, False, errMsg)
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE:
|
|
self.exitWithRetCode(const.ACTION_AUTO_ROLLBACK,
|
|
self.doInplaceBinaryRollback())
|
|
else:
|
|
self.exitWithRetCode(const.ACTION_AUTO_ROLLBACK,
|
|
self.doGreyBinaryRollback(
|
|
const.ACTION_AUTO_ROLLBACK))
|
|
elif self.context.action == const.ACTION_COMMIT_UPGRADE:
|
|
self.context.action = self.choseStrategy()
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE:
|
|
self.doInplaceCommitUpgrade()
|
|
else:
|
|
self.doGreyCommitUpgrade()
|
|
else:
|
|
self.doChoseStrategy()
|
|
except Exception as e:
|
|
self.context.logger.debug(traceback.format_exc() + str(e))
|
|
if not self.context.sshTool:
|
|
self.context.sshTool = SshTool(
|
|
self.context.clusterNodes, self.context.logger,
|
|
DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE)
|
|
if action == const.ACTION_AUTO_ROLLBACK and \
|
|
self.checkBakPathNotExists():
|
|
self.context.logger.log("No need to rollback.")
|
|
self.exitWithRetCode(action, True)
|
|
else:
|
|
self.context.logger.error(str(e))
|
|
self.exitWithRetCode(action, False, str(e))
|
|
|
|
def commonCheck(self):
|
|
"""
|
|
Check in the common process.
|
|
:return:
|
|
"""
|
|
self.checkReadOnly()
|
|
if self.context.is_grey_upgrade:
|
|
self.getOneDNInst(checkNormal=True)
|
|
self.checkUpgradeMode()
|
|
|
|
def checkReadOnly(self):
|
|
"""
|
|
check if in read only mode under grey upgrade, grey upgrade commit or
|
|
grey upgrade rollback if not in read only, then record the value of
|
|
enable_transaction_read_only and set it to off
|
|
"""
|
|
try:
|
|
self.context.logger.debug("Check if in read only mode.")
|
|
greyUpgradeFlagFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
# only used under grey upgrade, grey upgrade commit or grey upgrade
|
|
# rollback if under grey upgrade, the flag file
|
|
# greyUpgradeFlagFile has not been created
|
|
# so we use is_inplace_upgrade to judge the mode
|
|
if (self.context.action == const.ACTION_AUTO_UPGRADE and
|
|
not self.context.is_inplace_upgrade or
|
|
(os.path.isfile(greyUpgradeFlagFile) and
|
|
self.context.action in [const.ACTION_AUTO_ROLLBACK,
|
|
const.ACTION_COMMIT_UPGRADE])):
|
|
if self.unSetClusterReadOnlyMode() != 0:
|
|
raise Exception("NOTICE: "
|
|
+ ErrorCode.GAUSS_529["GAUSS_52907"])
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def checkUpgradeMode(self):
|
|
"""
|
|
used to check if upgrade_mode is 0 under before upgrade
|
|
if not, we set it to 0
|
|
"""
|
|
tempPath = self.context.upgradeBackupPath
|
|
filePath = os.path.join(tempPath, const.INPLACE_UPGRADE_STEP_FILE)
|
|
if self.context.action == const.ACTION_AUTO_UPGRADE \
|
|
and not os.path.exists(filePath):
|
|
try:
|
|
self.setUpgradeMode(0)
|
|
self.context.logger.log(
|
|
"Successfully set upgrade_mode to 0.")
|
|
except Exception as e:
|
|
self.context.logger.log("Failed to set upgrade_mode to 0, "
|
|
"please set it manually, "
|
|
"or rollback first.")
|
|
raise Exception(str(e))
|
|
|
|
def checkBakPathNotExists(self):
|
|
"""
|
|
check binary_upgrade exists on all nodes,
|
|
:return: True if not exists on all nodes
|
|
"""
|
|
try:
|
|
cmd = "if [ -d '%s' ]; then echo 'GetDir'; else echo 'NoDir'; fi" \
|
|
% self.context.upgradeBackupPath
|
|
self.context.logger.debug("Command for checking if upgrade bak "
|
|
"path exists: %s" % cmd)
|
|
outputCollect = self.context.sshTool.getSshStatusOutput(cmd)[1]
|
|
if outputCollect.find('GetDir') >= 0:
|
|
self.context.logger.debug("Checking result: %s"
|
|
% outputCollect)
|
|
return False
|
|
self.context.logger.debug("Path %s does not exists on all node."
|
|
% self.context.upgradeBackupPath)
|
|
return True
|
|
except Exception:
|
|
self.context.logger.debug("Failed to check upgrade bak path.")
|
|
return False
|
|
|
|
def doChoseStrategy(self):
|
|
"""
|
|
function: chose the strategy for upgrade
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Choosing strategy.")
|
|
try:
|
|
self.context.action = self.choseStrategy()
|
|
# we only support binary-upgrade.
|
|
if self.context.action in [const.ACTION_SMALL_UPGRADE,
|
|
const.ACTION_LARGE_UPGRADE]:
|
|
self.exitWithRetCode(const.ACTION_CHOSE_STRATEGY,
|
|
True,
|
|
"Upgrade strategy: %s."
|
|
% self.context.action)
|
|
# Use inplace upgrade under special case
|
|
else:
|
|
self.exitWithRetCode(const.ACTION_CHOSE_STRATEGY,
|
|
True,
|
|
"Upgrade strategy: %s."
|
|
% self.context.action)
|
|
except Exception as e:
|
|
self.exitWithRetCode(const.ACTION_CHOSE_STRATEGY, False, str(e))
|
|
self.context.logger.debug("Successfully got the upgrade strategy.")
|
|
|
|
def choseStrategy(self):
|
|
"""
|
|
function: chose upgrade strategy
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
upgradeAction = None
|
|
try:
|
|
# get new cluster info
|
|
newVersionFile = VersionInfo.get_version_file()
|
|
newClusterVersion, newClusterNumber, newCommitId = \
|
|
VersionInfo.get_version_info(newVersionFile)
|
|
gaussHome = ClusterDir.getInstallDir(self.context.user)
|
|
if gaussHome == "":
|
|
raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"]
|
|
% "$GAUSSHOME")
|
|
if not os.path.islink(gaussHome):
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52915"])
|
|
newPath = gaussHome + "_%s" % newCommitId
|
|
# new app dir should exist after preinstall,
|
|
# then we can use chose strategy
|
|
if not os.path.exists(newPath):
|
|
if self.context.action != const.ACTION_AUTO_ROLLBACK:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"]
|
|
% newPath)
|
|
self.context.logger.debug(
|
|
"Successfully obtained version information"
|
|
" of new clusters by %s." % newVersionFile)
|
|
|
|
# get the old cluster info, if binary_upgrade does not exists,
|
|
# try to copy from other nodes
|
|
oldPath = self.getClusterAppPath(const.OLD)
|
|
if oldPath == "":
|
|
self.context.logger.debug("Cannot get the old install "
|
|
"path from table and file.")
|
|
oldPath = os.path.realpath(gaussHome)
|
|
self.context.logger.debug("Old cluster app path is %s" % oldPath)
|
|
|
|
oldVersionFile = "%s/bin/upgrade_version" % oldPath
|
|
try:
|
|
(oldClusterVersion, oldClusterNumber, oldCommitId) = \
|
|
VersionInfo.get_version_info(oldVersionFile)
|
|
self.context.logger.debug("Successfully obtained version"
|
|
" information of old clusters by %s."
|
|
% oldVersionFile)
|
|
except Exception as e:
|
|
if os.path.exists(self.context.upgradeBackupPath):
|
|
# if upgradeBackupPath exist,
|
|
# it means that we do rollback first.
|
|
# and we get cluster version from the backup file
|
|
possibOldVersionFile = "%s/old_upgrade_version" \
|
|
% self.context.upgradeBackupPath
|
|
self.context.logger.debug(str(e))
|
|
self.context.logger.debug(
|
|
"Try to get the version information from %s."
|
|
% possibOldVersionFile)
|
|
(oldClusterVersion, oldClusterNumber, oldCommitId) = \
|
|
VersionInfo.get_version_info(possibOldVersionFile)
|
|
else:
|
|
raise Exception(str(e))
|
|
|
|
# if last success commit upgrade_type is grey upgrade,
|
|
# the symbolic link should point to the
|
|
# old app path with old commit id
|
|
if oldCommitId == newCommitId:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52901"])
|
|
self.context.logger.debug(
|
|
"Successfully obtained version information of new and old "
|
|
"clusters.\n The old cluster number:%s, the new "
|
|
"cluster number:%s." % (oldClusterNumber, newClusterNumber))
|
|
|
|
self.canDoRollbackOrCommit()
|
|
|
|
if oldClusterVersion > newClusterVersion:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52902"]
|
|
% (oldClusterVersion, newClusterVersion))
|
|
|
|
self.checkLastUpgrade(newCommitId)
|
|
|
|
if float(newClusterNumber) < float(oldClusterNumber):
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51629"]
|
|
% newClusterNumber)
|
|
elif float(newClusterNumber) == float(oldClusterNumber):
|
|
if self.context.is_inplace_upgrade:
|
|
upgradeAction = const.ACTION_INPLACE_UPGRADE
|
|
else:
|
|
upgradeAction = const.ACTION_SMALL_UPGRADE
|
|
else:
|
|
if int(float(newClusterNumber)) > int(float(oldClusterNumber)):
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52904"]
|
|
+ "This cluster version is "
|
|
"not supported upgrade.")
|
|
elif ((float(newClusterNumber) - int(float(newClusterNumber)))
|
|
> (float(oldClusterNumber) -
|
|
int(float(oldClusterNumber)))):
|
|
if self.context.is_inplace_upgrade:
|
|
upgradeAction = const.ACTION_INPLACE_UPGRADE
|
|
self.isLargeInplaceUpgrade = True
|
|
else:
|
|
upgradeAction = const.ACTION_LARGE_UPGRADE
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51629"]
|
|
% newClusterNumber)
|
|
self.context.logger.debug("The matched upgrade strategy is: %s."
|
|
% upgradeAction)
|
|
self.context.newClusterVersion = newClusterVersion
|
|
self.context.newClusterNumber = newClusterNumber
|
|
self.context.oldClusterVersion = oldClusterVersion
|
|
self.context.oldClusterNumber = oldClusterNumber
|
|
self.context.newClusterAppPath = newPath
|
|
self.context.oldClusterAppPath = oldPath
|
|
self.newCommitId = newCommitId
|
|
self.oldCommitId = oldCommitId
|
|
return upgradeAction
|
|
except Exception as e:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52900"] % str(e)
|
|
+ " Do nothing this time.")
|
|
|
|
def canDoRollbackOrCommit(self):
|
|
"""
|
|
Check whether rollback or commit is required.
|
|
:return:
|
|
"""
|
|
try:
|
|
if self.context.action == const.ACTION_AUTO_ROLLBACK or \
|
|
self.context.action == const.ACTION_COMMIT_UPGRADE:
|
|
inplaceUpgradeFlagFile = os.path.join(
|
|
self.context.upgradeBackupPath,
|
|
const.INPLACE_UPGRADE_FLAG_FILE)
|
|
grayUpgradeFlagFile = os.path.join(
|
|
self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
self.context.is_inplace_upgrade = False
|
|
# we do rollback by the backup directory
|
|
if os.path.isfile(inplaceUpgradeFlagFile):
|
|
self.context.logger.debug("inplace upgrade flag exists, "
|
|
"use inplace rollback or commit.")
|
|
self.context.is_inplace_upgrade = True
|
|
if os.path.isfile(grayUpgradeFlagFile):
|
|
self.context.logger.debug("grey upgrade flag exists, "
|
|
"use grey rollback or commit.")
|
|
self.context.is_grey_upgrade = True
|
|
if not (self.context.is_inplace_upgrade or
|
|
self.context.is_grey_upgrade):
|
|
if self.context.action == const.ACTION_AUTO_ROLLBACK \
|
|
and not self.checkBakPathNotExists():
|
|
self.cleanBinaryUpgradeBakFiles(True)
|
|
exitMsg = "No need to {0}".format(self.context.action)
|
|
self.exitWithRetCode(self.context.action, True, exitMsg)
|
|
except Exception as e:
|
|
raise Exception("Failed to check whether the rollback or commit."
|
|
" Error {0}".format(str(e)))
|
|
|
|
def checkLastUpgrade(self, newCommitId):
|
|
"""
|
|
check the last fail upgrade type is same with this time
|
|
check the last upgrade version is same with this time
|
|
under grey upgrade, if under inplace upgrade, we will
|
|
rollback first, under grey upgrade, we will upgrade again
|
|
"""
|
|
if self.context.action == const.ACTION_AUTO_UPGRADE:
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
cmd = "if [ -f '%s' ]; then echo 'True';" \
|
|
" else echo 'False'; fi" % stepFile
|
|
(resultMap, outputCollect) = \
|
|
self.context.sshTool.getSshStatusOutput(cmd)
|
|
self.context.logger.debug(
|
|
"The result of checking grey upgrade step flag"
|
|
" file on all nodes is:\n%s" % outputCollect)
|
|
if self.context.is_inplace_upgrade:
|
|
# if the grey upgrade rollback failed, it should have file,
|
|
# so cannot do grey upgrade now
|
|
if outputCollect.find('True') >= 0:
|
|
ermsg = ErrorCode.GAUSS_502["GAUSS_50200"] \
|
|
% const.GREY_UPGRADE_STEP_FILE \
|
|
+ "In grey upgrade process, " \
|
|
"cannot do inplace upgrade!"
|
|
raise Exception(str(ermsg))
|
|
else:
|
|
inplace_upgrade_flag_file =\
|
|
"%s/inplace_upgrade_flag" % self.context.upgradeBackupPath
|
|
if os.path.isfile(inplace_upgrade_flag_file):
|
|
ermsg = ErrorCode.GAUSS_502["GAUSS_50200"] % \
|
|
inplace_upgrade_flag_file + \
|
|
"In inplace upgrade process, " \
|
|
"cannot do grey upgrade!"
|
|
raise Exception(ermsg)
|
|
# it may have remaining when last upgrade use
|
|
# --force to forceRollback
|
|
self.checkBakPathAndTable(outputCollect)
|
|
self.checkNewCommitid(newCommitId)
|
|
elif self.context.action == const.ACTION_AUTO_ROLLBACK or \
|
|
self.context.action == const.ACTION_COMMIT_UPGRADE:
|
|
self.checkNewCommitid(newCommitId)
|
|
|
|
def checkBakPathAndTable(self, outputCollect):
|
|
"""
|
|
if the record step file in all nodes not exists, and the
|
|
table exists, so this situation means the last upgrade
|
|
remaining table
|
|
if the table and step file exists, check if the content is correct
|
|
:param resultMap:
|
|
:param outputCollect:
|
|
:return:
|
|
"""
|
|
# no need to check and drop schema under force upgrade
|
|
if not self.existTable(const.RECORD_NODE_STEP):
|
|
return
|
|
output = outputCollect.split('\n')
|
|
output = output[:-1]
|
|
findBakPath = False
|
|
for record in output:
|
|
# if can find step, means this
|
|
if record.find('True') >= 0:
|
|
findBakPath = True
|
|
break
|
|
if not findBakPath:
|
|
self.dropSupportSchema()
|
|
return
|
|
|
|
def checkNewCommitid(self, newCommitId):
|
|
"""
|
|
the commitid is in version.cfg, it should be same with the record
|
|
commitid in record app directory file
|
|
:param newCommitId: version.cfg line 3
|
|
:return: NA
|
|
"""
|
|
newPath = self.getClusterAppPath(const.NEW)
|
|
if newPath != "":
|
|
LastNewCommitId = newPath[-8:]
|
|
# When repeatedly run gs_upgradectl script,
|
|
# this time upgrade version should be same
|
|
# with last record upgrade version
|
|
if newCommitId != LastNewCommitId:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52935"])
|
|
|
|
def setGUCValue(self, guc_key, guc_value, action_type="reload"):
|
|
"""
|
|
function: do gs_guc
|
|
input : gucKey - parameter name
|
|
gucValue - parameter value
|
|
actionType - guc action type(set/reload). default is 'reload'
|
|
onlySetCn - whether only set CN instance. default is False
|
|
"""
|
|
tmp_file = ""
|
|
if guc_value != "":
|
|
guc_str = "%s='%s'" % (guc_key, guc_value)
|
|
else:
|
|
guc_str = "%s" % guc_key
|
|
try:
|
|
self.context.logger.debug("Start to set GUC value %s." % guc_str)
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s --guc_string=\"%s\" -l %s --setType=%s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_SET_GUC_VALUE,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
guc_str,
|
|
self.context.localLog,
|
|
action_type)
|
|
if action_type == "reload":
|
|
tmp_file = os.path.join(EnvUtil.getTmpDirFromEnv(self.context.user),
|
|
const.TMP_DYNAMIC_DN_INFO)
|
|
self.generateDynamicInfoFile(tmp_file)
|
|
self.context.logger.debug("Cmd for setting parameter: %s." % cmd)
|
|
host_list = copy.deepcopy(self.context.clusterNodes)
|
|
self.context.execCommandInSpecialNode(cmd, host_list)
|
|
self.context.logger.debug("Successfully set guc value.")
|
|
except Exception as er:
|
|
if self.context.forceRollback:
|
|
self.context.logger.debug("WARNING: failed to set value %s." % guc_str)
|
|
else:
|
|
raise Exception(str(er))
|
|
finally:
|
|
if os.path.exists(tmp_file):
|
|
delete_cmd = "(if [ -f '%s' ]; then rm -f '%s'; fi) " % \
|
|
(tmp_file, tmp_file)
|
|
host_list = copy.deepcopy(self.context.clusterNodes)
|
|
self.context.execCommandInSpecialNode(delete_cmd, host_list)
|
|
|
|
def setClusterReadOnlyMode(self):
|
|
"""
|
|
function: Set the cluster read-only mode
|
|
input : NA
|
|
output: 0 successfully
|
|
1 failed
|
|
"""
|
|
try:
|
|
self.context.logger.debug("Setting up the cluster read-only mode.")
|
|
self.setGUCValue("default_transaction_read_only", "true")
|
|
self.context.logger.debug("successfully set the cluster read-only mode.")
|
|
return 0
|
|
except Exception as e:
|
|
self.context.logger.debug("WARNING: Failed to set default_transaction_read_only "
|
|
"parameter. %s" % str(e))
|
|
return 1
|
|
|
|
def unSetClusterReadOnlyMode(self):
|
|
"""
|
|
function: Canceling the cluster read-only mode
|
|
input : NA
|
|
output: 0 successfully
|
|
1 failed
|
|
"""
|
|
try:
|
|
self.context.logger.debug("Canceling the cluster read-only mode.")
|
|
self.setGUCValue("default_transaction_read_only", "false")
|
|
self.context.logger.debug("Successfully cancelled the cluster read-only mode.")
|
|
return 0
|
|
except Exception as e:
|
|
self.context.logger.debug("WARNING: Failed to set default_transaction_read_only "
|
|
"parameter. %s" % str(e))
|
|
return 1
|
|
|
|
def stopCluster(self):
|
|
"""
|
|
function: Stopping the cluster
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Stopping the cluster.", "addStep")
|
|
# Stop cluster applications
|
|
cmd = "%s -U %s -R %s -t %s" % (
|
|
OMCommand.getLocalScript("Local_StopInstance"),
|
|
self.context.user, self.context.clusterInfo.appPath,
|
|
const.UPGRADE_TIMEOUT_CLUSTER_STOP)
|
|
self.context.logger.debug("Command for stop cluster: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd, self.context.sshTool,
|
|
self.context.isSingle or self.context.localMode,
|
|
self.context.mpprcFile)
|
|
self.context.logger.debug("Successfully stopped cluster.")
|
|
|
|
def startCluster(self):
|
|
"""
|
|
function: start cluster
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
versionFile = os.path.join(
|
|
self.context.oldClusterAppPath, "bin/upgrade_version")
|
|
if os.path.exists(versionFile):
|
|
_, number, _ = VersionInfo.get_version_info(versionFile)
|
|
cmd = "%s -U %s -R %s -t %s --cluster_number=%s" % (
|
|
OMCommand.getLocalScript("Local_StartInstance"),
|
|
self.context.user, self.context.clusterInfo.appPath,
|
|
const.UPGRADE_TIMEOUT_CLUSTER_START, number)
|
|
else:
|
|
cmd = "%s -U %s -R %s -t %s" % (
|
|
OMCommand.getLocalScript("Local_StartInstance"),
|
|
self.context.user, self.context.clusterInfo.appPath,
|
|
const.UPGRADE_TIMEOUT_CLUSTER_START)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd, self.context.sshTool,
|
|
self.context.isSingle or self.context.localMode,
|
|
self.context.mpprcFile)
|
|
self.context.logger.log("Successfully started cluster.")
|
|
|
|
def createCommitFlagFile(self):
|
|
"""
|
|
function: create a flag file, if this file exists,
|
|
means that user have called commit interface,
|
|
but still not finished. if create failed, script should exit.
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
commitFlagFile = "%s/commitFlagFile" % self.context.upgradeBackupPath
|
|
self.context.logger.debug("Start to create the commit flag file.")
|
|
try:
|
|
cmd = "(if [ -d '%s' ]; then touch '%s'; fi) " % (
|
|
self.context.upgradeBackupPath, commitFlagFile)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
except Exception as e:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50206"]
|
|
% ("commit flag file: %s" % str(e)))
|
|
self.context.logger.debug("Successfully created the commit flag file.")
|
|
|
|
def checkCommitFlagFile(self):
|
|
"""
|
|
function: check if commit flag file exists.
|
|
input : NA
|
|
output: return 0, If there is the file commitFlagFile.
|
|
else, return 1
|
|
"""
|
|
commitFlagFile = "%s/commitFlagFile" % self.context.upgradeBackupPath
|
|
if (os.path.isfile(commitFlagFile)):
|
|
return 0
|
|
else:
|
|
return 1
|
|
|
|
def createInplaceUpgradeFlagFile(self):
|
|
"""
|
|
function: create inplace upgrade flag file on
|
|
all nodes if is doing inplace upgrade
|
|
1.check if is inplace upgrade
|
|
2.get new and old cluster version number
|
|
3.write file
|
|
Input: NA
|
|
output : NA
|
|
"""
|
|
self.context.logger.debug("Start to create inplace upgrade flag file.")
|
|
try:
|
|
newClusterNumber = self.context.newClusterNumber
|
|
oldClusterNumber = self.context.oldClusterNumber
|
|
|
|
inplace_upgrade_flag_file = "%s/inplace_upgrade_flag" % \
|
|
self.context.upgradeBackupPath
|
|
FileUtil.createFile(inplace_upgrade_flag_file)
|
|
FileUtil.writeFile(inplace_upgrade_flag_file,
|
|
["newClusterNumber:%s" % newClusterNumber], 'a')
|
|
FileUtil.writeFile(inplace_upgrade_flag_file,
|
|
["oldClusterNumber:%s" % oldClusterNumber], 'a')
|
|
if (not self.context.isSingle):
|
|
self.context.sshTool.scpFiles(inplace_upgrade_flag_file,
|
|
self.context.upgradeBackupPath)
|
|
if float(self.context.oldClusterNumber) <= float(
|
|
const.UPGRADE_VERSION_64bit_xid) < \
|
|
float(self.context.newClusterNumber):
|
|
self.__upgrade_across_64bit_xid = True
|
|
|
|
self.context.logger.debug("Successfully created inplace"
|
|
" upgrade flag file.")
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def setUpgradeFromParam(self, cluster_version_number, is_check=True):
|
|
"""
|
|
function: set upgrade_from parameter
|
|
Input : oldClusterNumber
|
|
output : NA
|
|
"""
|
|
if not DefaultValue.get_cm_server_num_from_static(self.context.oldClusterInfo) > 0:
|
|
self.context.logger.debug("No need to set cm parameter.")
|
|
return
|
|
self.context.logger.debug("Set upgrade_from guc parameter.")
|
|
working_grand_version = int(float(cluster_version_number) * 1000)
|
|
cmd = "gs_guc set -Z cmagent -N all -I all -c 'upgrade_from=%s'" % working_grand_version
|
|
self.context.logger.debug("setting cmagent parameter: %s." % cmd)
|
|
try:
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
|
|
if status != 0:
|
|
self.context.logger.debug("Set upgrade_from failed. "
|
|
"cmd:%s\nOutput:%s" % (cmd, str(output)))
|
|
raise Exception(
|
|
ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error: \n%s" % str(output))
|
|
if is_check:
|
|
gucStr = "%s:%s" % ("upgrade_from", str(working_grand_version).strip())
|
|
self.checkParam(gucStr, True)
|
|
self.context.logger.debug("Successfully set cmagent parameter "
|
|
"upgrade_from=%s." % working_grand_version)
|
|
except Exception as er:
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE or \
|
|
not self.context.forceRollback:
|
|
raise Exception(str(er))
|
|
self.context.logger.log("NOTICE: Failed to set upgrade_from, "
|
|
"please set it manually with command: \n%s" % str(cmd))
|
|
|
|
def setUpgradeMode(self, mode, set_type="reload"):
|
|
"""
|
|
function: set upgrade_mode parameter
|
|
Input : upgrade_mode
|
|
output : NA
|
|
"""
|
|
try:
|
|
self.setUpgradeModeGuc(mode, set_type)
|
|
except Exception as er:
|
|
if self.context.action != const.ACTION_INPLACE_UPGRADE and \
|
|
not self.context.forceRollback:
|
|
raise Exception(str(er))
|
|
try:
|
|
self.setUpgradeModeGuc(mode, "set")
|
|
except Exception as _:
|
|
self.context.logger.log("NOTICE: Failed to set upgrade_mode to {0}, "
|
|
"please set it manually.".format(mode))
|
|
|
|
def setUpgradeModeGuc(self, mode, set_type="reload"):
|
|
"""
|
|
function: set upgrade mode guc
|
|
input : mode, setType
|
|
output : NA
|
|
"""
|
|
self.context.logger.debug("Set upgrade_mode guc parameter.")
|
|
cmd = "gs_guc %s -Z datanode -I all -c 'upgrade_mode=%d'" % (set_type, mode)
|
|
self.context.logger.debug("Command for setting database"
|
|
" node parameter: %s." % cmd)
|
|
retry_count = 0
|
|
while retry_count < 5:
|
|
try:
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool)
|
|
break
|
|
except Exception as _:
|
|
retry_count += 1
|
|
if retry_count < 5:
|
|
time.sleep(5)
|
|
continue
|
|
|
|
guc_str = "upgrade_mode:%d" % mode
|
|
self.checkParam(guc_str)
|
|
self.context.logger.debug("Successfully set "
|
|
"upgrade_mode to %d." % mode)
|
|
|
|
def checkParam(self, gucStr, fromFile=False):
|
|
"""
|
|
function: check the cmagent guc value
|
|
Input : gucStr the guc key:value string
|
|
output : NA
|
|
"""
|
|
self.context.logger.debug("Start to check GUC value %s." % gucStr)
|
|
try:
|
|
# send cmd to that node and exec
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s" \
|
|
" --guc_string=\"%s\" -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_CHECK_GUC,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
gucStr,
|
|
self.context.localLog)
|
|
if fromFile:
|
|
cmd += " --fromFile"
|
|
self.context.logger.debug("Command for checking"
|
|
" parameter: %s." % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
self.context.logger.debug("Successfully checked guc value.")
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def floatMoreThan(self, numOne, numTwo):
|
|
"""
|
|
function: float more than
|
|
input : numOne, numTwo
|
|
output : True/False
|
|
"""
|
|
if float(numOne) - float(numTwo) > float(const.DELTA_NUM):
|
|
return True
|
|
return False
|
|
|
|
def floatEqualTo(self, numOne, numTwo):
|
|
"""
|
|
function: float equal to
|
|
input: numOne, numTwo
|
|
output: True/False
|
|
"""
|
|
if float(-const.DELTA_NUM) < (float(numOne) - float(numTwo)) \
|
|
< float(const.DELTA_NUM):
|
|
return True
|
|
return False
|
|
|
|
def floatGreaterOrEqualTo(self, numOne, numTwo):
|
|
"""
|
|
function: float greater or equal to
|
|
input: numOne, numTwo
|
|
output: True/False
|
|
"""
|
|
if self.floatMoreThan(numOne, numTwo) or \
|
|
self.floatEqualTo(numOne, numTwo):
|
|
return True
|
|
return False
|
|
|
|
def reloadVacuumDeferCleanupAge(self):
|
|
"""
|
|
function: reload the guc paramter vacuum_defer_cleanup_age value on
|
|
inplace upgrade or grey large upgrade
|
|
input : NA
|
|
"""
|
|
self.setGUCValue("vacuum_defer_cleanup_age", "100000", "reload")
|
|
|
|
|
|
def doGreyBinaryUpgrade(self):
|
|
"""
|
|
function: do grey binary upgrade, which essentially replace the binary
|
|
files, for the new version than 91.255, support this strategy to
|
|
change binary upgrade(Inplace), use the symbolic links to change the
|
|
binary file directory instead of installing the new bin in the same
|
|
directory.choose minority nodes to upgrade first, observe to decide
|
|
whether upgrade remaining nodes or rollback grey nodes
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
upgradeAgain = False
|
|
try:
|
|
# 1. distribute xml configure file to every nodes.
|
|
self.distributeXml()
|
|
# 2. check if the app path is ready and sha256 is right and others
|
|
self.checkUpgrade()
|
|
# 4. check the cluster pressure
|
|
self.HASyncReplayCheck()
|
|
# 5. before do grey binary upgrade, we must make sure the
|
|
# cluster is Normal and the database could be
|
|
# connected, if not, exit.
|
|
(status, output) = self.doHealthCheck(const.OPTION_PRECHECK)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
|
|
"cluster" + "Detail: " + output)
|
|
# 6.chose the node name list that satisfy the condition as
|
|
# upgrade nodes
|
|
self.chooseUpgradeNodes()
|
|
# check if it satisfy upgrade again, if it is the second loop to
|
|
# upgrade, it can go go upgrade again branch
|
|
upgradeAgain = self.canUpgradeAgain()
|
|
except Exception as e:
|
|
# before this step, the upgrade process do nothing to the cluster,
|
|
# this time has no remaining
|
|
self.context.logger.debug(traceback.format_exc())
|
|
self.context.logger.log(ErrorCode.GAUSS_529["GAUSS_52934"] +
|
|
"Nodes are the old version.\n" +
|
|
"Error: %s." % str(e) +
|
|
" Do nothing this time.")
|
|
self.exitWithRetCode(self.action, False, str(e))
|
|
|
|
if not upgradeAgain:
|
|
try:
|
|
if not self.doGreyBinaryRollback():
|
|
self.exitWithRetCode(const.ACTION_AUTO_ROLLBACK, False)
|
|
self.removeOmRollbackProgressFile()
|
|
self.context.logger.log(
|
|
"The directory %s will be deleted after commit-upgrade, "
|
|
"please make sure there is no personal data." %
|
|
self.context.oldClusterAppPath)
|
|
# 7. prepare upgrade function for sync and table
|
|
# RECORD_NODE_STEP, init the step of all nodes as 0
|
|
self.prepareGreyUpgrade()
|
|
|
|
# 8. install the new bin in the appPath which has been
|
|
# prepared in the preinstall
|
|
self.installNewBin()
|
|
|
|
# decompress the catalog upgrade_sql.tar.gz to temp dir,
|
|
# include upgrade sql file and guc set
|
|
self.prepareUpgradeSqlFolder()
|
|
|
|
self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_CATALOG)
|
|
# 9. if we update catalog after switch to the new bin,
|
|
# the system will raise error cannot find
|
|
# catalog or column until finish the updateCatalog function
|
|
# we can not recognize if it really cannot
|
|
# find the column, or just because the old version. So we
|
|
# will update the catalog in the old version
|
|
if self.context.action == const.ACTION_LARGE_UPGRADE:
|
|
self.updateCatalog()
|
|
self.recordNodeStep(GreyUpgradeStep.STEP_SWITCH_NEW_BIN)
|
|
self.CopyCerts()
|
|
self.upgradeAgain()
|
|
except Exception as e:
|
|
errmsg = ErrorCode.GAUSS_529["GAUSS_52934"] + \
|
|
"You can use --grey to upgrade or manually rollback."
|
|
self.context.logger.log(errmsg + str(e))
|
|
self.exitWithRetCode(self.context.action, False)
|
|
else:
|
|
self.upgradeAgain()
|
|
self.exitWithRetCode(self.context.action, True)
|
|
|
|
def upgradeAgain(self):
|
|
try:
|
|
self.context.logger.debug(
|
|
"From this step, you can use -h to upgrade again if failed.")
|
|
# we have guarantee specified nodes have same step,
|
|
# so we only need to get one node step
|
|
currentStep = self.getOneNodeStep(self.context.nodeNames[0])
|
|
self.context.logger.debug("Current node step is %d" % currentStep)
|
|
# first time execute grey upgrade, we will record the step for
|
|
# all the nodes, if we upgrade remain nodes,
|
|
# reenter the upgrade process, we will not rollback autonomously,
|
|
# just upgrade again
|
|
if currentStep < GreyUpgradeStep.STEP_UPGRADE_PROCESS:
|
|
self.backupHotpatch()
|
|
# 10. sync Cgroup configure and etc.
|
|
# use the symbolic link to change the bin dir
|
|
# sync old config to new bin path, the pg_plugin save the
|
|
# C function .so file(but not end with .so),
|
|
# so if it create in the old appPath after copy to the
|
|
# newAppPath but not switch to new bin
|
|
# the new version may not recognize the C function
|
|
self.greySyncGuc()
|
|
self.greyUpgradeSyncOldConfigToNew()
|
|
# 11. switch the cluster version to new version
|
|
self.getOneDNInst(checkNormal=True)
|
|
self.switchBin(const.NEW)
|
|
# create CA for CM
|
|
self.create_ca_for_cm()
|
|
self.setNewVersionGuc()
|
|
self.recordNodeStep(GreyUpgradeStep.STEP_UPGRADE_PROCESS)
|
|
if currentStep < GreyUpgradeStep.STEP_UPDATE_POST_CATALOG:
|
|
# 12. kill the old existing process, will judge whether
|
|
# each process is the required version
|
|
self.switchExistsProcess()
|
|
self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG)
|
|
|
|
except Exception as e:
|
|
self.context.logger.log("Failed to upgrade, can use --grey to "
|
|
"upgrade again after rollback. Error: "
|
|
"%s" % str(e))
|
|
self.context.logger.debug(traceback.format_exc())
|
|
self.exitWithRetCode(self.context.action, False, str(e))
|
|
self.context.logger.log(
|
|
"The nodes %s have been successfully upgraded to new version. "
|
|
"Then do health check." % self.context.nodeNames)
|
|
|
|
try:
|
|
# 13. check the cluster status, the cluster status can be degraded
|
|
(status, output) = self.doHealthCheck(const.OPTION_POSTCHECK)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
|
|
"cluster" + output)
|
|
if self.isNodeSpecifyStep(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG):
|
|
# 14. exec post upgrade script
|
|
if self.context.action == const.ACTION_LARGE_UPGRADE:
|
|
self.waitClusterForNormal()
|
|
# backup global relmap file before doing upgrade-post
|
|
self.backupGlobalRelmapFile()
|
|
self.prepareSql("rollback-post")
|
|
self.execRollbackUpgradedCatalog(scriptType="rollback-post")
|
|
self.prepareSql("upgrade-post")
|
|
self.execRollbackUpgradedCatalog(scriptType="upgrade-post")
|
|
self.getLsnInfo()
|
|
hosts = copy.deepcopy(self.context.clusterNodes)
|
|
self.recordNodeStep(
|
|
GreyUpgradeStep.STEP_PRE_COMMIT, nodes=hosts)
|
|
self.printPrecommitBanner()
|
|
except Exception as e:
|
|
hintInfo = "Nodes are new version. " \
|
|
"Please check the cluster status. ERROR: \n"
|
|
self.context.logger.log(hintInfo + str(e))
|
|
self.context.logger.debug(traceback.format_exc())
|
|
self.exitWithRetCode(self.context.action, False, hintInfo + str(e))
|
|
self.context.logger.log("Successfully upgrade nodes.")
|
|
self.exitWithRetCode(self.context.action, True)
|
|
|
|
def getOneNodeStep(self, nodeName):
|
|
"""
|
|
get the node's step
|
|
"""
|
|
currentStep = self.getOneNodeStepInFile(nodeName)
|
|
return currentStep
|
|
|
|
def getOneNodeStepInFile(self, nodeName):
|
|
"""
|
|
get the node's step from step file
|
|
"""
|
|
try:
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
self.context.logger.debug(
|
|
"trying to get one node step in file %s" % stepFile)
|
|
with open(stepFile, 'r') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for row in reader:
|
|
if row['node_host'] == nodeName:
|
|
step = int(row['step'])
|
|
break
|
|
self.context.logger.debug("successfully got one node step {0} "
|
|
"in file {1}".format(step, stepFile))
|
|
return step
|
|
except Exception as e:
|
|
exitMsg = "Failed to get node step in step file. ERROR {0}".format(
|
|
str(e))
|
|
self.exitWithRetCode(self.action, False, exitMsg)
|
|
|
|
def greySyncGuc(self):
|
|
"""
|
|
delete the old version guc
|
|
"""
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_GREY_SYNC_GUC,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug("Command for sync GUC in upgrade: %s" % cmd)
|
|
hostList = copy.deepcopy(self.context.nodeNames)
|
|
self.context.sshTool.executeCommand(cmd, hostList=hostList)
|
|
self.context.logger.debug("Successfully sync guc.")
|
|
|
|
def greyUpgradeSyncOldConfigToNew(self):
|
|
"""
|
|
function: sync old cluster config to the new cluster install path
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
# restore list:
|
|
# etc/gscgroup_xxx.cfg
|
|
# lib/postgresql/pg_plugin
|
|
# initdb_param
|
|
# server.key.cipher
|
|
# server.key.rand
|
|
# /share/sslsert/ca.key
|
|
# /share/sslsert/etcdca.crt
|
|
self.context.logger.log("Sync cluster configuration.")
|
|
try:
|
|
# backup DS libs and gds file
|
|
cmd = "%s -t %s -U %s -V %d --old_cluster_app_path=%s " \
|
|
"--new_cluster_app_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_GREY_UPGRADE_CONFIG_SYNC,
|
|
self.context.user,
|
|
int(float(self.context.oldClusterNumber) * 1000),
|
|
self.context.oldClusterAppPath,
|
|
self.context.newClusterAppPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug("Command for syncing config files: %s"
|
|
% cmd)
|
|
hostList = copy.deepcopy(self.context.nodeNames)
|
|
self.context.sshTool.executeCommand(cmd, hostList=hostList)
|
|
|
|
# change the owner of application
|
|
cmd = "chown -R %s:%s '%s'" % \
|
|
(self.context.user, self.context.group,
|
|
self.context.newClusterAppPath)
|
|
hostList = copy.deepcopy(self.context.nodeNames)
|
|
self.context.sshTool.executeCommand(cmd, hostList=hostList)
|
|
except Exception as e:
|
|
raise Exception(str(e) + " Failed to sync configuration.")
|
|
self.context.logger.log("Successfully synced cluster configuration.")
|
|
|
|
def _check_and_start_cluster(self):
|
|
"""
|
|
Check cluster state and start cluster
|
|
"""
|
|
self.context.logger.log("Check cluster state.")
|
|
cmd = "source {0};gs_om -t query".format(self.context.userProfile)
|
|
status, output = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
self.context.logger.debug("Check cluster state failed. Output: {0}".format(output))
|
|
if "cluster_state : Degraded" in output or "cluster_state : Normal" in output:
|
|
self.context.logger.log("Cluster state: {0}".format(output))
|
|
return
|
|
self.context.logger.log("Cluster need start now.")
|
|
cmd = "source {0};gs_om -t start".format(self.context.userProfile)
|
|
status, output = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
self.context.logger.debug("Start cluster state failed. Output: {0}".format(output))
|
|
return
|
|
self.context.logger.log("Cluster is started now.")
|
|
|
|
def switchExistsProcess(self, isRollback=False):
|
|
"""
|
|
switch all the process
|
|
:param isRollback:
|
|
:return:
|
|
"""
|
|
self.context.logger.log("Switching all db processes.", "addStep")
|
|
self._check_and_start_cluster()
|
|
if DefaultValue.get_cm_server_num_from_static(self.context.oldClusterInfo) > 0:
|
|
self.setUpgradeFromParam(self.context.oldClusterNumber)
|
|
self.reloadCmAgent()
|
|
self.reload_cmserver()
|
|
self.createCheckpoint()
|
|
self.switchDn(isRollback)
|
|
try:
|
|
self.waitClusterNormalDegrade()
|
|
except Exception as e:
|
|
# can't promise normal status in force upgrade or forceRollback
|
|
if self.context.forceRollback:
|
|
self.context.logger.log("WARNING: Failed to wait "
|
|
"cluster normal or degrade.")
|
|
else:
|
|
raise Exception(str(e))
|
|
self.context.logger.log("Successfully switch all process version",
|
|
"constant")
|
|
|
|
def createCheckpoint(self):
|
|
try:
|
|
self.context.logger.log("Create checkpoint before switching.")
|
|
start_time = timeit.default_timer()
|
|
# create checkpoint
|
|
sql = "CHECKPOINT;"
|
|
for i in range(10):
|
|
(status, output) = self.execSqlCommandInPrimaryDN(sql)
|
|
# no need to retry under force upgrade
|
|
if status == 0:
|
|
break
|
|
self.context.logger.debug("Waring: checkpoint creation fails "
|
|
"for the %s time. Fail message:%s."
|
|
"try again at one second intervals" %
|
|
(str(i), str(output)))
|
|
time.sleep(1)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Error: \n%s" % str(output))
|
|
|
|
elapsed = timeit.default_timer() - start_time
|
|
self.context.logger.debug("Time to create checkpoint: %s" %
|
|
self.getTimeFormat(elapsed))
|
|
except Exception as e:
|
|
if self.context.forceRollback:
|
|
self.context.logger.log(
|
|
"WARNING: Failed to create checkpoint, "
|
|
"the switch process may use more time.")
|
|
else:
|
|
raise Exception(str(e))
|
|
|
|
def need_rolling(self, is_roll_back):
|
|
"""
|
|
Get is need switch UDF subprocess from upgrade mode
|
|
"""
|
|
self.context.logger.debug("Start check need rolling.")
|
|
new_static_config = os.path.realpath(os.path.join(self.context.newClusterAppPath,
|
|
"bin", "cluster_static_config"))
|
|
old_static_config = os.path.realpath(os.path.join(self.context.oldClusterAppPath,
|
|
"bin", "cluster_static_config"))
|
|
cluster_info = dbClusterInfo()
|
|
if is_roll_back:
|
|
self.context.logger.debug("This check need rolling for rollback.")
|
|
if not os.path.isfile(new_static_config):
|
|
self.context.logger.debug("Rollback not found new static config file [{0}]. "
|
|
"No need to switch UDF.".format(new_static_config))
|
|
return False
|
|
cluster_info.initFromStaticConfig(self.context.user, new_static_config)
|
|
if cluster_info.cmscount > 0:
|
|
self.context.logger.debug("Rollback cluster info include CMS instance. "
|
|
"So need to switch UDF.")
|
|
return True
|
|
self.context.logger.debug("Rollback new version cluster not include CMS instance. "
|
|
"So no need to switch UDF.")
|
|
return True
|
|
self.context.logger.debug("This check need rolling for upgrade.")
|
|
cluster_info.initFromStaticConfig(self.context.user, old_static_config)
|
|
if cluster_info.cmscount > 0:
|
|
self.context.logger.debug("Old cluster include CMS instance. So need to switch UDF.")
|
|
return True
|
|
self.context.logger.debug("Old cluster exclude CMS instance. So no need to switch UDF.")
|
|
return False
|
|
|
|
def switchDn(self, isRollback):
|
|
self.context.logger.log("Switching DN processes.")
|
|
start_time = timeit.default_timer()
|
|
# under upgrade, kill the process from old cluster app path,
|
|
# rollback: kill from new cluster app path
|
|
cmd = "%s -t %s -U %s -V %d --old_cluster_app_path=%s " \
|
|
"--new_cluster_app_path=%s -X '%s' -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_SWITCH_DN,
|
|
self.context.user,
|
|
int(float(self.context.oldClusterNumber) * 1000),
|
|
self.context.oldClusterAppPath,
|
|
self.context.newClusterAppPath,
|
|
self.context.xmlFile,
|
|
self.context.localLog)
|
|
|
|
if isRollback:
|
|
cmd += " --rollback"
|
|
if self.context.forceRollback:
|
|
cmd += " --force"
|
|
if self.need_rolling(isRollback):
|
|
cmd += " --rolling"
|
|
self.context.logger.debug(
|
|
"Command for switching DN processes: %s" % cmd)
|
|
hostList = copy.deepcopy(self.context.nodeNames)
|
|
self.context.sshTool.executeCommand(cmd, hostList=hostList)
|
|
start_cluster_time = timeit.default_timer()
|
|
self.greyStartCluster()
|
|
end_cluster_time = timeit.default_timer() - start_cluster_time
|
|
self.context.logger.debug("Time to start cluster is %s" %
|
|
self.getTimeFormat(end_cluster_time))
|
|
elapsed = timeit.default_timer() - start_time
|
|
self.context.logger.debug("Time to switch DN process version: %s"
|
|
% self.getTimeFormat(elapsed))
|
|
|
|
def greyStartCluster(self):
|
|
"""
|
|
start cluster in grey upgrade
|
|
:return:
|
|
"""
|
|
self.context.logger.log("Ready to grey start cluster.")
|
|
versionFile = os.path.join(
|
|
self.context.oldClusterAppPath, "bin/upgrade_version")
|
|
if os.path.exists(versionFile):
|
|
_, number, _ = VersionInfo.get_version_info(versionFile)
|
|
cmd = "gs_om -t start --cluster-number='%s'" % (number)
|
|
else:
|
|
cmd = "gs_om -t start"
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
|
|
"Command:%s. Error:\n%s" % (cmd, output))
|
|
self.context.logger.log("Grey start cluster successfully.")
|
|
|
|
def isNodeSpecifyStep(self, step, nodes=None):
|
|
"""
|
|
check if all the specfied nodes is the step
|
|
"""
|
|
return self.isNodeSpecifyStepInFile(step, nodes)
|
|
|
|
def isNodeSpecifyStepInFile(self, step=-1, nodes=None):
|
|
"""
|
|
step = -1 means we just check if step in all the specfied nodes is the
|
|
same otherwise, we check if all the specfied nodes is the given step
|
|
"""
|
|
try:
|
|
if nodes:
|
|
self.context.logger.debug(
|
|
"check if the nodes %s step is %s" % (nodes, step))
|
|
else:
|
|
self.context.logger.debug(
|
|
"check if all the nodes step is %s" % step)
|
|
nodes = copy.deepcopy(self.context.clusterNodes)
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
if not os.path.isfile(stepFile):
|
|
self.context.logger.debug(
|
|
"no step file, which means nodes %s step is same" % nodes)
|
|
return True
|
|
|
|
with open(stepFile, 'r') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for row in reader:
|
|
if row['node_host'] in nodes:
|
|
if step == -1:
|
|
step = int(row['step'])
|
|
else:
|
|
if step == int(row['step']):
|
|
continue
|
|
else:
|
|
self.context.logger.debug(
|
|
"the nodes %s step is not all %s" % (
|
|
nodes, step))
|
|
return False
|
|
self.context.logger.debug(
|
|
"the nodes %s step is all %s" % (nodes, step))
|
|
return True
|
|
except Exception as e:
|
|
exitMsg = \
|
|
"Failed to check node step in file. ERROR {0}".format(str(e))
|
|
self.exitWithRetCode(self.action, False, exitMsg)
|
|
|
|
def getLsnInfo(self):
|
|
"""
|
|
Obtain the maximum LSN of each DN instance.
|
|
"""
|
|
self.context.logger.debug("Start to get lsn info.")
|
|
try:
|
|
# prepare dynamic cluster info file in every node
|
|
self.getOneDNInst(checkNormal=True)
|
|
execHosts = [self.dnInst.hostname]
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_GET_LSN_INFO,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug("Command for geting lsn info: %s." % cmd)
|
|
self.context.sshTool.executeCommand(cmd, hostList=execHosts)
|
|
self.context.logger.debug(
|
|
"Successfully get lsn info in instanse node.")
|
|
except Exception as e:
|
|
if self.context.forceRollback:
|
|
self.context.logger.debug(
|
|
"Failed to get lsn info in force Scenario.")
|
|
return
|
|
raise Exception(
|
|
"Failed to get lsn info in instanse node. "
|
|
"Error:{0}".format(str(e)))
|
|
|
|
def chooseUpgradeNodes(self):
|
|
# Already set the self.context.nodesNum = 1
|
|
# when number and node names is empty
|
|
self.context.logger.debug("Choose the nodes to be upgraded.")
|
|
self.setClusterDetailInfo()
|
|
self.context.nodeNames = self.context.clusterNodes
|
|
self.context.logger.log("Upgrade all nodes.")
|
|
|
|
def getUpgradedNodeNames(self, step=GreyUpgradeStep.STEP_INIT_STATUS):
|
|
"""
|
|
by default, return upgraded nodes
|
|
otherwise, return the nodes that step is more than given step
|
|
under force upgrade, we only get step from file
|
|
"""
|
|
return self.getUpgradedNodeNamesInFile(step)
|
|
|
|
def getUpgradedNodeNamesInFile(self, step=GreyUpgradeStep.STEP_INIT_STATUS):
|
|
"""
|
|
get upgraded nodes from step file
|
|
by default, return upgraded nodes
|
|
otherwise, return the nodes that step is more than given step
|
|
"""
|
|
try:
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
self.context.logger.debug(
|
|
"trying to get upgraded nodes from %s" % (stepFile))
|
|
if not os.path.isfile(stepFile):
|
|
return []
|
|
greyNodeNames = []
|
|
with open(stepFile, 'r') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for row in reader:
|
|
if int(row['step']) > step:
|
|
greyNodeNames.append(row['node_host'])
|
|
self.context.logger.debug("upgraded nodes are {0}".format(
|
|
greyNodeNames))
|
|
return greyNodeNames
|
|
except Exception as e:
|
|
exitMsg = "Failed to get upgraded nodes from step file. " \
|
|
"ERROR {0}".format(str(e))
|
|
self.exitWithRetCode(self.action, False, exitMsg)
|
|
|
|
def existTable(self, relname):
|
|
"""
|
|
funcation: if the table exist in pg_class
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
try:
|
|
sql = "select count(*) from pg_catalog.pg_class c, " \
|
|
"pg_catalog.pg_namespace n " \
|
|
"where n.nspname = '%s' AND relname = '%s' " \
|
|
"AND c.relnamespace = n.oid;" % (
|
|
const.UPGRADE_SCHEMA, relname)
|
|
self.context.logger.debug("Sql to query if has the table: %s" % sql)
|
|
(status, output) = self.execSqlCommandInPrimaryDN(sql)
|
|
if status != 0 or SqlResult.findErrorInSql(output):
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] %
|
|
sql + " Error: \n%s" % str(output))
|
|
if output == '0':
|
|
self.context.logger.debug("Table does not exist.")
|
|
return False
|
|
self.context.logger.debug("Table exists.")
|
|
return True
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def canUpgradeAgain(self):
|
|
"""
|
|
judge if we should rollback or can upgrade again,
|
|
if has the nodes whose step is more than switch bin
|
|
"""
|
|
self.context.logger.debug("Check if we can upgrade again.")
|
|
greyNodeNames = self.getUpgradedNodeNames(
|
|
GreyUpgradeStep.STEP_SWITCH_NEW_BIN)
|
|
if len(greyNodeNames) > 0:
|
|
self.context.logger.debug(
|
|
"Has nodes step greater or equal than %d. Can upgrade again."
|
|
% GreyUpgradeStep.STEP_SWITCH_NEW_BIN)
|
|
return True
|
|
self.context.logger.debug(
|
|
"There is no node step greater or equal than %d. "
|
|
"Can not do upgrade again." % GreyUpgradeStep.STEP_SWITCH_NEW_BIN)
|
|
return False
|
|
|
|
def prepareGreyUpgrade(self):
|
|
"""
|
|
function: do pre-upgrade stuffs for primary and standby HA
|
|
sync check, and create table to record step
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
if self.context.upgrade_remain:
|
|
self.context.logger.debug("No need to create pre-upgrade stuffs")
|
|
return
|
|
self.context.logger.debug("Start to create pre-upgrade stuffs")
|
|
# under force upgrade, we only prepare the files
|
|
self.prepareGreyUpgradeFiles()
|
|
# all stuffs done successfully, return 0
|
|
self.context.logger.debug("Successfully created pre-upgrade stuffs.")
|
|
|
|
def prepareGreyUpgradeFiles(self):
|
|
# the bakpath is created in checkUpgrade,
|
|
# but may deleted when rollback, so need to check
|
|
try:
|
|
self.context.logger.debug("start to prepare grey upgrade files")
|
|
self.createBakPath()
|
|
self.initNodeStepInCsv()
|
|
self.initUpgradeProcessStatus()
|
|
self.recordDirFile()
|
|
self.copyBakVersion()
|
|
self.context.logger.debug(
|
|
"successfully prepared grey upgrade files")
|
|
except Exception as e:
|
|
self.context.logger.debug("failed to prepare grey upgrade files")
|
|
raise Exception(str(e))
|
|
|
|
def initNodeStepInCsv(self):
|
|
bakStepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE + "_bak")
|
|
self.context.logger.debug("Create and init the file %s." % bakStepFile)
|
|
FileUtil.createFile(bakStepFile, True, DefaultValue.KEY_FILE_MODE)
|
|
header = ["node_host", "upgrade_action", "step"]
|
|
FileUtil.createFileInSafeMode(bakStepFile)
|
|
writeInfo = []
|
|
for dbNode in self.context.clusterInfo.dbNodes:
|
|
writeInfo.append([('%s' % dbNode.name),
|
|
('%s' % self.context.action),
|
|
('%s' % GreyUpgradeStep.STEP_INIT_STATUS)])
|
|
with open(bakStepFile, "w") as csvfile:
|
|
writer = csv.writer(csvfile)
|
|
writer.writerow(header)
|
|
writer.writerows(writeInfo)
|
|
finalStepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
FileUtil.rename(bakStepFile, finalStepFile)
|
|
# so if we can get the step file, we can get the step information
|
|
self.context.logger.debug("Rename the file %s to %s." % (
|
|
bakStepFile, finalStepFile))
|
|
self.distributeFile(finalStepFile)
|
|
self.context.logger.debug("Successfully inited the file %s and "
|
|
"send it to each node." % finalStepFile)
|
|
|
|
def initUpgradeProcessStatus(self):
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.INPLACE_UPGRADE_STEP_FILE)
|
|
self.context.logger.debug("Create and init the file %s" % stepFile)
|
|
FileUtil.removeFile(stepFile, "python")
|
|
FileUtil.createFile(stepFile, True, DefaultValue.KEY_FILE_MODE)
|
|
self.recordNodeStepInplace(self.context.action,
|
|
GreyUpgradeStep.STEP_INIT_STATUS)
|
|
self.context.logger.debug("Successfully inited the file %s "
|
|
"and send it to each node" % stepFile)
|
|
|
|
def recordNodeStep(self, step, nodes=None):
|
|
"""
|
|
under normal rollback, if not have the binary_upgrade dir,
|
|
recordNodeStepInplace will create a file named binary_upgrade,
|
|
so we should raise error, and use the force rollback mode
|
|
For commit upgrade, we should create the dir to record the cannot
|
|
rollback flag to avoid node inconsistency
|
|
:param step: upgrade or rollback step
|
|
:param nodes: the nodes shoud be the step
|
|
:return:NA
|
|
"""
|
|
cmd = "if [ -d '%s' ]; then echo 'True'; else echo 'False'; fi" %\
|
|
self.context.upgradeBackupPath
|
|
hostList = copy.deepcopy(self.context.clusterNodes)
|
|
(resultMap, outputCollect) = self.context.sshTool.getSshStatusOutput(
|
|
cmd, hostList)
|
|
self.context.logger.debug(
|
|
"The result of checking distribute directory is:\n%s" %
|
|
outputCollect)
|
|
if outputCollect.find('False') >= 0:
|
|
if step != GreyUpgradeStep.STEP_BEGIN_COMMIT:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
|
|
self.context.upgradeBackupPath)
|
|
self.createBakPath()
|
|
self.recordNodeStepInplace(self.context.action, step)
|
|
# under force upgrade, we only record step to file
|
|
self.recordNodeStepInCsv(step, nodes)
|
|
self.context.logger.debug(
|
|
"Successfully record node step %s." % str(step))
|
|
|
|
def recordNodeStepInCsv(self, step, nodes=None):
|
|
if nodes is None:
|
|
nodes = []
|
|
self.context.logger.debug("Record node step %s in file" % str(step))
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
stepTempFile = os.path.join(self.context.upgradeBackupPath,
|
|
"upgrade_step_temp.csv")
|
|
FileUtil.createFileInSafeMode(stepTempFile)
|
|
with open(stepFile, 'r') as csvfile, \
|
|
open(stepTempFile, 'w') as tempfile:
|
|
header = ["node_host", "upgrade_action", "step"]
|
|
reader = csv.DictReader(csvfile)
|
|
writer = csv.writer(tempfile)
|
|
writer.writerow(header)
|
|
writeInfo = []
|
|
if not nodes:
|
|
nodes = self.context.nodeNames
|
|
if nodes:
|
|
for row in reader:
|
|
if row['node_host'] in nodes:
|
|
writeInfo.append([row['node_host'], row[
|
|
'upgrade_action'], str(step)])
|
|
else:
|
|
writeInfo.append([row['node_host'], row[
|
|
'upgrade_action'], row['step']])
|
|
else:
|
|
for row in reader:
|
|
writeInfo.append([row['node_host'],
|
|
row['upgrade_action'], str(step)])
|
|
writer.writerows(writeInfo)
|
|
|
|
FileUtil.removeFile(stepFile)
|
|
FileUtil.rename(stepTempFile, stepFile)
|
|
FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, stepFile)
|
|
# distribute the node step file to each node
|
|
self.distributeFile(stepFile)
|
|
|
|
def doInplaceBinaryUpgrade(self):
|
|
"""
|
|
function: do binary upgrade, which essentially replace the binary files
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
# 1. distribute new package to every nodes.
|
|
self.distributeXml()
|
|
# 2. check whether we should do rollback or not.
|
|
if not self.doInplaceBinaryRollback():
|
|
self.exitWithRetCode(const.ACTION_AUTO_ROLLBACK, False)
|
|
try:
|
|
self.checkUpgrade()
|
|
|
|
# 3. before do binary upgrade, we must make sure the cluster is
|
|
# Normal and the database could be connected
|
|
# if not, exit.
|
|
self.start_strategy(is_final=False)
|
|
|
|
# uninstall kerberos if has already installed
|
|
pghost_path = EnvUtil.getEnvironmentParameterValue(
|
|
'PGHOST', self.context.user)
|
|
kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path
|
|
if os.path.exists(kerberosflagfile):
|
|
self.stop_strategy(is_final=False)
|
|
self.context.logger.log("Starting uninstall Kerberos.",
|
|
"addStep")
|
|
cmd = "source %s && " % self.context.userProfile
|
|
cmd += "%s -m uninstall -U %s" % (OMCommand.getLocalScript(
|
|
"Local_Kerberos"), self.context.user)
|
|
self.context.sshTool.executeCommand(cmd)
|
|
self.context.logger.log("Successfully uninstall Kerberos.")
|
|
self.start_strategy(is_final=False)
|
|
if self.unSetClusterReadOnlyMode() != 0:
|
|
raise Exception("NOTICE: "
|
|
+ ErrorCode.GAUSS_529["GAUSS_52907"])
|
|
self.recordNodeStepInplace(const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_INIT_STATUS)
|
|
|
|
(status, output) = self.doHealthCheck(const.OPTION_PRECHECK)
|
|
if status != 0:
|
|
self.exitWithRetCode(const.ACTION_INPLACE_UPGRADE, False,
|
|
ErrorCode.GAUSS_516["GAUSS_51601"]
|
|
% "cluster" + output)
|
|
self.getOneDNInst()
|
|
# 4.record the old and new app dir in file
|
|
self.recordDirFile()
|
|
if self.isLargeInplaceUpgrade:
|
|
self.recordLogicalClusterName()
|
|
# 6. reload vacuum_defer_cleanup_age to new value
|
|
if self.isLargeInplaceUpgrade:
|
|
if self.__upgrade_across_64bit_xid:
|
|
self.reloadVacuumDeferCleanupAge()
|
|
|
|
if self.setClusterReadOnlyMode() != 0:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52908"])
|
|
|
|
# after checkUpgrade, the bak path is ready, we can use it now
|
|
# create inplace upgrade flag file if is doing inplace upgrade
|
|
self.createInplaceUpgradeFlagFile()
|
|
# 7. backup current application and configuration.
|
|
# The function only be used by binary upgrade.
|
|
# to ensure the transaction atomicity,
|
|
# it will be used with checkUpgrade().
|
|
self.backupNodeVersion()
|
|
# For inplace upgrade, we have to perform additional checks
|
|
# and then backup catalog files.
|
|
if self.isLargeInplaceUpgrade:
|
|
self.prepareUpgradeSqlFolder()
|
|
self.HASyncReplayCheck()
|
|
self.backupOldClusterDBAndRelInfo()
|
|
# 8. stop old cluster
|
|
self.recordNodeStepInplace(const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_STOP_NODE)
|
|
self.context.logger.debug("Start to stop all instances"
|
|
" on the node.", "addStep")
|
|
self.stop_strategy(is_final=False)
|
|
self.context.logger.debug("Successfully stop all"
|
|
" instances on the node.", "constant")
|
|
# 9. back cluster config. including this:
|
|
# cluster_static_config
|
|
# cluster_dynamic_config
|
|
# etc/gscgroup_xxx.cfg
|
|
# lib/postgresql/pg_plugin
|
|
# server.key.cipher
|
|
# server.key.rand
|
|
# Data Studio lib files
|
|
# gds files
|
|
# physical catalog files if performing inplace upgrade
|
|
self.recordNodeStepInplace(
|
|
const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_BACKUP_VERSION)
|
|
self.backupClusterConfig()
|
|
|
|
# 10. Upgrade application on node
|
|
# install new bin file
|
|
self.recordNodeStepInplace(const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_UPGRADE_APP)
|
|
self.installNewBin()
|
|
|
|
# 11. restore the cluster config. including this:
|
|
# cluster_static_config
|
|
# cluster_dynamic_config
|
|
# etc/gscgroup_xxx.cfg
|
|
# lib/postgresql/pg_plugin
|
|
# server.key.cipher
|
|
# server.key.rand
|
|
# Data Studio lib files
|
|
# gds files
|
|
# cn cert files
|
|
# At the same time, sync newly added guc for instances
|
|
self.restoreClusterConfig()
|
|
self.syncNewGUC()
|
|
# unset cluster readonly
|
|
self.start_strategy(is_final=False)
|
|
if self.unSetClusterReadOnlyMode() != 0:
|
|
raise Exception("NOTICE: "
|
|
+ ErrorCode.GAUSS_529["GAUSS_52907"])
|
|
# flush new app dynamic configuration
|
|
dynamicConfigFile = "%s/bin/cluster_dynamic_config" % \
|
|
self.context.newClusterAppPath
|
|
if os.path.exists(dynamicConfigFile) \
|
|
and self.isLargeInplaceUpgrade:
|
|
self.refresh_dynamic_config_file()
|
|
self.context.logger.debug(
|
|
"Successfully refresh dynamic config file")
|
|
self.stop_strategy(is_final=False)
|
|
if os.path.exists(dynamicConfigFile) \
|
|
and self.isLargeInplaceUpgrade:
|
|
self.restore_dynamic_config_file()
|
|
# 12. modify GUC parameter unix_socket_directory
|
|
self.modifySocketDir()
|
|
# 13. start new cluster
|
|
self.recordNodeStepInplace(const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_START_NODE)
|
|
self.context.logger.debug("Start to start all instances"
|
|
" on the node.", "addStep")
|
|
|
|
# update catalog
|
|
# start cluster in normal mode
|
|
if self.isLargeInplaceUpgrade:
|
|
self.touchRollbackCatalogFlag()
|
|
self.updateCatalog()
|
|
self.CopyCerts()
|
|
if DefaultValue.is_create_grpc(self.context.logger, self.context.oldClusterAppPath):
|
|
self.context.createGrpcCa()
|
|
self.context.logger.debug("Successfully createGrpcCa.")
|
|
|
|
# stop cluster for switch new bin
|
|
self.stop_strategy(is_final=False)
|
|
self.switchBin(const.NEW)
|
|
# create CA for CM
|
|
self.create_ca_for_cm()
|
|
self.start_strategy(is_final=False)
|
|
if self.isLargeInplaceUpgrade:
|
|
self.modifyPgProcIndex()
|
|
self.context.logger.debug("Start to exec post upgrade script")
|
|
self.doUpgradeCatalog(postUpgrade=True)
|
|
self.context.logger.debug(
|
|
"Successfully exec post upgrade script")
|
|
self.context.logger.debug("Successfully start all "
|
|
"instances on the node.", "constant")
|
|
if self.setClusterReadOnlyMode() != 0:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52908"])
|
|
# 14. check the cluster status
|
|
(status, output) = self.doHealthCheck(const.OPTION_POSTCHECK)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"]
|
|
% "cluster" + output)
|
|
|
|
# 15. record precommit step status
|
|
self.recordNodeStepInplace(const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_PRE_COMMIT)
|
|
self.printPrecommitBanner()
|
|
except Exception as e:
|
|
self.context.logger.error(str(e))
|
|
self.context.logger.log("Binary upgrade failed. Rollback"
|
|
" to the original cluster.")
|
|
# do rollback
|
|
self.exitWithRetCode(const.ACTION_AUTO_ROLLBACK,
|
|
self.doInplaceBinaryRollback())
|
|
self.exitWithRetCode(const.ACTION_INPLACE_UPGRADE, True)
|
|
|
|
def backupGlobalRelmapFile(self):
|
|
"""
|
|
Wait and check if all standbys have replayed upto flushed xlog
|
|
positions of primaries, then backup global/pg_filenode.map.
|
|
if old cluster version num >= RELMAP_4K_VERSION, then no need to backup
|
|
"""
|
|
if self.context.oldClusterNumber >= const.RELMAP_4K_VERSION:
|
|
self.context.logger.debug("no need to backup global relmap file")
|
|
return
|
|
|
|
# perform a checkpoint and wait standby catchup
|
|
self.createCheckpoint()
|
|
self.getAllStandbyDnInsts()
|
|
# wait standby catchup first
|
|
self.HASyncReplayCheck(False)
|
|
# then wait all cascade standby(if any)
|
|
for standby in self.dnStandbyInsts:
|
|
self.HASyncReplayCheck(False, standby)
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s -l %s -V %d" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_BACKUP_GLOBAL_RELMAP_FILE,
|
|
self.context.user,
|
|
self.context.localLog,
|
|
int(float(self.context.oldClusterNumber) * 1000))
|
|
self.context.logger.debug("backup global relmap file: %s." % cmd)
|
|
hostList = copy.deepcopy(self.context.clusterNodes)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
self.context.logger.debug("Successfully backup global relmap file.")
|
|
|
|
def cleanTmpGlobalRelampFile(self):
|
|
"""
|
|
remove global/pg_filenode.map when commit, if old cluster
|
|
version num >= RELMAP_4K_VERSION, then no need to remove.
|
|
"""
|
|
if self.context.oldClusterNumber >= const.RELMAP_4K_VERSION:
|
|
self.context.logger.debug("no need to clean tmp global relmap file")
|
|
return
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s -l %s -V %d" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_CLEAN_TMP_GLOBAL_RELMAP_FILE,
|
|
self.context.user,
|
|
self.context.localLog,
|
|
int(float(self.context.oldClusterNumber) * 1000))
|
|
|
|
self.context.logger.debug("clean tmp global relmap file when commit or rollback: %s." % cmd)
|
|
hostList = copy.deepcopy(self.context.clusterNodes)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
self.context.logger.debug("Successfully clean tmp global relmap file.")
|
|
|
|
def restoreGlobalRelampFile(self):
|
|
"""
|
|
restore global/pg_filenode.map when rollback, if old cluster
|
|
version num >= RELMAP_4K_VERSION, then no need to restore.
|
|
use pg_filenode.old.map to recover pg_filenode.map and pg_filenode.map.backup
|
|
"""
|
|
if self.context.oldClusterNumber >= const.RELMAP_4K_VERSION:
|
|
self.context.logger.debug("no need to restore global relmap file")
|
|
return
|
|
|
|
# perform checkpoint and wait standby sync before rollback
|
|
self.createCheckpoint()
|
|
self.getAllStandbyDnInsts()
|
|
# wait standby catchup first
|
|
self.HASyncReplayCheck(False)
|
|
# then wait all cascade standby(if any)
|
|
for standby in self.dnStandbyInsts:
|
|
self.HASyncReplayCheck(False, standby)
|
|
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s -l %s -V %d" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_RESTORE_GLOBAL_RELMAP_FILE,
|
|
self.context.user,
|
|
self.context.localLog,
|
|
int(float(self.context.oldClusterNumber) * 1000))
|
|
|
|
self.context.logger.debug("restore global relmap file when commit: %s." % cmd)
|
|
hostList = copy.deepcopy(self.context.clusterNodes)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
self.context.logger.debug("Successfully restore global relmap file.")
|
|
|
|
def doInplaceCommitUpgrade(self):
|
|
"""
|
|
function: commit binary upgrade and clean up backup files
|
|
1. unset read-only
|
|
2. drop old PMK schema
|
|
3. restore UDF
|
|
4. clean backup catalog physical
|
|
files if doing inplace upgrade
|
|
5. clean up other upgrade tmp files
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.log("NOTICE: Start to commit binary upgrade.")
|
|
self.context.logger.log("Start to check whether can be committed.", "addStep")
|
|
if self.getNodeStepInplace() != const.BINARY_UPGRADE_STEP_PRE_COMMIT:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52916"]
|
|
+ " Please check if previous upgrade"
|
|
" operation was successful or if"
|
|
" upgrade has already been committed.")
|
|
self.context.logger.log("Can be committed.", "constant")
|
|
self.context.logger.log("Start to set commit flag.", "addStep")
|
|
# create commit flag file
|
|
self.createCommitFlagFile()
|
|
self.context.logger.log("Set commit flag succeeded.", "constant")
|
|
self.context.logger.log("Start to do operations that cannot be rollback.", "addStep")
|
|
|
|
# variable to indicate whether we should keep step file
|
|
# and cleanup list file for re-entry
|
|
cleanUpSuccess = True
|
|
|
|
# drop table and index after large upgrade
|
|
if self.isLargeInplaceUpgrade and self.check_upgrade_mode():
|
|
self.drop_table_or_index()
|
|
# 1.unset read-only
|
|
if self.isLargeInplaceUpgrade:
|
|
self.setUpgradeFromParam(const.UPGRADE_UNSET_NUM)
|
|
self.reloadCmAgent()
|
|
self.setUpgradeMode(0)
|
|
|
|
if self.unSetClusterReadOnlyMode() != 0:
|
|
self.context.logger.log("NOTICE: "
|
|
+ ErrorCode.GAUSS_529["GAUSS_52907"])
|
|
cleanUpSuccess = False
|
|
if self.isLargeInplaceUpgrade:
|
|
self.cleanCsvFile()
|
|
# 2. drop old PMK schema
|
|
# we sleep 10 seconds first because DB might be updating
|
|
# ha status after unsetting read-only
|
|
self.context.logger.log("Cancel the upgrade status succeeded.", "constant")
|
|
self.context.logger.log("Start to clean temp files for upgrade.", "addStep")
|
|
time.sleep(10)
|
|
# 3. clean backup catalog physical files if doing inplace upgrade
|
|
if self.cleanBackupedCatalogPhysicalFiles() != 0:
|
|
self.context.logger.debug(
|
|
"Failed to clean backup files in directory %s. "
|
|
% self.context.upgradeBackupPath)
|
|
|
|
if not cleanUpSuccess:
|
|
self.context.logger.log("NOTICE: Cleanup is incomplete during commit. "
|
|
"Please re-commit upgrade once again or cleanup manually")
|
|
else:
|
|
# 8. clean up other upgrade tmp files
|
|
# and uninstall inplace upgrade support functions
|
|
self.cleanInstallPath(const.OLD)
|
|
self.cleanBinaryUpgradeBakFiles()
|
|
if self.isLargeInplaceUpgrade:
|
|
self.stop_strategy(is_final=False)
|
|
self.start_strategy(is_final=False)
|
|
|
|
# install Kerberos
|
|
self.install_kerberos()
|
|
self.context.logger.log("Clean temp files for upgrade succeeded.", "constant")
|
|
self.context.logger.log("NOTICE: Commit binary upgrade succeeded.")
|
|
# remove global relmap file
|
|
self.cleanTmpGlobalRelampFile()
|
|
self.exitWithRetCode(const.ACTION_INPLACE_UPGRADE, cleanUpSuccess)
|
|
|
|
def install_kerberos(self):
|
|
"""
|
|
install kerberos after upgrade
|
|
:return:NA
|
|
"""
|
|
pghost_path = EnvUtil.getEnvironmentParameterValue(
|
|
'PGHOST', self.context.user)
|
|
kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path
|
|
if os.path.exists(kerberosflagfile):
|
|
# install kerberos
|
|
cmd = "source %s &&" % self.context.userProfile
|
|
cmd += "gs_om -t stop && "
|
|
cmd += "%s -m install -U %s --krb-server" % (
|
|
OMCommand.getLocalScript("Local_Kerberos"),
|
|
self.context.user)
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 3, 5)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
|
|
"Command:%s. Error:\n%s" % (cmd, output))
|
|
cmd = "source %s && " % self.context.userProfile
|
|
cmd += "%s -m install -U %s --krb-client " % (
|
|
OMCommand.getLocalScript("Local_Kerberos"), self.context.user)
|
|
self.context.sshTool.executeCommand(
|
|
cmd, hostList=self.context.clusterNodes)
|
|
self.context.logger.log("Successfully install Kerberos.")
|
|
cmd = "source %s && gs_om -t start" % self.context.userProfile
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
|
|
"Command:%s. Error:\n%s" % (cmd, output))
|
|
os.remove(kerberosflagfile)
|
|
|
|
def refresh_dynamic_config_file(self):
|
|
"""
|
|
refresh dynamic config file
|
|
:return:
|
|
"""
|
|
cmd = "source %s ;gs_om -t refreshconf" % self.context.userProfile
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
|
|
"Command:%s. Error:\n%s" % (cmd, output))
|
|
|
|
def restore_dynamic_config_file(self):
|
|
"""
|
|
restore dynamic config file
|
|
:return:
|
|
"""
|
|
cmd = "%s -t %s -U %s -V %d --upgrade_bak_path=%s " \
|
|
"--old_cluster_app_path=%s --new_cluster_app_path=%s " \
|
|
"-l %s" % (
|
|
OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_RESTORE_DYNAMIC_CONFIG_FILE,
|
|
self.context.user,
|
|
int(float(self.context.oldClusterNumber) * 1000),
|
|
self.context.upgradeBackupPath,
|
|
self.context.oldClusterAppPath,
|
|
self.context.newClusterAppPath,
|
|
self.context.localLog)
|
|
|
|
self.context.logger.debug("Command for restoring "
|
|
"config files: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
def cleanCsvFile(self):
|
|
"""
|
|
clean csv file
|
|
:return:
|
|
"""
|
|
clusterNodes = self.context.clusterInfo.dbNodes
|
|
for dbNode in clusterNodes:
|
|
if len(dbNode.datanodes) == 0:
|
|
continue
|
|
dnInst = dbNode.datanodes[0]
|
|
dndir = dnInst.datadir
|
|
pg_proc_csv_path = \
|
|
'%s/pg_copydir/tbl_pg_proc_oids.csv' % dndir
|
|
new_pg_proc_csv_path = \
|
|
'%s/pg_copydir/new_tbl_pg_proc_oids.csv' % dndir
|
|
if os.path.exists(pg_proc_csv_path):
|
|
FileUtil.removeFile(pg_proc_csv_path)
|
|
if os.path.exists(new_pg_proc_csv_path):
|
|
FileUtil.removeFile(new_pg_proc_csv_path)
|
|
|
|
def check_upgrade_mode(self):
|
|
"""
|
|
check upgrade_mode value
|
|
:return:
|
|
"""
|
|
cmd = "source %s ; gs_guc check -N all -I all -c 'upgrade_mode'" % \
|
|
self.context.userProfile
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_500[
|
|
"GAUSS_50010"] % 'upgrade_mode' +
|
|
"Error: \n%s" % str(output))
|
|
if output.find("upgrade_mode=0") >= 0:
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
def cleanBackupedCatalogPhysicalFiles(self, isRollBack=False):
|
|
"""
|
|
function : clean backuped catalog physical files
|
|
input : isRollBack, default is False
|
|
output: return 0, if the operation is done successfully.
|
|
return 1, if the operation failed.
|
|
"""
|
|
try:
|
|
if self.isLargeInplaceUpgrade:
|
|
self.context.logger.log("Clean up backup catalog files.")
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -X '%s' -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_CLEAN_OLD_CLUSTER_CATALOG_PHYSICAL_FILES,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.xmlFile,
|
|
self.context.localLog)
|
|
if isRollBack:
|
|
cmd += " --rollback --oldcluster_num='%s'" % \
|
|
self.context.oldClusterNumber
|
|
self.context.logger.debug(
|
|
"Command for cleaning up physical catalog files: %s." % cmd)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
self.context.logger.debug(
|
|
"Successfully cleaned up backup catalog files.")
|
|
return 0
|
|
except Exception as e:
|
|
if isRollBack:
|
|
raise Exception(
|
|
"Fail to clean up backup catalog files: %s" % str(e))
|
|
else:
|
|
self.context.logger.debug(
|
|
"Fail to clean up backup catalog files. " +
|
|
"Please re-commit upgrade once again or clean up manually.")
|
|
return 1
|
|
|
|
def recordLogicalClusterName(self):
|
|
"""
|
|
function: record the logical node group name in bakpath,
|
|
so that we can restore specfic name in bakpath,
|
|
used in restoreCgroup, and refresh the CgroupConfigure
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
lcgroupfile = "%s/oldclusterinfo.json" % self.context.tmpDir
|
|
try:
|
|
self.context.logger.debug(
|
|
"Write and send logical cluster info file.")
|
|
# check whether file is exists
|
|
if os.path.isfile(lcgroupfile):
|
|
return 0
|
|
# check whether it is lc cluster
|
|
sql = """SELECT true AS group_kind
|
|
FROM pg_class c, pg_namespace n, pg_attribute attr
|
|
WHERE c.relname = 'pgxc_group' AND n.nspname = 'pg_catalog'
|
|
AND attr.attname = 'group_kind' AND c.relnamespace =
|
|
n.oid AND attr.attrelid = c.oid; """
|
|
self.context.logger.debug(
|
|
"Check if the cluster type is a logical cluster.")
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql,
|
|
self.context.user,
|
|
self.dnInst.hostname,
|
|
self.dnInst.port,
|
|
False,
|
|
DefaultValue.DEFAULT_DB_NAME,
|
|
IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_513[
|
|
"GAUSS_51300"] % sql + " Error: \n%s" % str(
|
|
output))
|
|
if not output or output.strip() != 't':
|
|
self.context.logger.debug(
|
|
"The old cluster is not logical cluster.")
|
|
return 0
|
|
self.context.logger.debug("The old cluster is logical cluster.")
|
|
# get lc group name lists
|
|
sql = "SELECT group_name FROM pgxc_group WHERE group_kind = 'v';"
|
|
self.context.logger.debug(
|
|
"Getting the list of logical cluster names.")
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql,
|
|
self.context.user,
|
|
self.dnInst.hostname,
|
|
self.dnInst.port,
|
|
False,
|
|
DefaultValue.DEFAULT_DB_NAME,
|
|
IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_513[
|
|
"GAUSS_51300"] % sql + " Error: \n%s" % str(
|
|
output))
|
|
lcgroupnames = output.split("\n")
|
|
self.context.logger.debug(
|
|
"The list of logical cluster names: %s." % lcgroupnames)
|
|
# create the file
|
|
FileUtil.createFile(lcgroupfile)
|
|
FileUtil.changeOwner(self.context.user, lcgroupfile)
|
|
FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, lcgroupfile)
|
|
# write result to file
|
|
with open(lcgroupfile, "w") as fp_json:
|
|
json.dump({"lcgroupnamelist": lcgroupnames}, fp_json)
|
|
# send file to remote nodes
|
|
if not self.context.isSingle:
|
|
self.context.sshTool.scpFiles(lcgroupfile, self.context.tmpDir)
|
|
self.context.logger.debug(
|
|
"Successfully to write and send logical cluster info file.")
|
|
return 0
|
|
except Exception as e:
|
|
cmd = "(if [ -f '%s' ]; then rm -f '%s'; fi)" % (
|
|
lcgroupfile, lcgroupfile)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
raise Exception(str(e))
|
|
|
|
def prepareUpgradeSqlFolder(self):
|
|
"""
|
|
function: verify upgrade_sql.tar.gz and extract it to binary backup
|
|
path, because all node need set_guc, so
|
|
we will decompress on all nodes
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Preparing upgrade sql folder.")
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE:
|
|
hostName = NetUtil.GetHostIpOrName()
|
|
hosts = [hostName]
|
|
else:
|
|
hosts = self.context.clusterNodes
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -X %s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_UPGRADE_SQL_FOLDER,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.xmlFile,
|
|
self.context.localLog)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile,
|
|
hosts)
|
|
|
|
def HASyncReplayCheck(self, catchupFailedOk=True, host=None):
|
|
"""
|
|
function: Wait and check if all standbys have replayed upto flushed
|
|
xlog positions of primaries.We record primary xlog flush
|
|
position at start of the check and wait until standby replay
|
|
upto that point.
|
|
Attention: If autovacuum is turned on, primary xlog flush
|
|
position may increase during the check.We do not check such
|
|
newly added xlog because they will not change catalog
|
|
physical file position.
|
|
Input: catchupFailedOk, if it's ok standby catch up primay failed
|
|
output : NA
|
|
"""
|
|
host = self.dnInst if host == None else host
|
|
self.context.logger.debug("Start to wait and check if all the standby"
|
|
" instances have replayed all xlogs, host: %s" % \
|
|
host.hostname)
|
|
self.doReplay(catchupFailedOk, host)
|
|
self.context.logger.debug("Successfully performed the replay check "
|
|
"of the standby instance.")
|
|
|
|
def doReplay(self, catchupFailedOk, host):
|
|
refreshTimeout = 180
|
|
waitTimeout = 300
|
|
RefreshTime = datetime.now() + timedelta(seconds=refreshTimeout)
|
|
EndTime = datetime.now() + timedelta(seconds=waitTimeout)
|
|
# wait and check sync status between primary and standby
|
|
|
|
NeedReplay = True
|
|
PosList = []
|
|
while NeedReplay:
|
|
sql = "SELECT sender_flush_location,receiver_replay_location " \
|
|
"from pg_catalog.pg_stat_get_wal_senders() " \
|
|
"where peer_role != 'Secondary';"
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql,
|
|
self.context.user,
|
|
host.hostname,
|
|
host.port,
|
|
False,
|
|
DefaultValue.DEFAULT_DB_NAME,
|
|
IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
self.context.logger.debug(
|
|
"Primary and Standby may be not in sync.")
|
|
self.context.logger.debug(
|
|
"Sync status: %s. Output: %s" % (str(status), output))
|
|
elif output != "":
|
|
self.context.logger.debug(
|
|
"Sync status: %s. Output: %s" % (str(status), output))
|
|
tmpPosList = self.getXlogPosition(output)
|
|
if len(PosList) == 0:
|
|
PosList = copy.deepcopy(tmpPosList)
|
|
self.context.logger.debug(
|
|
"Primary and Standby may be not in sync.")
|
|
else:
|
|
NeedReplay = False
|
|
for eachRec in PosList:
|
|
for eachTmpRec in tmpPosList:
|
|
if self.needReplay(eachRec, eachTmpRec):
|
|
NeedReplay = True
|
|
self.context.logger.debug(
|
|
"Primary and Standby may be not in sync.")
|
|
break
|
|
if NeedReplay:
|
|
break
|
|
else:
|
|
NeedReplay = False
|
|
|
|
# Standby replay postion may keep falling behind primary
|
|
# flush position if it is at the end of one xlog page and the
|
|
# free space is less than xlog record header size.
|
|
# We do a checkpoint to avoid such situation.
|
|
if datetime.now() > RefreshTime and NeedReplay:
|
|
self.context.logger.debug(
|
|
"Execute CHECKPOINT to refresh xlog position.")
|
|
refreshsql = "set statement_timeout=300000;CHECKPOINT;"
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
refreshsql,
|
|
self.context.user,
|
|
host.hostname,
|
|
host.port,
|
|
False,
|
|
DefaultValue.DEFAULT_DB_NAME,
|
|
IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(
|
|
ErrorCode.GAUSS_513["GAUSS_51300"] % refreshsql +
|
|
"Error: \n%s" % str(output))
|
|
|
|
if datetime.now() > EndTime and NeedReplay:
|
|
logStr = "WARNING: " + ErrorCode.GAUSS_513["GAUSS_51300"] % sql +\
|
|
" Timeout while waiting for standby replay."
|
|
if catchupFailedOk:
|
|
self.context.logger.log(logStr)
|
|
return
|
|
raise Exception(logStr)
|
|
time.sleep(5)
|
|
|
|
def getXlogPosition(self, output):
|
|
"""
|
|
get xlog position from output
|
|
"""
|
|
tmpPosList = []
|
|
resList = output.split('\n')
|
|
for eachLine in resList:
|
|
tmpRec = {}
|
|
(flushPos, replayPos) = eachLine.split('|')
|
|
(flushPosId, flushPosOff) = (flushPos.strip()).split('/')
|
|
(replayPosId, replayPosOff) = (replayPos.strip()).split('/')
|
|
tmpRec['nodeName'] = self.getHAShardingName()
|
|
tmpRec['flushPosId'] = flushPosId.strip()
|
|
tmpRec['flushPosOff'] = flushPosOff.strip()
|
|
tmpRec['replayPosId'] = replayPosId.strip()
|
|
tmpRec['replayPosOff'] = replayPosOff.strip()
|
|
tmpPosList.append(tmpRec)
|
|
return tmpPosList
|
|
|
|
def getHAShardingName(self):
|
|
"""
|
|
in centralized cluster, used to get the only one sharding name
|
|
"""
|
|
peerInsts = self.context.clusterInfo.getPeerInstance(self.dnInst)
|
|
(instance_name, _, _) = ClusterInstanceConfig.\
|
|
getInstanceInfoForSinglePrimaryMultiStandbyCluster(
|
|
self.dnInst, peerInsts)
|
|
return instance_name
|
|
|
|
def needReplay(self, eachRec, eachTmpRec):
|
|
"""
|
|
judeg if need replay by xlog position
|
|
"""
|
|
if eachRec['nodeName'] == eachTmpRec['nodeName'] \
|
|
and (int(eachRec['flushPosId'], 16) > int(
|
|
eachTmpRec['replayPosId'], 16) or (
|
|
int(eachRec['flushPosId'], 16) == int(
|
|
eachTmpRec['replayPosId'], 16) and int(
|
|
eachRec['flushPosOff'], 16) > int(eachTmpRec['replayPosOff'], 16))):
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def backupOldClusterDBAndRelInfo(self):
|
|
|
|
"""
|
|
function: backup old cluster db and rel info
|
|
send cmd to that node
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
tmpFile = os.path.join(EnvUtil.getTmpDirFromEnv(
|
|
self.context.user), const.TMP_DYNAMIC_DN_INFO)
|
|
try:
|
|
self.context.logger.debug("Start to backup old cluster database"
|
|
" and relation information.")
|
|
# prepare backup path
|
|
backup_path = os.path.join(
|
|
self.context.upgradeBackupPath, "oldClusterDBAndRel")
|
|
cmd = "rm -rf '%s' && mkdir '%s' -m '%s' " % \
|
|
(backup_path, backup_path, DefaultValue.KEY_DIRECTORY_MODE)
|
|
hostList = copy.deepcopy(self.context.clusterNodes)
|
|
self.context.sshTool.executeCommand(cmd, hostList=hostList)
|
|
# prepare dynamic cluster info file in every node
|
|
self.generateDynamicInfoFile(tmpFile)
|
|
# get dn primary hosts
|
|
dnPrimaryNodes = self.getPrimaryDnListFromDynamicFile()
|
|
execHosts = list(set(dnPrimaryNodes))
|
|
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -X '%s' -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_BACKUP_OLD_CLUSTER_DB_AND_REL,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.xmlFile,
|
|
self.context.localLog)
|
|
self.context.logger.debug(
|
|
"Command for backing up old cluster database and "
|
|
"relation information: %s." % cmd)
|
|
self.context.sshTool.executeCommand(cmd, hostList=execHosts)
|
|
self.context.logger.debug("Backing up information of all nodes.")
|
|
self.context.logger.debug("Successfully backed up old cluster "
|
|
"database and relation information")
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
finally:
|
|
if os.path.exists(tmpFile):
|
|
deleteCmd = "(if [ -f '%s' ]; then rm -f '%s'; fi) " % \
|
|
(tmpFile, tmpFile)
|
|
hostList = copy.deepcopy(self.context.clusterNodes)
|
|
self.context.sshTool.executeCommand(
|
|
deleteCmd, hostList=hostList)
|
|
|
|
def generateDynamicInfoFile(self, tmpFile):
|
|
"""
|
|
generate dynamic info file and send to every node
|
|
:return:
|
|
"""
|
|
self.context.logger.debug(
|
|
"Start to generate dynamic info file and send to every node.")
|
|
try:
|
|
cmd = ClusterCommand.getQueryStatusCmd("", outFile=tmpFile)
|
|
SharedFuncs.runShellCmd(cmd, self.context.user,
|
|
self.context.userProfile)
|
|
if not os.path.exists(tmpFile):
|
|
raise Exception("Can not genetate dynamic info file")
|
|
self.context.distributeFileToSpecialNode(tmpFile,
|
|
os.path.dirname(tmpFile),
|
|
self.context.clusterNodes)
|
|
self.context.logger.debug(
|
|
"Success to generate dynamic info file and send to every node.")
|
|
except Exception as er:
|
|
raise Exception("Failed to generate dynamic info file in "
|
|
"these nodes: {0}, error: {1}".format(
|
|
self.context.clusterNodes, str(er)))
|
|
|
|
def getPrimaryDnListFromDynamicFile(self):
|
|
"""
|
|
get primary dn list from dynamic file
|
|
:return: primary dn list
|
|
"""
|
|
try:
|
|
self.context.logger.debug(
|
|
"Start to get primary dn list from dynamic file.")
|
|
tmpFile = os.path.join(EnvUtil.getTmpDirFromEnv(
|
|
self.context.user), const.TMP_DYNAMIC_DN_INFO)
|
|
if not os.path.exists(tmpFile):
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_50201"] % tmpFile)
|
|
dynamicClusterStatus = DbClusterStatus()
|
|
dynamicClusterStatus.initFromFile(tmpFile)
|
|
cnAndPrimaryDnNodes = []
|
|
# Find the master DN instance
|
|
for dbNode in dynamicClusterStatus.dbNodes:
|
|
for instance in dbNode.datanodes:
|
|
if instance.status == 'Primary':
|
|
for staticDBNode in self.context.clusterInfo.dbNodes:
|
|
if staticDBNode.id == instance.nodeId:
|
|
cnAndPrimaryDnNodes.append(staticDBNode.name)
|
|
result = list(set(cnAndPrimaryDnNodes))
|
|
self.context.logger.debug("Success to get primary dn list from "
|
|
"dynamic file: {0}.".format(result))
|
|
return result
|
|
except Exception as er:
|
|
raise Exception("Failed to get primary dn list from dynamic file. "
|
|
"Error:{0}".format(str(er)))
|
|
|
|
|
|
def touchRollbackCatalogFlag(self):
|
|
"""
|
|
before update system catalog, touch a flag file.
|
|
"""
|
|
# touch init flag file
|
|
# during rollback, if init flag file has not been touched,
|
|
# we do not need to do catalog rollback.
|
|
cmd = "touch '%s/touch_init_flag'" % self.context.upgradeBackupPath
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
|
|
def updateCatalog(self):
|
|
"""
|
|
function: update catalog to new version
|
|
steps:
|
|
1.prepare update sql file and check sql file
|
|
2.do update catalog
|
|
Input: NA
|
|
output : NA
|
|
"""
|
|
try:
|
|
self.prepareSql("upgrade-post")
|
|
self.prepareSql("upgrade")
|
|
self.prepareSql("rollback-post")
|
|
self.prepareSql("rollback")
|
|
self.doUpgradeCatalog()
|
|
except Exception as e:
|
|
raise Exception(
|
|
"Failed to execute update sql file. Error: %s" % str(e))
|
|
|
|
def doUpgradeCatalog(self, postUpgrade=False):
|
|
"""
|
|
function: update catalog to new version
|
|
1.set upgrade_from param
|
|
2.start cluster
|
|
3.touch init files and do pre-upgrade staffs
|
|
4.connect database and update catalog one by one
|
|
5.stop cluster
|
|
6.unset upgrade_from param
|
|
7.start cluster
|
|
Input: oldClusterNumber
|
|
output : NA
|
|
"""
|
|
self.context.logger.debug("Start upgrade catalog.")
|
|
try:
|
|
if not postUpgrade:
|
|
self.context.logger.debug("Not post upgrade.")
|
|
self.setUpgradeFromParam(self.context.oldClusterNumber)
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE:
|
|
self.setUpgradeMode(1, "set")
|
|
self.start_strategy(is_final=False)
|
|
self.touchInitFile()
|
|
else:
|
|
# the guc parameter upgrade_from need to restart cmagent to take effect
|
|
self.setUpgradeMode(2)
|
|
self.reloadCmAgent()
|
|
# kill snapshot thread in kernel
|
|
self.context.killKernalSnapshotThread(self.dnInst)
|
|
|
|
self.execRollbackUpgradedCatalog(scriptType="rollback")
|
|
self.execRollbackUpgradedCatalog(scriptType="upgrade")
|
|
self.pgxcNodeUpdateLocalhost("upgrade")
|
|
else:
|
|
self.context.logger.debug("Post upgrade.")
|
|
self.waitClusterForNormal()
|
|
# backup global relmap file before doing upgrade-post
|
|
self.backupGlobalRelmapFile()
|
|
self.execRollbackUpgradedCatalog(scriptType="rollback-post")
|
|
self.execRollbackUpgradedCatalog(scriptType="upgrade-post")
|
|
|
|
self.getLsnInfo()
|
|
if self.context.action == \
|
|
const.ACTION_INPLACE_UPGRADE and not postUpgrade and not \
|
|
int(float(self.context.newClusterNumber) * 1000) > 92298:
|
|
self.updatePgproc()
|
|
except Exception as e:
|
|
raise Exception("update catalog failed.ERROR: %s" % str(e))
|
|
|
|
def updatePgproc(self):
|
|
"""
|
|
function: update pg_proc during large upgrade
|
|
:return:
|
|
"""
|
|
self.context.logger.debug(
|
|
"Start to update pg_proc in inplace large upgrade ")
|
|
# generate new csv file
|
|
execHosts = [self.dnInst.hostname]
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s -R '%s' -l %s" % (
|
|
OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_CREATE_NEW_CSV_FILE,
|
|
self.context.user,
|
|
self.context.tmpDir,
|
|
self.context.localLog)
|
|
self.context.logger.debug(
|
|
"Command for create new csv file: %s." % cmd)
|
|
self.context.sshTool.executeCommand(cmd, hostList=execHosts)
|
|
self.context.logger.debug(
|
|
"Successfully created new csv file.")
|
|
# select all databases
|
|
database_list = self.getDatabaseList()
|
|
# create pg_proc_temp_oids
|
|
new_pg_proc_csv_path = '%s/pg_copydir/new_tbl_pg_proc_oids.csv' % \
|
|
self.dnInst.datadir
|
|
self.createPgprocTempOids(new_pg_proc_csv_path, database_list)
|
|
# create pg_proc_temp_oids index
|
|
self.createPgprocTempOidsIndex(database_list)
|
|
# make checkpoint
|
|
self.replyXlog(database_list)
|
|
# create pg_proc_mapping.txt to save the mapping between pg_proc
|
|
# file path and pg_proc_temp_oids file path
|
|
cmd = "%s -t %s -U %s -R '%s' -l %s" % (
|
|
OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_CREATE_PG_PROC_MAPPING_FILE,
|
|
self.context.user,
|
|
self.context.tmpDir,
|
|
self.context.localLog)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
self.context.logger.debug(
|
|
"Successfully created file to save mapping between pg_proc file "
|
|
"path and pg_proc_temp_oids file path.")
|
|
# stop cluster
|
|
self.stop_strategy()
|
|
# replace pg_proc data file by pg_proc_temp data file
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s -R '%s' -l %s" % (
|
|
OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_REPLACE_PG_PROC_FILES,
|
|
self.context.user,
|
|
self.context.tmpDir,
|
|
self.context.localLog)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
self.context.logger.debug(
|
|
"Successfully replaced pg_proc data files.")
|
|
|
|
def createPgprocTempOids(self, new_pg_proc_csv_path, database_list):
|
|
"""
|
|
create pg_proc_temp_oids
|
|
:return:
|
|
"""
|
|
sql = \
|
|
"""START TRANSACTION; SET IsInplaceUpgrade = on;
|
|
CREATE TABLE pg_proc_temp_oids (proname name NOT NULL,
|
|
pronamespace oid NOT NULL, proowner oid NOT NULL, prolang oid
|
|
NOT NULL, procost real NOT NULL, prorows real NOT NULL,
|
|
provariadic oid NOT NULL, protransform regproc NOT NULL,
|
|
proisagg boolean NOT NULL, proiswindow boolean NOT NULL,
|
|
prosecdef boolean NOT NULL, proleakproof boolean NOT NULL,
|
|
proisstrict boolean NOT NULL, proretset boolean NOT NULL,
|
|
provolatile "char" NOT NULL, pronargs smallint NOT NULL,
|
|
pronargdefaults smallint NOT NULL, prorettype oid NOT NULL,
|
|
proargtypes oidvector NOT NULL, proallargtypes oid[],
|
|
proargmodes "char"[], proargnames text[], proargdefaults
|
|
pg_node_tree, prosrc text, probin text, proconfig text[],
|
|
proacl aclitem[], prodefaultargpos int2vector,fencedmode boolean,
|
|
proshippable boolean, propackage boolean, prokind "char" NOT
|
|
NULL) with oids;"""
|
|
sql += "copy pg_proc_temp_oids WITH OIDS from '%s' with " \
|
|
"delimiter ',' csv header FORCE NOT NULL proargtypes;" % \
|
|
new_pg_proc_csv_path
|
|
sql += "COMMIT;"
|
|
# update proisagg and proiswindow message sql
|
|
sql += \
|
|
"update pg_proc_temp_oids set proisagg = CASE WHEN prokind = 'a' " \
|
|
"THEN True ELSE False END, proiswindow = CASE WHEN prokind = 'w' " \
|
|
"THEN True ELSE False END;"
|
|
self.context.logger.debug("pg_proc_temp_oids sql is %s" % sql)
|
|
# creat table
|
|
for eachdb in database_list:
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
eachdb, IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Error: \n%s" % str(output))
|
|
|
|
def createPgprocTempOidsIndex(self, database_list):
|
|
"""
|
|
create index pg_proc_oid_index_temp and
|
|
pg_proc_proname_args_nsp_index_temp
|
|
:return:
|
|
"""
|
|
sql = "CREATE UNIQUE INDEX pg_proc_oid_index_temp ON " \
|
|
"pg_proc_temp_oids USING btree (oid) TABLESPACE pg_default;"
|
|
sql += "CREATE UNIQUE INDEX pg_proc_proname_args_nsp_index_temp ON" \
|
|
" pg_proc_temp_oids USING btree (proname, proargtypes," \
|
|
" pronamespace) TABLESPACE pg_default;"
|
|
# creat index
|
|
for eachdb in database_list:
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
eachdb, IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Error: \n%s" % str(output))
|
|
|
|
def getDatabaseList(self):
|
|
"""
|
|
check database list in cluster
|
|
:return:
|
|
"""
|
|
self.context.logger.debug("Get database list in cluster.")
|
|
sql = "select datname from pg_database;"
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Error: \n%s" % str(output))
|
|
if "" == output:
|
|
raise Exception("No database objects were found in the cluster!")
|
|
reslines = (output.strip()).split('\n')
|
|
if (len(reslines) < 3
|
|
or "template1" not in reslines
|
|
or "template0" not in reslines
|
|
or "postgres" not in reslines):
|
|
raise Exception("The database list is invalid:%s." % str(reslines))
|
|
self.context.logger.debug("Database list in cluster is %s." % reslines)
|
|
return reslines
|
|
|
|
def replyXlog(self, database_list):
|
|
"""
|
|
make checkpoint
|
|
:return:
|
|
"""
|
|
sql = 'CHECKPOINT;'
|
|
for eachdb in database_list:
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
eachdb, IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Error: \n%s" % str(output))
|
|
|
|
def execRollbackUpgradedCatalog(self, scriptType="rollback"):
|
|
"""
|
|
function : connect database and rollback/upgrade catalog one by one
|
|
1.find a node that has dn instance
|
|
2.scp sql files to that node
|
|
3.send cmd to that node and exec
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Start to {0} catalog.".format(scriptType))
|
|
try:
|
|
dnNodeName = self.dnInst.hostname
|
|
if dnNodeName == "":
|
|
raise Exception(ErrorCode.GAUSS_526["GAUSS_52602"])
|
|
self.context.logger.debug("dn nodes is {0}".format(dnNodeName))
|
|
# scp sql files to that node
|
|
maindb_sql = "%s/%s_catalog_maindb_tmp.sql" \
|
|
% (self.context.upgradeBackupPath, scriptType)
|
|
otherdb_sql = "%s/%s_catalog_otherdb_tmp.sql" \
|
|
% (self.context.upgradeBackupPath, scriptType)
|
|
if "upgrade" == scriptType:
|
|
check_upgrade_sql = \
|
|
"%s/check_upgrade_tmp.sql" % self.context.upgradeBackupPath
|
|
if not os.path.isfile(check_upgrade_sql):
|
|
raise Exception(
|
|
ErrorCode.GAUSS_502["GAUSS_50210"] % check_upgrade_sql)
|
|
self.context.logger.debug("Scp {0} file to nodes {1}".format(
|
|
check_upgrade_sql, dnNodeName))
|
|
if not self.context.isSingle:
|
|
LocalRemoteCmd.scpFile(dnNodeName, check_upgrade_sql,
|
|
self.context.upgradeBackupPath)
|
|
if not os.path.isfile(maindb_sql):
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % maindb_sql)
|
|
if not os.path.isfile(otherdb_sql):
|
|
raise Exception(
|
|
ErrorCode.GAUSS_502["GAUSS_50210"] % otherdb_sql)
|
|
if (not self.context.isSingle):
|
|
LocalRemoteCmd.scpFile(dnNodeName, maindb_sql,
|
|
self.context.upgradeBackupPath)
|
|
LocalRemoteCmd.scpFile(dnNodeName, otherdb_sql,
|
|
self.context.upgradeBackupPath)
|
|
self.context.logger.debug(
|
|
"Scp {0} file and {1} file to nodes {2}".format(
|
|
maindb_sql, otherdb_sql, dnNodeName))
|
|
# send cmd to that node and exec
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s --script_type=%s -l " \
|
|
"%s" % (OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_UPDATE_CATALOG,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
scriptType,
|
|
self.context.localLog)
|
|
self.context.logger.debug(
|
|
"Command for executing {0} catalog.".format(scriptType))
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile,
|
|
[dnNodeName])
|
|
self.context.logger.debug(
|
|
"Successfully {0} catalog.".format(scriptType))
|
|
except Exception as e:
|
|
self.context.logger.log("Failed to {0} catalog.".format(scriptType))
|
|
if not self.context.forceRollback:
|
|
raise Exception(str(e))
|
|
|
|
def pgxcNodeUpdateLocalhost(self, mode):
|
|
"""
|
|
This function is used to modify the localhost of the system table
|
|
which pgxc_node
|
|
:param mode:
|
|
:return:
|
|
"""
|
|
try:
|
|
if int(float(self.context.newClusterNumber) * 1000) < 92069 or \
|
|
int(float(self.context.oldClusterNumber) * 1000) >= 92069:
|
|
return
|
|
if mode == "upgrade":
|
|
self.context.logger.debug("Update localhost in pgxc_node.")
|
|
else:
|
|
self.context.logger.debug("Rollback localhost in pgxc_node.")
|
|
for dbNode in self.context.clusterInfo.dbNodes:
|
|
for dn in dbNode.datanodes:
|
|
sql = "START TRANSACTION;"
|
|
sql += "SET %s = on;" % const.ON_INPLACE_UPGRADE
|
|
if mode == "upgrade":
|
|
sql += "UPDATE PGXC_NODE SET node_host = '%s', " \
|
|
"node_host1 = '%s' WHERE node_host = " \
|
|
"'localhost'; " % (dn.listenIps[0],
|
|
dn.listenIps[0])
|
|
else:
|
|
sql += "UPDATE PGXC_NODE SET node_host = " \
|
|
"'localhost', node_host1 = 'localhost' WHERE" \
|
|
" node_type = 'C' and node_host = '%s';" %\
|
|
(dn.listenIps[0])
|
|
sql += "COMMIT;"
|
|
self.context.logger.debug("Current sql %s." % sql)
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user, dn.hostname, dn.port,
|
|
False, DefaultValue.DEFAULT_DB_NAME,
|
|
IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
if self.context.forceRollback:
|
|
self.context.logger.debug("In forceRollback, "
|
|
"roll back pgxc_node. "
|
|
"%s " % str(output))
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"]
|
|
% sql + " Error: \n%s" %
|
|
str(output))
|
|
if mode == "upgrade":
|
|
self.context.logger.debug(
|
|
"Success update localhost in pgxc_node.")
|
|
else:
|
|
self.context.logger.debug(
|
|
"Success rollback localhost in pgxc_node.")
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def touchInitFile(self):
|
|
"""
|
|
function: touch upgrade init file for every primary/standby and
|
|
do pre-upgrade staffs
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
try:
|
|
if self.isLargeInplaceUpgrade:
|
|
self.context.logger.debug("Start to create upgrade init file.")
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_TOUCH_INIT_FILE,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.localLog)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
self.context.logger.debug(
|
|
"Successfully created upgrade init file.")
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def prepareSql(self, mode="rollback"):
|
|
"""
|
|
function : prepare 4 files: rollback_catalog_maindb_tmp.sql,
|
|
rollback_catalog_otherdb_tmp.sql and upgrade file
|
|
2.for each result file: filter all files and merge
|
|
into the *_tmp.sql file
|
|
|
|
:param rollback: can be rollback or upgrade
|
|
"""
|
|
try:
|
|
self.prepareSqlForDb(mode)
|
|
self.prepareSqlForDb(mode, "otherdb")
|
|
if mode == "upgrade":
|
|
self.prepareCheckSql()
|
|
except Exception as e:
|
|
raise Exception("Failed to prepare %s sql file failed. ERROR: %s"
|
|
% (mode, str(e)))
|
|
|
|
def prepareSqlForDb(self, mode, dbType="maindb"):
|
|
self.context.logger.debug(
|
|
"Start to prepare {0} sql files for {1}.".format(mode, dbType))
|
|
header = self.getSqlHeader()
|
|
if "upgrade" in mode:
|
|
listName = "upgrade"
|
|
else:
|
|
listName = "rollback"
|
|
fileNameList = self.getFileNameList("{0}_catalog_{1}".format(
|
|
listName, dbType), mode)
|
|
if "rollback" in mode:
|
|
fileNameList.sort(reverse=True)
|
|
else:
|
|
fileNameList.sort()
|
|
if 'rollback_catalog_maindb_92_506.sql' in fileNameList:
|
|
fileNameList.remove('rollback_catalog_maindb_92_506.sql')
|
|
fileNameList.append('rollback_catalog_maindb_92_506.sql')
|
|
if 'rollback_catalog_otherdb_92_506.sql' in fileNameList:
|
|
fileNameList.remove('rollback_catalog_otherdb_92_506.sql')
|
|
fileNameList.append('rollback_catalog_otherdb_92_506.sql')
|
|
fileName = "{0}_catalog_{1}_tmp.sql".format(mode, dbType)
|
|
self.context.logger.debug("The real file list for %s: %s" % (
|
|
dbType, fileNameList))
|
|
self.togetherFile(header, "{0}_catalog_{1}".format(listName, dbType),
|
|
fileNameList, fileName)
|
|
self.context.logger.debug("Successfully prepared sql files for %s."
|
|
% dbType)
|
|
|
|
def prepareCheckSql(self):
|
|
header = ["START TRANSACTION;"]
|
|
fileNameList = self.getFileNameList("check_upgrade")
|
|
fileNameList.sort()
|
|
if 'rollback_catalog_maindb_92_506.sql' in fileNameList:
|
|
fileNameList.remove('rollback_catalog_maindb_92_506.sql')
|
|
fileNameList.append('rollback_catalog_maindb_92_506.sql')
|
|
if 'rollback_catalog_otherdb_92_506.sql' in fileNameList:
|
|
fileNameList.remove('rollback_catalog_otherdb_92_506.sql')
|
|
fileNameList.append('rollback_catalog_otherdb_92_506.sql')
|
|
self.context.logger.debug("The real file list for checking upgrade: "
|
|
"%s" % fileNameList)
|
|
self.togetherFile(header, "check_upgrade", fileNameList,
|
|
"check_upgrade_tmp.sql")
|
|
|
|
def togetherFile(self, header, filePathName, fileNameList, executeFileName):
|
|
writeFile = ""
|
|
try:
|
|
filePath = "%s/upgrade_sql/%s" % (self.context.upgradeBackupPath,
|
|
filePathName)
|
|
self.context.logger.debug("Preparing [%s]." % filePath)
|
|
writeFile = "%s/%s" % (self.context.upgradeBackupPath,
|
|
executeFileName)
|
|
FileUtil.createFile(writeFile)
|
|
FileUtil.writeFile(writeFile, header, 'w')
|
|
|
|
with open(writeFile, 'a') as sqlFile:
|
|
for each_file in fileNameList:
|
|
each_file_with_path = "%s/%s" % (filePath, each_file)
|
|
self.context.logger.debug("Handling file: %s" %
|
|
each_file_with_path)
|
|
with open(each_file_with_path, 'r') as fp:
|
|
for line in fp:
|
|
sqlFile.write(line)
|
|
sqlFile.write(os.linesep)
|
|
FileUtil.writeFile(writeFile, ["COMMIT;"], 'a')
|
|
self.context.logger.debug(
|
|
"Success to together {0} file".format(writeFile))
|
|
if not os.path.isfile(writeFile):
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % writeFile)
|
|
except Exception as e:
|
|
raise Exception("Failed to write {0} sql file. ERROR: {1}".format(
|
|
writeFile, str(e)))
|
|
|
|
def om_stop_cluster(self):
|
|
"""
|
|
Stop cluster with gs_om
|
|
"""
|
|
cmd = "source %s ;gs_om -t stop" % self.context.userProfile
|
|
status, output = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % "cluster" +
|
|
"Output: %s" % output)
|
|
self.context.logger.log("Stop cluster with gs_om successfully.")
|
|
|
|
def om_start_cluster(self):
|
|
"""
|
|
Start Cluster with om
|
|
"""
|
|
cmd = "source %s ;gs_om -t start" % self.context.userProfile
|
|
status, output = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster" +
|
|
"Output: %s" % output)
|
|
self.context.logger.debug("Start cluster with gs_om successfully.")
|
|
|
|
def get_cms_num(self, cluster_config_file):
|
|
"""
|
|
Get cm_server num from static config file
|
|
"""
|
|
cluster_info = dbClusterInfo()
|
|
cluster_info.initFromStaticConfig(self.context.user, cluster_config_file)
|
|
return DefaultValue.get_cm_server_num_from_static(cluster_info)
|
|
|
|
def _get_strategy_with_cm_num(self, old_cm_num, new_cm_num):
|
|
"""
|
|
Get strategy with CM server instance number
|
|
"""
|
|
if new_cm_num == 0 and old_cm_num == 0:
|
|
self.context.logger.debug("No CM instance exists in the new and old clusters.")
|
|
return 0
|
|
if new_cm_num > 0 and old_cm_num == 0:
|
|
self.context.logger.debug("The new cluster has a CM components "
|
|
"but the old cluster does not have a CM components.")
|
|
return 1
|
|
if new_cm_num > 0 and old_cm_num > 0:
|
|
self.context.logger.debug("CM components has in origin cluster.")
|
|
return 2
|
|
else:
|
|
return -1
|
|
|
|
|
|
def get_upgrade_cm_strategy(self):
|
|
"""
|
|
Get strategy for start cluster
|
|
"""
|
|
old_cluster_config_file = \
|
|
os.path.realpath(os.path.join(self.context.oldClusterAppPath,
|
|
"bin", "cluster_static_config"))
|
|
new_cluster_config_file = \
|
|
os.path.realpath(os.path.join(self.context.newClusterAppPath,
|
|
"bin", "cluster_static_config"))
|
|
|
|
if not os.path.isfile(new_cluster_config_file):
|
|
self.context.logger.debug("Start cluster with om tool, "
|
|
"[{0}]".format(new_cluster_config_file))
|
|
if os.path.isfile(old_cluster_config_file):
|
|
if self.get_cms_num(old_cluster_config_file) == 0:
|
|
return 0
|
|
else:
|
|
return 2
|
|
return -1
|
|
|
|
new_cm_num = self.get_cms_num(new_cluster_config_file)
|
|
|
|
if not os.path.isfile(old_cluster_config_file):
|
|
self.context.logger.debug("Not exist old static_config_file "
|
|
"[{0}]".format(old_cluster_config_file))
|
|
if new_cm_num == 0:
|
|
return 0
|
|
else:
|
|
return 2
|
|
|
|
old_cm_num = self.get_cms_num(old_cluster_config_file)
|
|
return self._get_strategy_with_cm_num(old_cm_num, new_cm_num)
|
|
|
|
def start_strategy(self, is_final=True):
|
|
"""
|
|
Start cluster
|
|
"""
|
|
cm_strategy = self.get_upgrade_cm_strategy()
|
|
if cm_strategy == 0:
|
|
self.startCluster()
|
|
if cm_strategy == 1:
|
|
if is_final:
|
|
self.om_start_cluster()
|
|
else:
|
|
self.startCluster()
|
|
else:
|
|
self.om_start_cluster()
|
|
|
|
def stop_strategy(self, is_final=True):
|
|
"""
|
|
Start cluster
|
|
"""
|
|
cm_strategy = self.get_upgrade_cm_strategy()
|
|
if cm_strategy == 0:
|
|
self.stopCluster()
|
|
if cm_strategy == 1:
|
|
if is_final:
|
|
self.om_stop_cluster()
|
|
else:
|
|
self.stopCluster()
|
|
else:
|
|
self.om_stop_cluster()
|
|
|
|
def modifyPgProcIndex(self):
|
|
"""
|
|
1. 执行重建pg_proc index 的sql
|
|
2. make checkpoint
|
|
3. stop cluster
|
|
4. start cluster
|
|
:return:
|
|
"""
|
|
self.context.logger.debug("Begin to modify pg_proc index.")
|
|
time.sleep(3)
|
|
database_list = self.getDatabaseList()
|
|
# 执行重建pg_proc index 的sql
|
|
sql = """START TRANSACTION;SET IsInplaceUpgrade = on;
|
|
drop index pg_proc_oid_index;SET LOCAL
|
|
inplace_upgrade_next_system_object_oids=IUO_CATALOG,false,
|
|
true,0,0,0,2690;CREATE UNIQUE INDEX pg_proc_oid_index ON pg_proc
|
|
USING btree (oid);SET LOCAL
|
|
inplace_upgrade_next_system_object_oids=IUO_CATALOG,false,
|
|
true,0,0,0,0;commit;CHECKPOINT;"""
|
|
for eachdb in database_list:
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
eachdb, IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Error: \n%s" % str(output))
|
|
sql = """START TRANSACTION;SET IsInplaceUpgrade = on;
|
|
drop index pg_proc_proname_args_nsp_index;SET LOCAL
|
|
inplace_upgrade_next_system_object_oids=IUO_CATALOG,false,
|
|
true,0,0,0,2691;create UNIQUE INDEX pg_proc_proname_args_nsp_index
|
|
ON pg_proc USING btree (proname, proargtypes, pronamespace);SET
|
|
LOCAL inplace_upgrade_next_system_object_oids=IUO_CATALOG,false,
|
|
true,0,0,0,0;commit;CHECKPOINT;"""
|
|
for eachdb in database_list:
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
eachdb, IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Error: \n%s" % str(output))
|
|
# stop cluster
|
|
self.stop_strategy()
|
|
# start cluster
|
|
self.start_strategy()
|
|
self.context.logger.debug("Successfully modified pg_proc index.")
|
|
|
|
def setNewVersionGuc(self):
|
|
"""
|
|
function: set new Version guc
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
pass
|
|
|
|
def setActionFile(self):
|
|
"""
|
|
set the action from step file, if not find, set it to large upgrade,
|
|
if the upgrade type is small upgrade, but we set it to large upgrade,
|
|
just kill the cm agent as expense, take no effect to transaction
|
|
But if the action should be large, we does not set the upgrade_mode,
|
|
some new feature will not opened
|
|
:return: NA
|
|
"""
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
self.context.logger.debug("Get the action from file %s." % stepFile)
|
|
if not (os.path.exists(stepFile) or os.path.isfile(stepFile)):
|
|
self.context.logger.debug("Step file does not exists or not file,"
|
|
" cannot get action from it. "
|
|
"Set it to large upgrade.")
|
|
self.context.action = const.ACTION_LARGE_UPGRADE
|
|
return
|
|
with open(stepFile, 'r') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for row in reader:
|
|
self.context.action = row['upgrade_action']
|
|
break
|
|
self.context.logger.debug("Set the action to %s"
|
|
% self.context.action)
|
|
|
|
def getClusterAppPath(self, mode=const.OLD):
|
|
"""
|
|
if cannot get path from table, try to get from the backup file
|
|
:param mode:
|
|
:return:
|
|
"""
|
|
self.context.logger.debug("Get the install path from table or file.")
|
|
path = self.getClusterAppPathFromFile(mode)
|
|
return path
|
|
|
|
def getClusterAppPathFromFile(self, mode=const.OLD):
|
|
"""
|
|
get the app path from backup dir, mode is new or old,
|
|
:param mode: 'old', 'new'
|
|
:return: the real path of appPath
|
|
"""
|
|
dirFile = "%s/%s" % (self.context.upgradeBackupPath,
|
|
const.RECORD_UPGRADE_DIR)
|
|
self.context.logger.debug("Get the %s app path from file %s"
|
|
% (mode, dirFile))
|
|
if mode not in [const.OLD, const.NEW]:
|
|
raise Exception(traceback.format_exc())
|
|
if not os.path.exists(dirFile):
|
|
self.context.logger.debug(ErrorCode.GAUSS_502["GAUSS_50201"]
|
|
% dirFile)
|
|
if self.checkBakPathNotExists():
|
|
return ""
|
|
# copy the binary_upgrade dir from other node,
|
|
# if one node is damaged while binary_upgrade may disappear,
|
|
# user repair one node before commit, and send the commit
|
|
# command to the repair node, we need to copy the
|
|
# dir from remote node
|
|
cmd = "if [ -f '%s' ]; then echo 'GetFile';" \
|
|
" else echo 'NoThisFile'; fi" % dirFile
|
|
self.context.logger.debug("Command for checking file: %s" % cmd)
|
|
(status, output) = self.context.sshTool.getSshStatusOutput(
|
|
cmd, self.context.clusterNodes, self.context.mpprcFile)
|
|
outputMap = self.context.sshTool.parseSshOutput(
|
|
self.context.clusterNodes)
|
|
self.context.logger.debug("Output: %s" % output)
|
|
copyNode = ""
|
|
for node in self.context.clusterNodes:
|
|
if status[node] == DefaultValue.SUCCESS:
|
|
if 'GetFile' in outputMap[node]:
|
|
copyNode = node
|
|
break
|
|
if copyNode:
|
|
if not os.path.exists(self.context.upgradeBackupPath):
|
|
self.context.logger.debug("Create directory %s."
|
|
% self.context.tmpDir)
|
|
FileUtil.createDirectory(
|
|
self.context.upgradeBackupPath, True,
|
|
DefaultValue.KEY_DIRECTORY_MODE)
|
|
self.context.logger.debug("Copy the directory %s from node %s."
|
|
% (self.context.upgradeBackupPath,
|
|
copyNode))
|
|
cmd = LocalRemoteCmd.getRemoteCopyCmd(
|
|
self.context.upgradeBackupPath, self.context.tmpDir,
|
|
str(copyNode), False, 'directory')
|
|
self.context.logger.debug("Command for copying "
|
|
"directory: %s" % cmd)
|
|
CmdExecutor.execCommandLocally(cmd)
|
|
else:
|
|
# binary_upgrade exists, but no step file
|
|
return ""
|
|
if not os.path.isfile(dirFile):
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] % dirFile)
|
|
with open(dirFile, 'r') as fp:
|
|
retLines = fp.readlines()
|
|
if len(retLines) != 2:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] % dirFile)
|
|
if mode == const.OLD:
|
|
path = retLines[0].strip()
|
|
else:
|
|
path = retLines[1].strip()
|
|
# if can get the path from file, the path must be valid,
|
|
# otherwise the file is damaged accidentally
|
|
DefaultValue.checkPathVaild(path)
|
|
if not os.path.exists(path):
|
|
if mode == const.NEW and \
|
|
self.context.action == const.ACTION_AUTO_ROLLBACK:
|
|
self.context.logger.debug("Under rollback, the new "
|
|
"cluster app path does not exists.")
|
|
elif mode == const.OLD and \
|
|
self.context.action == const.ACTION_COMMIT_UPGRADE:
|
|
self.context.logger.debug("Under commit, no need to "
|
|
"check the old path exists.")
|
|
else:
|
|
self.context.logger.debug(traceback.format_exc())
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % path)
|
|
self.context.logger.debug("Successfully Get the app"
|
|
" path [%s] from file" % path)
|
|
return path
|
|
|
|
def printPrecommitBanner(self):
|
|
"""
|
|
funcation: if in pre-commit status, and do not execute
|
|
the commit cmd, then can print this message
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.log("Upgrade main process has been finished,"
|
|
" user can do some check now.")
|
|
self.context.logger.log("Once the check done, please execute "
|
|
"following command to commit upgrade:")
|
|
xmlFile = self.context.xmlFile \
|
|
if len(self.context.xmlFile) else "XMLFILE"
|
|
self.context.logger.log("\n gs_upgradectl -t "
|
|
"commit-upgrade -X %s \n" % xmlFile)
|
|
|
|
def doGreyCommitUpgrade(self):
|
|
"""
|
|
function: commit binary upgrade and clean up backup files
|
|
1. unset read-only
|
|
2. drop old PMK schema
|
|
3. clean up other upgrade tmp files
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
try:
|
|
(status, output) = self.doHealthCheck(const.OPTION_POSTCHECK)
|
|
if status != 0:
|
|
raise Exception(
|
|
"NOTICE: " + ErrorCode.GAUSS_516[
|
|
"GAUSS_51601"] % "cluster" + output)
|
|
if self.unSetClusterReadOnlyMode() != 0:
|
|
raise Exception("NOTICE: " + ErrorCode.GAUSS_529["GAUSS_52907"])
|
|
|
|
if not (self.isNodeSpecifyStep(GreyUpgradeStep.STEP_PRE_COMMIT)
|
|
or self.isNodeSpecifyStep(
|
|
GreyUpgradeStep.STEP_BEGIN_COMMIT)):
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52916"])
|
|
# for the reenter commit, the schema may have been deleted
|
|
if self.existTable(const.RECORD_NODE_STEP):
|
|
self.recordNodeStep(GreyUpgradeStep.STEP_BEGIN_COMMIT)
|
|
self.setActionFile()
|
|
if self.context.action == const.ACTION_LARGE_UPGRADE:
|
|
if DefaultValue.get_cm_server_num_from_static(self.context.clusterInfo) > 0:
|
|
self.setUpgradeFromParam(const.UPGRADE_UNSET_NUM)
|
|
self.reloadCmAgent()
|
|
self.reload_cmserver(is_final=True)
|
|
self.setUpgradeMode(0)
|
|
time.sleep(10)
|
|
if self.dropPMKSchema() != 0:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52917"])
|
|
|
|
self.clearOtherToolPackage()
|
|
self.cleanInstallPath(const.OLD)
|
|
self.dropSupportSchema()
|
|
self.cleanBinaryUpgradeBakFiles()
|
|
self.cleanConfBakOld()
|
|
# remove tmp global relmap file
|
|
self.cleanTmpGlobalRelampFile()
|
|
self.context.logger.log("Commit upgrade succeeded.")
|
|
except Exception as e:
|
|
self.exitWithRetCode(const.ACTION_COMMIT_UPGRADE, False, str(e))
|
|
self.exitWithRetCode(const.ACTION_COMMIT_UPGRADE, True)
|
|
|
|
def dropPMKSchema(self):
|
|
"""
|
|
function: Notice: the pmk schema on database postgres
|
|
input : NA
|
|
output: return 0, if the operation is done successfully.
|
|
return 1, if the operation failed.
|
|
"""
|
|
try:
|
|
self.context.logger.debug("Start to drop schema PMK.")
|
|
# execute drop commands by the CN instance
|
|
sql = "DROP SCHEMA IF EXISTS pmk CASCADE; "
|
|
retry_times = 0
|
|
while True:
|
|
(status, output) = self.execSqlCommandInPrimaryDN(sql)
|
|
if status != 0 or SqlResult.findErrorInSql(output):
|
|
if retry_times < 12:
|
|
self.context.logger.debug(
|
|
"ERROR: Failed to DROP SCHEMA pmk for the %d time."
|
|
" Error: \n%s" % (retry_times + 1, str(output)))
|
|
else:
|
|
raise Exception(
|
|
ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Error: \n%s" % str(output))
|
|
else:
|
|
break
|
|
|
|
time.sleep(5)
|
|
retry_times += 1
|
|
self.context.logger.debug("Succcessfully deleted schema PMK.")
|
|
return 0
|
|
except Exception as e:
|
|
self.context.logger.log(
|
|
"NOTICE: Failed to execute SQL command on CN instance, "
|
|
+ "please re-commit upgrade once again or " +
|
|
"re-execute SQL command 'DROP SCHEMA "
|
|
"IF EXISTS pmk CASCADE' manually.")
|
|
self.context.logger.debug(str(e))
|
|
return 1
|
|
|
|
def cleanConfBakOld(self):
|
|
"""
|
|
clean conf.bak.old files in all instances
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
try:
|
|
cmd = "%s -t %s -U %s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_CLEAN_CONF_BAK_OLD,
|
|
self.context.user,
|
|
self.context.localLog)
|
|
hostList = copy.deepcopy(self.context.nodeNames)
|
|
self.context.sshTool.executeCommand(cmd, hostList=hostList)
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
self.context.logger.debug(
|
|
"Successfully cleaned conf.bak.old in all instances.")
|
|
|
|
def doGreyBinaryRollback(self, action=""):
|
|
"""
|
|
function: rollback the upgrade of binary
|
|
input : NA
|
|
output: return True, if the operation is done successfully.
|
|
return False, if the operation failed.
|
|
"""
|
|
self.context.logger.log("Performing grey rollback.")
|
|
# before prepare upgrade function and table or after commit,
|
|
# table does not exist means not rollback
|
|
# if we read the step for file, means we have force to rollback,
|
|
# the record in table is not same with file
|
|
# we can only read the step from file
|
|
try:
|
|
self.distributeXml()
|
|
if action == const.ACTION_AUTO_ROLLBACK:
|
|
self.clearOtherToolPackage(action)
|
|
try:
|
|
self.getOneDNInst(True)
|
|
except Exception as e:
|
|
# don't promise DN is available in force rollback
|
|
if self.context.forceRollback:
|
|
self.context.logger.debug("Error: %s" % str(e))
|
|
else:
|
|
raise Exception(str(e))
|
|
# if the cluster is degrade and cn is down,
|
|
# the set command will be False, ignore the error
|
|
if self.unSetClusterReadOnlyMode() != 0:
|
|
self.context.logger.log(
|
|
"WARNING: Failed to unset cluster read only mode.")
|
|
|
|
if self.context.forceRollback:
|
|
# if one node is uninstalled,
|
|
# there will be no binary_upgrade dir
|
|
self.createBakPath()
|
|
self.setReadStepFromFile()
|
|
self.createGphomePack()
|
|
# first time user may use forcerollback, but next time user may
|
|
# not use force rollback, so the step file and step
|
|
# table is not same, so we can only read step from file,
|
|
# consider if need to sync them, not important
|
|
# under force upgrade, only read step from file
|
|
maxStep = self.getNodeStep()
|
|
# if -2, it means there is no need to exec rollback
|
|
# if under upgrade continue mode, it will do upgrade not rollback,
|
|
# it can enter the upgrade process
|
|
# when the binary_upgrade bak dir has some files
|
|
if maxStep == const.BINARY_UPGRADE_NO_NEED_ROLLBACK:
|
|
self.cleanBinaryUpgradeBakFiles(True)
|
|
self.context.logger.log("No need to rollback.")
|
|
return True
|
|
|
|
elif maxStep == GreyUpgradeStep.STEP_BEGIN_COMMIT:
|
|
self.context.logger.log(
|
|
ErrorCode.GAUSS_529["GAUSS_52919"] +
|
|
" Please commit again! Can not rollback any more.")
|
|
return False
|
|
|
|
# Mark that we leave pre commit status,
|
|
# so that if we fail at the first few steps,
|
|
# we won't be allowed to commit upgrade any more.
|
|
elif maxStep == GreyUpgradeStep.STEP_PRE_COMMIT:
|
|
nodes = self.getNodesWithStep(maxStep)
|
|
self.recordNodeStep(
|
|
GreyUpgradeStep.STEP_UPDATE_POST_CATALOG, nodes)
|
|
maxStep = self.getNodeStep()
|
|
if maxStep == GreyUpgradeStep.STEP_UPDATE_POST_CATALOG:
|
|
self.context.logger.debug(
|
|
"Record the step %d to mark it has leaved pre-commit"
|
|
" status." % GreyUpgradeStep.STEP_UPDATE_POST_CATALOG)
|
|
try:
|
|
if self.context.action == const.ACTION_LARGE_UPGRADE\
|
|
and \
|
|
self.isNodeSpecifyStep(
|
|
GreyUpgradeStep.STEP_UPDATE_POST_CATALOG):
|
|
self.prepareUpgradeSqlFolder()
|
|
self.prepareSql("rollback-post")
|
|
self.setUpgradeMode(2)
|
|
self.execRollbackUpgradedCatalog(
|
|
scriptType="rollback-post")
|
|
# restore old relmap file after rollback-post
|
|
self.restoreGlobalRelampFile()
|
|
except Exception as e:
|
|
if self.context.forceRollback:
|
|
self.context.logger.debug("Error: %s" % str(e))
|
|
else:
|
|
raise Exception(str(e))
|
|
nodes = self.getNodesWithStep(maxStep)
|
|
self.recordNodeStep(GreyUpgradeStep.STEP_UPGRADE_PROCESS, nodes)
|
|
# rollback the nodes from maxStep, each node do its rollback
|
|
needSwitchProcess = False
|
|
if maxStep >= GreyUpgradeStep.STEP_UPGRADE_PROCESS:
|
|
needSwitchProcess = True
|
|
|
|
if maxStep >= GreyUpgradeStep.STEP_SWITCH_NEW_BIN:
|
|
self.greyRestoreConfig()
|
|
self.clean_cm_instance()
|
|
self.switchBin(const.OLD)
|
|
self.greyRestoreGuc()
|
|
if needSwitchProcess:
|
|
self.rollbackHotpatch()
|
|
self.getOneDNInst(checkNormal=True)
|
|
self.switchExistsProcess(True)
|
|
self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_CATALOG)
|
|
if maxStep >= GreyUpgradeStep.STEP_UPDATE_CATALOG and\
|
|
self.context.action == const.ACTION_LARGE_UPGRADE:
|
|
self.rollbackCatalog()
|
|
self.recordNodeStep(GreyUpgradeStep.STEP_INIT_STATUS)
|
|
|
|
if maxStep >= GreyUpgradeStep.STEP_INIT_STATUS:
|
|
# clean on all the node, because the binary_upgrade temp
|
|
# dir will create in every node
|
|
self.cleanInstallPath(const.NEW)
|
|
self.getOneDNInst()
|
|
self.dropSupportSchema()
|
|
self.initOmRollbackProgressFile()
|
|
self.cleanBinaryUpgradeBakFiles(True)
|
|
self.cleanTmpGlobalRelampFile()
|
|
except Exception as e:
|
|
self.context.logger.debug(str(e))
|
|
self.context.logger.debug(traceback.format_exc())
|
|
self.context.logger.log("Rollback failed. Error: %s" % str(e))
|
|
return False
|
|
self.context.logger.log("Rollback succeeded.")
|
|
return True
|
|
|
|
def setReadStepFromFile(self):
|
|
readFromFileFlag = os.path.join(self.context.upgradeBackupPath,
|
|
const.READ_STEP_FROM_FILE_FLAG)
|
|
self.context.logger.debug("Under force rollback mode.")
|
|
FileUtil.createFile(readFromFileFlag, True, DefaultValue.KEY_FILE_MODE)
|
|
self.distributeFile(readFromFileFlag)
|
|
self.context.logger.debug("Create file %s. " % readFromFileFlag +
|
|
"Only read step from file.")
|
|
|
|
def getNodeStep(self):
|
|
"""
|
|
get node step from file or tacle
|
|
"""
|
|
maxStep = self.getNodeStepFile()
|
|
return maxStep
|
|
|
|
def getNodeStepFile(self):
|
|
if not os.path.exists(self.context.upgradeBackupPath):
|
|
self.context.logger.debug("Directory %s does not exist. "
|
|
"Only clean remaining files and schema."
|
|
% self.context.upgradeBackupPath)
|
|
return const.BINARY_UPGRADE_NO_NEED_ROLLBACK
|
|
if not os.path.isdir(self.context.upgradeBackupPath):
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_50211"] %
|
|
self.context.upgradeBackupPath)
|
|
# because the binary_upgrade dir is used to block expand,
|
|
# so we should clean the dir when rollback
|
|
fileList = os.listdir(self.context.upgradeBackupPath)
|
|
if not fileList:
|
|
return GreyUpgradeStep.STEP_INIT_STATUS
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
if not os.path.exists(stepFile):
|
|
self.context.logger.debug(
|
|
"No need to rollback. File %s does not exist." % stepFile)
|
|
return const.BINARY_UPGRADE_NO_NEED_ROLLBACK
|
|
|
|
self.context.logger.debug("Get the node step from file %s." % stepFile)
|
|
with open(stepFile, 'r') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
maxStep = const.INVALID_UPRADE_STEP
|
|
for row in reader:
|
|
self.checkStep(row['step'])
|
|
maxStep = max(int(row['step']), maxStep)
|
|
if row['upgrade_action'] != self.context.action:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50222"] %
|
|
stepFile +
|
|
"\nIncorrect upgrade strategy, input "
|
|
"upgrade type: %s; record upgrade type: %s"
|
|
% (self.context.action,
|
|
row['upgrade_action']))
|
|
self.context.logger.debug("Get the max step [%d] from file." % maxStep)
|
|
self.context.logger.debug(
|
|
"Successfully get the node step from file %s." % stepFile)
|
|
return maxStep
|
|
|
|
def checkActionInTableOrFile(self):
|
|
"""
|
|
under force upgrade, step file and table may not be coincident.
|
|
So we only use step file
|
|
"""
|
|
self.checkActionInFile()
|
|
|
|
def execSqlCommandInPrimaryDN(self, sql, retryTime=3):
|
|
self.context.logger.debug("Start to exec sql {0}.".format(sql))
|
|
count = 0
|
|
status, output = 1, ""
|
|
while count < retryTime:
|
|
|
|
self.context.logger.debug(
|
|
"Exec sql in dn node {0}".format(self.dnInst.hostname))
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True)
|
|
self.context.logger.debug(
|
|
"Exec sql result is, status:{0}, output is {1}".format(
|
|
status, output))
|
|
if status != 0 or SqlResult.findErrorInSql(output):
|
|
count += 1
|
|
continue
|
|
else:
|
|
break
|
|
return status, output
|
|
|
|
def checkActionInFile(self):
|
|
"""
|
|
function: check whether current action is same
|
|
with record action in file
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
try:
|
|
self.context.logger.debug("Check the action in file.")
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
if not os.path.isfile(stepFile):
|
|
self.context.logger.debug(
|
|
ErrorCode.GAUSS_502["GAUSS_50201"] % (stepFile))
|
|
return
|
|
|
|
with open(stepFile, 'r') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for row in reader:
|
|
upgrade_action = row['upgrade_action']
|
|
if self.context.action != upgrade_action:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52925"] % (
|
|
self.context.action, upgrade_action))
|
|
self.context.logger.debug("Successfully check the action in file.")
|
|
return
|
|
except Exception as e:
|
|
self.context.logger.debug("Failed to check action in table.")
|
|
raise Exception(str(e))
|
|
|
|
def getNodesWithStep(self, step):
|
|
"""
|
|
get nodes with the given step from step file or table
|
|
"""
|
|
nodes = self.getNodesWithStepFile(step)
|
|
return nodes
|
|
|
|
def getNodesWithStepFile(self, step):
|
|
"""
|
|
get nodes with the given step from file upgrade_step.csv
|
|
"""
|
|
stepFile = os.path.join(self.context.upgradeBackupPath,
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
self.context.logger.debug("Get the node step from file %s." % stepFile)
|
|
nodes = []
|
|
with open(stepFile, 'r') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for row in reader:
|
|
if not row['step'].isdigit():
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52926"])
|
|
if int(row['step']) == step:
|
|
nodes.append(row['node_host'])
|
|
self.context.logger.debug("Nodes %s is step %d" % (nodes, step))
|
|
return nodes
|
|
|
|
def greyRestoreConfig(self):
|
|
"""
|
|
deal with the lib/postgresql/pg_plugin
|
|
Under rollback, we will use new pg_plugin dir as base, the file in
|
|
new dir but not in old dir will be moved to old dir considering add
|
|
the C function, and remove from old dir considering drop the C function
|
|
copy the config from new dir to old dir if the config may change
|
|
by user action
|
|
"""
|
|
|
|
cmd = "%s -t %s -U %s --old_cluster_app_path=%s " \
|
|
"--new_cluster_app_path=%s -l %s" % (
|
|
OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_GREY_RESTORE_CONFIG,
|
|
self.context.user,
|
|
self.context.oldClusterAppPath,
|
|
self.context.newClusterAppPath,
|
|
self.context.localLog)
|
|
if self.context.forceRollback:
|
|
cmd += " --force"
|
|
self.context.logger.debug("Command for restoring config: %s" % cmd)
|
|
rollbackList = copy.deepcopy(self.context.clusterNodes)
|
|
self.context.sshTool.executeCommand(cmd, hostList=rollbackList)
|
|
self.context.logger.debug("Successfully restore config.")
|
|
|
|
def greyRestoreGuc(self):
|
|
"""
|
|
restore the old guc in rollback
|
|
:return: NA
|
|
"""
|
|
cmd = "%s -t %s -U %s --old_cluster_app_path=%s -X %s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_GREY_RESTORE_GUC,
|
|
self.context.user,
|
|
self.context.oldClusterAppPath,
|
|
self.context.xmlFile,
|
|
self.context.localLog)
|
|
if self.context.forceRollback:
|
|
cmd += " --force"
|
|
self.context.logger.debug("Command for restoring GUC: %s" % cmd)
|
|
rollbackList = copy.deepcopy(self.context.clusterNodes)
|
|
self.context.sshTool.executeCommand(cmd, hostList=rollbackList)
|
|
self.context.logger.debug("Successfully restore guc.")
|
|
|
|
def dropSupportSchema(self):
|
|
self.context.logger.debug("Drop schema.")
|
|
sql = "DROP SCHEMA IF EXISTS %s CASCADE;" % const.UPGRADE_SCHEMA
|
|
retryTime = 0
|
|
try:
|
|
while retryTime < 5:
|
|
(status, output) = self.execSqlCommandInPrimaryDN(sql)
|
|
if status != 0 or SqlResult.findErrorInSql(output):
|
|
retryTime += 1
|
|
self.context.logger.debug(
|
|
"Failed to execute SQL: %s. Error: \n%s. retry" % (
|
|
sql, str(output)))
|
|
else:
|
|
break
|
|
if status != 0 or SqlResult.findErrorInSql(output):
|
|
self.context.logger.debug(
|
|
"Failed to execute SQL: %s. Error: \n%s" % (
|
|
sql, str(output)))
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Please drop manually with this command.")
|
|
self.context.logger.debug("Successfully drop schema %s cascade." %
|
|
const.UPGRADE_SCHEMA)
|
|
except Exception as e:
|
|
if self.context.forceRollback:
|
|
self.context.logger.log(
|
|
"Failed to drop schema. Please drop manually "
|
|
"with this command: \n %s" % sql)
|
|
else:
|
|
raise Exception(str(e))
|
|
|
|
def doInplaceBinaryRollback(self):
|
|
"""
|
|
function: rollback the upgrade of binary
|
|
input : NA
|
|
output: return True, if the operation is done successfully.
|
|
return False, if the operation failed.
|
|
"""
|
|
self.context.logger.log("Performing inplace rollback.")
|
|
# step flag
|
|
# const.BINARY_UPGRADE_NO_NEED_ROLLBACK value is -2
|
|
# const.INVALID_UPRADE_STEP value is -1
|
|
# const.BINARY_UPGRADE_STEP_INIT_STATUS value is 0
|
|
# const.BINARY_UPGRADE_STEP_STOP_NODE value is 2
|
|
# const.BINARY_UPGRADE_STEP_BACKUP_VERSION value is 3
|
|
# const.BINARY_UPGRADE_STEP_UPGRADE_APP value is 4
|
|
# const.BINARY_UPGRADE_STEP_START_NODE value is 5
|
|
# const.BINARY_UPGRADE_STEP_PRE_COMMIT value is 6
|
|
self.distributeXml()
|
|
step = self.getNodeStepInplace()
|
|
if step == const.BINARY_UPGRADE_NO_NEED_ROLLBACK:
|
|
self.context.logger.log("Rollback succeeded.")
|
|
return True
|
|
|
|
# if step <= -1, it means the step file is broken, exit.
|
|
if step <= const.INVALID_UPRADE_STEP:
|
|
self.context.logger.debug("Invalid upgrade step: %s." % str(step))
|
|
return False
|
|
|
|
# if step value is const.BINARY_UPGRADE_STEP_PRE_COMMIT
|
|
# and find commit flag file,
|
|
# means user has commit upgrade, then can not do rollback
|
|
if step == const.BINARY_UPGRADE_STEP_PRE_COMMIT:
|
|
if not self.checkCommitFlagFile():
|
|
self.context.logger.log(
|
|
"Upgrade has already been committed, "
|
|
"can not execute rollback command any more.")
|
|
return False
|
|
|
|
try:
|
|
self.checkStaticConfig()
|
|
self.start_strategy()
|
|
# Mark that we leave pre commit status,
|
|
# so that if we fail at the first few steps,
|
|
# we won't be allowed to commit upgrade any more.
|
|
if step == const.BINARY_UPGRADE_STEP_PRE_COMMIT:
|
|
self.recordNodeStepInplace(
|
|
const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_START_NODE)
|
|
|
|
if step >= const.BINARY_UPGRADE_STEP_START_NODE:
|
|
# drop table and index after large upgrade
|
|
if self.isLargeInplaceUpgrade:
|
|
if self.check_upgrade_mode():
|
|
self.drop_table_or_index()
|
|
self.restoreClusterConfig(True)
|
|
self.clean_cm_instance()
|
|
self.switchBin(const.OLD)
|
|
if self.isLargeInplaceUpgrade:
|
|
touchInitFlagFile = os.path.join(
|
|
self.context.upgradeBackupPath, "touch_init_flag")
|
|
if os.path.exists(touchInitFlagFile):
|
|
self.rollbackCatalog()
|
|
self.cleanCsvFile()
|
|
else:
|
|
self.setUpgradeMode(0)
|
|
else:
|
|
self.setUpgradeFromParam(const.UPGRADE_UNSET_NUM)
|
|
self.stop_strategy()
|
|
self.recordNodeStepInplace(
|
|
const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_UPGRADE_APP)
|
|
|
|
if step >= const.BINARY_UPGRADE_STEP_UPGRADE_APP:
|
|
self.restoreNodeVersion()
|
|
self.restoreClusterConfig(True)
|
|
self.recordNodeStepInplace(
|
|
const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_BACKUP_VERSION)
|
|
|
|
if step >= const.BINARY_UPGRADE_STEP_BACKUP_VERSION:
|
|
self.cleanBackupedCatalogPhysicalFiles(True)
|
|
self.recordNodeStepInplace(
|
|
const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_STOP_NODE)
|
|
|
|
if step >= const.BINARY_UPGRADE_STEP_STOP_NODE:
|
|
self.start_strategy()
|
|
self.recordNodeStepInplace(
|
|
const.ACTION_INPLACE_UPGRADE,
|
|
const.BINARY_UPGRADE_STEP_INIT_STATUS)
|
|
|
|
if step >= const.BINARY_UPGRADE_STEP_INIT_STATUS:
|
|
if self.unSetClusterReadOnlyMode() != 0:
|
|
raise Exception("NOTICE: " +
|
|
ErrorCode.GAUSS_529["GAUSS_52907"])
|
|
self.cleanBinaryUpgradeBakFiles(True)
|
|
self.cleanInstallPath(const.NEW)
|
|
self.cleanTmpGlobalRelampFile()
|
|
# install kerberos
|
|
self.install_kerberos()
|
|
except Exception as e:
|
|
self.context.logger.error(str(e))
|
|
self.context.logger.log("Rollback failed.")
|
|
return False
|
|
|
|
self.context.logger.log("Rollback succeeded.")
|
|
return True
|
|
|
|
def check_table_or_index_exist(self, name, eachdb):
|
|
"""
|
|
check a table exist
|
|
:return:
|
|
"""
|
|
sql = "select count(*) from pg_class where relname = '%s';" % name
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
eachdb, IsInplaceUpgrade=True)
|
|
if status != 0 or SqlResult.findErrorInSql(output):
|
|
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
|
" Error: \n%s" % str(output))
|
|
if output == '0':
|
|
self.context.logger.debug("Table does not exist.")
|
|
return False
|
|
self.context.logger.debug("Table exists.")
|
|
return True
|
|
|
|
def drop_table_or_index(self):
|
|
"""
|
|
drop a table
|
|
:return:
|
|
"""
|
|
self.context.logger.debug("Start to drop table or index")
|
|
database_list = self.getDatabaseList()
|
|
# drop table and index
|
|
maindb = "postgres"
|
|
otherdbs = database_list
|
|
otherdbs.remove("postgres")
|
|
# check table exist in postgres
|
|
table_name = 'pg_proc_temp_oids'
|
|
if self.check_table_or_index_exist(table_name, maindb):
|
|
self.drop_one_database_table_or_index([maindb])
|
|
else:
|
|
return
|
|
# drop other database table and index
|
|
self.drop_one_database_table_or_index(otherdbs)
|
|
self.context.logger.debug(
|
|
"Successfully droped table or index.")
|
|
|
|
def drop_one_database_table_or_index(self,
|
|
database_list):
|
|
"""
|
|
drop a table in one database
|
|
:return:
|
|
"""
|
|
table_name = 'pg_proc_temp_oids'
|
|
delete_table_sql = "START TRANSACTION;SET IsInplaceUpgrade = on;" \
|
|
"drop table %s;commit;" % table_name
|
|
index_name_list = ['pg_proc_oid_index_temp',
|
|
'pg_proc_proname_args_nsp_index_temp']
|
|
for eachdb in database_list:
|
|
if self.check_table_or_index_exist(table_name, eachdb):
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
delete_table_sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
eachdb, IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(
|
|
ErrorCode.GAUSS_513["GAUSS_51300"] % delete_table_sql
|
|
+ " Error: \n%s" % str(output))
|
|
for index in index_name_list:
|
|
if self.check_table_or_index_exist(index, eachdb):
|
|
sql = "START TRANSACTION;SET IsInplaceUpgrade = on;" \
|
|
"drop index %s;commit;" % index
|
|
(status, output) = ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user,
|
|
self.dnInst.hostname, self.dnInst.port, False,
|
|
eachdb, IsInplaceUpgrade=True)
|
|
if status != 0:
|
|
raise Exception(
|
|
ErrorCode.GAUSS_513[
|
|
"GAUSS_51300"] % sql + " Error: \n%s" % str(
|
|
output))
|
|
|
|
def rollbackCatalog(self):
|
|
"""
|
|
function: rollback catalog change
|
|
steps:
|
|
1.prepare update sql file and check sql file
|
|
2.do rollback catalog
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
try:
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE and int(
|
|
float(self.context.oldClusterNumber) * 1000) <= 93000:
|
|
raise Exception("For this old version %s, we only support "
|
|
"physical rollback." % str(
|
|
self.context.oldClusterNumber))
|
|
self.context.logger.log("Rollbacking catalog.")
|
|
self.prepareUpgradeSqlFolder()
|
|
self.prepareSql()
|
|
self.doRollbackCatalog()
|
|
self.context.logger.log("Successfully Rollbacked catalog.")
|
|
except Exception as e:
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE:
|
|
self.context.logger.debug(
|
|
"Failed to perform rollback operation by rolling "
|
|
"back SQL files:\n%s" % str(e))
|
|
try:
|
|
self.context.logger.debug("Try to recover again using "
|
|
"catalog physical files")
|
|
self.doPhysicalRollbackCatalog()
|
|
except Exception as e:
|
|
raise Exception(
|
|
"Failed to rollback catalog. ERROR: %s" % str(e))
|
|
else:
|
|
raise Exception(
|
|
"Failed to rollback catalog. ERROR: %s" % str(e))
|
|
|
|
|
|
def doRollbackCatalog(self):
|
|
"""
|
|
function : rollback catalog change
|
|
steps:
|
|
stop cluster
|
|
set upgrade_from param
|
|
start cluster
|
|
connect database and rollback catalog changes one by one
|
|
stop cluster
|
|
unset upgrade_from param
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE:
|
|
self.start_strategy(is_final=False)
|
|
self.setUpgradeFromParam(self.context.oldClusterNumber)
|
|
self.setUpgradeMode(1)
|
|
else:
|
|
self.setUpgradeFromParam(self.context.oldClusterNumber)
|
|
self.setUpgradeMode(2)
|
|
self.reloadCmAgent()
|
|
self.execRollbackUpgradedCatalog()
|
|
self.pgxcNodeUpdateLocalhost("rollback")
|
|
self.setUpgradeFromParam(const.UPGRADE_UNSET_NUM)
|
|
self.setUpgradeMode(0)
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE:
|
|
self.stop_strategy(is_final=False)
|
|
else:
|
|
self.reloadCmAgent()
|
|
|
|
|
|
def doPhysicalRollbackCatalog(self):
|
|
"""
|
|
function : rollback catalog by restore physical files
|
|
stop cluster
|
|
unset upgrade_from param
|
|
restore physical files
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
try:
|
|
self.start_strategy(is_final=False)
|
|
self.setUpgradeFromParam(const.UPGRADE_UNSET_NUM)
|
|
self.setUpgradeMode(0)
|
|
self.stop_strategy(is_final=False)
|
|
self.execPhysicalRollbackUpgradedCatalog()
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def execPhysicalRollbackUpgradedCatalog(self):
|
|
"""
|
|
function : rollback catalog by restore physical files
|
|
send cmd to all node
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
try:
|
|
if self.isLargeInplaceUpgrade:
|
|
self.context.logger.debug(
|
|
"Start to restore physical catalog files.")
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \
|
|
"--oldcluster_num='%s' -X '%s' -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_RESTORE_OLD_CLUSTER_CATALOG_PHYSICAL_FILES,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.oldClusterNumber,
|
|
self.context.xmlFile,
|
|
self.context.localLog)
|
|
self.context.logger.debug(
|
|
"Command for restoring physical catalog files: %s." % cmd)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
self.context.logger.debug(
|
|
"Successfully restored physical catalog files.")
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def getSqlHeader(self):
|
|
"""
|
|
function: get sql header
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
header = ["START TRANSACTION;"]
|
|
header.append("SET %s = on;" % const.ON_INPLACE_UPGRADE)
|
|
header.append("SET search_path = 'pg_catalog';")
|
|
header.append("SET local client_min_messages = NOTICE;")
|
|
header.append("SET local log_min_messages = NOTICE;")
|
|
return header
|
|
|
|
def getFileNameList(self, filePathName, scriptType="_"):
|
|
"""
|
|
function: get file name list
|
|
input : filePathName
|
|
output : []
|
|
"""
|
|
filePath = "%s/upgrade_sql/%s" % (self.context.upgradeBackupPath,
|
|
filePathName)
|
|
allFileList = os.listdir(filePath)
|
|
upgradeFileList = []
|
|
if len(allFileList) == 0:
|
|
return []
|
|
for each_sql_file in allFileList:
|
|
if not os.path.isfile("%s/%s" % (filePath, each_sql_file)):
|
|
continue
|
|
prefix = each_sql_file.split('.')[0]
|
|
resList = prefix.split('_')
|
|
if len(resList) != 5 or scriptType not in resList:
|
|
continue
|
|
file_num = "%s.%s" % (resList[3], resList[4])
|
|
if self.floatMoreThan(float(file_num),
|
|
self.context.oldClusterNumber) and \
|
|
self.floatGreaterOrEqualTo(self.context.newClusterNumber,
|
|
float(file_num)):
|
|
upgradeFileList.append(each_sql_file)
|
|
return upgradeFileList
|
|
|
|
def initClusterInfo(self, dbClusterInfoPath):
|
|
"""
|
|
function: init the cluster
|
|
input : dbClusterInfoPath
|
|
output: dbClusterInfo
|
|
"""
|
|
clusterInfoModules = OldVersionModules()
|
|
fileDir = os.path.dirname(os.path.realpath(dbClusterInfoPath))
|
|
sys.path.insert(0, fileDir)
|
|
# init cluster information
|
|
gp_home = ClusterDir.getClusterToolPath(self.context.user)
|
|
gauss_home = ClusterDir.getInstallDir(self.context.user)
|
|
gp_home_version = os.path.join(gp_home, "script", "gspylib", "common", "VersionInfo.py")
|
|
gauss_home_version = os.path.join(gauss_home, "bin", "script",
|
|
"gspylib", "common", "VersionInfo.py")
|
|
if not os.path.isfile(gp_home_version) and os.path.isfile(gauss_home_version):
|
|
FileUtil.cpFile(gauss_home_version, gp_home_version)
|
|
|
|
clusterInfoModules.oldDbClusterInfoModule = __import__('DbClusterInfo')
|
|
sys.path.remove(fileDir)
|
|
return clusterInfoModules.oldDbClusterInfoModule.dbClusterInfo()
|
|
|
|
def initOldClusterInfo(self, dbClusterInfoPath):
|
|
"""
|
|
function: init old cluster information
|
|
input : dbClusterInfoPath
|
|
output: clusterInfoModules.oldDbClusterInfoModule.dbClusterInfo()
|
|
"""
|
|
clusterInfoModules = OldVersionModules()
|
|
fileDir = os.path.dirname(os.path.realpath(dbClusterInfoPath))
|
|
# script and OldDbClusterInfo.py are in the same PGHOST directory
|
|
sys.path.insert(0, fileDir)
|
|
# V1R8 DbClusterInfo.py is "from gspylib.common.ErrorCode import
|
|
# ErrorCode"
|
|
sys.path.insert(0, os.path.join(fileDir, "script"))
|
|
# init old cluster information
|
|
clusterInfoModules.oldDbClusterInfoModule = \
|
|
__import__('OldDbClusterInfo')
|
|
return clusterInfoModules.oldDbClusterInfoModule.dbClusterInfo()
|
|
|
|
def initClusterConfig(self):
|
|
"""
|
|
function: init cluster info
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
gaussHome = \
|
|
EnvUtil.getEnvironmentParameterValue("GAUSSHOME",
|
|
self.context.user)
|
|
# $GAUSSHOME must has available value.
|
|
if gaussHome == "":
|
|
raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME")
|
|
(appPath, appPathName) = os.path.split(gaussHome)
|
|
commonDbClusterInfoModule = \
|
|
"%s/bin/script/gspylib/common/DbClusterInfo.py" % gaussHome
|
|
commonStaticConfigFile = "%s/bin/cluster_static_config" % gaussHome
|
|
try:
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE:
|
|
|
|
# get DbClusterInfo.py and cluster_static_config both of backup
|
|
# path and install path
|
|
# get oldClusterInfo
|
|
# if the backup file exists, we use them;
|
|
# if the install file exists, we use them;
|
|
# else, we can not get oldClusterInfo, exit.
|
|
# backup path exists
|
|
commonDbClusterInfoModuleBak = "%s/../OldDbClusterInfo.py" % \
|
|
self.context.upgradeBackupPath
|
|
commonStaticConfigFileBak = "%s/../cluster_static_config" % \
|
|
self.context.upgradeBackupPath
|
|
|
|
# if binary.tar exist, decompress it
|
|
if os.path.isfile("%s/%s" % (self.context.upgradeBackupPath,
|
|
self.context.binTarName)):
|
|
cmd = "cd '%s'&&tar xfp '%s'" % \
|
|
(self.context.upgradeBackupPath,
|
|
self.context.binTarName)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if (status != 0):
|
|
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
|
|
cmd + "Error: \n%s" % str(output))
|
|
|
|
if (os.path.isfile(commonDbClusterInfoModuleBak)
|
|
and os.path.isfile(commonStaticConfigFileBak)):
|
|
try:
|
|
# import old module
|
|
# init old cluster config
|
|
self.context.oldClusterInfo = \
|
|
self.initOldClusterInfo(
|
|
commonDbClusterInfoModuleBak)
|
|
self.context.oldClusterInfo.initFromStaticConfig(
|
|
self.context.user, commonStaticConfigFileBak)
|
|
except Exception as e:
|
|
# maybe the old cluster is V1R5C00 TR5 version, not
|
|
# support specify static config file
|
|
# path for initFromStaticConfig function,
|
|
# so use new cluster format try again
|
|
self.context.oldClusterInfo = dbClusterInfo()
|
|
self.context.oldClusterInfo.initFromStaticConfig(
|
|
self.context.user, commonStaticConfigFileBak)
|
|
# if backup path not exist, then use install path
|
|
elif (os.path.isfile(commonDbClusterInfoModule)
|
|
and os.path.isfile(commonStaticConfigFile)):
|
|
# import old module
|
|
# init old cluster config
|
|
self.context.oldClusterInfo = \
|
|
self.initClusterInfo(commonDbClusterInfoModule)
|
|
self.context.oldClusterInfo.initFromStaticConfig(
|
|
self.context.user, commonStaticConfigFile)
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
|
|
"static config file")
|
|
|
|
# get the accurate logPath
|
|
logPathWithUser = EnvUtil.getEnv("GAUSSLOG")
|
|
DefaultValue.checkPathVaild(logPathWithUser)
|
|
splitMark = "/%s" % self.context.user
|
|
self.context.oldClusterInfo.logPath = \
|
|
logPathWithUser[0:(logPathWithUser.rfind(splitMark))]
|
|
|
|
# init new cluster config
|
|
# if xmlFile != "", init it by initFromXml();
|
|
# else, using oldClusterInfo
|
|
if self.context.xmlFile != "":
|
|
# get clusterInfo
|
|
# if falied to do dbClusterInfo, it means the
|
|
# DbClusterInfo.py is not correct
|
|
# we will use the backup file to instead of it
|
|
self.context.clusterInfo = dbClusterInfo()
|
|
try:
|
|
self.context.clusterInfo.initFromXml(
|
|
self.context.xmlFile)
|
|
except Exception as e:
|
|
self.context.logger.error(str(e))
|
|
try:
|
|
# init clusterinfo from backup dbclusterinfo
|
|
self.context.clusterInfo = \
|
|
self.initOldClusterInfo(
|
|
commonDbClusterInfoModuleBak)
|
|
self.context.clusterInfo.initFromXml(
|
|
self.context.xmlFile)
|
|
except Exception as e:
|
|
try:
|
|
self.context.clusterInfo = \
|
|
self.initClusterInfo(
|
|
commonDbClusterInfoModule)
|
|
self.context.clusterInfo.initFromXml(
|
|
self.context.xmlFile)
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
# verify cluster config info between old and new cluster
|
|
self.verifyClusterConfigInfo(self.context.clusterInfo,
|
|
self.context.oldClusterInfo)
|
|
# after doing verifyClusterConfigInfo(),
|
|
# the clusterInfo and oldClusterInfo are be changed,
|
|
# so we should do init it again
|
|
self.context.clusterInfo = dbClusterInfo()
|
|
try:
|
|
self.context.clusterInfo.initFromXml(
|
|
self.context.xmlFile)
|
|
except Exception as e:
|
|
self.context.logger.debug(str(e))
|
|
try:
|
|
# init clusterinfo from backup dbclusterinfo
|
|
self.context.clusterInfo = \
|
|
self.initOldClusterInfo(
|
|
commonDbClusterInfoModuleBak)
|
|
self.context.clusterInfo.initFromXml(
|
|
self.context.xmlFile)
|
|
except Exception as e:
|
|
try:
|
|
self.context.clusterInfo = \
|
|
self.initClusterInfo(
|
|
commonDbClusterInfoModule)
|
|
self.context.clusterInfo.initFromXml(
|
|
self.context.xmlFile)
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
else:
|
|
self.context.clusterInfo = self.context.oldClusterInfo
|
|
elif (self.context.action == const.ACTION_CHOSE_STRATEGY
|
|
or self.context.action == const.ACTION_COMMIT_UPGRADE):
|
|
# after switch to new bin, the gausshome points to newversion,
|
|
# so the oldClusterNumber is same with
|
|
# newClusterNumber, the oldClusterInfo is same with new
|
|
try:
|
|
self.context.oldClusterInfo = self.context.clusterInfo
|
|
self.getOneDNInst(True)
|
|
if os.path.isfile(commonDbClusterInfoModule) and \
|
|
os.path.isfile(commonStaticConfigFile):
|
|
# import old module
|
|
# init old cluster config
|
|
self.context.oldClusterInfo = \
|
|
self.initClusterInfo(commonDbClusterInfoModule)
|
|
self.context.oldClusterInfo.initFromStaticConfig(
|
|
self.context.user, commonStaticConfigFile)
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
|
|
"static config file")
|
|
except Exception as e:
|
|
# upgrade backup path
|
|
if (os.path.exists(
|
|
"%s/%s/bin/script/util/DbClusterInfo.py" % (
|
|
self.context.upgradeBackupPath, appPathName))):
|
|
binaryModuleBak = \
|
|
"%s/%s/bin/script/util/DbClusterInfo.py" % \
|
|
(self.context.upgradeBackupPath, appPathName)
|
|
else:
|
|
binaryModuleBak = \
|
|
"%s/%s/bin/script/gspylib/common/" \
|
|
"DbClusterInfo.py" % \
|
|
(self.context.upgradeBackupPath, appPathName)
|
|
binaryStaticConfigFileBak = \
|
|
"%s/%s/bin/cluster_static_config" % \
|
|
(self.context.upgradeBackupPath, appPathName)
|
|
|
|
if os.path.isfile(binaryModuleBak) and \
|
|
os.path.isfile(binaryStaticConfigFileBak):
|
|
# import old module
|
|
# init old cluster config
|
|
commonDbClusterInfoModuleBak = \
|
|
"%s/../OldDbClusterInfo.py" % \
|
|
self.context.upgradeBackupPath
|
|
self.context.oldClusterInfo = \
|
|
self.initOldClusterInfo(
|
|
commonDbClusterInfoModuleBak)
|
|
self.context.oldClusterInfo.initFromStaticConfig(
|
|
self.context.user, binaryStaticConfigFileBak)
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
|
|
"static config file")
|
|
elif (self.context.action in
|
|
[const.ACTION_SMALL_UPGRADE, const.ACTION_AUTO_UPGRADE,
|
|
const.ACTION_LARGE_UPGRADE, const.ACTION_AUTO_ROLLBACK]):
|
|
# 1. get new cluster info
|
|
self.context.clusterInfo = dbClusterInfo()
|
|
self.context.clusterInfo.initFromXml(self.context.xmlFile)
|
|
# 2. get oldClusterInfo
|
|
# when under rollback
|
|
# the gausshome may point to old or new clusterAppPath,
|
|
# so we must choose from the record table
|
|
# when upgrade abnormal nodes, the gausshome points to
|
|
# newClusterAppPath
|
|
|
|
oldPath = self.getClusterAppPath()
|
|
if oldPath != "" and os.path.exists(oldPath):
|
|
self.context.logger.debug("The old install path is %s" %
|
|
oldPath)
|
|
commonDbClusterInfoModule = \
|
|
"%s/bin/script/gspylib/common/DbClusterInfo.py" % \
|
|
oldPath
|
|
commonStaticConfigFile = \
|
|
"%s/bin/cluster_static_config" % oldPath
|
|
else:
|
|
self.context.logger.debug("The old install path is %s"
|
|
% os.path.realpath(gaussHome))
|
|
if (os.path.isfile(commonDbClusterInfoModule)
|
|
and os.path.isfile(commonStaticConfigFile)):
|
|
# import old module
|
|
# init old cluster config
|
|
self.context.oldClusterInfo = \
|
|
self.initClusterInfo(commonDbClusterInfoModule)
|
|
self.context.oldClusterInfo.initFromStaticConfig(
|
|
self.context.user, commonStaticConfigFile)
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
|
|
"static config file")
|
|
|
|
staticClusterInfo = dbClusterInfo()
|
|
config = os.path.join(gaussHome, "bin/cluster_static_config")
|
|
if not os.path.isfile(config):
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
|
|
os.path.realpath(config))
|
|
staticClusterInfo.initFromStaticConfig(self.context.user,
|
|
config)
|
|
|
|
# verify cluster config info between old and new cluster
|
|
self.verifyClusterConfigInfo(self.context.clusterInfo,
|
|
staticClusterInfo)
|
|
# after doing verifyClusterConfigInfo(), the clusterInfo and
|
|
# oldClusterInfo are be changed,
|
|
# so we should do init it again
|
|
self.context.clusterInfo = dbClusterInfo()
|
|
# we will get the self.context.newClusterAppPath in
|
|
# choseStrategy
|
|
self.context.clusterInfo.initFromXml(self.context.xmlFile)
|
|
if self.context.is_inplace_upgrade or \
|
|
self.context.action == const.ACTION_AUTO_ROLLBACK:
|
|
self.getOneDNInst()
|
|
self.context.logger.debug("Successfully init cluster config.")
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % 't' +
|
|
" Value: %s" % self.context.action)
|
|
|
|
# judgment has installed kerberos before action_inplace_upgrade
|
|
self.context.logger.debug(
|
|
"judgment has installed kerberos before action_inplace_upgrade")
|
|
xmlfile = os.path.join(os.path.dirname(self.context.userProfile),
|
|
DefaultValue.FI_KRB_XML)
|
|
if os.path.exists(xmlfile) and \
|
|
self.context.action == const.ACTION_AUTO_UPGRADE \
|
|
and self.context.is_grey_upgrade:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50200"] % "kerberos")
|
|
if os.path.exists(xmlfile) and self.context.is_inplace_upgrade:
|
|
pghost_path = EnvUtil.getEnvironmentParameterValue(
|
|
'PGHOST', self.context.user)
|
|
destfile = "%s/krb5.conf" % os.path.dirname(
|
|
self.context.userProfile)
|
|
kerberosflagfile = "%s/kerberos_upgrade_flag" % pghost_path
|
|
cmd = "cp -rf %s %s " % (destfile, kerberosflagfile)
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 3, 5)
|
|
if status != 0:
|
|
raise Exception(
|
|
ErrorCode.GAUSS_502["GAUSS_50206"] % kerberosflagfile
|
|
+ " Error: \n%s" % output)
|
|
self.context.logger.debug(
|
|
"Successful back up kerberos config file.")
|
|
except Exception as e:
|
|
self.context.logger.debug(traceback.format_exc())
|
|
self.exitWithRetCode(self.context.action, False, str(e))
|
|
|
|
def getAllStandbyDnInsts(self):
|
|
"""
|
|
function: find all normal standby dn instances by dbNodes.
|
|
input : NA
|
|
output: DN instances
|
|
"""
|
|
try:
|
|
self.context.logger.debug("Get all standby DN.")
|
|
dnList = []
|
|
dnInst = None
|
|
clusterNodes = self.context.oldClusterInfo.dbNodes
|
|
standbyDn, output = DefaultValue.getStandbyNode(
|
|
self.context.userProfile, self.context.logger)
|
|
self.context.logger.debug(
|
|
"Cluster status information is %s;The standbyDn is %s" % (
|
|
output, standbyDn))
|
|
if not standbyDn or standbyDn == []:
|
|
self.context.logger.debug("There is no standby dn")
|
|
return []
|
|
for dbNode in clusterNodes:
|
|
if len(dbNode.datanodes) == 0:
|
|
continue
|
|
dnInst = dbNode.datanodes[0]
|
|
if dnInst.hostname not in standbyDn:
|
|
continue
|
|
dnList.append(dnInst)
|
|
|
|
(checkStatus, checkResult) = OMCommand.doCheckStaus(
|
|
self.context.user, 0)
|
|
if checkStatus == 0:
|
|
self.context.logger.debug("The cluster status is normal,"
|
|
" no need to check standby dn status.")
|
|
else:
|
|
dnList = []
|
|
clusterStatus = \
|
|
OMCommand.getClusterStatus()
|
|
if clusterStatus is None:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51600"])
|
|
clusterInfo = dbClusterInfo()
|
|
clusterInfo.initFromXml(self.context.xmlFile)
|
|
clusterInfo.dbNodes.extend(clusterNodes)
|
|
for dbNode in clusterInfo.dbNodes:
|
|
if len(dbNode.datanodes) == 0:
|
|
continue
|
|
dn = dbNode.datanodes[0]
|
|
if dn.hostname not in standbyDn:
|
|
continue
|
|
dbInst = clusterStatus.getInstanceStatusById(
|
|
dn.instanceId)
|
|
if dbInst is None:
|
|
continue
|
|
if dbInst.status == "Normal":
|
|
self.context.logger.debug(
|
|
"DN from %s is healthy." % dn.hostname)
|
|
dnList.append(dn)
|
|
else:
|
|
self.context.logger.debug(
|
|
"DN from %s is unhealthy." % dn.hostname)
|
|
|
|
if not dnList or dnList == []:
|
|
self.context.logger.debug("There is no normal standby dn")
|
|
else:
|
|
self.context.logger.debug("Successfully get all standby DN: %s" % \
|
|
','.join(d.hostname for d in dnList))
|
|
self.dnStandbyInsts = dnList
|
|
|
|
except Exception as e:
|
|
self.context.logger.log("Failed to get all standby DN. Error: %s" % str(e))
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51624"])
|
|
|
|
def getOneDNInst(self, checkNormal=False):
|
|
"""
|
|
function: find a dn instance by dbNodes,
|
|
which we can execute SQL commands
|
|
input : NA
|
|
output: DN instance
|
|
"""
|
|
try:
|
|
self.context.logger.debug(
|
|
"Get one DN. CheckNormal is %s" % checkNormal)
|
|
dnInst = None
|
|
clusterNodes = self.context.oldClusterInfo.dbNodes
|
|
primaryDnNode, output = DefaultValue.getPrimaryNode(
|
|
self.context.userProfile, self.context.logger)
|
|
self.context.logger.debug(
|
|
"Cluster status information is %s;The primaryDnNode is %s" % (
|
|
output, primaryDnNode))
|
|
if not primaryDnNode:
|
|
self.context.logger.error("Get primary DN failed. Please check cluster.")
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51652"] % "Get primary DN failed.")
|
|
for dbNode in clusterNodes:
|
|
if len(dbNode.datanodes) == 0:
|
|
continue
|
|
dnInst = dbNode.datanodes[0]
|
|
if dnInst.hostname not in primaryDnNode:
|
|
continue
|
|
break
|
|
|
|
if checkNormal:
|
|
(checkStatus, checkResult) = OMCommand.doCheckStaus(
|
|
self.context.user, 0)
|
|
if checkStatus == 0:
|
|
self.context.logger.debug("The cluster status is normal,"
|
|
" no need to check dn status.")
|
|
else:
|
|
clusterStatus = \
|
|
OMCommand.getClusterStatus()
|
|
if clusterStatus is None:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51600"])
|
|
clusterInfo = dbClusterInfo()
|
|
clusterInfo.initFromXml(self.context.xmlFile)
|
|
clusterInfo.dbNodes.extend(clusterNodes)
|
|
for dbNode in clusterInfo.dbNodes:
|
|
if len(dbNode.datanodes) == 0:
|
|
continue
|
|
dn = dbNode.datanodes[0]
|
|
if dn.hostname not in primaryDnNode:
|
|
continue
|
|
dbInst = clusterStatus.getInstanceStatusById(
|
|
dn.instanceId)
|
|
if dbInst is None:
|
|
continue
|
|
if dbInst.status == "Normal":
|
|
self.context.logger.debug(
|
|
"DN from %s is healthy." % dn.hostname)
|
|
dnInst = dn
|
|
break
|
|
self.context.logger.debug(
|
|
"DN from %s is unhealthy." % dn.hostname)
|
|
|
|
# check if contain DN on nodes
|
|
if not dnInst or dnInst == []:
|
|
raise Exception(ErrorCode.GAUSS_526["GAUSS_52602"])
|
|
else:
|
|
self.context.logger.debug("Successfully get one DN from %s."
|
|
% dnInst.hostname)
|
|
self.dnInst = dnInst
|
|
|
|
except Exception as e:
|
|
self.context.logger.log("Failed to get one DN. Error: %s" % str(e))
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51624"])
|
|
|
|
def verifyClusterConfigInfo(self, clusterInfo, oldClusterInfo,
|
|
ignoreFlag="upgradectl"):
|
|
"""
|
|
function: verify cluster config info between xml and static config
|
|
input : clusterInfo, oldClusterInfo
|
|
output: NA
|
|
"""
|
|
try:
|
|
# should put self.context.clusterInfo before
|
|
# self.context.oldClusterInfo,
|
|
# because self.context.oldClusterInfo is not the istance of
|
|
# dbCluster
|
|
# covert new cluster information to compare cluster
|
|
compnew = self.covertToCompCluster(clusterInfo)
|
|
# covert old cluster information to compare cluster
|
|
compold = self.covertToCompCluster(oldClusterInfo)
|
|
# do compare
|
|
# if it is not same, print it.
|
|
theSame, tempbuffer = compareObject(compnew, compold,
|
|
"clusterInfo", [], ignoreFlag)
|
|
if (theSame):
|
|
self.context.logger.log("Static configuration matched with "
|
|
"old static configuration files.")
|
|
else:
|
|
msg = "Instance[%s] are not the same.\nXml cluster " \
|
|
"information: %s\nStatic cluster information: %s\n" % \
|
|
(tempbuffer[0], tempbuffer[1], tempbuffer[2])
|
|
self.context.logger.debug("The old cluster information is "
|
|
"from the cluster_static_config.")
|
|
raise Exception(ErrorCode.GAUSS_512["GAUSS_51217"] +
|
|
"Error: \n%s" % msg.strip("\n"))
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def covertToCompCluster(self, dbclusterInfo):
|
|
"""
|
|
function: covert to comp cluster
|
|
input : clusterInfo, oldClusterInfo
|
|
output: compClusterInfo
|
|
"""
|
|
# init dbcluster class
|
|
compClusterInfo = dbClusterInfo()
|
|
# get name
|
|
compClusterInfo.name = dbclusterInfo.name
|
|
# get appPath
|
|
compClusterInfo.appPath = dbclusterInfo.appPath
|
|
# get logPath
|
|
compClusterInfo.logPath = dbclusterInfo.logPath
|
|
|
|
for dbnode in dbclusterInfo.dbNodes:
|
|
compNodeInfo = dbNodeInfo()
|
|
# get datanode instance information
|
|
for datanode in dbnode.datanodes:
|
|
compNodeInfo.datanodes.append(
|
|
self.coverToCompInstance(datanode))
|
|
# get node information
|
|
compClusterInfo.dbNodes.append(compNodeInfo)
|
|
return compClusterInfo
|
|
|
|
def coverToCompInstance(self, compinstance):
|
|
"""
|
|
function: cover to comp instance
|
|
1. get instanceId
|
|
2. get mirrorId
|
|
3. get port
|
|
4. get datadir
|
|
5. get instanceType
|
|
6. get listenIps
|
|
7. get haIps
|
|
input : compinstance
|
|
output: covertedInstanceInfo
|
|
"""
|
|
covertedInstanceInfo = instanceInfo()
|
|
# get instanceId
|
|
covertedInstanceInfo.instanceId = compinstance.instanceId
|
|
# get mirrorId
|
|
covertedInstanceInfo.mirrorId = compinstance.mirrorId
|
|
# get port
|
|
covertedInstanceInfo.port = compinstance.port
|
|
# get datadir
|
|
covertedInstanceInfo.datadir = compinstance.datadir
|
|
# get instanceType
|
|
covertedInstanceInfo.instanceType = compinstance.instanceType
|
|
# get listenIps
|
|
covertedInstanceInfo.listenIps = compinstance.listenIps
|
|
# get haIps
|
|
covertedInstanceInfo.haIps = compinstance.haIps
|
|
return covertedInstanceInfo
|
|
|
|
def distributeXml(self):
|
|
"""
|
|
function: distribute package to every host
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Distributing xml configure file.",
|
|
"addStep")
|
|
|
|
try:
|
|
|
|
hosts = self.context.clusterInfo.getClusterNodeNames()
|
|
hosts.remove(NetUtil.GetHostIpOrName())
|
|
|
|
# Send xml file to every host
|
|
DefaultValue.distributeXmlConfFile(self.context.sshTool,
|
|
self.context.xmlFile,
|
|
hosts,
|
|
self.context.mpprcFile,
|
|
self.context.isSingle)
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
self.context.logger.debug("Successfully distributed xml "
|
|
"configure file.", "constant")
|
|
|
|
def recordNodeStepInplace(self, action, step):
|
|
"""
|
|
function: record step info on all nodes
|
|
input : action, step
|
|
output: NA
|
|
"""
|
|
try:
|
|
# record step info on local node
|
|
|
|
tempPath = self.context.upgradeBackupPath
|
|
filePath = os.path.join(tempPath, const.INPLACE_UPGRADE_STEP_FILE)
|
|
cmd = "echo \"%s:%d\" > %s" % (action, step, filePath)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] %
|
|
filePath + "Error: \n%s" % str(output))
|
|
|
|
if not self.context.isSingle:
|
|
# send file to remote nodes
|
|
self.context.sshTool.scpFiles(filePath, tempPath)
|
|
self.context.logger.debug("Successfully wrote step file[%s:%d]."
|
|
% (action, step))
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def distributeFile(self, step_file):
|
|
"""
|
|
function: distribute file
|
|
input : step_file
|
|
output : NA
|
|
"""
|
|
self.context.logger.debug("Distribute the file %s" % step_file)
|
|
# send the file to each node
|
|
hosts = self.context.clusterInfo.getClusterNodeNames()
|
|
hosts.remove(NetUtil.GetHostIpOrName())
|
|
if not self.context.isSingle:
|
|
stepDir = os.path.normpath(os.path.dirname(step_file))
|
|
self.context.sshTool.scpFiles(step_file, stepDir, hosts)
|
|
self.context.logger.debug("Successfully distribute the file %s"
|
|
% step_file)
|
|
|
|
def getNodeStepInplace(self):
|
|
"""
|
|
function: Get the upgrade step info for inplace upgrade
|
|
input : action
|
|
output: the upgrade step info
|
|
"""
|
|
try:
|
|
tempPath = self.context.upgradeBackupPath
|
|
# get file path and check file exists
|
|
filePath = os.path.join(tempPath, const.INPLACE_UPGRADE_STEP_FILE)
|
|
if not os.path.exists(filePath):
|
|
self.context.logger.debug("The cluster status is Normal. "
|
|
"No need to rollback.")
|
|
return const.BINARY_UPGRADE_NO_NEED_ROLLBACK
|
|
|
|
# read and check record format
|
|
stepInfo = FileUtil.readFile(filePath)[0]
|
|
stepList = stepInfo.split(":")
|
|
if len(stepList) != 2:
|
|
raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % filePath)
|
|
|
|
recordType = stepList[0].strip()
|
|
recordStep = stepList[1].strip()
|
|
# check upgrade type
|
|
# the record value must be consistent with the upgrade type
|
|
if self.context.action != recordType:
|
|
raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % "t" +
|
|
"Input upgrade type: %s record upgrade type: "
|
|
"%s\nMaybe you chose the wrong interface." %
|
|
(self.context.action, recordType))
|
|
# if record value is not digit, exit.
|
|
if not recordStep.isdigit() or int(recordStep) > \
|
|
const.BINARY_UPGRADE_STEP_PRE_COMMIT or \
|
|
int(recordStep) < const.INVALID_UPRADE_STEP:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51633"] %
|
|
recordStep)
|
|
except Exception as e:
|
|
self.context.logger.error(str(e))
|
|
return const.INVALID_UPRADE_STEP
|
|
self.context.logger.debug("The rollback step is %s" % recordStep)
|
|
return int(recordStep)
|
|
|
|
def checkStep(self, step):
|
|
"""
|
|
function: check step
|
|
input : step
|
|
output : NA
|
|
"""
|
|
if not step.isdigit() or \
|
|
int(step) > GreyUpgradeStep.STEP_BEGIN_COMMIT or \
|
|
int(step) < const.INVALID_UPRADE_STEP:
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51633"] % str(step))
|
|
|
|
##########################################################################
|
|
# Offline upgrade functions
|
|
##########################################################################
|
|
def checkUpgrade(self):
|
|
"""
|
|
function: Check the environment for upgrade
|
|
input : action
|
|
output: NA
|
|
"""
|
|
self.context.logger.log("Checking upgrade environment.", "addStep")
|
|
try:
|
|
# Check the environment for upgrade
|
|
cmd = "%s -t %s -R '%s' -l '%s' -N '%s' -X '%s'" % \
|
|
(OMCommand.getLocalScript("Local_Check_Upgrade"),
|
|
self.context.action,
|
|
self.context.oldClusterAppPath,
|
|
self.context.localLog,
|
|
self.context.newClusterAppPath,
|
|
self.context.xmlFile)
|
|
self.context.logger.debug("Command for checking upgrade "
|
|
"environment: %s." % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
except Exception as e:
|
|
self.context.logger.log("Failed to check upgrade environment.",
|
|
"constant")
|
|
raise Exception(str(e))
|
|
if not self.context.forceRollback:
|
|
if self.context.oldClusterNumber >= \
|
|
const.ENABLE_STREAM_REPLICATION_VERSION:
|
|
self.check_gucval_is_inval_given(
|
|
const.ENABLE_STREAM_REPLICATION_NAME, const.VALUE_ON)
|
|
try:
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE:
|
|
self.context.logger.log(
|
|
"Successfully checked upgrade environment.", "constant")
|
|
return
|
|
self.checkActionInTableOrFile()
|
|
self.checkDifferentVersion()
|
|
self.checkOption()
|
|
except Exception as e:
|
|
self.context.logger.log(
|
|
"Failed to check upgrade environment.", "constant")
|
|
raise Exception(str(e))
|
|
|
|
self.context.logger.log(
|
|
"Successfully checked upgrade environment.", "constant")
|
|
|
|
def check_gucval_is_inval_given(self, guc_name, val_list):
|
|
"""
|
|
Checks whether a given parameter is a given value list in a
|
|
given instance list.
|
|
"""
|
|
self.context.logger.debug("checks whether the parameter:{0} is "
|
|
"the value:{1}.".format(guc_name, val_list))
|
|
guc_str = "{0}:{1}".format(guc_name, ",".join(val_list))
|
|
self.checkParam(guc_str)
|
|
self.context.logger.debug("Success to check the parameter:{0} value "
|
|
"is in the value:{1}.".format(guc_name,
|
|
val_list))
|
|
|
|
def checkDifferentVersion(self):
|
|
"""
|
|
if the cluster has only one version. no need to check
|
|
if the cluster has two version, it should be the new
|
|
version or the old version
|
|
:return:
|
|
"""
|
|
self.context.logger.debug("Check the amount of cluster version.")
|
|
failedHost = []
|
|
failMsg = ""
|
|
gaussHome = ClusterDir.getInstallDir(self.context.user)
|
|
# $GAUSSHOME must has available value.
|
|
if gaussHome == "":
|
|
raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$GAUSSHOME")
|
|
versionFile = os.path.join(gaussHome, "bin/upgrade_version")
|
|
cmd = "sed -n \'3,1p\' %s" % versionFile
|
|
hostList = copy.deepcopy(self.context.clusterNodes)
|
|
(resultMap, outputCollect) = \
|
|
self.context.sshTool.getSshStatusOutput(cmd, hostList)
|
|
for key, val in resultMap.items():
|
|
if DefaultValue.FAILURE in val:
|
|
failedHost.append(key)
|
|
failMsg += val
|
|
if failedHost:
|
|
self.context.recordIgnoreOrFailedNodeInEveryNode(
|
|
self.context.failedNodeRecordFile, failedHost)
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52929"] + failMsg)
|
|
for result in outputCollect:
|
|
if result.find(self.newCommitId) or result.find(self.oldCommitId):
|
|
continue
|
|
self.context.logger.debug(
|
|
"Find the gausssdb version %s is not same with"
|
|
" current upgrade version" % str(result))
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52935"])
|
|
self.context.logger.debug(
|
|
"Successfully checked the amount of cluster version.")
|
|
|
|
def checkOption(self):
|
|
"""
|
|
if user use -g first, and then use -h <last -g choose nodes>,
|
|
we can upgrade again
|
|
:return:
|
|
"""
|
|
if self.context.is_grey_upgrade:
|
|
self.check_option_grey()
|
|
if len(self.context.nodeNames) != 0:
|
|
self.checkOptionH()
|
|
elif self.context.upgrade_remain:
|
|
self.checkOptionContinue()
|
|
else:
|
|
self.checkOptionG()
|
|
|
|
def check_option_grey(self):
|
|
"""
|
|
if nodes have been upgraded, no need to use --grey to upgrade again
|
|
:return:
|
|
"""
|
|
stepFile = os.path.join(
|
|
self.context.upgradeBackupPath, const.GREY_UPGRADE_STEP_FILE)
|
|
if not os.path.isfile(stepFile):
|
|
self.context.logger.debug(
|
|
"File %s does not exists. No need to check." %
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
return
|
|
grey_node_names = self.getUpgradedNodeNames()
|
|
if grey_node_names:
|
|
self.context.logger.log(
|
|
"All nodes have been upgrade, no need to upgrade again.")
|
|
self.exitWithRetCode(self.action, True)
|
|
|
|
def checkOptionH(self):
|
|
self.checkNodeNames()
|
|
stepFile = os.path.join(
|
|
self.context.upgradeBackupPath, const.GREY_UPGRADE_STEP_FILE)
|
|
if not os.path.isfile(stepFile):
|
|
self.context.logger.debug(
|
|
"File %s does not exists. No need to check." %
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
return
|
|
if not self.isNodesSameStep(self.context.nodeNames):
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52909"])
|
|
if self.isNodeSpecifyStep(
|
|
GreyUpgradeStep.STEP_UPDATE_POST_CATALOG,
|
|
self.context.nodeNames):
|
|
raise Exception(
|
|
ErrorCode.GAUSS_529["GAUSS_52910"] % self.context.nodeNames)
|
|
nodes = self.getNodeLessThan(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG)
|
|
# compare whether current upgrade nodes are same with
|
|
# last unfinished node names
|
|
if nodes:
|
|
a = [i for i in self.context.nodeNames if i not in nodes]
|
|
b = [i for i in nodes if i not in self.context.nodeNames]
|
|
if len(a) != 0 or len(b) != 0:
|
|
raise Exception(
|
|
ErrorCode.GAUSS_529["GAUSS_52911"] % nodes +
|
|
" Please upgrade them first.")
|
|
|
|
def checkNodeNames(self):
|
|
self.context.logger.debug(
|
|
"Check if the node name is invalid or duplicated.")
|
|
clusterNodes = self.context.clusterInfo.getClusterNodeNames()
|
|
for nodeName in self.context.nodeNames:
|
|
if nodeName not in clusterNodes:
|
|
raise Exception(
|
|
ErrorCode.GAUSS_500["GAUSS_50011"] % ("-h", nodeName))
|
|
|
|
undupNodes = set(self.context.nodeNames)
|
|
if len(self.context.nodeNames) != len(undupNodes):
|
|
self.context.logger.log(
|
|
ErrorCode.GAUSS_500["GAUSS_50004"] % (
|
|
"h" + "Duplicates node names"))
|
|
nodeDict = {}.fromkeys(self.context.nodeNames, 0)
|
|
for name in self.context.nodeNames:
|
|
nodeDict[name] = nodeDict[name] + 1
|
|
for key, value in nodeDict.items():
|
|
if value > 1:
|
|
self.context.logger.log(
|
|
"Duplicates node name %s, "
|
|
"only keep one in grey upgrade!" % key)
|
|
self.context.nodeNames = list(undupNodes)
|
|
|
|
def isNodesSameStep(self, nodes):
|
|
"""
|
|
judge if given nodes are same step
|
|
"""
|
|
return self.isNodeSpecifyStepInFile(nodes=nodes)
|
|
|
|
def getNodeLessThan(self, step):
|
|
"""
|
|
get the nodes whose step is less than specified step, and can not be 0
|
|
"""
|
|
nodes = self.getNodeLessThanInFile(step)
|
|
return nodes
|
|
|
|
def getNodeLessThanInFile(self, step):
|
|
"""
|
|
get the nodes whose step is less than specified step, and can not be 0
|
|
"""
|
|
try:
|
|
stepFile = os.path.join(
|
|
self.context.upgradeBackupPath, const.GREY_UPGRADE_STEP_FILE)
|
|
self.context.logger.debug("trying to get nodes that step is "
|
|
"less than %s from %s" % (step, stepFile))
|
|
if not os.path.isfile(stepFile):
|
|
return []
|
|
nodes = []
|
|
with open(stepFile, 'r') as csvfile:
|
|
reader = csv.DictReader(csvfile)
|
|
for row in reader:
|
|
if int(row['step']) != 0 and int(row['step']) < step:
|
|
nodes.append(row['node_host'])
|
|
self.context.logger.debug("successfully got nodes that step is "
|
|
"less than %s from %s" % (step, stepFile))
|
|
return nodes
|
|
except Exception as e:
|
|
exitMsg = "Failed to get nodes that step is less than {0} " \
|
|
"from {1}. ERROR {2}".format(step, stepFile, str(e))
|
|
self.exitWithRetCode(self.action, False, exitMsg)
|
|
|
|
def checkOptionContinue(self):
|
|
stepFile = os.path.join(
|
|
self.context.upgradeBackupPath, const.GREY_UPGRADE_STEP_FILE)
|
|
if not os.path.isfile(stepFile):
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52920"] +
|
|
"Need to upgrade some nodes first.")
|
|
greyNodeNames = self.getUpgradedNodeNames()
|
|
# the nodes that have upgraded that should reached to precommit
|
|
if not self.isNodeSpecifyStep(GreyUpgradeStep.STEP_UPDATE_POST_CATALOG,
|
|
greyNodeNames):
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52912"])
|
|
if len(greyNodeNames) == len(self.context.clusterInfo.dbNodes):
|
|
self.printPrecommitBanner()
|
|
self.context.logger.debug(
|
|
"The node host in table %s.%s is equal to cluster nodes."
|
|
% (const.UPGRADE_SCHEMA, const.RECORD_NODE_STEP))
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52913"])
|
|
if not self.checkVersion(self.newCommitId, greyNodeNames):
|
|
raise Exception(
|
|
ErrorCode.GAUSS_529["GAUSS_52914"] +
|
|
"Please use the same version to upgrade remain nodes.")
|
|
|
|
def checkOptionG(self):
|
|
stepFile = os.path.join(
|
|
self.context.upgradeBackupPath, const.GREY_UPGRADE_STEP_FILE)
|
|
if not os.path.isfile(stepFile):
|
|
self.context.logger.debug(
|
|
"File %s does not exists. No need to check." %
|
|
const.GREY_UPGRADE_STEP_FILE)
|
|
return
|
|
# -g only support 2 loops to upgrade, if has node upgrade,
|
|
# cannot use -g to upgrade other nodes
|
|
greyNodeNames = self.getUpgradedNodeNames()
|
|
if not greyNodeNames:
|
|
self.context.logger.debug("No node has ever been upgraded.")
|
|
return
|
|
else:
|
|
raise Exception("-g only support if no node has ever been upgraded"
|
|
" ,nodes %s have been upgraded, "
|
|
"so can use --continue instead of -g to upgrade"
|
|
" other nodes" % greyNodeNames)
|
|
|
|
def backupClusterConfig(self):
|
|
"""
|
|
function: Backup the cluster config
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
# backup list:
|
|
# cluster_static_config
|
|
# cluster_dynamic_config
|
|
# etc/gscgroup_xxx.cfg
|
|
# lib/postgresql/pg_plugin
|
|
# server.key.cipher
|
|
# server.key.rand
|
|
# datasource.key.cipher
|
|
# datasource.key.rand
|
|
# usermapping.key.cipher
|
|
# usermapping.key.rand
|
|
# subscription.key.cipher
|
|
# subscription.key.rand
|
|
# utilslib
|
|
# /share/sslsert/ca.key
|
|
# /share/sslsert/etcdca.crt
|
|
# catalog physical files
|
|
# Data Studio lib files
|
|
# gds files
|
|
# javaUDF
|
|
# postGIS
|
|
# hadoop_odbc_connector extension files
|
|
# libsimsearch etc files and lib files
|
|
self.context.logger.log("Backing up cluster configuration.", "addStep")
|
|
try:
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s -V %d --upgrade_bak_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_BACKUP_CONFIG,
|
|
self.context.user,
|
|
int(float(self.context.oldClusterNumber) * 1000),
|
|
self.context.upgradeBackupPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug("Command for backing up cluster "
|
|
"configuration: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
# backup hotpatch info file
|
|
self.backupHotpatch()
|
|
# backup version file.
|
|
self.backup_version_file()
|
|
|
|
if not self.isLargeInplaceUpgrade:
|
|
return
|
|
# backup catalog data files if needed
|
|
self.backupCatalogFiles()
|
|
|
|
# backup DS libs and gds file
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_INPLACE_BACKUP,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug(
|
|
"Command for backing up gds file: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
self.context.logger.log("Successfully backed up cluster "
|
|
"configuration.", "constant")
|
|
|
|
def backupCatalogFiles(self):
|
|
"""
|
|
function: backup physical files of catalg objects
|
|
1.check if is inplace upgrade
|
|
2.get database list
|
|
3.get catalog objects list
|
|
4.backup physical files for each database
|
|
5.backup global folder
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
try:
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \
|
|
"--oldcluster_num='%s' -X '%s' -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_BACKUP_OLD_CLUSTER_CATALOG_PHYSICAL_FILES,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.oldClusterNumber,
|
|
self.context.xmlFile,
|
|
self.context.localLog)
|
|
self.context.logger.debug("Command for backing up physical files "
|
|
"of catalg objects: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
self.context.logger.debug("Successfully backed up catalog "
|
|
"physical files for old cluster.")
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def syncNewGUC(self):
|
|
"""
|
|
function: sync newly added guc during inplace upgrade.
|
|
For now, we only sync guc of cm_agent and cm_server
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Start to sync new guc.", "addStep")
|
|
try:
|
|
# send cmd to all node and exec
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \
|
|
"--new_cluster_app_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_SYNC_CONFIG,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.newClusterAppPath,
|
|
self.context.localLog,)
|
|
self.context.logger.debug(
|
|
"Command for synchronizing new guc: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
except Exception as e:
|
|
self.context.logger.debug("Failed to synchronize new guc.",
|
|
"constant")
|
|
raise Exception(str(e))
|
|
self.context.logger.debug("Successfully synchronized new guc.",
|
|
"constant")
|
|
|
|
def waitClusterForNormal(self, waitTimeOut=300):
|
|
"""
|
|
function: Wait the node become Normal
|
|
input : waitTimeOut
|
|
output: NA
|
|
"""
|
|
self.context.logger.log("Waiting for the cluster status to "
|
|
"become normal.")
|
|
dotCount = 0
|
|
# get the end time
|
|
endTime = datetime.now() + timedelta(seconds=int(waitTimeOut))
|
|
while True:
|
|
time.sleep(5)
|
|
sys.stdout.write(".")
|
|
dotCount += 1
|
|
if dotCount >= 12:
|
|
dotCount = 0
|
|
sys.stdout.write("\n")
|
|
|
|
(checkStatus, checkResult) = \
|
|
OMCommand.doCheckStaus(self.context.user, 0)
|
|
if checkStatus == 0:
|
|
if dotCount != 0:
|
|
sys.stdout.write("\n")
|
|
self.context.logger.log("The cluster status is normal.")
|
|
break
|
|
|
|
if datetime.now() >= endTime:
|
|
if dotCount != 0:
|
|
sys.stdout.write("\n")
|
|
self.context.logger.debug(checkResult)
|
|
raise Exception("Timeout." + "\n" +
|
|
ErrorCode.GAUSS_516["GAUSS_51602"])
|
|
|
|
if checkStatus != 0:
|
|
self.context.logger.debug(checkResult)
|
|
raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster")
|
|
|
|
def create_ca_for_cm(self):
|
|
"""
|
|
Create CM CA file
|
|
"""
|
|
if self.get_upgrade_cm_strategy() != 1:
|
|
self.context.logger.debug("No need to create CA for CM.")
|
|
return
|
|
|
|
new_cluster_config_file = \
|
|
os.path.realpath(os.path.join(self.context.newClusterAppPath,
|
|
"bin", "cluster_static_config"))
|
|
self.context.logger.debug("Start create CA for CM.")
|
|
new_cluster_info = dbClusterInfo()
|
|
new_cluster_info.initFromStaticConfig(self.context.user,
|
|
new_cluster_config_file)
|
|
local_node = [node for node in new_cluster_info.dbNodes
|
|
if node.name == NetUtil.GetHostIpOrName()][0]
|
|
agent_component = CM_OLAP()
|
|
agent_component.instInfo = local_node.cmagents[0]
|
|
agent_component.logger = self.context.logger
|
|
agent_component.binPath = os.path.realpath(os.path.join(self.context.newClusterAppPath,
|
|
"bin"))
|
|
agent_component.create_cm_ca(self.context.sshTool)
|
|
self.context.logger.debug("Create CA for CM successfully.")
|
|
|
|
def reloadCmAgent(self, is_final=False):
|
|
"""
|
|
Run the 'kill -1' command to make the parameters of all cmagent instances take effect.
|
|
:return:
|
|
"""
|
|
if not DefaultValue.get_cm_server_num_from_static(self.context.oldClusterInfo) > 0 \
|
|
and not is_final:
|
|
self.context.logger.debug("No need to reload cm configuration.")
|
|
return
|
|
self.context.logger.debug("Start to reload cmagent")
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_RELOAD_CMAGENT,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug("reloading all cmagent process: %s" % cmd)
|
|
try:
|
|
hostList = copy.deepcopy(self.context.clusterNodes)
|
|
self.context.execCommandInSpecialNode(cmd, hostList)
|
|
# wait the cluster be normal
|
|
self.waitClusterNormalDegrade()
|
|
self.context.logger.debug("Success to reload cmagent")
|
|
except Exception as er:
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE or not \
|
|
self.context.forceRollback:
|
|
raise Exception(str(er))
|
|
self.context.logger.debug("Failed to reload cm agent. Warning:{0}".format(str(er)))
|
|
|
|
def reload_cmserver(self, is_final=False):
|
|
"""
|
|
Run the 'kill -1' command to make the parameters of all cmserver instances take effect.
|
|
:return:
|
|
"""
|
|
if DefaultValue.get_cm_server_num_from_static(self.context.oldClusterInfo) == 0 \
|
|
and not is_final:
|
|
self.context.logger.debug("No need to reload cm server configuration.")
|
|
return
|
|
self.context.logger.debug("Start to reload cmserver")
|
|
cm_nodes = []
|
|
# Get all the nodes that contain the CMSERVER instance
|
|
for dbNode in self.context.clusterInfo.dbNodes:
|
|
if len(dbNode.cmservers) > 0:
|
|
cm_nodes.append(dbNode.name)
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_RELOAD_CMSERVER,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug("reloading all cmserver process: %s" % cmd)
|
|
try:
|
|
self.context.execCommandInSpecialNode(cmd, cm_nodes)
|
|
# wait the cluster be normal
|
|
self.waitClusterNormalDegrade()
|
|
self.context.logger.debug("Success to reload cmserver")
|
|
except Exception as er:
|
|
if self.context.action == const.ACTION_INPLACE_UPGRADE or \
|
|
not self.context.forceRollback:
|
|
raise Exception(str(er))
|
|
self.context.logger.debug("Failed to reload cm server. Warning:{0}".format(str(er)))
|
|
|
|
def restoreClusterConfig(self, isRollBack=False):
|
|
"""
|
|
function: Restore the cluster config
|
|
input : isRollBack
|
|
output: NA
|
|
"""
|
|
# restore list:
|
|
# cluster_dynamic_config
|
|
# etc/gscgroup_xxx.cfg
|
|
# lib/postgresql/pg_plugin
|
|
# server.key.cipher
|
|
# server.key.rand
|
|
# datasource.key.cipher
|
|
# datasource.key.rand
|
|
# utilslib
|
|
# /share/sslsert/ca.key
|
|
# /share/sslsert/etcdca.crt
|
|
# Data Studio lib files
|
|
# gds files
|
|
# javaUDF
|
|
# postGIS
|
|
# hadoop_odbc_connector extension files
|
|
# libsimsearch etc files and lib files
|
|
if isRollBack:
|
|
self.context.logger.log("Restoring cluster configuration.")
|
|
else:
|
|
self.context.logger.log("Restoring cluster configuration.",
|
|
"addStep")
|
|
try:
|
|
if isRollBack:
|
|
self.rollbackHotpatch()
|
|
else:
|
|
# restore static configuration
|
|
cmd = "%s -t %s -U %s -V %d --upgrade_bak_path=%s " \
|
|
"--old_cluster_app_path=%s --new_cluster_app_path=%s " \
|
|
"-l %s" % (
|
|
OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_RESTORE_CONFIG,
|
|
self.context.user,
|
|
int(float(self.context.oldClusterNumber) * 1000),
|
|
self.context.upgradeBackupPath,
|
|
self.context.oldClusterAppPath,
|
|
self.context.newClusterAppPath,
|
|
self.context.localLog)
|
|
|
|
self.context.logger.debug("Command for restoring "
|
|
"config files: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
if self.isLargeInplaceUpgrade:
|
|
# backup DS libs and gds file
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_INPLACE_BACKUP,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug(
|
|
"Command for restoreing DS libs and gds file: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.userProfile)
|
|
# change the owner of application
|
|
cmd = "chown -R %s:%s '%s'" % \
|
|
(self.context.user, self.context.group,
|
|
self.context.newClusterAppPath)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd,
|
|
self.context.sshTool, self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
if isRollBack:
|
|
self.context.logger.log("Successfully restored "
|
|
"cluster configuration.")
|
|
else:
|
|
self.context.logger.log("Successfully restored cluster "
|
|
"configuration.", "constant")
|
|
|
|
def checkStaticConfig(self):
|
|
"""
|
|
function: Check if static config file exists in bin dir,
|
|
if not exists, restore it from backup dir
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.log("Checking static configuration files.")
|
|
try:
|
|
# check static configuration path
|
|
staticConfigPath = "%s/bin" % self.context.oldClusterAppPath
|
|
# restore static configuration
|
|
cmd = "(if [ ! -f '%s/cluster_static_config' ];then cp " \
|
|
"%s/cluster_static_config %s/bin;fi)" % \
|
|
(staticConfigPath, self.context.upgradeBackupPath,
|
|
self.context.oldClusterAppPath)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
self.context.logger.log("Successfully checked static "
|
|
"configuration files.")
|
|
|
|
def backupNodeVersion(self):
|
|
"""
|
|
function: Backup current application and configuration.
|
|
The function only be used by binary upgrade.
|
|
To ensure the transaction atomicity,
|
|
it will be used with checkUpgrade().
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.log("Backing up current application "
|
|
"and configurations.", "addStep")
|
|
try:
|
|
# back up environment variables
|
|
cmd = "cp '%s' '%s'_gauss" % (self.context.userProfile,
|
|
self.context.userProfile)
|
|
self.context.logger.debug(
|
|
"Command for backing up environment file: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
# back up application and configuration
|
|
cmd = "%s -U %s -P %s -p -b -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Backup"), self.context.user,
|
|
self.context.upgradeBackupPath, self.context.localLog)
|
|
self.context.logger.debug(
|
|
"Command for backing up application: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(
|
|
cmd,
|
|
self.context.sshTool, self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
except Exception as e:
|
|
# delete binary backup directory
|
|
delCmd = g_file.SHELL_CMD_DICT["deleteDir"] % \
|
|
(self.context.tmpDir, os.path.join(self.context.tmpDir,
|
|
'backupTemp_*'))
|
|
CmdExecutor.execCommandWithMode(delCmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
raise Exception(str(e))
|
|
|
|
self.context.logger.log("Successfully backed up current "
|
|
"application and configurations.", "constant")
|
|
|
|
def restoreNodeVersion(self):
|
|
"""
|
|
function: Restore the application and configuration
|
|
1. restore old version
|
|
2. restore environment variables
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.log("Restoring application and configurations.")
|
|
|
|
try:
|
|
# restore old version
|
|
cmd = "%s -U %s -P %s -p -b -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Restore"),
|
|
self.context.user, self.context.upgradeBackupPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug("Command for restoring "
|
|
"old version: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
# restore environment variables
|
|
cmd = "(if [ -f '%s'_gauss ];then mv '%s'_gauss '%s';fi)" % \
|
|
(self.context.userProfile, self.context.userProfile,
|
|
self.context.userProfile)
|
|
self.context.logger.debug("Command for restoring environment file:"
|
|
" %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
self.context.logger.log("Successfully restored application and "
|
|
"configuration.")
|
|
|
|
def modifySocketDir(self):
|
|
"""
|
|
function: modify unix socket directory
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.log("Modifying the socket path.", "addStep")
|
|
try:
|
|
# modifying the socket path for all CN/DN instance
|
|
self.setGUCValue("unix_socket_directory",
|
|
DefaultValue.getTmpDirAppendMppdb(self.context.user), "set")
|
|
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
self.context.logger.log("Successfully modified socket path.",
|
|
"constant")
|
|
|
|
###########################################################################
|
|
# Rollback upgrade functions
|
|
###########################################################################
|
|
def cleanBackupFiles(self):
|
|
"""
|
|
function: Clean backup files.
|
|
input : action
|
|
output : NA
|
|
"""
|
|
try:
|
|
# clean backup files
|
|
cmd = "(if [ -f '%s/OldDbClusterInfo.py' ]; then rm -f " \
|
|
"'%s/OldDbClusterInfo.py'; fi) &&" % \
|
|
(self.context.tmpDir, self.context.tmpDir)
|
|
cmd += "(if [ -f '%s/OldDbClusterInfo.pyc' ]; then rm -f " \
|
|
"'%s/OldDbClusterInfo.pyc'; fi) &&" % \
|
|
(self.context.tmpDir, self.context.tmpDir)
|
|
cmd += "(if [ -d '%s/script' ]; then rm -rf '%s/script'; " \
|
|
"fi) &&" % (self.context.tmpDir, self.context.tmpDir)
|
|
cmd += "(if [ -f '%s/oldclusterinfo' ]; then rm -f " \
|
|
"'%s/oldclusterinfo'; fi) &&" % \
|
|
(self.context.tmpDir, self.context.tmpDir)
|
|
cmd += "(if [ -f '%s/oldclusterGUC' ]; then rm -f " \
|
|
"'%s/oldclusterGUC'; fi) &&" % \
|
|
(self.context.tmpDir, self.context.tmpDir)
|
|
cmd += "(if [ -f '%s/cluster_static_config' ]; then rm -f " \
|
|
"'%s/cluster_static_config'; fi) &&" % \
|
|
(self.context.tmpDir, self.context.tmpDir)
|
|
cmd += "(if [ -f '%s/c_functionfilelist.dat' ]; then rm -f " \
|
|
"'%s/c_functionfilelist.dat'; fi) &&" % \
|
|
(self.context.tmpDir, self.context.tmpDir)
|
|
cmd += "(if [ -f '%s'_gauss ]; then rm -f '%s'_gauss ; fi) &&" % \
|
|
(self.context.userProfile, self.context.userProfile)
|
|
cmd += "(if [ -f '%s/oldclusterinfo.json' ]; then rm -f " \
|
|
"'%s/oldclusterinfo.json'; fi) &&" % \
|
|
(self.context.tmpDir, self.context.tmpDir)
|
|
cmd += "(if [ -f '%s/%s' ]; then rm -f '%s/%s'; fi) &&" % \
|
|
(self.context.tmpDir, const.CLUSTER_CNSCONF_FILE,
|
|
self.context.tmpDir, const.CLUSTER_CNSCONF_FILE)
|
|
cmd += "(rm -f '%s'/gauss_crontab_file_*) &&" % self.context.tmpDir
|
|
cmd += "(if [ -d '%s' ]; then rm -rf '%s'; fi) &&" % \
|
|
(self.context.upgradeBackupPath,
|
|
self.context.upgradeBackupPath)
|
|
cmd += "(if [ -f '%s/pg_proc_mapping.txt' ]; then rm -f" \
|
|
" '%s/pg_proc_mapping.txt'; fi)" % \
|
|
(self.context.tmpDir, self.context.tmpDir)
|
|
self.context.logger.debug("Command for clean "
|
|
"backup files: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
|
|
def cleanBinaryUpgradeBakFiles(self, isRollBack=False):
|
|
"""
|
|
function: Clean back up files, include cluster_static_config,
|
|
cluster_dynamic_config, binary.tar, parameter.tar.
|
|
input : isRollBack
|
|
output: NA
|
|
"""
|
|
if (isRollBack):
|
|
self.context.logger.debug("Cleaning backup files.")
|
|
else:
|
|
self.context.logger.debug("Cleaning backup files.", "addStep")
|
|
|
|
try:
|
|
# clean backup files
|
|
self.cleanBackupFiles()
|
|
except Exception as e:
|
|
raise Exception(str(e))
|
|
if (isRollBack):
|
|
self.context.logger.debug("Successfully cleaned backup files.")
|
|
else:
|
|
self.context.logger.debug("Successfully cleaned backup files.",
|
|
"constant")
|
|
|
|
###########################################################################
|
|
# Rollback upgrade functions
|
|
###########################################################################
|
|
|
|
def doHealthCheck(self, checkPosition):
|
|
"""
|
|
function: Do health check, if healthy, return 0, else return 1
|
|
input : checkPosition
|
|
output: 0 successfully
|
|
1 failed
|
|
"""
|
|
#######################################################################
|
|
# When do binary-upgrade:
|
|
# const.OPTION_PRECHECK -> cluster Normal
|
|
# -> database can connec
|
|
# const.OPTION_POSTCHECK -> cluster Normal
|
|
# -> package version Normal
|
|
# -> database can connec
|
|
#######################################################################
|
|
self.context.logger.log("Start to do health check.", "addStep")
|
|
|
|
status = 0
|
|
output = ""
|
|
|
|
if checkPosition == const.OPTION_PRECHECK:
|
|
if (self.checkClusterStatus(checkPosition, True) != 0):
|
|
output += "\n Cluster status does not match condition."
|
|
if self.checkConnection() != 0:
|
|
output += "\n Database could not be connected."
|
|
elif checkPosition == const.OPTION_POSTCHECK:
|
|
if self.checkClusterStatus(checkPosition) != 0:
|
|
output += "\n Cluster status is Abnormal."
|
|
if not self.checkVersion(
|
|
self.context.newClusterVersion,
|
|
self.context.clusterInfo.getClusterNodeNames()):
|
|
output += "\n The gaussdb version is inconsistent."
|
|
if self.checkConnection() != 0:
|
|
output += "\n Database could not be connected."
|
|
else:
|
|
# Invalid check position
|
|
output += "\n Invalid check position."
|
|
if output != "":
|
|
status = 1
|
|
# all check has been pass, return 0
|
|
self.context.logger.log("Successfully checked cluster status.",
|
|
"constant")
|
|
return (status, output)
|
|
|
|
def checkVersion(self, checkinfo, checknodes):
|
|
"""
|
|
function: Check if the node have been upgraded, if gaussdb bin
|
|
file verison is same on all host, return 0, else retrun 1
|
|
input : checkinfo, checknodes
|
|
output: 0 successfully
|
|
1 failed
|
|
"""
|
|
self.context.logger.debug(
|
|
"Start to check gaussdb version consistency.")
|
|
if self.context.isSingle:
|
|
self.context.logger.debug("There is single cluster,"
|
|
" no need to check it.")
|
|
return True
|
|
|
|
try:
|
|
# checking gaussdb bin file version VxxxRxxxCxx or commitid
|
|
cmd = "source %s;%s -t %s -v %s -U %s -l %s" % \
|
|
(self.context.userProfile,
|
|
OMCommand.getLocalScript("Local_Check_Upgrade"),
|
|
const.ACTION_CHECK_VERSION,
|
|
checkinfo,
|
|
self.context.user,
|
|
self.context.localLog)
|
|
self.context.logger.debug("Command for checking gaussdb version "
|
|
"consistency: %s." % cmd)
|
|
(status, output) = \
|
|
self.context.sshTool.getSshStatusOutput(cmd, checknodes)
|
|
for node in status.keys():
|
|
failFlag = "Failed to check version information"
|
|
if status[node] != DefaultValue.SUCCESS or \
|
|
output.find(failFlag) >= 0:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52929"] +
|
|
"Error: \n%s" % str(output))
|
|
# gaussdb bin file version is same on all host, return 0
|
|
self.context.logger.debug("Successfully checked gaussdb"
|
|
" version consistency.")
|
|
return True
|
|
except Exception as e:
|
|
self.context.logger.debug(str(e))
|
|
return False
|
|
|
|
def _query_cluster_status(self):
|
|
"""
|
|
Query cluster status
|
|
"""
|
|
cmd = "source %s;gs_om -t query" % self.context.userProfile
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if "Cascade Need repair" in output:
|
|
self.context.logger.debug("Cascade node disconnect , "
|
|
"check again after 5 seconds.\n{0}".format(output))
|
|
time.sleep(5)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
self.context.logger.debug("Retry query cluster status finish. "
|
|
"Output:\n{0}".format(output))
|
|
return cmd, status, output
|
|
|
|
def checkClusterStatus(self, checkPosition=const.OPTION_PRECHECK,
|
|
doDetailCheck=False):
|
|
"""
|
|
function: Check cluster status, if NORMAL, return 0, else return 1
|
|
For grey upgrade, if have switched to new bin, we will remove
|
|
abnormal nodes and then return 0, else return 1
|
|
input : checkPosition, doDetailCheck
|
|
output: 0 successfully
|
|
1 failed
|
|
"""
|
|
self.context.logger.debug("Start to check cluster status.")
|
|
# build query cmd
|
|
# according to the implementation of the results to determine whether
|
|
# the implementation of success
|
|
cmd, status, output = self._query_cluster_status()
|
|
|
|
if status != 0:
|
|
self.context.logger.debug(
|
|
"Failed to execute command %s.\nStatus:%s\nOutput:%s" %
|
|
(cmd, status, output))
|
|
return 1
|
|
self.context.logger.debug(
|
|
"Successfully obtained cluster status information. "
|
|
"Cluster status information:\n%s" % output)
|
|
if output.find("Normal") < 0:
|
|
self.context.logger.debug("The cluster_state is Abnormal.")
|
|
if checkPosition == const.OPTION_POSTCHECK:
|
|
if output.find("Degraded") < 0:
|
|
self.context.logger.debug("The cluster_state is not "
|
|
"Degraded under postcheck.")
|
|
return 1
|
|
else:
|
|
return 1
|
|
|
|
# do more check if required
|
|
if doDetailCheck:
|
|
cluster_state_check = False
|
|
redistributing_check = False
|
|
for line in output.split('\n'):
|
|
if len(line.split(":")) != 2:
|
|
continue
|
|
(key, value) = line.split(":")
|
|
if key.strip() == "cluster_state" and \
|
|
value.strip() == "Normal":
|
|
cluster_state_check = True
|
|
elif key.strip() == "redistributing" and value.strip() == "No":
|
|
redistributing_check = True
|
|
if cluster_state_check and redistributing_check:
|
|
self.context.logger.debug("Cluster_state must be Normal, "
|
|
"redistributing must be No.")
|
|
return 0
|
|
else:
|
|
self.context.logger.debug(
|
|
"Cluster status information does not meet the upgrade "
|
|
"condition constraints. When upgrading, cluster_state must"
|
|
" be Normal, redistributing must be No and balanced"
|
|
" must be Yes.")
|
|
return 1
|
|
|
|
# cluster is NORMAL, return 0
|
|
return 0
|
|
|
|
def waitClusterNormalDegrade(self, waitTimeOut=300):
|
|
"""
|
|
function: Check if cluster status is Normal for each main step of
|
|
online upgrade
|
|
input : waitTimeOut, default is 60.
|
|
output : NA
|
|
"""
|
|
# get the end time
|
|
self.context.logger.log("Wait for the cluster status normal "
|
|
"or degrade.")
|
|
endTime = datetime.now() + timedelta(seconds=int(waitTimeOut))
|
|
while True:
|
|
cmd = "source %s;gs_om -t status --detail" % \
|
|
self.context.userProfile
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status == 0 and (output.find("Normal") >= 0 or
|
|
output.find("Degraded") >= 0):
|
|
self.context.logger.debug(
|
|
"The cluster status is normal or degrade now.")
|
|
break
|
|
|
|
if datetime.now() >= endTime:
|
|
self.context.logger.debug("The cmd is %s " % cmd)
|
|
raise Exception("Timeout." + "\n" +
|
|
ErrorCode.GAUSS_516["GAUSS_51602"])
|
|
else:
|
|
self.context.logger.debug(
|
|
"Cluster status has not reach normal. Wait for another 3"
|
|
" seconds.\n%s" % output)
|
|
time.sleep(3) # sleep 3 seconds
|
|
|
|
def checkConnection(self):
|
|
"""
|
|
function: Check if cluster accept connecitons,
|
|
upder inplace upgrade, all DB should be connected
|
|
under grey upgrade, makesure all CN in nodes that does not
|
|
under upgrade process or extracted abnormal nodes can be
|
|
connected if accpet connection, return 0, else return 1
|
|
1. find a cn instance
|
|
2. connect this cn and exec sql cmd
|
|
input : NA
|
|
output: 0 successfully
|
|
1 failed
|
|
"""
|
|
self.context.logger.debug("Start to check database connection.")
|
|
for dbNode in self.context.clusterInfo.dbNodes:
|
|
if len(dbNode.datanodes) == 0 or dbNode.name:
|
|
continue
|
|
for dnInst in dbNode.datanodes:
|
|
# connect this DB and exec sql cmd
|
|
sql = "SELECT 1;"
|
|
(status, output) = \
|
|
ClusterCommand.remoteSQLCommand(
|
|
sql, self.context.user, dnInst.hostname, dnInst.port,
|
|
False, DefaultValue.DEFAULT_DB_NAME,
|
|
IsInplaceUpgrade=True)
|
|
if status != 0 or not output.isdigit():
|
|
self.context.logger.debug(
|
|
"Failed to execute SQL on [%s]: %s. Error: \n%s" %
|
|
(dnInst.hostname, sql, str(output)))
|
|
return 1
|
|
|
|
# all DB accept connection, return 0
|
|
self.context.logger.debug("Successfully checked database connection.")
|
|
return 0
|
|
|
|
def createBakPath(self):
|
|
"""
|
|
function: create bak path
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
cmd = "(if [ ! -d '%s' ]; then mkdir -p '%s'; fi)" % \
|
|
(self.context.upgradeBackupPath, self.context.upgradeBackupPath)
|
|
cmd += " && (chmod %d -R %s)" % (DefaultValue.KEY_DIRECTORY_MODE,
|
|
self.context.upgradeBackupPath)
|
|
self.context.logger.debug("Command for creating directory: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
def recordDirFile(self):
|
|
"""
|
|
function: record dir file
|
|
input: NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Create the file to record "
|
|
"old and new app directory.")
|
|
# write the old cluster number and new cluster number into backup dir
|
|
appDirRecord = os.path.join(self.context.upgradeBackupPath,
|
|
const.RECORD_UPGRADE_DIR)
|
|
FileUtil.createFile(appDirRecord, True, DefaultValue.KEY_FILE_MODE)
|
|
FileUtil.writeFile(appDirRecord, [self.context.oldClusterAppPath,
|
|
self.context.newClusterAppPath], 'w')
|
|
self.distributeFile(appDirRecord)
|
|
self.context.logger.debug("Successfully created the file to "
|
|
"record old and new app directory.")
|
|
|
|
def copyBakVersion(self):
|
|
"""
|
|
under commit, if we have cleaned old install path, then node disabled,
|
|
we cannot get old version,
|
|
under choseStrategy, we will not pass the check
|
|
:return:NA
|
|
"""
|
|
versionFile = os.path.join(self.context.oldClusterAppPath,
|
|
"bin/upgrade_version")
|
|
bakVersionFile = os.path.join(self.context.upgradeBackupPath,
|
|
"old_upgrade_version")
|
|
cmd = "(if [ -f '%s' ]; then cp -f -p '%s' '%s';fi)" % \
|
|
(versionFile, versionFile, bakVersionFile)
|
|
cmd += " && (chmod %d %s)" % \
|
|
(DefaultValue.KEY_FILE_MODE, bakVersionFile)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
def cleanInstallPath(self, cleanNew=const.NEW):
|
|
"""
|
|
function: after grey upgrade succeed, clean old install path
|
|
input : cleanNew
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Cleaning %s install path." % cleanNew,
|
|
"addStep")
|
|
# clean old install path
|
|
if cleanNew == const.NEW:
|
|
installPath = self.context.newClusterAppPath
|
|
elif cleanNew == const.OLD:
|
|
installPath = self.context.oldClusterAppPath
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_529["GAUSS_52937"])
|
|
|
|
cmd = "%s -t %s -U %s -R %s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_CLEAN_INSTALL_PATH,
|
|
self.context.user,
|
|
installPath,
|
|
self.context.localLog)
|
|
if self.context.forceRollback:
|
|
cmd += " --force"
|
|
self.context.logger.debug("Command for clean %s install path: %s" %
|
|
(cleanNew, cmd))
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
self.context.logger.log("Successfully cleaned %s install path." %
|
|
cleanNew, "constant")
|
|
|
|
def installNewBin(self):
|
|
"""
|
|
function: install new binary in a new directory
|
|
1. get env GAUSSLOG
|
|
2. get env PGHOST
|
|
3. install new bin file
|
|
4. sync old config to new bin path
|
|
5. update env
|
|
input: none
|
|
output: none
|
|
"""
|
|
try:
|
|
self.context.logger.log("Installing new binary.", "addStep")
|
|
|
|
# install new bin file
|
|
cmd = "%s -t 'install_cluster' -U %s:%s -R '%s' -P %s -c %s" \
|
|
" -l '%s' -X '%s' -T -u" % \
|
|
(OMCommand.getLocalScript("Local_Install"),
|
|
self.context.user,
|
|
self.context.group,
|
|
self.context.newClusterAppPath,
|
|
self.context.tmpDir,
|
|
self.context.clusterInfo.name,
|
|
self.context.localLog,
|
|
self.context.xmlFile)
|
|
self.context.logger.debug(
|
|
"Command for installing new binary: %s." % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
self.context.logger.debug(
|
|
"Successfully installed new binary files.")
|
|
except Exception as e:
|
|
self.context.logger.debug("Failed to install new binary files.")
|
|
raise Exception(str(e))
|
|
|
|
def backupHotpatch(self):
|
|
"""
|
|
function: backup hotpatch config file patch.info in xxx/data/hotpatch
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Start to backup hotpatch.")
|
|
try:
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s " \
|
|
"--new_cluster_app_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_BACKUP_HOTPATCH,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.newClusterAppPath,
|
|
self.context.localLog)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
except Exception as e:
|
|
raise Exception(" Failed to backup hotpatch config file." + str(e))
|
|
self.context.logger.log("Successfully backup hotpatch config file.")
|
|
|
|
def rollbackHotpatch(self):
|
|
"""
|
|
function: backup hotpatch config file patch.info in xxx/data/hotpatch
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
self.context.logger.debug("Start to rollback hotpatch.")
|
|
try:
|
|
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -l %s -X '%s'" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_ROLLBACK_HOTPATCH,
|
|
self.context.user,
|
|
self.context.upgradeBackupPath,
|
|
self.context.localLog,
|
|
self.context.xmlFile)
|
|
if self.context.forceRollback:
|
|
cmd += " --force"
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
except Exception as e:
|
|
raise Exception(" Failed to rollback hotpatch config file."
|
|
+ str(e))
|
|
self.context.logger.log("Successfully rollback hotpatch config file.")
|
|
|
|
def backup_version_file(self):
|
|
"""
|
|
Backup the old version file.
|
|
"""
|
|
oldVersionFile = "%s/bin/%s" % \
|
|
(self.context.oldClusterAppPath,
|
|
DefaultValue.DEFAULT_DISABLED_FEATURE_FILE_NAME)
|
|
oldLicenseFile = "%s/bin/%s" % (self.context.oldClusterAppPath,
|
|
DefaultValue.DEFAULT_LICENSE_FILE_NAME)
|
|
|
|
cmd = "(if [ -d %s ] && [ -f %s ]; then cp -f %s %s; fi) && " % \
|
|
(self.context.upgradeBackupPath, oldVersionFile, oldVersionFile,
|
|
self.context.upgradeBackupPath)
|
|
cmd += "(if [ -d %s ] && [ -f %s ]; then cp -f %s %s; fi)" % \
|
|
(self.context.upgradeBackupPath, oldLicenseFile, oldLicenseFile,
|
|
self.context.upgradeBackupPath)
|
|
|
|
self.context.logger.debug(
|
|
"Execute command to backup the product version file and the "
|
|
"license control file: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
def getTimeFormat(self, seconds):
|
|
"""
|
|
format secends to h-m-s
|
|
input:int
|
|
output:int
|
|
"""
|
|
seconds = int(seconds)
|
|
if seconds == 0:
|
|
return 0
|
|
# Converts the seconds to standard time
|
|
hour = seconds / 3600
|
|
minute = (seconds - hour * 3600) / 60
|
|
s = seconds % 60
|
|
resultstr = ""
|
|
if hour != 0:
|
|
resultstr += "%dh" % hour
|
|
if minute != 0:
|
|
resultstr += "%dm" % minute
|
|
return "%s%ds" % (resultstr, s)
|
|
|
|
def CopyCerts(self):
|
|
"""
|
|
function: copy certs
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
self.context.logger.log("copy certs from %s to %s." % (
|
|
self.context.oldClusterAppPath, self.context.newClusterAppPath))
|
|
try:
|
|
cmd = "%s -t %s -U %s --old_cluster_app_path=%s " \
|
|
"--new_cluster_app_path=%s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_COPY_CERTS,
|
|
self.context.user,
|
|
self.context.oldClusterAppPath,
|
|
self.context.newClusterAppPath,
|
|
self.context.localLog)
|
|
self.context.logger.debug("Command for copy certs: '%s'." % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
except Exception as e:
|
|
self.context.logger.log("Failed to copy certs from %s to %s." %
|
|
(self.context.oldClusterAppPath,
|
|
self.context.newClusterAppPath))
|
|
raise Exception(str(e))
|
|
time.sleep(10)
|
|
self.context.logger.log("Successfully copy certs from %s to %s." %
|
|
(self.context.oldClusterAppPath,
|
|
self.context.newClusterAppPath),
|
|
"constant")
|
|
|
|
def clean_cm_instance(self):
|
|
"""
|
|
Clean CM instance directory
|
|
"""
|
|
self.context.logger.log("Start roll back CM instance.")
|
|
cm_strategy = self.get_upgrade_cm_strategy()
|
|
if cm_strategy == 1:
|
|
self.context.logger.debug("Rollback need clean cm directory")
|
|
cmd = "%s -t %s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_CLEAN_CM,
|
|
self.context.localLog)
|
|
self.context.logger.debug("Roll back CM install command: {0}".format(cmd))
|
|
self.context.sshTool.executeCommand(cmd, hostList=self.context.nodeNames)
|
|
self.context.logger.debug("Clean cm directory successfully.")
|
|
else:
|
|
self.context.logger.debug("No need clean CM instance directory.")
|
|
|
|
|
|
|
|
|
|
def switchBin(self, switchTo=const.OLD):
|
|
"""
|
|
function: switch bin
|
|
input : switchTo
|
|
output : NA
|
|
"""
|
|
self.context.logger.log("Switch symbolic link to %s binary directory."
|
|
% switchTo, "addStep")
|
|
try:
|
|
cmd = "%s -t %s -U %s -l %s" % \
|
|
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
|
const.ACTION_SWITCH_BIN,
|
|
self.context.user,
|
|
self.context.localLog)
|
|
if switchTo == const.NEW:
|
|
cmd += " -R '%s'" % self.context.newClusterAppPath
|
|
else:
|
|
cmd += " -R '%s'" % self.context.oldClusterAppPath
|
|
if self.context.forceRollback:
|
|
cmd += " --force"
|
|
self.context.logger.debug("Command for switching binary directory:"
|
|
" '%s'." % cmd)
|
|
if self.context.is_grey_upgrade:
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile,
|
|
self.context.nodeNames)
|
|
else:
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
|
|
except Exception as e:
|
|
self.context.logger.log("Failed to switch symbolic link to %s "
|
|
"binary directory." % switchTo)
|
|
raise Exception(str(e))
|
|
time.sleep(10)
|
|
self.context.logger.log("Successfully switch symbolic link to %s "
|
|
"binary directory." % switchTo, "constant")
|
|
|
|
def clearOtherToolPackage(self, action=""):
|
|
"""
|
|
function: clear other tool package
|
|
input : action
|
|
output : NA
|
|
"""
|
|
if action == const.ACTION_AUTO_ROLLBACK:
|
|
self.context.logger.debug("clean other tool package files.")
|
|
else:
|
|
self.context.logger.debug(
|
|
"clean other tool package files.", "addStep")
|
|
try:
|
|
commonPart = PackageInfo.get_package_back_name().rsplit("_", 1)[0]
|
|
gphomePath = \
|
|
os.listdir(ClusterDir.getClusterToolPath(self.context.user))
|
|
commitId = self.newCommitId
|
|
if action == const.ACTION_AUTO_ROLLBACK:
|
|
commitId = self.oldCommitId
|
|
for filePath in gphomePath:
|
|
if commonPart in filePath and commitId not in filePath:
|
|
toDeleteFilePath = os.path.join(
|
|
ClusterDir.getClusterToolPath(self.context.user),
|
|
filePath)
|
|
deleteCmd = "(if [ -f '%s' ]; then rm -rf '%s'; fi) " % \
|
|
(toDeleteFilePath, toDeleteFilePath)
|
|
CmdExecutor.execCommandWithMode(
|
|
deleteCmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
except Exception as e:
|
|
self.context.logger.log(
|
|
"Failed to clean other tool package files.")
|
|
raise Exception(str(e))
|
|
if action == const.ACTION_AUTO_ROLLBACK:
|
|
self.context.logger.debug(
|
|
"Success to clean other tool package files.")
|
|
else:
|
|
self.context.logger.debug(
|
|
"Success to clean other tool package files.", "constant")
|
|
|
|
def createGphomePack(self):
|
|
"""
|
|
function: create Gphome pack
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
try:
|
|
cmd = "(if [ ! -d '%s' ]; then mkdir -p '%s'; fi)" % \
|
|
(ClusterDir.getClusterToolPath(self.context.user),
|
|
ClusterDir.getClusterToolPath(self.context.user))
|
|
cmd += " && (chmod %d -R %s)" % \
|
|
(DefaultValue.KEY_DIRECTORY_MODE,
|
|
ClusterDir.getClusterToolPath(self.context.user))
|
|
self.context.logger.debug(
|
|
"Command for creating directory: %s" % cmd)
|
|
CmdExecutor.execCommandWithMode(cmd,
|
|
self.context.sshTool,
|
|
self.context.isSingle,
|
|
self.context.mpprcFile)
|
|
oldPackName = "%s-Package-bak_%s.tar.gz" % \
|
|
(VersionInfo.PRODUCT_NAME_PACKAGE, self.oldCommitId)
|
|
packFilePath = "%s/%s" % (ClusterDir.getClusterToolPath(
|
|
self.context.user), oldPackName)
|
|
copyNode = ""
|
|
cmd = "if [ -f '%s' ]; then echo 'GetFile'; " \
|
|
"else echo 'NoThisFile'; fi" % packFilePath
|
|
self.context.logger.debug("Command for checking file: %s" % cmd)
|
|
(status, output) = self.context.sshTool.getSshStatusOutput(
|
|
cmd, self.context.clusterNodes, self.context.mpprcFile)
|
|
outputMap = self.context.sshTool.parseSshOutput(
|
|
self.context.clusterNodes)
|
|
self.context.logger.debug("Output: %s" % output)
|
|
for node in self.context.clusterNodes:
|
|
if status[node] == DefaultValue.SUCCESS:
|
|
if 'GetFile' in outputMap[node]:
|
|
copyNode = node
|
|
break
|
|
if copyNode:
|
|
self.context.logger.debug("Copy the file %s from node %s." %
|
|
(packFilePath, copyNode))
|
|
for node in self.context.clusterNodes:
|
|
if status[node] == DefaultValue.SUCCESS:
|
|
if 'NoThisFile' in outputMap[node]:
|
|
cmd = LocalRemoteCmd.getRemoteCopyCmd(
|
|
packFilePath,
|
|
ClusterDir.getClusterToolPath(
|
|
self.context.user),
|
|
str(copyNode), False, 'directory', node)
|
|
self.context.logger.debug(
|
|
"Command for copying directory: %s" % cmd)
|
|
CmdExecutor.execCommandLocally(cmd)
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_502["GAUSS_50210"] %
|
|
packFilePath)
|
|
except Exception as e:
|
|
raise Exception(str(e))
|