From a1cf7f333279b85232111c962935e143c8e18b50 Mon Sep 17 00:00:00 2001 From: chentingting <2393940156@qq.com> Date: Tue, 21 Feb 2023 15:43:33 +0800 Subject: [PATCH] add support upgrade for dss --- script/base_utils/os/env_util.py | 35 +++++ script/base_utils/os/file_util.py | 27 +++- script/gs_preinstall | 30 ++-- script/gspylib/common/ErrorCode.py | 3 +- script/gspylib/common/LocalBaseOM.py | 8 +- script/gspylib/component/DSS/dss_checker.py | 104 ++++++++++++- script/gspylib/component/DSS/dss_comp.py | 52 +++---- .../component/Kernel/DN_OLAP/DN_OLAP.py | 2 +- script/gspylib/component/Kernel/Kernel.py | 8 +- script/impl/om/OLAP/OmImplOLAP.py | 14 ++ .../preinstall/OLAP/PreinstallImplOLAP.py | 50 +++--- script/impl/preinstall/PreinstallImpl.py | 11 +- script/impl/upgrade/UpgradeConst.py | 1 + script/impl/upgrade/UpgradeImpl.py | 58 ++++++- script/local/CheckUninstall.py | 1 - script/local/Install.py | 55 +++++-- script/local/PreInstallUtility.py | 68 +++++--- script/local/StartInstance.py | 33 +++- script/local/Uninstall.py | 9 +- script/local/UpgradeUtility.py | 147 ++++++++++++++++-- 20 files changed, 585 insertions(+), 131 deletions(-) diff --git a/script/base_utils/os/env_util.py b/script/base_utils/os/env_util.py index 6ab3cfa..e54ed65 100644 --- a/script/base_utils/os/env_util.py +++ b/script/base_utils/os/env_util.py @@ -230,3 +230,38 @@ class EnvUtil(object): if os.getuid() == 0 and user == "": return "" return EnvUtil.getEnvironmentParameterValue("DSS_HOME", user) + + @staticmethod + def is_fuzzy_upgrade(user, logger=None, env_file=None): + ''' + If gauss_env is 2 or the $GAUSSHOME/bin is exist, is upgrade. + ''' + app_bin = os.path.realpath( + os.path.join( + EnvUtil.getEnvironmentParameterValue('GAUSSHOME', + user, + env_file=env_file), + 'bin')) + gauss_env = EnvUtil.getEnvironmentParameterValue('GAUSS_ENV', + user, + env_file=env_file) + if os.path.isdir(app_bin): + if logger: + logger.debug("The $GAUSSHOME/bin is exist.") + if gauss_env in ["1", "2"]: + if logger: + logger.debug(f"The $GAUSS_ENV is {gauss_env}.") + if os.path.isdir(app_bin) or gauss_env in ["2"]: + if logger: + logger.debug("There is the upgrade is in progress.") + return True + return False + + @staticmethod + def is_dss_mode(user): + dss_home = EnvUtil.get_dss_home(user) + vgname = EnvUtil.getEnv('VGNAME') + if os.path.isdir(dss_home) and vgname: + return True + else: + return False diff --git a/script/base_utils/os/file_util.py b/script/base_utils/os/file_util.py index 68e58f2..40fbce0 100644 --- a/script/base_utils/os/file_util.py +++ b/script/base_utils/os/file_util.py @@ -211,6 +211,19 @@ class FileUtil(object): return True + @staticmethod + def is_in_file_with_context(file_path, + call_back_name=lambda _: True, + call_back_context=lambda _: True): + ''' + Easy to match strings in files + ''' + if call_back_name(file_path): + with open(file_path, 'r') as fr_any: + if call_back_context(fr_any.read()): + return True + return False + @staticmethod def readFile(filename, keyword="", rows=0): """ @@ -398,6 +411,18 @@ class FileUtil(object): raise Exception(ErrorCode.GAUSS_501["GAUSS_50107"] % path + " Error:\n%s." % output + "The cmd is %s" % cmd) + @staticmethod + def get_caps(path): + ''' + Get the permissions of some root users. + ''' + cmd = f'getcap {path}' + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_501["GAUSS_50107"] % path + + " Error:\n%s." % output + "The cmd is %s" % cmd) + return output.strip() + @staticmethod def changeOwner(user, path, recursive=False, cmd_type="shell", @@ -920,7 +945,7 @@ class FileUtil(object): if status != 0: raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % "log file" + " Directory:%s." % user_dir + " Error: \n%s" % file_output) - + @staticmethod def checkFileExists(file): """ diff --git a/script/gs_preinstall b/script/gs_preinstall index 04e4d8b..7c6c8f6 100644 --- a/script/gs_preinstall +++ b/script/gs_preinstall @@ -36,13 +36,13 @@ if "--unused-third-party" in sys.argv: clib_files = os.path.join(package_path, "gspylib/clib/*.so*") FileUtil.cleanDirectoryContent(lib_path) FileUtil.removeFile(clib_files) - + # use system pip dependecies import psutil import netifaces import cryptography import paramiko - + from gspylib.common.GaussLog import GaussLog from gspylib.common.Common import DefaultValue from gspylib.common.ErrorCode import ErrorCode @@ -53,8 +53,10 @@ from gspylib.threads.SshTool import SshTool from domain_utils.cluster_file.cluster_config_file import ClusterConfigFile from domain_utils.cluster_file.cluster_dir import ClusterDir from domain_utils.cluster_file.profile_file import ProfileFile +from domain_utils.cluster_file.version_info import VersionInfo from domain_utils.cluster_os.cluster_user import ClusterUser from base_utils.os.net_util import NetUtil +from base_utils.os.file_util import FileUtil from domain_utils.domain_common.cluster_constants import ClusterConstants from base_utils.os.user_util import UserUtil @@ -375,28 +377,36 @@ General options: cm_cmd = 'tar -xpf `ls openGauss*cm.tar.gz|tail -1`' cmd = self.get_dec_package_cmd(bin_cmd, cm_cmd) status, output = subprocess.getstatusoutput(cmd) - GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50217"] % - "version.cfg" + "The cmd is %s. " % cmd + - "The output is %s." % output) + if status != 0: + GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50217"] % + "version.cfg" + "The cmd is %s. " % cmd + + "The output is %s." % output) def get_dec_package_cmd(self, bin_cmd, cm_cmd): root = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..') clib = os.path.join(root, "script/gspylib/clib") bin_files = ['./bin/encrypt'] + clib_app = os.path.realpath( + os.path.join(f'{clib}', + f"dss_app_$(cat {root}/version.cfg | tail -n 1)")) + dss_files = [] + cm_files = [] if self.clusterInfo.enable_dss == 'on': cm_files = ['bin/cm_persist'] - bin_files.extend([ + dss_files = [ './bin/perctrl', './bin/dsscmd', './lib/libdssapi.so', './bin/dss_clear.sh' - ]) + ] else: cm_files = [] cmd = 'cd {} && '.format(root) - cmd += '{} {} && '.format(bin_cmd, ''.join(' '.join(bin_files))) + cmd += '{} {} && '.format(bin_cmd, ' '.join(bin_files + dss_files)) if cm_files: - cmd += '{} {} && '.format(cm_cmd, ''.join(' '.join(cm_files))) - cmd += '\mv {} {} && '.format(' '.join(bin_files + cm_files), clib) + cmd += '{} {} && '.format(cm_cmd, ' '.join(cm_files)) + cmd += 'mkdir -p {0} -m u=rwx && '.format(clib_app) + cmd += 'mv {} {} && '.format(' '.join(cm_files + dss_files), clib_app) + cmd += '\mv {} {} && '.format(' '.join(bin_files), clib) cmd += 'cd {} && rm -rf bin'.format(root) return cmd diff --git a/script/gspylib/common/ErrorCode.py b/script/gspylib/common/ErrorCode.py index 902a52d..badcb8c 100644 --- a/script/gspylib/common/ErrorCode.py +++ b/script/gspylib/common/ErrorCode.py @@ -628,7 +628,8 @@ class ErrorCode(): " version from function.", 'GAUSS_51655': "[GAUSS-51655] : There is %s on the cluster when operating on a cluster" "the %s parameter is not needed.", - 'GAUSS_51656': "[GAUSS-51656] : Waiting for udev trigger to end timeout" + 'GAUSS_51656': "[GAUSS-51656] : Waiting for udev trigger to end timeout", + 'GAUSS_51657': "[GAUSS-51657] : Waiting for start %s to end timeout" } ########################################################################### diff --git a/script/gspylib/common/LocalBaseOM.py b/script/gspylib/common/LocalBaseOM.py index 427e63b..578d25b 100644 --- a/script/gspylib/common/LocalBaseOM.py +++ b/script/gspylib/common/LocalBaseOM.py @@ -21,6 +21,7 @@ import os sys.path.append(sys.path[0] + "/../../") from gspylib.common.GaussLog import GaussLog from gspylib.common.DbClusterInfo import dbClusterInfo +from gspylib.common.DbClusterStatus import DbClusterStatus from gspylib.common.ErrorCode import ErrorCode from gspylib.component.CM.CM_OLAP.CM_OLAP import CM_OLAP from gspylib.component.DSS.dss_comp import Dss @@ -28,6 +29,10 @@ from gspylib.component.Kernel.DN_OLAP.DN_OLAP import DN_OLAP from domain_utils.cluster_file.version_info import VersionInfo from base_utils.os.net_util import NetUtil from base_utils.os.user_util import UserUtil +from base_utils.os.env_util import EnvUtil +from gspylib.component.DSS.dss_checker import DssConfig +import impl.upgrade.UpgradeConst as const + class LocalBaseOM(object): @@ -157,13 +162,14 @@ class LocalBaseOM(object): output: NA """ try: + is_dss_mode = EnvUtil.is_dss_mode(self.user) self.clusterInfo = dbClusterInfo() hostName = NetUtil.GetHostIpOrName() dynamicFileExist = False if self.__class__.__name__ == "Start": dynamicFileExist = \ self.clusterInfo.dynamicConfigExists(self.user) - if dynamicFileExist: + if dynamicFileExist and not is_dss_mode: self.clusterInfo.readDynamicConfig(self.user) self.dbNodeInfo = self.clusterInfo.getDbNodeByName(hostName) else: diff --git a/script/gspylib/component/DSS/dss_checker.py b/script/gspylib/component/DSS/dss_checker.py index dfba225..812ad68 100644 --- a/script/gspylib/component/DSS/dss_checker.py +++ b/script/gspylib/component/DSS/dss_checker.py @@ -23,16 +23,24 @@ import re import sys import base64 import json +import getpass +import time try: sys.path.append(sys.path[0] + "/../../../") from gspylib.common.ErrorCode import ErrorCode from base_utils.security.security_checker import SecurityChecker + from domain_utils.cluster_file.cluster_dir import ClusterDir + from base_utils.os.file_util import FileUtil + from base_utils.os.cmd_util import CmdUtil + except ImportError as e: sys.exit("[GAUSS-52200] : Unable to import module: %s." % str(e)) class DssConfig(): + DMS_DEFAULT_RESTART_DELAY = 1 + DMS_TMP_RESTART_DELAY = 300 def __init__(self, attr='', unzip_str='', offset=0): self.ids = '' @@ -128,11 +136,103 @@ class DssConfig(): key: value }).encode()).decode() else: + if not value.strip(): + return '' b64_ans = json.loads( - base64.urlsafe_b64decode(value.encode()).decode()).get( - key, '') + base64.urlsafe_b64decode(value.encode()).decode()).get(key, '') return b64_ans + @staticmethod + def get_cm_inst_path(cur_db_info, inst_type='cm_agent'): + if inst_type == 'cm_agent': + return DssConfig.get_simple_value( + DssConfig.get_simple_value(cur_db_info, ['cmagents']), + ['datadir']) + elif inst_type == 'cm_server': + return DssConfig.get_simple_value( + DssConfig.get_simple_value(cur_db_info, ['cmservers']), + ['datadir']) + return [] + + @staticmethod + def check_process_exist(check_flag, user=''): + if not user: + user = getpass.getuser() + check_cmd = 'ps -u {} v'.format(user) + sts, out = CmdUtil.getstatusoutput_by_fast_popen(check_cmd) + if sts not in [0]: + raise Exception(ErrorCode.GAUSS_512["GAUSS_51252"] + + ' Error: {}.'.format(str(out).strip())) + if str(out).find(check_flag) > -1: + return True + else: + return False + + @staticmethod + def set_cm_manual_flag(inst_id, flag, logger): + gauss_home = ClusterDir.get_gauss_home() + file_ = os.path.realpath( + os.path.join(gauss_home, + f'bin/instance_manual_start_{str(inst_id)}')) + logger.debug( + "Start to delete or add manual flag file: {}.".format(file_)) + if flag == 'start' and os.path.isfile(file_): + os.remove(file_) + logger.debug("End to delete manual flag file: {}.".format(file_)) + elif flag == 'stop' and not os.path.isfile(file_): + FileUtil.createFileInSafeMode(file_) + logger.debug("End to add manual flag file: {}.".format(file_)) + + @staticmethod + def get_cma_res_value(cma_path, key, res_name='dms_res'): + cma_res = os.path.join(cma_path, 'cm_resource.json') + if os.path.isfile(cma_res): + with open(cma_res, 'r') as fr: + res_dict = json.loads(fr.read()) + for dict_ in res_dict.get('resources', {}): + if dict_.get('name') == res_name: + return str(dict_.get(key, '')) + return '' + + @staticmethod + def wait_for_process_start(logger, flag, check_flag='', timeout=300): + if not check_flag: + check_flag = flag + logger.log(f"Start to wait for {flag} to be started.") + while timeout > 0: + if not DssConfig.check_process_exist(check_flag=check_flag): + if timeout % 5 == 0: + logger.debug(f'The process {flag} if not running.') + timeout -= 1 + time.sleep(1) + continue + else: + break + if timeout == 0: + raise Exception(ErrorCode.GAUSS_516['GAUSS_51657'] % flag) + logger.log(f'The process {flag} is running.') + + @staticmethod + def reload_cm_resource(logger, timeout=300, wait_for_start=True): + logger.debug('Start to reload the cm resource file.') + edit_cmd = f'cm_ctl res --edit --res_name="dms_res" ' \ + f'--res_attr="restart_delay={timeout}"' + logger.debug(f'The cmd of the reload: {edit_cmd}.') + sts, out = CmdUtil.getstatusoutput_by_fast_popen(edit_cmd) + if sts not in [0]: + raise Exception(ErrorCode.GAUSS_535["GAUSS_53507"] % edit_cmd + + "Error:%s." + out) + kill_cmd = "ps ux | grep 'bin/cm_agent' | grep -v grep " \ + "| awk '{print $2}' | xargs -r -n 100 kill -9" + logger.debug(f'The cmd of the kill cm agent is: %s.' % kill_cmd) + status, _ = CmdUtil.retryGetstatusoutput(kill_cmd, 3, 5) + if status == 0: + logger.log("Successfully kill the cm agent.") + else: + raise Exception("Failed to kill the cm agent.") + if wait_for_start: + DssConfig.wait_for_process_start(logger, 'cm_agent', 'bin/cm_agent') + logger.debug("End to kill the cm agent.") def __str__(self): ''' diff --git a/script/gspylib/component/DSS/dss_comp.py b/script/gspylib/component/DSS/dss_comp.py index f4134f3..75102a7 100644 --- a/script/gspylib/component/DSS/dss_comp.py +++ b/script/gspylib/component/DSS/dss_comp.py @@ -122,11 +122,14 @@ class DssInst(): 'Error: {}'.format(e)) @staticmethod - def get_dss_id_from_key(dss_home): + def get_dss_id_from_key(dss_home=''): ''' Obtaining INST_ID Through Configuration Items ''' try: + if not dss_home: + dss_home = EnvUtil.get_dss_home(getpass.getuser()) + cfg = os.path.join(dss_home, 'cfg', 'dss_inst.ini') inst_id = DssInst(cfg_path=cfg).parser.get('INST_ID', '') if inst_id.isdigit(): @@ -179,7 +182,7 @@ class Dss(BaseComponent): time.sleep(0.5) @staticmethod - def unreg_disk(dss_home, user='', clib='', logger=None): + def unreg_disk(dss_home, user='', clib_app='', logger=None): ''' The minimum ID is 0 and the maximum ID is 8. There are nine instances in total. @@ -192,12 +195,12 @@ class Dss(BaseComponent): un_reg_cmd_str = f'sh {dsscmd_path} {dss_home}; ' - if clib: + if clib_app: dsscmd_path = os.path.realpath( - os.path.join(clib, Dss.DSS_IOFENCE_FILENAME)) + os.path.join(clib_app, Dss.DSS_IOFENCE_FILENAME)) cmd_str = f'su - {user} -c "export DSS_HOME={dss_home}; ' - cmd_str += f'export LD_LIBRARY_PATH={clib}; ' - cmd_str += f'export PATH={clib}:$PATH; ' + cmd_str += f'export LD_LIBRARY_PATH={clib_app}; ' + cmd_str += f'export PATH={clib_app}:$PATH; ' un_reg_cmd_str = cmd_str + f'sh {dsscmd_path} {dss_home}; "' if logger: @@ -210,7 +213,12 @@ class Dss(BaseComponent): if logger: logger.debug(f'The result of the unreg: {out}') - def start_dss_server(self, kill_server=True, unrej=False, exist_so=False): + @staticmethod + def start_dss_server(logger=None, + bin_path='', + kill_server=True, + unrej=False, + exist_so=False): ''' The OM manually starts the DSS server to obtain the socket file. ''' @@ -222,42 +230,32 @@ class Dss(BaseComponent): dss_home = EnvUtil.get_dss_home(getpass.getuser()) if unrej: - Dss.unreg_disk(dss_home, logger=self.logger) + Dss.unreg_disk(dss_home, logger=logger) + if bin_path: + dss_cmd = os.path.realpath(os.path.join(bin_path, 'dssserver')) + else: + dss_cmd = 'dssserver' cmd = 'sh -c "source {} && nohup {} -D {} >/dev/null 2>&1 & "'.format( - EnvUtil.getMpprcFile(), os.path.join(self.binPath, 'dssserver'), - dss_home) + EnvUtil.getMpprcFile(), dss_cmd, dss_home) proc = FastPopen(cmd) out, err = proc.communicate() if proc.returncode != 0: raise Exception(ErrorCode.GAUSS_512["GAUSS_51252"] + ' Error: {}'.format(str(err + out).strip())) - self.logger.debug('Successfully start dss server') - - @staticmethod - def check_dss_exist(): - user = getpass.getuser() - check_cmd = 'ps -u {} v | grep dssserver | grep -v grep'.format(user) - sts, out = CmdUtil.getstatusoutput_by_fast_popen(check_cmd) - if sts != 0: - raise Exception(ErrorCode.GAUSS_512["GAUSS_51252"] + - ' Error: {}'.format(str(out).strip())) - - if str(out).find('dssserver'): - return True - else: - return False + if logger: + logger.debug('Successfully start dss server.') def initInstance(self): ''' om init dss server ''' - self.start_dss_server() + Dss.start_dss_server(self.logger, self.binPath) @staticmethod def catch_err(exist_so=True): ''' - This command is used to kill the dsserver after + This command is used to kill the dssserver after the dn initialization is complete to prevent the initialization process from exiting. ''' diff --git a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py index a0965d8..3601835 100644 --- a/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py +++ b/script/gspylib/component/Kernel/DN_OLAP/DN_OLAP.py @@ -144,7 +144,7 @@ class DN_OLAP(Kernel): if self.paxos_mode: cmd += " -c" elif self.dss_mode: - if not Dss.check_dss_exist(): + if not DssConfig.check_process_exist('dssserver'): raise Exception('The dssserver process does not exist.') vgname = EnvUtil.getEnv('VGNAME') dss_home = EnvUtil.getEnv('DSS_HOME') diff --git a/script/gspylib/component/Kernel/Kernel.py b/script/gspylib/component/Kernel/Kernel.py index 0ccbc21..93618d9 100644 --- a/script/gspylib/component/Kernel/Kernel.py +++ b/script/gspylib/component/Kernel/Kernel.py @@ -67,7 +67,7 @@ class Kernel(BaseComponent): """ def start(self, time_out=DefaultValue.TIMEOUT_CLUSTER_START, - security_mode="off", cluster_number=None): + security_mode="off", cluster_number=None, is_dss_mode=False): """ """ if cluster_number: @@ -77,12 +77,12 @@ class Kernel(BaseComponent): else: cmd = "%s/gs_ctl start -D %s " % ( self.binPath, self.instInfo.datadir) - if self.instInfo.instanceType == DefaultValue.MASTER_INSTANCE: + if not is_dss_mode and self.instInfo.instanceType == DefaultValue.MASTER_INSTANCE: if len(self.instInfo.peerInstanceInfos) > 0: cmd += "-M primary" - elif self.instInfo.instanceType == DefaultValue.CASCADE_STANDBY: + elif not is_dss_mode and self.instInfo.instanceType == DefaultValue.CASCADE_STANDBY: cmd += "-M cascade_standby" - elif self.instInfo.instanceType == DefaultValue.STANDBY_INSTANCE: + elif not is_dss_mode and self.instInfo.instanceType == DefaultValue.STANDBY_INSTANCE: cmd += "-M standby" if time_out is not None: cmd += " -t %s" % time_out diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 800ce53..2eebf5c 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -21,6 +21,7 @@ import subprocess import sys import re import time +import getpass sys.path.append(sys.path[0] + "/../../../../") from gspylib.common.DbClusterInfo import queryCmd @@ -32,6 +33,10 @@ from gspylib.common.OMCommand import OMCommand from impl.om.OmImpl import OmImpl from gspylib.os.gsfile import g_file from base_utils.os.net_util import NetUtil +from base_utils.os.env_util import EnvUtil +from gspylib.component.DSS.dss_checker import DssConfig + + ########################################### @@ -154,6 +159,15 @@ class OmImplOLAP(OmImpl): cluster_normal_status = [DbClusterStatus.CLUSTER_STATUS_NORMAL, DbClusterStatus.CLUSTER_STATUS_DEGRADED] + + if EnvUtil.is_dss_mode(self.context.g_opts.user): + cma_paths = DssConfig.get_cm_inst_path( + self.clusterInfo.dbNodes[nodeId]) + if cma_paths and DssConfig.get_cma_res_value( + cma_paths[0], key='restart_delay') != str( + DssConfig.DMS_DEFAULT_RESTART_DELAY): + DssConfig.reload_cm_resource( + self.logger, timeout=DssConfig.DMS_DEFAULT_RESTART_DELAY) if nodeId == 0 and self.dataDir: raise Exception(ErrorCode.GAUSS_516["GAUSS_51655"] % ("cm", "-D")) # start cluster diff --git a/script/impl/preinstall/OLAP/PreinstallImplOLAP.py b/script/impl/preinstall/OLAP/PreinstallImplOLAP.py index d336955..f669c0c 100644 --- a/script/impl/preinstall/OLAP/PreinstallImplOLAP.py +++ b/script/impl/preinstall/OLAP/PreinstallImplOLAP.py @@ -354,11 +354,17 @@ class PreinstallImplOLAP(PreinstallImpl): Create a VG on the first node. ''' if not self.context.clusterInfo.enable_dss == 'on': - self.context.logger.debug('The mode is non-dss') + self.context.logger.debug('The mode is non-dss.') return - clib_path = os.path.join(self.context.clusterToolPath, - 'script/gspylib/clib') + self.context.logger.debug('Start to create vg.') + if EnvUtil.is_fuzzy_upgrade(self.context.user, self.context.logger, + self.context.mpprcFile): + return + + clib_app = os.path.join(self.context.clusterToolPath, + 'script/gspylib/clib', + f'dss_app_{VersionInfo.getCommitid()}') dss_home = self.context.clusterInfo.dss_home for vgname, dss_disk in UdevContext.get_all_vgname_disk_pair( self.context.clusterInfo.dss_shared_disks, @@ -371,13 +377,13 @@ class PreinstallImplOLAP(PreinstallImpl): '{2}/dsscmd showdisk -g {3} -s vg_header -D {1}"' cv_cmd = 'su - {0} -c "export DSS_HOME={1}; export LD_LIBRARY_PATH={2};' \ '{2}/dsscmd cv -g {3} -v {4} -s {5} -D {1}"' - show_cmd = show_cmd.format(self.context.user, dss_home, clib_path, + show_cmd = show_cmd.format(self.context.user, dss_home, clib_app, vgname, dss_disk) - cv_cmd = cv_cmd.format(self.context.user, dss_home, clib_path, + cv_cmd = cv_cmd.format(self.context.user, dss_home, clib_app, vgname, dss_disk, au_size) self.context.logger.debug( - 'The cmd of the showdisk: {}'.format(show_cmd)) - self.context.logger.debug('The cmd of the cv: {}'.format(cv_cmd)) + 'The cmd of the showdisk: {}.'.format(show_cmd)) + self.context.logger.debug('The cmd of the cv: {}.'.format(cv_cmd)) sts, out = subprocess.getstatusoutput(show_cmd) if sts == 0: if out.find('vg_name = {}'.format(vgname)) > -1: @@ -396,6 +402,7 @@ class PreinstallImplOLAP(PreinstallImpl): raise Exception( "Failed to query the volume using dsscmd, cmd: {}, Error: {}" .format(show_cmd, out.strip())) + self.context.logger.debug("End to create vg.") def reset_lun_device(self): @@ -403,17 +410,16 @@ class PreinstallImplOLAP(PreinstallImpl): Low-level user disk with dd ''' if not self.context.clusterInfo.enable_dss == 'on': - self.context.logger.debug('The mode is non-dss') + self.context.logger.debug('The mode is non-dss.') return - if EnvUtil.getEnvironmentParameterValue( - 'GAUSS_ENV', self.context.user) not in ["", "1"]: + if EnvUtil.is_fuzzy_upgrade(self.context.user, self.context.logger, + self.context.mpprcFile): self.context.logger.debug( - "If the value of GAUSS_ENV is not empty or 1, the LUN does not need to be reset." - ) + "The luns doesn't need to be reset in the upgrade.") return - + self.context.logger.log("Cleaning up the dss luns.", "addStep") infos = list( filter(None, re.split(r':|,', self.context.clusterInfo.dss_vg_info))) @@ -424,17 +430,8 @@ class PreinstallImplOLAP(PreinstallImpl): self.context.clusterInfo.cm_share_disk ])) - app_bin = os.path.realpath( - os.path.join( - EnvUtil.getEnvironmentParameterValue('GAUSSHOME', - self.context.user), 'bin')) - if os.path.isdir(app_bin): - self.context.logger.debug( - 'The $GAUSSHOME/bin directory exists. LUNs are not cleared.') - return - self.context.logger.debug( - "LUNs are about to be cleared, contains: {}.".format( + "The luns are about to be cleared, contains: {}.".format( ', '.join(cm_devs + dss_devs))) cmd = [] @@ -445,7 +442,7 @@ class PreinstallImplOLAP(PreinstallImpl): self.context.logger.debug("Clear lun cmd: {}.".format(' && '.join(cmd))) CmdExecutor.execCommandLocally(' && '.join(cmd)) - self.context.logger.log("Successfully Cleaning Up Lun.") + self.context.logger.log("Successfully cleaned up the dss lun.") def setPssh(self): """ @@ -726,9 +723,9 @@ class PreinstallImplOLAP(PreinstallImpl): DSS initialization ''' if not self.context.clusterInfo.enable_dss == 'on': - self.context.logger.debug('The mode is Non-dss') + self.context.logger.debug('The mode is non-dss.') return - self.context.logger.log("Unreg the dss lun.", "addStep") + self.context.logger.log("Unreging the dss lun.", "addStep") try: cmd = ( "%s -t %s -u %s -g %s -X %s -Q %s -l %s" % @@ -746,6 +743,7 @@ class PreinstallImplOLAP(PreinstallImpl): self.context.localMode, self.context.mpprcFile, parallelism=False) + self.context.logger.log("Successfully unreg the dss lun.") except Exception as e: raise Exception(str(e)) diff --git a/script/impl/preinstall/PreinstallImpl.py b/script/impl/preinstall/PreinstallImpl.py index 19c9e18..c7a7c99 100644 --- a/script/impl/preinstall/PreinstallImpl.py +++ b/script/impl/preinstall/PreinstallImpl.py @@ -18,6 +18,7 @@ import subprocess import os import pwd import sys +import re import getpass sys.path.append(sys.path[0] + "/../") @@ -976,6 +977,8 @@ class PreinstallImpl: self.context.sshTool, self.context.localMode or self.context.isSingle, self.context.mpprcFile) + self.context.logger.debug( + f"The cmd of the create cluster path: {cmd}.") except Exception as e: raise Exception(str(e)) self.context.logger.log("Successfully created cluster's path.", @@ -1625,9 +1628,15 @@ class PreinstallImpl: # close log file self.context.logger.closeLog() except Exception as e: + is_upgrade_func = lambda x: re.findall(r'GAUSS_ENV[ ]*=[ ]*2', x) for rmPath in self.context.needFixOwnerPaths: if os.path.isfile(rmPath): - FileUtil.removeFile(rmPath) + if FileUtil.is_in_file_with_context( + rmPath, call_back_context=is_upgrade_func): + self.context.logger.debug( + f'In upgrade process, no need to delete {rmPath}.') + else: + FileUtil.removeFile(rmPath) elif os.path.isdir(rmPath): FileUtil.removeDirectory(rmPath) self.context.logger.logExit(str(e)) diff --git a/script/impl/upgrade/UpgradeConst.py b/script/impl/upgrade/UpgradeConst.py index 941fbeb..4c9051d 100644 --- a/script/impl/upgrade/UpgradeConst.py +++ b/script/impl/upgrade/UpgradeConst.py @@ -77,6 +77,7 @@ ACTION_GREY_SYNC_GUC = "grey_sync_guc" ACTION_GREY_UPGRADE_CONFIG_SYNC = "grey_upgrade_config_sync" ACTION_CREATE_CM_CA_FOR_ROLLING_UPGRADE = "create_cm_ca_for_rolling_upgrade" ACTION_SWITCH_DN = "switch_dn" +ACTION_WAIT_OM_MONITOR = "wait_om_monitor" ACTION_GET_LSN_INFO = "get_lsn_info" ACTION_GREY_RESTORE_CONFIG = "grey_restore_config" ACTION_GREY_RESTORE_GUC = "grey_restore_guc" diff --git a/script/impl/upgrade/UpgradeImpl.py b/script/impl/upgrade/UpgradeImpl.py index 0a02dfd..eda6c87 100644 --- a/script/impl/upgrade/UpgradeImpl.py +++ b/script/impl/upgrade/UpgradeImpl.py @@ -24,6 +24,7 @@ import csv import traceback import copy import re +import getpass from datetime import datetime, timedelta @@ -51,6 +52,8 @@ from domain_utils.cluster_file.package_info import PackageInfo from domain_utils.cluster_file.version_info import VersionInfo from domain_utils.sql_handler.sql_result import SqlResult from base_utils.os.net_util import NetUtil +from gspylib.component.DSS.dss_checker import DssConfig + class OldVersionModules(): @@ -240,7 +243,7 @@ class UpgradeImpl: + "Error: \n%s" % str(output)) if (not self.context.isSingle): - # send file to remote nodes + # send file to remote nodes self.context.sshTool.scpFiles(filePath, self.context.tmpDir) self.context.logger.debug("Successfully write file %s." % filePath) @@ -1420,7 +1423,43 @@ class UpgradeImpl: self.context.logger.debug("Old cluster exclude CMS instance. So no need to switch UDF.") return False + def waif_for_om_monitor_start(self, is_rollback): + if not EnvUtil.is_dss_mode(self.context.user): + self.context.logger.debug( + "In non-dss-enabled, no need to wait for om_monitor to start.") + self.context.logger.log("Start to wait for om_monitor.") + is_rolling = False + start_time = timeit.default_timer() + cmd = "%s -t %s -U %s -V %d --old_cluster_app_path=%s " \ + "--new_cluster_app_path=%s -X '%s' -l %s" % \ + (OMCommand.getLocalScript("Local_Upgrade_Utility"), + const.ACTION_WAIT_OM_MONITOR, + self.context.user, + int(float(self.context.oldClusterNumber) * 1000), + self.context.oldClusterAppPath, + self.context.newClusterAppPath, + self.context.xmlFile, + self.context.localLog) + + if is_rollback: + cmd += " --rollback" + if self.context.forceRollback: + cmd += " --force" + if len(self.context.nodeNames) != len(self.context.clusterNodes): + is_rolling = True + if self.need_rolling(is_rollback) or is_rolling: + cmd += " --rolling" + self.context.logger.debug("Command for waiting for om_monitor: %s." % cmd) + hostList = copy.deepcopy(self.context.nodeNames) + self.context.sshTool.executeCommand(cmd, hostList=hostList) + elapsed = timeit.default_timer() - start_time + self.context.logger.debug("Time to wait for om_monitor: %s." % + elapsed) + def switchDn(self, isRollback): + + self.waif_for_om_monitor_start(is_rollback=isRollback) + self.context.logger.log("Switching DN processes.") is_rolling = False start_time = timeit.default_timer() @@ -1446,8 +1485,19 @@ class UpgradeImpl: if self.need_rolling(isRollback) or is_rolling: self.context.logger.log("Switch DN processes for rolling upgrade.") cmd += " --rolling" + if EnvUtil.is_dss_mode( + getpass.getuser()) and DefaultValue.isgreyUpgradeNodeSpecify( + self.context.user, + DefaultValue.GREY_UPGRADE_STEP_UPGRADE_PROCESS, None, + self.context.logger): + self.context.logger.debug( + "In dss_mode, have cm configuration, upgrade all nodes together." + ) + cmd += " --upgrade_dss_config={}".format( + DssConfig.get_value_b64_handler('dss_upgrade_all', 'on')) + self.context.logger.debug( - "Command for switching DN processes: %s" % cmd) + "Command for switching DN processes: %s." % cmd) hostList = copy.deepcopy(self.context.nodeNames) self.context.sshTool.executeCommand(cmd, hostList=hostList) start_cluster_time = timeit.default_timer() @@ -2622,6 +2672,7 @@ class UpgradeImpl: cmd = ClusterCommand.getQueryStatusCmd("", outFile=tmpFile) SharedFuncs.runShellCmd(cmd, self.context.user, self.context.userProfile) + self.context.logger.debug(f"The generator cmd is {cmd}.") if not os.path.exists(tmpFile): raise Exception("Can not genetate dynamic info file") self.context.distributeFileToSpecialNode(tmpFile, @@ -4183,6 +4234,9 @@ class UpgradeImpl: output: NA """ try: + if EnvUtil.is_dss_mode(self.context.user): + # there is any dss_clear process + self.stop_strategy(is_final=False) self.start_strategy(is_final=False) self.setUpgradeFromParam(const.UPGRADE_UNSET_NUM) self.setUpgradeMode(0) diff --git a/script/local/CheckUninstall.py b/script/local/CheckUninstall.py index fc106bc..081ebb8 100644 --- a/script/local/CheckUninstall.py +++ b/script/local/CheckUninstall.py @@ -98,7 +98,6 @@ class CheckUninstall: self.__checkOSVersion() self.__checkOsUser() self.__checkInstanllPath() - self.unregister() self.logger.closeLog() def unregister(self): diff --git a/script/local/Install.py b/script/local/Install.py index 05315c7..c5b0011 100644 --- a/script/local/Install.py +++ b/script/local/Install.py @@ -351,18 +351,46 @@ class Install(LocalBaseOM): def link_dss_bin(self): - clib_path = os.path.join( - EnvUtil.getEnvironmentParameterValue("GPHOME", self.user), - 'script/gspylib/clib') - bin_path = os.path.join(self.installPath, 'bin') - if not os.path.isdir(clib_path) or not os.path.isdir(bin_path): - raise Exception('ss') - cmd = 'ln -snf {0}/cm_persist {0}/perctrl {1}'.format( - clib_path, bin_path) - status, output = subprocess.getstatusoutput(cmd) + ''' + The install user doesn't have the root permissions. + Therefore, privileges escaation is not supported. + In the preinstall process, the binary privileges is + escalated and is linked during the install process. + ''' + clib_app = os.path.realpath( + os.path.join( + EnvUtil.getEnvironmentParameterValue("GPHOME", self.user), + 'script/gspylib/clib', f'dss_app_{VersionInfo.getCommitid()}')) + dss_app = os.path.realpath( + os.path.join(os.path.dirname(self.installPath), + f'dss_app_{VersionInfo.getCommitid()}')) + + bin_path = os.path.realpath(os.path.join(self.installPath, 'bin')) + if not os.path.isdir(bin_path): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % bin_path) + if not os.path.isdir(dss_app): + raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % dss_app) + + sudo_bin = ['perctrl', 'cm_persist'] + for bin_ in sudo_bin: + clib_bin = os.path.realpath(os.path.join(clib_app, bin_)) + app_bin = os.path.realpath(os.path.join(dss_app, bin_)) + if os.path.isfile(clib_bin): + mv_cmd = r'\mv {0} {1}'.format(clib_bin, app_bin) + status, output = subprocess.getstatusoutput(mv_cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % mv_cmd + + "Error:\n%s" % output) + + link_cmd = 'ln -snf {0}/cm_persist {0}/perctrl {1}'.format( + dss_app, bin_path) + self.logger.debug(f"The cmd of the link: {link_cmd}.") + status, output = subprocess.getstatusoutput(link_cmd) if status != 0: - raise Exception("Dss bin file link failed. Output: %s" % output) - self.logger.log("Dss bin file package successfully.") + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % link_cmd + + "Error:\n%s." % output) + self.logger.log("Successfully generated the soft link.") + def decompress_cm_package(self): @@ -452,12 +480,13 @@ class Install(LocalBaseOM): # decompress CM package self.decompress_cm_package() + # change owner for tar file. + FileUtil.changeOwner(self.user, self.installPath, True) + # link bin with cap on dss mode if self.clusterInfo.enable_dss == 'on': self.link_dss_bin() - # change owner for tar file. - FileUtil.changeOwner(self.user, self.installPath, True) self.logger.log("Successfully decompressed bin file.") def __saveUpgradeVerionInfo(self): diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py index 067790d..0699f6a 100644 --- a/script/local/PreInstallUtility.py +++ b/script/local/PreInstallUtility.py @@ -52,6 +52,7 @@ from domain_utils.cluster_file.profile_file import ProfileFile from domain_utils.cluster_file.version_info import VersionInfo from domain_utils.cluster_file.package_info import PackageInfo from base_utils.os.sshd_config import SshdConfig +from base_utils.os.env_util import EnvUtil from base_utils.os.net_util import NetUtil from domain_utils.domain_common.cluster_constants import ClusterConstants from os_platform.linux_distro import LinuxDistro @@ -855,11 +856,12 @@ Common options: if self.clusterInfo.enable_dss == 'on': idx = DssConfig.get_current_dss_id_by_dn(self.clusterInfo.dbNodes, self.dbNodeInfo) - if idx != -1: + if idx != -1 and not EnvUtil.is_fuzzy_upgrade( + self.user, self.logger, self.mpprcFile): self.prepare_dss_home_path(idx) else: self.logger.debug('In dss-mode, the dn does not ' \ - 'exist on the current node.') + 'exist on the current node or in upgrade.') self.logger.debug("Successfully created paths for cluster.") @@ -915,12 +917,12 @@ Common options: Creating a disk soft link ''' - self.logger.debug("Creating disk link.") + self.logger.debug("Creating dss disk link.") context = list( UdevContext((self.user, self.group), self.clusterInfo, DiskUtil.get_scsi_dev_id)) - self.logger.debug("Checking udev directory.") + self.logger.debug("Checking dss udev directory.") if os.path.isdir(UdevContext.DSS_UDEV_DIR): rule_file = os.path.join(UdevContext.DSS_UDEV_DIR, UdevContext.DSS_UDEV_NAME) % self.user @@ -964,7 +966,7 @@ Common options: time.sleep(1) elif flags: break - self.logger.debug("Successfully created disk link.") + self.logger.debug("Successfully created dss disk link.") def prepareGaussLogPath(self): """ @@ -1101,6 +1103,13 @@ Common options: self.prepareGivenPath(installPath, needCheckEmpty) self.checkUpperPath(needCheckEmpty, installPath) + if self.clusterInfo.enable_dss: + dss_app = os.path.realpath( + os.path.join(os.path.dirname(installPath), + f'dss_app_{commitid}')) + self.logger.debug("Dss app path %s." % dss_app) + self.prepareGivenPath(dss_app, False) + self.logger.debug("Successfully created installation path.") def checkUpperPath(self, needCheckEmpty, installPath): @@ -2662,11 +2671,32 @@ Common options: FileUtil.changeMode(DefaultValue.MIN_FILE_MODE, "%s/version.cfg" % package_path) + def fix_dss_cap_permission(self): + ''' + Modifying the dss binary cap permissions. + ''' + self.logger.debug("Modifying dss cap permissions.") + clib_app = os.path.realpath( + os.path.join(self.clusterToolPath, "script/gspylib/clib", + f"dss_app_{VersionInfo.getCommitid()}")) + + dss_files = ['dsscmd', 'perctrl', 'dss_clear.sh'] + for file_ in dss_files: + FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, + os.path.join(clib_app, file_)) + + caps = ['perctrl', 'cm_persist'] + for file_ in caps: + FileUtil.change_caps(DefaultValue.CAP_WIO, + os.path.join(clib_app, file_)) + self.logger.debug("Successfully modified dss cap permissions.") + def fix_dss_dir_permission(self): ''' Modify the permissions on some DSS-related directories and escalate the permissions in binary mode. ''' + self.logger.debug("Modifying dss home permissions.") dss_home = self.clusterInfo.dss_home cfg_dir = os.path.realpath(os.path.join(dss_home, 'cfg')) log_path = os.path.realpath(os.path.join(dss_home, 'log')) @@ -2679,20 +2709,7 @@ Common options: files = ["{}/*ini".format(cfg_dir)] for file_ in files: FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, file_) - - clib_path = os.path.realpath( - os.path.join(self.clusterToolPath, "script/gspylib/clib")) - - dss_files = ['dsscmd', 'perctrl', 'dss_clear.sh'] - for file_ in dss_files: - FileUtil.changeMode(DefaultValue.BIN_FILE_MODE, - os.path.join(clib_path, file_)) - - caps = ['perctrl', 'cm_persist'] - for file_ in caps: - FileUtil.change_caps(DefaultValue.CAP_WIO, - os.path.join(clib_path, file_)) - + self.logger.debug("Successfully modified dss home permissions.") def fix_dss_disk_permission(self): ''' @@ -2732,6 +2749,9 @@ Common options: self.logger.debug('In dss-mode, the dn does not' \ ' exist on the current node.') return + self.fix_dss_cap_permission() + if EnvUtil.is_fuzzy_upgrade(self.user, self.logger, self.mpprcFile): + return self.fix_dss_dir_permission() self.fix_dss_disk_permission() self.fix_cm_disk_permission() @@ -2756,12 +2776,16 @@ Common options: ' exist on the current node.') return + if EnvUtil.is_fuzzy_upgrade(self.user, self.logger, self.mpprcFile): + self.logger.debug('In upgrade, the disk lock will not to be unregistered.') + return - clib = os.path.realpath( - os.path.join(self.clusterToolPath, 'script', 'gspylib', 'clib')) + clib_app = os.path.realpath( + os.path.join(self.clusterToolPath, 'script', 'gspylib', 'clib', + f'dss_app_{VersionInfo.getCommitid()}')) Dss.unreg_disk(self.clusterInfo.dss_home, user=self.user, - clib=clib, + clib_app=clib_app, logger=self.logger) def clean_dss_env(self, mpprc_file): diff --git a/script/local/StartInstance.py b/script/local/StartInstance.py index f2d2e82..c80ec7e 100644 --- a/script/local/StartInstance.py +++ b/script/local/StartInstance.py @@ -18,6 +18,7 @@ import sys import getopt +import getpass sys.path.append(sys.path[0] + "/../") from gspylib.common.GaussLog import GaussLog @@ -26,6 +27,8 @@ from gspylib.common.LocalBaseOM import LocalBaseOM from gspylib.common.ParameterParsecheck import Parameter from domain_utils.cluster_file.cluster_log import ClusterLog from domain_utils.domain_common.cluster_constants import ClusterConstants +from base_utils.os.env_util import EnvUtil +from gspylib.component.DSS.dss_checker import DssConfig class Start(LocalBaseOM): @@ -136,13 +139,35 @@ General options: output : NA """ isDataDirCorrect = False + is_dss_mode = EnvUtil.is_dss_mode(self.user) for dn in self.dnCons: if self.dataDir != "" and dn.instInfo.datadir != self.dataDir: continue - if self.cluster_number: - dn.start(self.time_out, self.security_mode, self.cluster_number) - else: - dn.start(self.time_out, self.security_mode) + try: + if is_dss_mode: + DssConfig.wait_for_process_start(self.logger, 'dssserver', 'dssserver -D') + DssConfig.set_cm_manual_flag(dn.instInfo.instanceId, + 'start', self.logger) + if self.cluster_number: + dn.start(self.time_out, + self.security_mode, + self.cluster_number, + is_dss_mode=is_dss_mode) + else: + dn.start(self.time_out, + self.security_mode, + is_dss_mode=is_dss_mode) + finally: + if is_dss_mode: + # recover the parameters of the cma resource file. + cma_paths = DssConfig.get_cm_inst_path(self.dbNodeInfo) + if cma_paths and DssConfig.get_cma_res_value( + cma_paths[0], key='restart_delay') != str( + DssConfig.DMS_DEFAULT_RESTART_DELAY): + DssConfig.reload_cm_resource( + self.logger, + timeout=DssConfig.DMS_DEFAULT_RESTART_DELAY) + isDataDirCorrect = True if not isDataDirCorrect: diff --git a/script/local/Uninstall.py b/script/local/Uninstall.py index dde314c..27622cb 100644 --- a/script/local/Uninstall.py +++ b/script/local/Uninstall.py @@ -108,9 +108,14 @@ class Uninstall(LocalBaseOM): ''' Deregistering a Disk in dss-mode ''' + self.logger.log("Start to unregist the lun.") gausshome = ClusterDir.getInstallDir(self.user) dsscmd = os.path.realpath(os.path.join(gausshome, 'bin', 'dsscmd')) - if os.path.isfile(dsscmd): + perctrl = os.path.realpath(os.path.join(gausshome, 'bin', 'perctrl')) + if os.path.isfile(dsscmd) and os.path.isfile(perctrl): + if not FileUtil.get_caps(perctrl): + self.logger.log("The perctrl does not have permissions.") + return dss_home = EnvUtil.get_dss_home(self.user) cfg = os.path.join(dss_home, 'cfg', 'dss_inst.ini') if os.path.isfile(cfg): @@ -119,7 +124,7 @@ class Uninstall(LocalBaseOM): else: self.logger.log(f"The {cfg} not exist.") else: - self.logger.debug("Non-dss-mode or not find dsscmd.") + self.logger.log("Non-dss-mode or not find dsscmd.") def __changeuserEnv(self): """ diff --git a/script/local/UpgradeUtility.py b/script/local/UpgradeUtility.py index 8607829..039e5ed 100644 --- a/script/local/UpgradeUtility.py +++ b/script/local/UpgradeUtility.py @@ -26,6 +26,7 @@ import os import subprocess import pwd import re +import getpass import time import timeit import traceback @@ -65,6 +66,8 @@ from domain_utils.sql_handler.sql_result import SqlResult from domain_utils.sql_handler.sql_file import SqlFile from domain_utils.domain_common.cluster_constants import ClusterConstants from base_diff.sql_commands import SqlCommands +from gspylib.component.DSS.dss_comp import Dss, DssInst +from gspylib.component.DSS.dss_checker import DssConfig @@ -141,6 +144,7 @@ class CmdOptions(): self.fromFile = False self.setType = "reload" self.isSingleInst = False + self.upgrade_dss_config = '' class OldVersionModules(): @@ -313,11 +317,12 @@ def parseCommandLine(): output: NA """ try: - opts, args = getopt.getopt(sys.argv[1:], "t:U:R:l:V:X:", - ["help", "upgrade_bak_path=", "script_type=", - "old_cluster_app_path=", "new_cluster_app_path=", "rollback", - "force", "rolling", "oldcluster_num=", "guc_string=", - "fromFile", "setType=", "HA"]) + opts, args = getopt.getopt(sys.argv[1:], "t:U:R:l:V:X:", [ + "help", "upgrade_bak_path=", "script_type=", + "old_cluster_app_path=", "new_cluster_app_path=", "rollback", + "force", "rolling", "oldcluster_num=", "guc_string=", "fromFile", + "setType=", "HA", "upgrade_dss_config=" + ]) except Exception as er: usage() raise Exception(ErrorCode.GAUSS_500["GAUSS_50000"] % str(er)) @@ -384,6 +389,8 @@ def parseLongOptions(key, value): if "=" in value and len(value.split("=")) == 2 and "'" not in value.split("=")[1]: value = value.split("=")[0] + "=" + "'%s'" % value.split("=")[1] g_opts.gucStr = value + elif key == "--upgrade_dss_config": + g_opts.upgrade_dss_config = value elif key == "--fromFile": g_opts.fromFile = True elif key == "--setType": @@ -409,6 +416,7 @@ def checkParameter(): const.ACTION_COPY_CERTS, const.ACTION_GREY_UPGRADE_CONFIG_SYNC, const.ACTION_SWITCH_DN, + const.ACTION_WAIT_OM_MONITOR, const.ACTION_GREY_RESTORE_CONFIG] and \ (not g_opts.newClusterAppPath or not g_opts.oldClusterAppPath): GaussLog.exitWithError( @@ -708,12 +716,12 @@ def touchInstanceInitFile(): g_logger.logExit(str(e)) -def reloadCmagent(): +def reloadCmagent(signal=1): """ reload the cm_agent instance, make the guc parameter working """ cmd = "ps ux | grep '%s/bin/cm_agent' | grep -v grep | awk '{print $2}' | " \ - "xargs -r -n 100 kill -1" % g_clusterInfo.appPath + "xargs -r -n 100 kill -%s" % (g_clusterInfo.appPath, str(signal)) g_logger.debug("Command for reload cm_agent:%s" % cmd) (status, output) = CmdUtil.retryGetstatusoutput(cmd, 3, 5) if status == 0: @@ -1528,7 +1536,18 @@ def backupConfig(): g_logger.debug("Backup CM cert files command: %s" % back_up_cm_cert_file_cmd) CmdExecutor.execCommandLocally(back_up_cm_cert_file_cmd) g_logger.debug("Backup CM cert files successfully.") - + + dss_cert_file_dir = os.path.realpath( + os.path.join(clusterAppPath, 'share/sslcert/dss')) + if os.path.isdir(dss_cert_file_dir): + back_up_dss_cert_file_cmd = "if [ -d '%s' ]; " \ + "then cp -r '%s' '%s'; fi" % (dss_cert_file_dir, + dss_cert_file_dir, bakPath) + g_logger.debug("Backup dss cert files command: %s" % + back_up_dss_cert_file_cmd) + CmdExecutor.execCommandLocally(back_up_dss_cert_file_cmd) + g_logger.debug("Backup dss cert files successfully.") + om_cert_file_dir = "'%s'/share/sslcert/om" % clusterAppPath back_up_om_cert_file_cmd = "if [ -d '%s' ]; " \ "then cp -r '%s' '%s'; fi" % (om_cert_file_dir, @@ -1876,10 +1895,9 @@ def restoreConfig(): cm_cert_backup_dir = os.path.realpath(os.path.join(bakPath, "cm")) cm_cert_dest_dir = os.path.realpath(os.path.join(clusterAppPath, "share", "sslcert")) - restore_cm_cert_file_cmd = "if [ -d '%s' ]; " \ - "then cp -r '%s' '%s'; fi" % (cm_cert_backup_dir, - cm_cert_backup_dir, - cm_cert_dest_dir) + restore_cm_cert_file_cmd = "if [ -d '{0}' ]; then " \ + "cp -r '{0}' '{1}'; chmod -R 400 {1}/cm/*; fi".format( + cm_cert_backup_dir, cm_cert_dest_dir) g_logger.debug("Restore CM cert files command: %s" % restore_cm_cert_file_cmd) CmdExecutor.execCommandLocally(restore_cm_cert_file_cmd) g_logger.debug("Restore CM cert files successfully.") @@ -2260,6 +2278,14 @@ def cleanInstallPath(): output : NA """ installPath = g_opts.appPath + commit_id = installPath[-8:] + if commit_id: + dss_app = os.path.realpath( + os.path.join(os.path.dirname(installPath), f'dss_app_{commit_id}')) + cmd = "(if [ -d '%s' ]; then rm -rf '%s'; fi)" % (dss_app, dss_app) + g_logger.log("Command for cleaning install path: %s." % cmd) + CmdExecutor.execCommandLocally(cmd) + if not os.path.exists(installPath): g_logger.debug(ErrorCode.GAUSS_502[ "GAUSS_50201"] % installPath + " No need to clean.") @@ -2295,6 +2321,9 @@ def cleanInstallPath(): cmd += " && (if [ -d '%s' ]; then chmod -R %d '%s'; fi)" % (cm_cert_dir, DefaultValue.KEY_DIRECTORY_MODE, cm_cert_dir) + dss_cert_dir = os.path.realpath(os.path.join(installPath, "share", "sslcert", "dss")) + cmd += " && (if [ -d '%s' ]; then chmod -R %d '%s'; fi)" % ( + dss_cert_dir, DefaultValue.KEY_DIRECTORY_MODE, dss_cert_dir) appBakPath = "%s/to_be_delete" % tmpDir cmd += " && (if [ -d '%s' ]; then chmod -R %d '%s'; fi)" % (appBakPath, DefaultValue.KEY_DIRECTORY_MODE, @@ -2379,6 +2408,23 @@ def copyCerts(): FileUtil.changeMode(DefaultValue.KEY_FILE_MODE, "%s/*" % newOmSslCerts) + if EnvUtil.is_dss_mode(getpass.getuser()) and EnvUtil.get_dss_ssl_status( + getpass.getuser()) == 'on': + old_dss_certs = os.path.join(g_opts.oldClusterAppPath, + "share/sslcert/dss") + new_dss_certs = os.path.join(g_opts.newClusterAppPath, + "share/sslcert/dss") + FileUtil.createDirectory(new_dss_certs, + mode=DefaultValue.KEY_DIRECTORY_MODE) + for cert_file in DefaultValue.GDS_CERT_LIST: + cert_ = os.path.realpath(os.path.join(old_dss_certs, cert_file)) + if FileUtil.checkFileExists(cert_): + FileUtil.cpFile(cert_, new_dss_certs) + FileUtil.changeMode(DefaultValue.MIN_FILE_MODE, "%s/*" % new_dss_certs) + + + + def prepareUpgradeSqlFolder(): """ @@ -2768,7 +2814,12 @@ def backupOldClusterCatalogPhysicalFiles(): connect to each cn and dn, connect to each database, and do backup """ + if EnvUtil.is_dss_mode(getpass.getuser()): + g_logger.log( + "In dss-enabled, the physical catalog file is not to be backuped.") + return g_logger.log("Backing up old cluster catalog physical files.") + try: InstanceList = [] # find all instances need to do backup @@ -3057,6 +3108,12 @@ def restoreOneInstanceOldClusterCatalogPhysicalFiles(instance): read database and catalog info from file connect each database, do restore """ + if EnvUtil.is_dss_mode(getpass.getuser()): + g_logger.log( + "In dss-enabled, there is no backup file" \ + " for physical catalog file to restore." + ) + return g_logger.debug("Restore instance catalog physical files. " "Instance data dir: %s" % instance.datadir) try: @@ -3218,6 +3275,12 @@ def cleanOldClusterCatalogPhysicalFiles(): connect to each cn and dn, connect to each database, and do backup """ + if EnvUtil.is_dss_mode(getpass.getuser()): + g_logger.log( + "In dss-enabled, there is no backup file" \ + " for physical catalog file to clean up." + ) + return g_logger.log("Cleaning old cluster catalog physical files.") try: # kill any pending processes that are @@ -4094,11 +4157,51 @@ def setOneInstanceGuc(instance): raise Exception(ErrorCode.GAUSS_500["GAUSS_50007"] % cmd + " Error: \n%s" % str(output)) +def switch_dss_server(): + is_dss_mode = EnvUtil.is_dss_mode(getpass.getuser()) + if not is_dss_mode: + g_logger.debug("Non-dss-enabled, no need to switch dssserver.") + return + # Start the dssserver firstly. + DssConfig.set_cm_manual_flag(DssInst.get_dss_id_from_key() + 20001, 'start', + g_logger) + g_logger.log("Start to kill dssserver.") + Dss.kill_dss_server(logger=g_logger) + g_logger.log("End to kill dssserver.") + + +def wait_for_om_monitor_start(): + is_dss_mode = EnvUtil.is_dss_mode(getpass.getuser()) + if not is_dss_mode: + g_logger.debug("Non-dss-enabled, no need to wait om_monitor.") + return + if isNeedSwitch('cm_agent', True): + DssConfig.wait_for_process_start(g_logger, 'om_monitor', + 'bin/om_monitor') + def switchDnNodeProcess(): """ function: switch node process which CN or DN exits :return: """ + is_cm_killed = False + is_dss_mode = EnvUtil.is_dss_mode(getpass.getuser()) + is_all_upgrade = DssConfig.get_value_b64_handler('dss_upgrade_all', + g_opts.upgrade_dss_config, + action='decode') == 'on' + if is_dss_mode and g_opts.rolling and not is_all_upgrade: + if isNeedSwitch('cm_agent', True): + # disabling cm from starting the dn process + DssConfig.reload_cm_resource( + g_logger, + timeout=DssConfig.DMS_TMP_RESTART_DELAY, + wait_for_start=False) + is_cm_killed = True + + if is_dss_mode and g_opts.rolling and not is_cm_killed: + if isNeedSwitch('cm_agent', True): + reloadCmagent(signal=9) + if g_opts.rolling: # for rolling upgrade, gaussdb fenced udf will be # switched after cm_agent has been switched @@ -4112,6 +4215,8 @@ def switchDnNodeProcess(): switchDn() elapsed = timeit.default_timer() - start_time g_logger.log("Time to switch DN: %s" % getTimeFormat(elapsed)) + if is_dss_mode and isNeedSwitch('dssserver'): + switch_dss_server() def switchFencedUDFProcess(): @@ -4134,7 +4239,7 @@ def switchFencedUDFProcess(): raise Exception("Failed to kill gaussdb fenced UDF master process.") -def isNeedSwitch(process, dataDir=""): +def isNeedSwitch(process, dataDir="", is_dss_mode=False): """ get the pid from ps ux command, and then get the realpth of this pid from /proc/$pid/exe, under upgrade, if we can find the new path, then we do not @@ -4157,6 +4262,20 @@ def isNeedSwitch(process, dataDir=""): r"xargs `; if [ `echo $dir | grep %s` ];then echo 'True'; " \ r"else echo 'False'; fi; done" cmd = cmd % (process, g_gausshome, dataDir, path) + elif is_dss_mode and process == 'cm_agent': + cmd = r"pidList=`ps ux | grep '\<%s\>' | grep -v 'grep'" \ + r" | awk '{print $2}' | xargs `; " \ + r"for pid in $pidList; do dir=`readlink -f /proc/$pid/exe | " \ + r"xargs `; if [ `echo $dir | grep %s` ];then echo 'True'; " \ + r"else echo 'False'; fi; done" + cmd = cmd % (process, path) + elif process == 'dssserver': + cmd = r"pidList=`ps ux | grep -E 'dssserver[ ]+-D' | grep -v 'grep'" \ + r" | awk '{print $2}' | xargs `; " \ + r"for pid in $pidList; do dir=`readlink -f /proc/$pid/exe | " \ + r"xargs `; if [ `echo $dir | grep %s` ];then echo 'True'; " \ + r"else echo 'False'; fi; done" + cmd = cmd % path else: cmd = r"pidList=`ps ux | grep '\<%s\>' | grep '%s' | grep -v 'grep'" \ r" | awk '{print $2}' | xargs `; " \ @@ -4735,6 +4854,7 @@ def checkAction(): const.ACTION_GREY_UPGRADE_CONFIG_SYNC, const.ACTION_CREATE_CM_CA_FOR_ROLLING_UPGRADE, const.ACTION_SWITCH_DN, + const.ACTION_WAIT_OM_MONITOR, const.ACTION_GET_LSN_INFO, const.ACTION_GREY_RESTORE_CONFIG, const.ACTION_GREY_RESTORE_GUC, @@ -4797,6 +4917,7 @@ def main(): const.ACTION_GREY_UPGRADE_CONFIG_SYNC: greyUpgradeSyncConfig, const.ACTION_CREATE_CM_CA_FOR_ROLLING_UPGRADE: createCmCaForRollingUpgrade, const.ACTION_SWITCH_DN: switchDnNodeProcess, + const.ACTION_WAIT_OM_MONITOR: wait_for_om_monitor_start, const.ACTION_CLEAN_GS_SECURE_FILES: clean_gs_secure_files, const.ACTION_GET_LSN_INFO: getLsnInfo, const.ACTION_GREY_RESTORE_CONFIG: greyRestoreConfig,