From 3b7cbbf11adb1f9562bd2360982728ec0f98a4e3 Mon Sep 17 00:00:00 2001 From: LiHeng Date: Tue, 16 Mar 2021 21:22:33 +0800 Subject: [PATCH] refresh replconninfo to identify cascade standby after switchover/faiover --- script/gspylib/common/Common.py | 7 +- script/gspylib/common/DbClusterInfo.py | 25 +++- script/impl/om/OLAP/OmImplOLAP.py | 17 ++- script/local/Resetreplconninfo.py | 199 +++++++++++++++++++++++++ 4 files changed, 235 insertions(+), 13 deletions(-) create mode 100644 script/local/Resetreplconninfo.py diff --git a/script/gspylib/common/Common.py b/script/gspylib/common/Common.py index f3bb915..60f1f99 100644 --- a/script/gspylib/common/Common.py +++ b/script/gspylib/common/Common.py @@ -5359,7 +5359,9 @@ class ClusterInstanceConfig(): (dbInst.port + 4), pj.haIps[i], pj.haPort, pj.port + 5, pj.port + 4) - + if pj.instanceType == DefaultValue.CASCADE_STANDBY: + chanalInfo += " iscascade=true" + connInfo1.append(chanalInfo) else: for pj in iter(peerInsts): @@ -5381,7 +5383,8 @@ class ClusterInstanceConfig(): (dbInst.port + 4), pj.haIps[i], pj.haPort, pj.port + 5, (pj.port + 4)) - + if pj.instanceType == DefaultValue.CASCADE_STANDBY: + chanalInfo += " iscascade=true" connInfo1.append(chanalInfo) return connInfo1, nodename diff --git a/script/gspylib/common/DbClusterInfo.py b/script/gspylib/common/DbClusterInfo.py index ef3f929..f0a3b35 100644 --- a/script/gspylib/common/DbClusterInfo.py +++ b/script/gspylib/common/DbClusterInfo.py @@ -5979,7 +5979,12 @@ class dbClusterInfo(): def isSingleNode(self): return (self.__getDnInstanceNum() <= 1) - def createDynamicConfig(self, user, localHostName, sshtool): + def doRefreshConf(self, user, localHostName, sshtool): + self.__createDynamicConfig(user, localHostName, sshtool) + self.__create_simple_datanode_config(user, localHostName, sshtool) + self.__reset_replconninfo(user, sshtool) + + def __createDynamicConfig(self, user, localHostName, sshtool): """ function : Save cluster info into to dynamic config input : String,int @@ -6045,6 +6050,8 @@ class dbClusterInfo(): raise Exception(ErrorCode.GAUSS_502["GAUSS_50205"] % \ "dynamic configuration file" + " Error: \n%s" % str(e)) + + def __create_simple_datanode_config(self, user, localhostname, sshtool): simpleDNConfig = self.__getDynamicSimpleDNConfig(user) if os.path.exists(simpleDNConfig): cmd = "rm -f %s" % simpleDNConfig @@ -6058,7 +6065,7 @@ class dbClusterInfo(): try: with open(simpleDNConfig, "w") as fp: for dninfo in tempstatus: - dnstatus = dninfo.split()[-2] + dnstatus = dninfo.split()[6] dnname = dninfo.split()[1] if dnstatus not in statusdic: fp.write("%s=%d\n" % @@ -6073,7 +6080,7 @@ class dbClusterInfo(): "dynamic configuration file" + " Error: \n%s" % str(e)) try: - self.__sendDynamicCfgToAllNodes(localHostName, + self.__sendDynamicCfgToAllNodes(localhostname, simpleDNConfig, simpleDNConfig) except Exception as e: @@ -6083,6 +6090,18 @@ class dbClusterInfo(): "dynamic configuration file" + " Error: \n%s" % str(e)) + def __reset_replconninfo(self, user, sshtool): + # add for cascade + local_script = os.path.dirname(os.path.realpath(__file__)) \ + + '/../../local/Resetreplconninfo.py' + cmd = "python3 %s -U %s -t reset" % (local_script, user) + (status, output) = \ + sshtool.getSshStatusOutput(cmd, self.getClusterNodeNames()) + for node in self.getClusterNodeNames(): + if status[node] != 'Success': + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + "Error:\n%s" % output) + def __packDynamicNodeInfo(self, dbNode, localHostName, sshtool): # node id info = struct.pack("I", dbNode.id) diff --git a/script/impl/om/OLAP/OmImplOLAP.py b/script/impl/om/OLAP/OmImplOLAP.py index 44ae7db..1d326d5 100644 --- a/script/impl/om/OLAP/OmImplOLAP.py +++ b/script/impl/om/OLAP/OmImplOLAP.py @@ -203,7 +203,7 @@ class OmImplOLAP(OmImpl): self.logger.log("Starting %s." % startType) self.logger.log("=========================================") hostName = DefaultValue.GetHostIpOrName() - #get the newest dynaminc config and send to other node + # get the newest dynaminc config and send to other node self.clusterInfo.checkClusterDynamicConfig(self.context.user, hostName) if self.context.g_opts.nodeName == "": hostList = self.clusterInfo.getClusterNodeNames() @@ -217,10 +217,10 @@ class OmImplOLAP(OmImpl): else: time_out = self.time_out cmd = "source %s; %s -U %s -R %s -t %s --security-mode=%s" % ( - self.context.g_opts.mpprcFile, - OMCommand.getLocalScript("Local_StartInstance"), - self.context.user, self.context.clusterInfo.appPath, time_out, - self.context.g_opts.security_mode) + self.context.g_opts.mpprcFile, + OMCommand.getLocalScript("Local_StartInstance"), + self.context.user, self.context.clusterInfo.appPath, time_out, + self.context.g_opts.security_mode) if self.dataDir != "": cmd += " -D %s" % self.dataDir failedOutput = '' @@ -342,8 +342,9 @@ class OmImplOLAP(OmImpl): "No need to generate dynamic configuration file for one node.") return self.logger.log("Generating dynamic configuration file for all nodes.") - hostName = DefaultValue.GetHostIpOrName() + hostname = DefaultValue.GetHostIpOrName() sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames()) - self.context.clusterInfo.createDynamicConfig(self.context.user, - hostName, sshtool) + self.context.clusterInfo.doRefreshConf(self.context.user, hostname, + sshtool) + self.logger.log("Successfully generated dynamic configuration file.") diff --git a/script/local/Resetreplconninfo.py b/script/local/Resetreplconninfo.py new file mode 100644 index 0000000..5d7c6a3 --- /dev/null +++ b/script/local/Resetreplconninfo.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +############################################################################# +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms +# and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# ---------------------------------------------------------------------------- +# Description : Resetreplconninfo.py is a utility to reset local replconninfo. +############################################################################# + +import getopt +import os +import sys +import subprocess + +sys.path.append(sys.path[0] + "/../") +from gspylib.common.GaussLog import GaussLog +from gspylib.common.Common import DefaultValue +from gspylib.common.ErrorCode import ErrorCode + +######################################################################## +# Global variables define +######################################################################## +g_opts = None + + +######################################################################## +class CmdOptions(): + """ + """ + + def __init__(self): + """ + """ + self.action = "" + self.clusterUser = "" + + +def usage(): + """ +Resetreplconninfo.py is a utility to reset replconninfos on local node. + +Usage: + python3 Resetreplconninfo.py --help + python3 Resetreplconninfo.py -U omm -t reset + +General options: + -U Cluster user. + -t reset. + --help Show help information for this utility, + and exit the command line mode. + """ + print(usage.__doc__) + + +def parseCommandLine(): + """ + function: parse command line + """ + try: + opts, args = getopt.getopt(sys.argv[1:], "U:t:h", ["help"]) + except Exception as e: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e)) + + if len(args) > 0: + usage() + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] + % str(args[0])) + + global g_opts + g_opts = CmdOptions() + + for (key, value) in opts: + if key == "-h" or key == "--help": + usage() + sys.exit(0) + elif key == "-t": + g_opts.action = value + elif key == "-U": + g_opts.clusterUser = value + + +def checkParameter(): + """ + function: check parameter + """ + if g_opts.clusterUser == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'U' + ".") + if g_opts.action == "": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".") + if g_opts.action != "reset": + GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"] % 't') + + +class Resetreplconninfo(): + """ + class: Resetreplconninfo + """ + + def __init__(self): + """ + function: configure all instance on local node + """ + # get mpprc file + envfile = os.getenv('MPPDB_ENV_SEPARATE_PATH') + if envfile is not None and envfile != "": + self.userProfile = \ + envfile.replace("\\", "\\\\").replace('"', '\\"\\"') + else: + self.userProfile = "~/.bashrc" + + def __getStatusByOM(self): + """ + function :Get the environment parameter. + output : String + """ + cmd = "source %s;gs_om -t status --detail" % self.userProfile + (status, output) = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] + % cmd + " Error: \n%s" % output) + return output.split("\n")[-1] + + def resetRepl(self): + """ + function: reset Repl + input : NA + output: NA + """ + status_list = self.__getStatusByOM().split('|') + repl_list = ['replconninfo' + str(i) for i in + range(1, len(status_list))] + + localhost = DefaultValue.GetHostIpOrName() + remote_ip_dict = {} + for info_all in status_list: + info = info_all.split() + if info[1] == localhost: + local_dndir = info[4] + else: + remote_ip_dict[info[2]] = info[6] + head_cmd = "source %s;" % self.userProfile + for repl in repl_list: + cmd = head_cmd + 'gs_guc check -N %s -D %s -c "%s"' % \ + (localhost, local_dndir, repl) + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + cmd + " Error:\n%s" % output) + # get remote ip and check iscascade + replinfo_all = output.split('\n')[-2].strip().split("'") + replinfo_value = replinfo_all[1].split() + for remoteip in remote_ip_dict: + if remoteip in replinfo_all[1]: + if remote_ip_dict[remoteip] == "Cascade" and \ + "iscascade=true" not in replinfo_value: + replinfo_value.append("iscascade=true") + elif remote_ip_dict[remoteip] != "Cascade" and \ + "iscascade=true" in replinfo_value: + replinfo_value.remove("iscascade=true") + else: + break + replinfo_all = \ + replinfo_all[0] + "'" + " ".join(replinfo_value) + "'" + cmd = head_cmd + 'gs_guc reload -N %s -D %s -c "%s"' % \ + (localhost, local_dndir, replinfo_all) + status, output = subprocess.getstatusoutput(cmd) + if status != 0: + raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % + cmd + " Error:\n%s" % output) + break + + +if __name__ == '__main__': + try: + # parse and check input parameters + parseCommandLine() + checkParameter() + + # reset replconninfos + reseter = Resetreplconninfo() + reseter.resetRepl() + + except Exception as e: + GaussLog.exitWithError(str(e)) + + sys.exit(0)