Files
openGauss-OM/script/impl/collect/OLAP/CollectImplOLAP.py
hangjin2020 a2679f0cf7 om仓迁移
2020-12-16 17:25:24 +08:00

928 lines
37 KiB
Python

# coding: UTF-8
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
#
# openGauss is licensed under Mulan PSL v2.
# You can use this software according to the terms
# and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
# http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
# ----------------------------------------------------------------------------
import sys
import time
import subprocess
import time
import base64
import json
sys.path.append(sys.path[0] + "/../../../")
from gspylib.common.Common import DefaultValue
from gspylib.common.OMCommand import OMCommand
from gspylib.common.ErrorCode import ErrorCode
from gspylib.os.gsfile import g_file
from gspylib.os.gsplatform import g_Platform
from gspylib.os.gsOSlib import g_OSlib
from impl.collect.CollectImpl import CollectImpl
class CollectImplOLAP(CollectImpl):
"""
The class is used to do perform collect log files.
"""
def __init__(self, collectObj):
"""
function: Constructor
input : collectObj
output: NA
"""
self.jobInfo = {}
self.nodeJobInfo = {}
super(CollectImplOLAP, self).__init__(collectObj)
def parseConfigFile(self):
"""
function: Parsing configuration files
input : NA
output: NA
"""
try:
# Init the cluster information
self.context.initClusterInfoFromStaticFile(self.context.user)
self.context.appPath = self.context.clusterInfo.appPath
# Obtain the cluster installation directory owner and group
(self.context.user, self.context.group) = g_OSlib.getPathOwner(
self.context.appPath)
if (self.context.user == "" or self.context.group == ""):
self.context.logger.logExit(ErrorCode.GAUSS_503["GAUSS_50308"])
# Match the corresponding node
for nodename in self.context.nodeName:
if not self.context.clusterInfo.getDbNodeByName(nodename):
self.context.logger.logExit(
ErrorCode.GAUSS_516["GAUSS_51619"] % nodename)
if (len(self.context.nodeName) == 0):
self.context.nodeName = \
self.context.clusterInfo.getClusterNodeNames()
self.context.initSshTool(self.context.nodeName,
DefaultValue.TIMEOUT_PSSH_COLLECTOR)
if (len(self.context.nodeName) == 1 and self.context.nodeName[
0] == DefaultValue.GetHostIpOrName()):
self.context.localMode = True
except Exception as e:
raise Exception(str(e))
self.context.logger.log("Successfully parsed the configuration file.")
# python will remove Single quotes
# and double quote when we pass parameter from outside
# we use # to replace double quote
def formatJsonString(self, check):
"""
function: format sonString
input : string
output: json string
"""
if (self.context.isSingle or self.context.localMode):
return "\'" + json.dumps(check).replace("\"", "#") + "\'"
else:
return "\'" \
+ \
json.dumps(check).replace("$", "\$").replace("\"", "#") \
+ "\'"
def checkTmpDir(self):
"""
function: Check tmp dir, if tmp dir not exist, create it
input : TmpDirFromEnv
output: NA
"""
try:
# Create a temporary file
tmpDir = DefaultValue.getTmpDirFromEnv()
cmd = "(if [ ! -d '%s' ];then mkdir -p '%s' -m %s;fi)" \
% (tmpDir, tmpDir, DefaultValue.KEY_DIRECTORY_MODE)
DefaultValue.execCommandWithMode(
cmd,
"Check temporary directory",
self.context.sshTool,
self.context.isSingle or self.context.localMode,
self.context.mpprcFile)
except Exception as e:
self.context.logger.logExit(str(e))
def checkCommand(self):
"""
function: check command
output: Successfully command exists
"""
self.context.logger.log("check rsync command.")
# Check the system information on each node
cmd = "source %s; %s -t check_command -U %s -S %d -l %s" % (
self.context.mpprcFile,
OMCommand.getLocalScript("Local_Collect"),
self.context.user,
self.context.speedLimitFlag,
self.context.localLog)
flag = 0
failedNodeList = []
if (self.context.isSingle or self.context.localMode):
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
flag = 1
self.context.logger.log("The cmd is %s " % cmd)
self.context.logger.logExit(
"rsync command not found on %s. "
"Error:\n%s\n--speed-limit parameters cannot be used" % \
(self.context.nodeName[0], output))
else:
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd, self.context.nodeName)
self.context.sshTool.parseSshOutput(self.context.nodeName)
# Gets the execution result
for node in self.context.nodeName:
if (status[node] != DefaultValue.SUCCESS):
flag = 1
failedNodeList.append(node)
if flag == 0:
self.context.logger.log("Successfully check rsync command.")
else:
self.context.logger.logExit(
"rsync command not found on hosts: %s.\n "
"--speed-limit parameters cannot be used "
% str(failedNodeList))
def createStoreDir(self):
"""
:return:
"""
resultdir = ""
# Gets the current time
currentTime = time.strftime("%Y%m%d_%H%M%S")
if (self.context.outFile is not None and self.context.outFile != ""):
# rm the tmpdir
resultdir = self.context.outFile
else:
# rm the tmpdir
resultdir = DefaultValue.getTmpDirFromEnv()
cmd = \
"if [ -d '%s'/collector_tmp_* ];" \
"then rm -rf '%s'/collector_tmp_*; fi" % (
resultdir, resultdir)
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
if status != 0:
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd
+ "Error:\n%s" % output)
# Get the default path
targetdir = "%s/collector_tmp_%s" % (resultdir, currentTime)
self.context.outFile = "%s/collector_%s" % (targetdir, currentTime)
# Create a folder to store log information
g_file.createDirectory(self.context.outFile)
g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE,
self.context.outFile, True)
return (currentTime, targetdir, resultdir)
def createDir(self):
"""
function: create Dir
output: Successfully create dir
"""
self.context.logger.log("create Dir.")
# Check the system information on each node
cmd = "source %s; %s -t create_dir -U %s -l %s" % (
self.context.mpprcFile,
OMCommand.getLocalScript("Local_Collect"),
self.context.user,
self.context.localLog)
flag = 0
if (self.context.isSingle or self.context.localMode):
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
flag = 1
self.context.logger.log("The cmd is %s " % cmd)
self.context.logger.log(
"Failed to create dir on %s. Error:\n%s" % \
(self.context.nodeName[0], output))
else:
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd,
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.nodeName)
# Gets the execution result
for node in self.context.nodeName:
if (status[node] != DefaultValue.SUCCESS):
flag = 1
self.context.logger.log(
"Failed to create dir on %s. Error:\n%s" % \
(node, str(outputMap[node])))
if (flag == 0):
self.context.logger.log("Successfully create dir.")
def printSummaryInfo(self, resultdir, currentTime):
maxNamelen = len("SUCCESS HOSTNAME")
maxJoblen = len("success")
jobCount = 0
jobNameList = []
tag = ""
info = ""
for host in self.context.nodeName:
maxNamelen = max(maxNamelen, len(host))
for jobName, jobInfo in self.jobInfo.items():
subJobCount = len(jobInfo)
jobCount += subJobCount
while subJobCount > 0:
job = "%s-%s" % (jobName, str(subJobCount))
maxJoblen = max(maxJoblen, len(job))
jobNameList.append(job)
subJobCount -= 1
maxJoblen = maxJoblen + 4
maxNamelen = maxNamelen + 4
title = "%s%s%s%s%s%s%s" % ("|", "TASK NAME".center(maxJoblen), "|",
"SUCCESS HOSTNAME".center(maxNamelen), "|",
"FAILED HOSTNAME".center(maxNamelen), "|")
index = len(title)
while index > 0:
tag = "%s%s" % (tag, "-")
index -= 1
info = "%s%s%s" % (info, tag, "\n")
info = "%s%s%s%s%s%s%s%s" % (
info, "|", "".center(maxJoblen), "|", "".center(maxNamelen), "|",
"".center(maxNamelen), "|\n")
info = "%s%s%s" % (info, title, "\n")
info = "%s%s%s%s%s%s%s%s" % (
info, "|", "".center(maxJoblen), "|", "".center(maxNamelen), "|",
"".center(maxNamelen), "|\n")
info = "%s%s%s" % (info, tag, "\n")
for job in jobNameList:
jobName = str(job.split("-")[0])
i = int(job.split("-")[1])
len_s = len(self.jobInfo[jobName][i - 1]["successNodes"])
len_f = len(self.jobInfo[jobName][i - 1]["failedNodes"])
if len_s >= len_f:
self.jobInfo[jobName][i - 1]["failedNodes"] += [None] * (
len_s - len_f)
else:
self.jobInfo[jobName][i - 1]["successNodes"] += [None] * (
len_f - len_s)
isInitTitle = 0
for s, f in zip(self.jobInfo[jobName][i - 1]["successNodes"],
self.jobInfo[jobName][i - 1]["failedNodes"]):
if isInitTitle == 1:
job = ""
if str(s) == "None":
s = ""
if str(f) == "None":
f = ""
info = "%s%s%s%s%s%s%s%s%s" % (
info, "|", job.ljust(maxJoblen), "|",
str(s).center(maxNamelen), "|", str(f).center(maxNamelen),
"|",
"\n")
isInitTitle = 1
info = "%s%s%s" % (info, tag, "\n")
cmd = " echo '%s\n' >> %s/collector_tmp_%s/collector_%s/Summary.log" \
% (
info, resultdir, currentTime, currentTime)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
print("Generate Summary Info Failed.")
self.context.logger.debug("The cmd is %s " % cmd)
self.context.logger.debug(
"Generate Summary Info Failed %s." % output)
def printDetailSummaryInfo(self, resultdir, currentTime):
statusFiedLen = len("SuccessfulTask") + 4
for node, jobList in self.nodeJobInfo.items():
for job in jobList:
jsonJob = json.loads(job)
successLen = 0
jobName = jsonJob["jobName"]
successInfoList = []
failedInfolist = []
Info = ""
tag = []
successTask = jsonJob["successTask"]
for i in range(0, len(successTask), 5):
Task = "; ".join(successTask[i: i + 5])
successLen = max(successLen, len(Task))
successInfoList.append(Task)
failedLen = 0
for failedJob, reason in jsonJob["failedTask"].items():
failedInfo = failedJob + ": " + reason
failedLen = max(len(failedInfo), failedLen)
failedInfolist.append(failedInfo)
title = "%s - %s - %s" % (
node, jobName, "Success" if failedLen == 0 else "Failed")
taskMaxLen = max(failedLen + 4 + 2, successLen + 4 + 2)
maxLen = max(taskMaxLen, len(title))
titleLen = maxLen + statusFiedLen + 1
totalLen = titleLen + 2
i = 0
while i < totalLen:
tag.append("-")
i += 1
Info = "%s%s%s" % (Info, "".join(tag), "\n")
Info = "%s%s%s%s%s" % (
Info, "|", " ".center(titleLen), "|", "\n")
Info = "%s%s%s%s%s" % (
Info, "|", title.center(titleLen), "|", "\n")
Info = "%s%s%s%s%s" % (
Info, "|", " ".center(titleLen), "|", "\n")
Info = "%s%s%s" % (Info, "".join(tag), "\n")
for s in successInfoList:
Info = "%s%s%s%s%s%s%s" % (
Info, "|", "SuccessfulTask".center(statusFiedLen), "|",
s.center(maxLen), "|", "\n")
Info = "%s%s%s" % (Info, "".join(tag), "\n")
for f in failedInfolist:
Info = "%s%s%s%s%s%s%s" % (
Info, "|", "FailedTask".center(statusFiedLen), "|",
f.center(maxLen), "|", "\n")
Info = "%s%s%s" % (Info, "".join(tag), "\n")
Info = "%s%s" % (Info, "\n\n")
cmd = \
" echo '%s' " \
">> %s/collector_tmp_%s/collector_%s/Detail.log" % (
Info, resultdir, currentTime, currentTime)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
print("Generate Detail Summary Info Failed")
self.context.logger.debug("The cmd is %s " % cmd)
self.context.logger.debug(
"Generate Detail Summary Info Failed %s." % output)
def generalJobInfo(self, jobName, nodeList):
if self.jobInfo.__contains__(jobName):
self.jobInfo[jobName].append(nodeList)
else:
nodes = [nodeList]
self.jobInfo[jobName] = nodes
def generalDetailInfo(self, nodeName, job):
if self.nodeJobInfo.__contains__(nodeName):
self.nodeJobInfo[nodeName].append(job)
else:
jobList = [job]
self.nodeJobInfo[nodeName] = jobList
def generalSummary(self, resultdir, currentTime):
self.printSummaryInfo(resultdir, currentTime)
self.printDetailSummaryInfo(resultdir, currentTime)
def resultCheck(self, output):
isFailed = 0
nodeList = {}
successNodeList = []
failedNodeList = []
jobName = "UNKNOWN"
try:
if self.context.isSingle or self.context.localMode:
if len(json.loads(output)["failedTask"]) > 0:
isFailed = 1
failedNodeList.append(self.context.nodeName[0])
else:
successNodeList.append(self.context.nodeName[0])
self.generalDetailInfo(self.context.nodeName[0], output)
jobName = json.loads(output)["jobName"]
else:
for node in self.context.nodeName:
if len(json.loads(str(output[node]))["failedTask"]) > 0:
isFailed = 1
failedNodeList.append(node)
else:
successNodeList.append(node)
self.generalDetailInfo(node, str(output[node]))
jobName = json.loads(output[node])["jobName"]
nodeList["successNodes"] = successNodeList
nodeList["failedNodes"] = failedNodeList
self.generalJobInfo(jobName, nodeList)
return isFailed
except Exception as e:
self.context.logger.debug("check result failed %s." % str(e))
return 1
def planResultCheck(self, output):
isFailed = 1
nodeList = {}
successNodeList = []
failedNodeList = []
jobName = "UNKNOWN"
try:
if self.context.isSingle or self.context.localMode:
if len(json.loads(output)["failedTask"]) == 0:
isFailed = 0
successNodeList.append(self.context.nodeName[0])
else:
failedNodeList.append(self.context.nodeName[0])
self.generalDetailInfo(self.context.nodeName[0], output)
jobName = json.loads(output)["jobName"]
else:
for node in self.context.nodeName:
if len(json.loads(str(output[node]))["failedTask"]) == 0:
isFailed = 0
successNodeList.append(node)
else:
failedNodeList.append(node)
self.generalDetailInfo(node, str(output[node]))
jobName = json.loads(output[node])["jobName"]
nodeList["successNodes"] = successNodeList
nodeList["failedNodes"] = failedNodeList
self.generalJobInfo(jobName, nodeList)
return isFailed
except Exception as e:
self.context.logger.debug("check plan result failed %s." % str(e))
return 1
def systemCheck(self, sysInfo):
"""
function: collected OS information
output: Successfully collected OS information
"""
self.context.logger.log("Collecting OS information.")
# Check the system information on each node
cmd = "source %s; %s -t system_check -U %s -l %s -C %s" % (
self.context.mpprcFile,
OMCommand.getLocalScript("Local_Collect"),
self.context.user,
self.context.localLog,
self.formatJsonString(sysInfo))
if (self.context.isSingle or self.context.localMode):
output = subprocess.getstatusoutput(cmd)[1]
flag = self.resultCheck(output)
else:
self.context.sshTool.getSshStatusOutput(
cmd,
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.nodeName)
# Gets the execution result
flag = self.resultCheck(outputMap)
if (flag == 0):
self.context.logger.log("Successfully collected OS information.")
else:
self.context.logger.log("The cmd is %s " % cmd)
self.context.logger.log("Failed to collect OS information.")
def databaseCheck(self, data):
"""
function: collected catalog informatics
output: Successfully collected catalog statistics.
"""
self.context.logger.log("Collecting catalog statistics.")
# Collect catalog statistics on each node
cmd = "source %s; %s -t database_check -U %s -l %s -C %s" % (
self.context.mpprcFile,
OMCommand.getLocalScript("Local_Collect"),
self.context.user,
self.context.localLog,
self.formatJsonString(data))
if (self.context.isSingle or self.context.localMode):
output = subprocess.getstatusoutput(cmd)[1]
flag = self.resultCheck(output)
else:
self.context.sshTool.getSshStatusOutput(
cmd,
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.nodeName)
# Gets the execution result
flag = self.resultCheck(outputMap)
if (flag == 0):
self.context.logger.log(
"Successfully collected catalog statistics.")
else:
self.context.logger.log("The cmd is %s " % cmd)
self.context.logger.log("Failed collected catalog statistics.")
def logCopy(self, log, l):
"""
function: collected log files
output: Successfully collected log files
"""
self.context.logger.log("Collecting %s files." % log)
# Copy the log information on each node
self.context.keyword = base64.b64encode(
bytes(self.context.keyword, 'utf-8')).decode()
cmd = \
"source %s; " \
"%s -t %s -U %s -b '%s' -e '%s' -k '%s' -l %s -s %d -S %d -C %s" \
% (self.context.mpprcFile,
OMCommand.getLocalScript("Local_Collect"),
"log_copy" if log == "Log" else (
"xlog_copy" if log == "XLog" else "core_copy"),
self.context.user,
self.context.begintime,
self.context.endtime,
self.context.keyword,
self.context.localLog,
# For local collection,
# use the max speed limit, as all nodes do it individually.
self.context.speedLimit * 1024,
self.context.speedLimitFlag,
self.formatJsonString(l)
)
if (self.context.isSingle or self.context.localMode):
output = subprocess.getstatusoutput(cmd)[1]
flag = self.resultCheck(output)
else:
timeout = int(
self.context.LOG_SIZE_PER_DAY_ONE_NODE
* self.context.duration // self.context.speedLimit \
+ self.context.LOG_SIZE_PER_DAY_ONE_NODE
* self.context.duration // self.context.TAR_SPEED)
# The timeout value should be in [10 min, 1 hour]
if (timeout < DefaultValue.TIMEOUT_PSSH_COLLECTOR):
timeout = DefaultValue.TIMEOUT_PSSH_COLLECTOR
elif (timeout > 3600):
timeout = 3600
self.context.sshTool.setTimeOut(timeout)
self.context.logger.debug(
"Collection will be timeout in %ds." % timeout)
self.context.sshTool.getSshStatusOutput(
cmd,
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.nodeName)
# Gets the execution result
flag = self.resultCheck(outputMap)
if (flag == 0):
self.context.logger.log("Successfully collected %s files." % log)
else:
self.context.logger.log("The cmd is %s " % cmd)
self.context.logger.log("Failed collected %s files." % log)
def confGstack(self, check, s):
"""
function: collected configuration files and processed stack information
output: Successfully collected configuration files
and processed stack information.
"""
self.context.logger.log("Collecting %s files." % s["TypeName"])
# Collect configuration files
# and process stack information on each node
cmd = "source %s; %s -t %s -U %s -l %s -C %s" % (
self.context.mpprcFile,
OMCommand.getLocalScript("Local_Collect"),
check,
self.context.user,
self.context.localLog,
self.formatJsonString(s))
if (self.context.isSingle or self.context.localMode):
output = subprocess.getstatusoutput(cmd)[1]
flag = self.resultCheck(output)
else:
self.context.sshTool.getSshStatusOutput(
cmd,
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.nodeName)
# Gets the execution result
flag = self.resultCheck(outputMap)
if (flag == 0):
self.context.logger.log(
"Successfully collected %s files." % s["TypeName"])
else:
self.context.logger.log("The cmd is %s " % cmd)
self.context.logger.log(
"Failed collected %s files." % s["TypeName"])
def planSimulator(self, data):
"""
function: collect plan simulator files
output: Successfully collected files.
"""
self.context.logger.log("Collecting plan simulator statistics.")
# Collect plan simulator on each node
cmd = "source %s; %s -t plan_simulator_check -U %s -l %s -C %s" % (
self.context.mpprcFile,
OMCommand.getLocalScript("Local_Collect"),
self.context.user,
self.context.localLog,
self.formatJsonString(data))
if (self.context.isSingle or self.context.localMode):
output = subprocess.getstatusoutput(cmd)[1]
flag = self.planResultCheck(output)
else:
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd,
self.context.nodeName)
outputMap = self.context.sshTool.parseSshOutput(
self.context.nodeName)
# Gets the execution result
flag = self.planResultCheck(outputMap)
if (flag == 0):
self.context.logger.log("Successfully collected plan simulator.")
else:
self.context.logger.log("The cmd is %s " % cmd)
self.context.logger.log("Failed collected plan simulator.")
def copyFile(self):
"""
function: collected result files
output: Successfully collected files.
"""
self.context.logger.log("Collecting files.")
# Collect result files on each node
cmd = "source %s; %s -t copy_file -U %s -o %s -h %s -l %s" % (
self.context.mpprcFile,
OMCommand.getLocalScript("Local_Collect"),
self.context.user,
self.context.outFile,
DefaultValue.GetHostIpOrName(),
self.context.localLog)
flag = 0
if (self.context.isSingle or self.context.localMode):
cmd = cmd + (" -s %d" % self.context.speedLimit * 1024)
cmd = cmd + (" -S %d" % self.context.speedLimitFlag)
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
flag = 0
self.context.logger.log("The cmd is %s " % cmd)
self.context.logger.log(
"Failed to collect files on %s. Error:\n%s" % \
(self.context.nodeName[0], output))
else:
flag = 1
else:
parallelNum = DefaultValue.getCpuSet()
if (len(self.context.nodeName) < parallelNum):
parallelNum = len(self.context.nodeName)
speedLimitEachNodeKBs = int(
self.context.speedLimit * 1024 // parallelNum)
# In parallel mode,
# set a bandwidth to collect log files from other nodes
# to avoid too much IO for net card, which is risky for CM things.
cmd = cmd + (" -s %d" % speedLimitEachNodeKBs)
cmd = cmd + (" -S %d" % self.context.speedLimitFlag)
# The timeout value to remote copy.
timeout = self.context.LOG_SIZE_PER_DAY_ONE_NODE \
* self.context.duration * 1024 // speedLimitEachNodeKBs
# The timeout value should be in [10 min, 1 hour]
if (timeout < DefaultValue.TIMEOUT_PSSH_COLLECTOR):
timeout = DefaultValue.TIMEOUT_PSSH_COLLECTOR
elif (timeout > 3600):
timeout = 3600
self.context.sshTool.setTimeOut(timeout)
self.context.logger.debug(
"Copy logs will be timeout in %ds." % timeout)
(status, output) = self.context.sshTool.getSshStatusOutput(
cmd,
self.context.nodeName,
parallel_num=parallelNum)
self.context.sshTool.parseSshOutput(
self.context.nodeName)
# Gets the execution result
for node in self.context.nodeName:
if (status[node] == DefaultValue.SUCCESS):
flag = 1
if (flag == 0):
self.context.logger.log(
"Failed to collect files: All collection tasks failed")
else:
self.context.logger.log("Successfully collected files.")
def tarResultFiles(self, currentTime, targetdir, resultdir):
"""
:return:
"""
# tar the result and delete directory
try:
# tar the result and delete directory
tarFile = "collector_%s.tar.gz" % currentTime
destDir = "collector_%s" % currentTime
cmd = "%s && %s" % (g_Platform.getCdCmd(targetdir),
g_Platform.getCompressFilesCmd(tarFile,
destDir))
cmd += " && %s" % g_Platform.getChmodCmd(
str(DefaultValue.KEY_FILE_MODE), tarFile)
cmd += " && %s" % g_Platform.getMoveFileCmd(tarFile, "../")
cmd += " && %s '%s'" % (
g_Platform.getRemoveCmd("directory"), targetdir)
DefaultValue.execCommandLocally(cmd)
self.context.logger.log(
"All results are stored in %s/collector_%s.tar.gz." % (
resultdir, currentTime))
except Exception as e:
raise Exception(str(e))
def getCycle(self, sysInfo):
"""
function: parse interval and count
input : sysInfo
output: count, interval
"""
interval = 0
if sysInfo.__contains__('Interval'):
interval = int(sysInfo['Interval'].replace(" ", ""))
count = int(sysInfo['Count'].replace(" ", ""))
return interval, count
def doCollector(self):
"""
function: collect information
input : strftime
output: Successfully collected catalog statistics
"""
# Parsing configuration files
self.parseConfigFile()
# check rsync command
if self.context.speedLimitFlag == 1:
self.checkCommand()
# check tmp directory
self.checkTmpDir()
self.createDir()
# create store dir
(currentTime, targetdir, resultdir) = self.createStoreDir()
# collect OS information
if self.context.config.__contains__('System'):
sysList = self.context.config['System']
for sysInfo in sysList:
if sysInfo.__contains__('Count'):
(interval, count) = self.getCycle(sysInfo)
print("do system check interval %s : count %s" % (
str(interval), str(count)))
while count:
count -= 1
self.systemCheck(sysInfo)
if count > 0 and interval > 0:
time.sleep(interval)
else:
self.systemCheck(sysInfo)
# collect catalog statistics
if self.context.config.__contains__('Database'):
dataList = self.context.config['Database']
for data in dataList:
if data.__contains__('Count'):
(interval, count) = self.getCycle(data)
print("do database check interval %s : count %s" % (
str(interval), str(count)))
while count:
count -= 1
self.databaseCheck(data)
if count > 0 and interval > 0:
time.sleep(interval)
else:
self.databaseCheck(data)
# Collect log files
if self.context.config.__contains__('Log'):
logList = self.context.config['Log']
for l in logList:
if l.__contains__('Count'):
(interval, count) = self.getCycle(l)
print("do log check interval %s : count %s" % (
str(interval), str(count)))
if count > 1:
self.context.logger.log(
ErrorCode.GAUSS_512["GAUSS_51246"] % "Log")
count = 1
while count:
count -= 1
self.logCopy("Log", l)
if count > 0 and interval > 0:
time.sleep(interval)
else:
self.logCopy("Log", l)
# Collect xlog files
if self.context.config.__contains__('XLog'):
xloglist = self.context.config['XLog']
for l in xloglist:
if l.__contains__('Count'):
(interval, count) = self.getCycle(l)
print("do XLog check " + str(interval) + ":" + str(count))
if count > 1:
self.context.logger.log(
ErrorCode.GAUSS_512["GAUSS_51246"] % "XLog")
count = 1
while count:
count -= 1
self.logCopy("XLog", l)
if count > 0 and interval > 0:
time.sleep(interval)
else:
self.logCopy("XLog", l)
# CoreDump files
if self.context.config.__contains__('CoreDump'):
corelist = self.context.config['CoreDump']
for l in corelist:
if l.__contains__('Count'):
(interval, count) = self.getCycle(l)
print("do CoreDump check " + str(interval) + ":" + str(
count))
if count > 1:
self.context.logger.log(
ErrorCode.GAUSS_512["GAUSS_51246"] % "CoreDump")
count = 1
while count:
count -= 1
self.logCopy("CoreDump", l)
if count > 0 and interval > 0:
time.sleep(interval)
else:
self.logCopy("CoreDump", l)
# collect configuration files
if self.context.config.__contains__('Config'):
clist = self.context.config['Config']
for c in clist:
if c.__contains__('Count'):
(interval, count) = self.getCycle(c)
print("do Config check " + str(interval) + ":" + str(
count))
if count > 1:
self.context.logger.log(
ErrorCode.GAUSS_512["GAUSS_51246"] % "Config")
count = 1
while count:
count -= 1
self.confGstack("Config", c)
if count > 0 and interval > 0:
time.sleep(interval)
else:
self.confGstack("Config", c)
# process stack information
if self.context.config.__contains__('Gstack'):
stacklist = self.context.config['Gstack']
for s in stacklist:
if s.__contains__('Count'):
(interval, count) = self.getCycle(s)
print("do Gstack check " + str(interval) + ":" + str(
count))
while count:
count -= 1
self.confGstack("Gstack", s)
if count > 0 and interval > 0:
time.sleep(interval)
else:
self.confGstack("Gstack", s)
# collect configuration files and process stack information
if self.context.config.__contains__('Trace'):
print("do config check")
# collect plan simulator files
if self.context.config.__contains__('Plan'):
dbList = self.context.config['Plan']
for s in dbList:
if s.__contains__('Count'):
(interval, count) = self.getCycle(s)
print("do Plan check " + str(interval) + ":" + str(count))
while count:
count -= 1
self.planSimulator(s)
if count > 0 and interval > 0:
time.sleep(interval)
else:
self.planSimulator(s)
# Collect result files
self.copyFile()
# generate summary info
self.generalSummary(resultdir, currentTime)
# tar the result and delete directory
self.tarResultFiles(currentTime, targetdir, resultdir)