1508 lines
59 KiB
Python
1508 lines
59 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding:utf-8 -*-
|
|
#############################################################################
|
|
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
#
|
|
# openGauss is licensed under Mulan PSL v2.
|
|
# You can use this software according to the terms
|
|
# and conditions of the Mulan PSL v2.
|
|
# You may obtain a copy of Mulan PSL v2 at:
|
|
#
|
|
# http://license.coscl.org.cn/MulanPSL2
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OF ANY KIND,
|
|
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
# See the Mulan PSL v2 for more details.
|
|
# ----------------------------------------------------------------------------
|
|
# Description : LoaclCollector.py is a local utility to
|
|
# collect file and parameter file
|
|
#############################################################################
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import getopt
|
|
import time
|
|
import re
|
|
import base64
|
|
import json
|
|
import datetime
|
|
import getpass
|
|
|
|
sys.path.append(sys.path[0] + "/../")
|
|
from gspylib.common.DbClusterInfo import dbClusterInfo
|
|
from gspylib.common.ParameterParsecheck import Parameter
|
|
from gspylib.common.GaussLog import GaussLog
|
|
from gspylib.common.Common import ClusterCommand, DefaultValue
|
|
from multiprocessing.dummy import Pool as ThreadPool
|
|
from gspylib.common.ErrorCode import ErrorCode
|
|
from base_utils.os.cmd_util import CmdUtil
|
|
from domain_utils.cluster_file.cluster_log import ClusterLog
|
|
from base_utils.os.env_util import EnvUtil
|
|
from base_utils.os.file_util import FileUtil
|
|
from base_utils.os.net_util import NetUtil
|
|
from domain_utils.domain_common.cluster_constants import ClusterConstants
|
|
from domain_utils.cluster_os.cluster_user import ClusterUser
|
|
from domain_utils.cluster_file.cluster_dir import ClusterDir
|
|
from gspylib.component.DSS.dss_comp import Dss
|
|
|
|
###########################
|
|
# instance type. only for CN/DN
|
|
###########################
|
|
INSTANCE_TYPE_UNDEFINED = -1
|
|
# master
|
|
MASTER_INSTANCE = 0
|
|
# standby
|
|
STANDBY_INSTANCE = 1
|
|
# dummy standby
|
|
DUMMY_STANDBY_INSTANCE = 2
|
|
|
|
#######################################################################
|
|
# GLOBAL VARIABLES
|
|
# g_opts: globle option
|
|
# g_logger: globle logger
|
|
# g_clusterInfo: global clueter information
|
|
# g_resultdir: globle result dir
|
|
# g_localnodeinfo: globle local nodes information
|
|
#######################################################################
|
|
HOSTNAME = NetUtil.GetHostIpOrName()
|
|
g_opts = None
|
|
g_logger = None
|
|
g_clusterInfo = None
|
|
g_resultdir = None
|
|
g_localnodeinfo = None
|
|
g_jobInfo = None
|
|
g_tmpdir = None
|
|
g_current_time = ""
|
|
g_need_gstack = 0
|
|
g_core_pattern = 'core-%e-%p-%t'
|
|
|
|
|
|
class CmdOptions():
|
|
'''
|
|
classdocs
|
|
'''
|
|
|
|
def __init__(self):
|
|
"""
|
|
function: Constructor
|
|
"""
|
|
# initialize variable
|
|
self.action = ""
|
|
self.outputDir = ""
|
|
self.logFile = ""
|
|
self.nodeName = ""
|
|
self.appPath = ""
|
|
self.user = ""
|
|
self.begin = ""
|
|
self.end = ""
|
|
self.key = ""
|
|
# Speed limit to copy/remote copy files, in KB/s
|
|
# Here we use KB/s to avoid bandwidth is too small to calculate,
|
|
# which may get a zero.
|
|
self.speedLimitKBs = 0
|
|
self.speedLimitFlag = 0
|
|
self.config = ""
|
|
self.content = []
|
|
|
|
|
|
class JobInfo():
|
|
"""
|
|
class: JobInfo
|
|
"""
|
|
|
|
def __init__(self):
|
|
'''
|
|
Constructor
|
|
'''
|
|
# initialize variable
|
|
self.jobName = ""
|
|
self.successTask = []
|
|
self.failedTask = {}
|
|
|
|
|
|
def checkEmpty(path):
|
|
"""
|
|
function: check the path is empty
|
|
input : path
|
|
output : int
|
|
"""
|
|
isEmpty = 1
|
|
for root, dirs, files in os.walk(path, topdown=False):
|
|
if files:
|
|
isEmpty = 0
|
|
break
|
|
return isEmpty
|
|
|
|
|
|
def replaceInvalidStr(outputStr):
|
|
"""
|
|
function: replace invalid str
|
|
input : outputStr
|
|
output : str
|
|
"""
|
|
return outputStr.replace("\'", "").replace("\"", "").replace("`",
|
|
"").replace(
|
|
"echo", "e c h o").replace("\n", " ")
|
|
|
|
|
|
def sendLogFiles():
|
|
"""
|
|
function: package and send log files back to the command node.
|
|
:return:
|
|
"""
|
|
g_logger.debug("Begin to remote copy log files.")
|
|
g_logger.debug(
|
|
"Speed limit to copy log files is %d KB/s." % g_opts.speedLimitKBs)
|
|
# Compress the copied log file and modify the permissions in the
|
|
# temporary directory
|
|
tarName = "%s.tar.gz" % HOSTNAME
|
|
|
|
path = g_tmpdir + "/%s" % HOSTNAME
|
|
if not os.path.exists(path):
|
|
g_logger.logExit("Result Dir is not exists.")
|
|
|
|
isEmpty = checkEmpty(path)
|
|
if isEmpty == 1:
|
|
# Delete the result temporary directory if the result temporary
|
|
# directory exists
|
|
cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % (
|
|
g_resultdir, g_resultdir)
|
|
# Delete the archive if the archive is present in the temporary
|
|
# directory
|
|
cmd = "%s && (if [ -f '%s'/'%s' ];then rm -rf '%s'/'%s';fi)" % \
|
|
(cmd, g_tmpdir, tarName, g_tmpdir, tarName)
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.logExit("Failed to delete %s." % "%s and %s" % (
|
|
g_resultdir, tarName) + " Error:\n%s" % output)
|
|
g_logger.logExit("All collection tasks failed")
|
|
|
|
cmd = "cd '%s' && tar -zcf '%s' '%s' && chmod %s '%s'" % \
|
|
(g_tmpdir, tarName, HOSTNAME, DefaultValue.FILE_MODE, tarName)
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.logExit("Failed to compress %s." % ("directory %s/%s" % \
|
|
(g_tmpdir,
|
|
HOSTNAME))
|
|
+ " Error: \n%s" % output)
|
|
|
|
if g_opts.nodeName != "":
|
|
# send backup file which is compressed to the node that is
|
|
# currently performing the backup
|
|
if g_opts.nodeName == NetUtil.GetHostIpOrName():
|
|
if int(g_opts.speedLimitFlag) == 1:
|
|
cmd = "rsync --bwlimit=%d '%s'/'%s' '%s'/" % \
|
|
(g_opts.speedLimitKBs, g_tmpdir, tarName,
|
|
g_opts.outputDir)
|
|
else:
|
|
cmd = "cp '%s'/'%s' '%s'/" % (
|
|
g_tmpdir, tarName, g_opts.outputDir)
|
|
else:
|
|
# scp's limit parameter is specified in Kbit/s. 1KB/s = 8Kbit/s
|
|
cmd = "pscp -x '-l %d' -H %s '%s'/'%s' '%s'/" % \
|
|
(
|
|
g_opts.speedLimitKBs * 8, g_opts.nodeName, g_tmpdir, tarName,
|
|
g_opts.outputDir)
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.logExit(
|
|
"Failed to copy %s." % tarName + " Error:\n%s" % output)
|
|
|
|
# Delete the temporary directory if the temporary directory exists
|
|
cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % (g_resultdir, g_resultdir)
|
|
# Delete the archive if the archive is present in the temporary directory
|
|
cmd = "%s && (if [ -f '%s'/'%s' ];then rm -rf '%s'/'%s';fi)" % \
|
|
(cmd, g_tmpdir, tarName, g_tmpdir, tarName)
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.logExit("Failed to delete %s. %s" % (
|
|
"%s and %s" % (g_resultdir, tarName), " Error:\n%s" % output))
|
|
|
|
|
|
def checkParameterEmpty(parameter, parameterName):
|
|
"""
|
|
function: check parameter whether is or not empty
|
|
input : parameter, parameterName
|
|
output : NA
|
|
"""
|
|
if parameter == "":
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"]
|
|
% parameterName)
|
|
|
|
|
|
def parseCommandLine():
|
|
"""
|
|
function: do parse command line
|
|
input : cmdCommand
|
|
output: help/version information
|
|
"""
|
|
global g_opts
|
|
g_opts = CmdOptions()
|
|
try:
|
|
# Parse command
|
|
opts, args = getopt.getopt(sys.argv[1:], "t:U:o:h:b:e:k:l:s:S:C:",
|
|
[""])
|
|
except getopt.GetoptError as e:
|
|
# Error exit if an illegal parameter exists
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e))
|
|
if len(args) > 0:
|
|
# Error exit if an illegal parameter exists
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] %
|
|
str(args[0]))
|
|
# Save parameter
|
|
parameter_map = {"-t": g_opts.action, "-U": g_opts.user,
|
|
"-o": g_opts.outputDir, "-h": g_opts.nodeName, \
|
|
"-l": g_opts.logFile, "-b": g_opts.begin,
|
|
"-e": g_opts.end, "-k": g_opts.key,
|
|
"-s": g_opts.speedLimitKBs, "-S": g_opts.speedLimitFlag,
|
|
"-C": g_opts.config}
|
|
parameter_keys = parameter_map.keys()
|
|
|
|
for key, value in opts:
|
|
if key in parameter_keys:
|
|
if key == "-C":
|
|
value = value.replace("#", "\"")
|
|
parameter_map[key] = value.strip()
|
|
else:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % value)
|
|
|
|
Parameter.checkParaVaild(key, value)
|
|
g_opts.action = parameter_map["-t"]
|
|
g_opts.user = parameter_map["-U"]
|
|
g_opts.outputDir = parameter_map["-o"]
|
|
g_opts.nodeName = parameter_map["-h"]
|
|
g_opts.logFile = parameter_map["-l"]
|
|
g_opts.begin = parameter_map["-b"]
|
|
g_opts.end = parameter_map["-e"]
|
|
g_opts.key = parameter_map["-k"]
|
|
g_opts.speedLimitKBs = parameter_map["-s"]
|
|
g_opts.speedLimitFlag = parameter_map["-S"]
|
|
g_opts.config = parameter_map["-C"]
|
|
# The -t parameter is required
|
|
checkParameterEmpty(g_opts.action, "t")
|
|
# check if user exist and is the right user
|
|
checkParameterEmpty(g_opts.user, "U")
|
|
ClusterUser.checkUser(g_opts.user, False)
|
|
# check log file
|
|
if g_opts.logFile == "":
|
|
g_opts.logFile = ClusterLog.getOMLogPath(ClusterConstants.LOCAL_LOG_FILE,
|
|
g_opts.user, "", "")
|
|
if not os.path.isabs(g_opts.logFile):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log")
|
|
if int(g_opts.speedLimitKBs) < 0:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_526["GAUSS_53032"])
|
|
|
|
g_opts.speedLimitKBs = int(g_opts.speedLimitKBs)
|
|
|
|
# 1048576 KB/s = 1GB/s, which means unlimited.
|
|
if g_opts.speedLimitKBs == 0:
|
|
g_opts.speedLimitKBs = 1048576
|
|
|
|
|
|
def initGlobal():
|
|
"""
|
|
function: Init logger g_clusterInfo g_sshTool g_nodes
|
|
input : NA
|
|
output: []
|
|
"""
|
|
global g_logger
|
|
global g_clusterInfo
|
|
global g_resultdir
|
|
global g_localnodeinfo
|
|
global g_tmpdir
|
|
global g_current_time
|
|
global g_core_pattern
|
|
|
|
try:
|
|
# The -t parameter is required
|
|
g_logger = GaussLog(g_opts.logFile, "LocalCollect")
|
|
# Init the cluster information from static configuration file
|
|
g_clusterInfo = dbClusterInfo()
|
|
g_clusterInfo.initFromStaticConfig(g_opts.user)
|
|
g_tmpdir = EnvUtil.getTmpDirFromEnv()
|
|
|
|
# Obtain the cluster installation directory
|
|
g_opts.appPath = g_clusterInfo.appPath
|
|
# Gets the current node information
|
|
g_localnodeinfo = g_clusterInfo.getDbNodeByName(HOSTNAME)
|
|
# Gets a temporary directory
|
|
g_resultdir = "%s/%s" % (g_tmpdir, HOSTNAME)
|
|
|
|
g_current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f")
|
|
except Exception as e:
|
|
g_logger.logExit(str(e))
|
|
|
|
|
|
def check_command():
|
|
"""
|
|
function: check command
|
|
input : NA
|
|
output : NA
|
|
"""
|
|
g_logger.debug("check Command for rsync")
|
|
g_logger.debug(g_opts.speedLimitFlag)
|
|
cmd = "command -v rsync"
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.logExit(("The cmd is %s." % cmd) + output)
|
|
|
|
|
|
def create_temp_result_folder():
|
|
"""
|
|
function: create_temp_result_folder
|
|
output: Successfully create temp result folder
|
|
"""
|
|
# Delete the temporary folder if a temporary folder with the same name
|
|
# exists
|
|
cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % (g_resultdir, g_resultdir)
|
|
# Create temporary folders and subfolders
|
|
cmd = "%s && mkdir -p -m %s '%s'" % (
|
|
cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
|
|
cmd = "%s && mkdir -p -m %s '%s/systemfiles'" % (
|
|
cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
|
|
cmd = "%s && mkdir -p -m %s '%s/catalogfiles'" % (
|
|
cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
|
|
cmd = "%s && mkdir -p -m %s '%s/xlogfiles'" % (
|
|
cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
|
|
cmd = "%s && mkdir -p -m %s '%s/gstackfiles'" % (
|
|
cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
|
|
cmd = "%s && mkdir -p -m %s '%s/coreDumpfiles'" % (
|
|
cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
|
|
cmd = "%s && mkdir -p -m %s '%s/planSimulatorfiles'" % (
|
|
cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
|
|
cmd = "%s && mkdir -p -m %s '%s'/logfiles && mkdir -p -m %s " \
|
|
"'%s/configfiles'" % \
|
|
(cmd, DefaultValue.KEY_DIRECTORY_MODE, g_resultdir,
|
|
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
|
|
g_logger.debug("Command for creating output directory: %s" % cmd)
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.logExit("Failed to create the %s directory." % \
|
|
("%s/logfiles and %s/configfiles" % (
|
|
g_resultdir, g_resultdir)) + " Error:\n%s" % output)
|
|
|
|
|
|
def itemTitleCommand(cmds, info, dataFileName):
|
|
"""
|
|
function: item title command
|
|
input : cmds, info, dataFileName
|
|
output : NA
|
|
"""
|
|
itemTitle = "'###########################################################'"
|
|
cmds.append("echo '\n%s' >> %s 2>&1" % (itemTitle, dataFileName))
|
|
cmds.append("echo '#' >> %s 2>&1" % dataFileName)
|
|
cmds.append("echo '#' %s >> %s 2>&1" % (info, dataFileName))
|
|
cmds.append("echo '#' >> %s 2>&1" % dataFileName)
|
|
cmds.append("echo %s >> %s 2>&1" % (itemTitle, dataFileName))
|
|
|
|
|
|
def basic_info_check():
|
|
"""
|
|
function: collected basci information
|
|
output: Successfully collected basic information
|
|
"""
|
|
g_logger.debug("Starting collect basic info.")
|
|
dataFileName = "%s/systemfiles/database_system_info_%s.txt" % (
|
|
g_resultdir, datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f"))
|
|
cmds = []
|
|
itemTitleCommand(cmds, "C L U S T E R' 'I N F O", dataFileName)
|
|
cmds.append("gs_om -t status --detail >> %s 2>&1" % dataFileName)
|
|
|
|
itemTitleCommand(cmds, "V E R S I O N' 'I N F O", dataFileName)
|
|
cmds.append("gaussdb --version >> %s 2>&1" % dataFileName)
|
|
cmds.append("cm_agent --version >> %s 2>&1" % dataFileName)
|
|
cmds.append("cm_server --version >> %s 2>&1" % dataFileName)
|
|
cmds.append("gs_gtm --version >> %s 2>&1" % dataFileName)
|
|
cmds.append("cat /proc/version >> %s 2>&1" % dataFileName)
|
|
|
|
cmd = "cat /proc/sys/kernel/core_pattern"
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to collect core dump files. Command: %s.\n Error:\n%s" % (
|
|
cmd, output))
|
|
core_config = str(output)
|
|
core_pattern = core_config.split('/')[-1]
|
|
itemTitleCommand(cmds, "C O R E' 'F I L E' 'I N F O", dataFileName)
|
|
if core_pattern != g_core_pattern:
|
|
cmds.append(
|
|
"echo Failed to collect core dump files, core pattern "
|
|
"is not core-e-p-t. >> %s 2>&1" % dataFileName)
|
|
else:
|
|
core_path = "/".join(core_config.split("/")[:-1])
|
|
cmds.append("ls -lrt %s >> %s 2>&1" % (core_path, dataFileName))
|
|
|
|
itemTitleCommand(cmds, "X L O G' 'F I L E' 'I N F O", dataFileName)
|
|
for Inst in g_localnodeinfo.datanodes:
|
|
cmds.append(
|
|
"echo '\n********' dn_%d xlog file info '*******' >> %s 2>&1" % (
|
|
Inst.instanceId, dataFileName))
|
|
pg_xlog = Inst.datadir + "/pg_xlog"
|
|
cmds.append("ls -lrt %s >> %s 2>&1" % (pg_xlog, dataFileName))
|
|
|
|
for Inst in g_localnodeinfo.coordinators:
|
|
cmds.append(
|
|
"echo '\n********' cn_%d xlog file info '*******' >> %s 2>&1" % (
|
|
Inst.instanceId, dataFileName))
|
|
pg_xlog = Inst.datadir + "/pg_xlog"
|
|
cmds.append("ls -lrt %s >> %s 2>&1" % (pg_xlog, dataFileName))
|
|
|
|
cmd = "echo $GAUSSLOG"
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
gausslog = str(output)
|
|
pg_log = "%s/pg_log" % gausslog
|
|
|
|
itemTitleCommand(cmds, "P G_L O G' 'F I L E' 'I N F O", dataFileName)
|
|
for root, dirs, files in os.walk(pg_log):
|
|
for perDir in dirs:
|
|
cmds.append(
|
|
"echo '\n********' %s pg_log file info '*******' >> %s 2>&1"
|
|
% (
|
|
perDir, dataFileName))
|
|
cmds.append(
|
|
"ls -lrt %s/%s >> %s 2>&1" % (pg_log, perDir, dataFileName))
|
|
|
|
# Executes each query command and redirects the results to the specified
|
|
# file
|
|
for cmd in cmds:
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
("Failed to collect basic information. Error:\n%s." % output) +
|
|
("The cmd is %s " % cmd))
|
|
|
|
|
|
def system_check():
|
|
"""
|
|
function: collected OS information
|
|
input : dataFileName
|
|
output: Successfully collected OS information
|
|
"""
|
|
g_logger.debug("Collecting OS information.")
|
|
g_jobInfo.jobName = "Collecting OS information"
|
|
dataFileName = "%s/systemfiles/OS_information_%s.txt" % (
|
|
g_resultdir, datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f"))
|
|
cmds = []
|
|
# Add information to the document
|
|
cmds.append(
|
|
"echo '************************************\n* OS information"
|
|
" for host' > %s 2>&1" % dataFileName)
|
|
cmds.append("hostname >> %s 2>&1" % dataFileName)
|
|
cmds.append("echo '************************************' >> %s 2>&1" %
|
|
dataFileName)
|
|
appendCommand(cmds, "ps ux", dataFileName)
|
|
appendCommand(cmds, "iostat -xm 2 3", dataFileName)
|
|
appendCommand(cmds, "free -m", dataFileName)
|
|
# Executes each query command and redirects the results to the specified
|
|
# file
|
|
for cmd in cmds:
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if ">>" in cmd:
|
|
cmd = cmd.split(">>")[0]
|
|
cmd = cmd.replace("\n", " ")
|
|
if "echo" in cmd:
|
|
continue
|
|
if status != 0:
|
|
if "Permission denied" in output:
|
|
output = "can not print info to file: Permission denied"
|
|
elif 'iostat' in cmd and 'command not found' in str(
|
|
output).lower().strip():
|
|
output = ErrorCode.GAUSS_514["GAUSS_51405"] % " iostat." + str(
|
|
output).lstrip().strip()
|
|
g_jobInfo.failedTask[cmd] = replaceInvalidStr(output)
|
|
g_logger.debug(
|
|
"Failed to collect OS information. Error:\n%s" % output)
|
|
else:
|
|
g_jobInfo.successTask.append(cmd)
|
|
basic_info_check()
|
|
# Modify the file permissions
|
|
os.chmod(dataFileName, DefaultValue.FILE_MODE_PERMISSION)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
g_logger.debug("Successfully collected OS information.")
|
|
|
|
|
|
def appendCommand(cmds, newCommand, dataFileName):
|
|
"""
|
|
function: make up the commands into the array
|
|
input : cmds, newCommand, dataFileName
|
|
output: NA
|
|
"""
|
|
# Execute the command and output to the specified file
|
|
cmds.append("echo '\n************************************\n* " \
|
|
"%s \n" \
|
|
"************************************' >> %s 2>&1" % \
|
|
(newCommand, dataFileName))
|
|
if 'iostat' in newCommand:
|
|
cmds.append("%s >> %s" % (newCommand, dataFileName))
|
|
else:
|
|
cmds.append("%s >> %s 2>&1" % (newCommand, dataFileName))
|
|
|
|
|
|
def database_check():
|
|
"""
|
|
function: collected catalog informatics
|
|
input : dbNode
|
|
output: Successfully collected catalog statistics.
|
|
"""
|
|
# Execute SQL for collect catalog statistics
|
|
g_logger.debug("Collecting catalog statistics.")
|
|
g_jobInfo.jobName = "Collecting catalog information"
|
|
for dnInst in g_localnodeinfo.datanodes:
|
|
if dnInst.instanceType == STANDBY_INSTANCE:
|
|
continue
|
|
sqls = []
|
|
schema = ""
|
|
for s in DefaultValue.DATABASE_CHECK_WHITE_LIST:
|
|
schema += "\'%s\'," % s
|
|
sql = "SELECT viewname FROM pg_views Where schemaname IN (%s) union " \
|
|
"SELECT tablename FROM pg_tables Where schemaname IN (%s);" % (
|
|
schema[:-1], schema[:-1])
|
|
g_logger.debug(sql)
|
|
(status, output) = ClusterCommand.execSQLCommand(sql, g_opts.user, "",
|
|
dnInst.port)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to exec SQL command. please check db status. sql: "
|
|
"%s.\n Error: %s.\n" % (
|
|
sql, output))
|
|
g_jobInfo.failedTask["find views"] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53502"]
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
raise Exception("")
|
|
g_jobInfo.successTask.append("find views")
|
|
V_list = output.split("\n")
|
|
for view in g_opts.content:
|
|
view = view.replace(" ", "")
|
|
if len(view) > 0:
|
|
schema = 'pg_catalog'
|
|
if "." in view:
|
|
s_t = view.split(".")
|
|
if len(s_t) != 2:
|
|
g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53515"] % view
|
|
continue
|
|
else:
|
|
schema = s_t[0]
|
|
name = s_t[1]
|
|
if schema.lower() not in \
|
|
DefaultValue.DATABASE_CHECK_WHITE_LIST:
|
|
g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53513"] \
|
|
% schema
|
|
continue
|
|
if name.lower() not in V_list:
|
|
g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53514"] % (
|
|
name, schema)
|
|
continue
|
|
elif view.lower() not in V_list:
|
|
g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53514"] % (
|
|
view, schema)
|
|
continue
|
|
filepath = ("%s/catalogfiles/" % g_resultdir)
|
|
if not os.path.exists(filepath):
|
|
os.makedirs(filepath)
|
|
filename = ("%s/dn_%s_%s_%s.csv" % (
|
|
filepath, dnInst.instanceId, view.replace(".", "_"),
|
|
datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f")))
|
|
sql = "\copy (select * from %s) to %s with csv HEADER;" % (
|
|
view, filename)
|
|
(status, output) = ClusterCommand.execSQLCommand(sql,
|
|
g_opts.user,
|
|
"",
|
|
dnInst.port)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to exec SQL command. sql %s.\n Error: %s.\n"
|
|
% (
|
|
sql, output))
|
|
if "does not exist" in output:
|
|
g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53500"] % view
|
|
elif "Connection refused" in output:
|
|
g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53501"]
|
|
else:
|
|
g_jobInfo.failedTask[view] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53502"]
|
|
else:
|
|
g_jobInfo.successTask.append(view)
|
|
g_logger.debug(
|
|
"Successfully collected %s statistics. %s" % (
|
|
view, sql))
|
|
execute_sqls(sqls, dnInst)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
g_logger.debug("Successfully collected catalog statistics.")
|
|
|
|
|
|
def execute_sqls(sqls, dnInst):
|
|
"""
|
|
function: execute the sql commands
|
|
input : sqls, dnInst
|
|
output: NA
|
|
"""
|
|
# Writes the formatted content to the specified file
|
|
filePath = "%s/catalogfiles/gs_clean_%s.txt" % (
|
|
g_resultdir, datetime.datetime.now().strftime("%Y%m%d_%H%M%S%f"))
|
|
FileUtil.createFileInSafeMode(filePath)
|
|
with open(filePath, "w") as f:
|
|
f.write(
|
|
"************************************\n"
|
|
"* Catalog statistics for host "
|
|
"%s \n************************************" % dnInst.hostname)
|
|
for sql in sqls:
|
|
# Execute each sql and write the results to a file
|
|
f.write(
|
|
"\n\n************************************\n %s "
|
|
"\n************************************\n" % sql)
|
|
output = ClusterCommand.execSQLCommand(sql, g_opts.user, "",
|
|
dnInst.port)[1]
|
|
f.write(str(output))
|
|
|
|
userProfile = EnvUtil.getMpprcFile()
|
|
cmd = "source %s ; gs_clean -a -N -s -p %s" \
|
|
% (userProfile, dnInst.port)
|
|
f.write(
|
|
"\n\n************************************\n %s "
|
|
"\n************************************\n" % cmd)
|
|
output = subprocess.getstatusoutput(cmd)[1]
|
|
f.write(str(output))
|
|
|
|
f.flush()
|
|
# Modify the file permissions to 640
|
|
os.chmod(filePath, DefaultValue.FILE_MODE_PERMISSION)
|
|
|
|
|
|
def log_check(logFileName):
|
|
"""
|
|
function: log check
|
|
input : logFileName
|
|
output: filename includes keywords or not
|
|
"""
|
|
for c in g_opts.content:
|
|
c = c.replace(" ", "").lower()
|
|
if len(c) > 0 and c in logFileName.lower():
|
|
return 1
|
|
return 0
|
|
|
|
def dss_cert_replacer(logger):
|
|
'''
|
|
Re-generate the ciphertext of the DSS.
|
|
'''
|
|
|
|
logger.debug("Start to replace the ciphertext of the DSS locally")
|
|
user = getpass.getuser()
|
|
gausshome = ClusterDir.getInstallDir(user)
|
|
dsscmd = os.path.realpath(os.path.join(gausshome, 'bin', 'dsscmd'))
|
|
if os.path.isfile(dsscmd):
|
|
dss_home = EnvUtil.get_dss_home(user)
|
|
cfg = os.path.join(dss_home, 'cfg', 'dss_inst.ini')
|
|
if os.path.isfile(cfg):
|
|
Dss.write_dss_context_with_file()
|
|
logger.debug("Successfully generate the ciphertext of the DSS.")
|
|
else:
|
|
logger.log(f"The {cfg} not exist.")
|
|
else:
|
|
logger.debug("Non-dss-mode or not find dsscmd.")
|
|
|
|
|
|
def log_copy():
|
|
"""
|
|
function: collected log files
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
g_logger.debug("Starting collect log.")
|
|
g_jobInfo.jobName = "Collecting pg_log information"
|
|
logfiletar = "log_%s.tar.gz" % datetime.datetime.now().strftime(
|
|
"%Y%m%d_%H%M%S%f")
|
|
keyword_result = "keyword_result.txt"
|
|
deleteCmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \
|
|
"then rm -rf tmp_gs_collector; fi"
|
|
|
|
if g_opts.key is not None and g_opts.key != "":
|
|
g_logger.debug(
|
|
"Keyword for collecting log in base64 encode [%s]." % g_opts.key)
|
|
g_opts.key = base64.b64decode(g_opts.key)
|
|
g_logger.debug(
|
|
"Keyword for collecting log in plain text [%s]." % g_opts.key)
|
|
|
|
g_logger.debug(
|
|
"Speed limit to copy log files is %d KB/s." % g_opts.speedLimitKBs)
|
|
|
|
# Filter the log files, if has keyword, do not collect prf file
|
|
if g_opts.key is not None and g_opts.key != "":
|
|
cmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \
|
|
"then rm -rf tmp_gs_collector; " \
|
|
"fi && (find . -type f -iname '*.log' -print)" \
|
|
" | xargs ls --time-style='+ %Y%m%d%H%M' -ll"
|
|
else:
|
|
cmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \
|
|
"then rm -rf tmp_gs_collector; " \
|
|
"fi && (find . -type f -iname '*.log' -print && " \
|
|
"find . -type f -iname '*.prf' -print) " \
|
|
"| xargs ls --time-style='+ %Y%m%d%H%M' -ll"
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
logFiles = output.split("\n")
|
|
logs = []
|
|
Directorys = []
|
|
findFiles = 0
|
|
# If there is a log file filtered by time
|
|
if len(logFiles[0].split()) != 2:
|
|
for logFile in logFiles:
|
|
logFileName = logFile.split()[6]
|
|
logStartTime = formatTime(logFileName)
|
|
# If the log file name does not meet the format requirements,skip
|
|
if not logStartTime.isdigit() or len(logStartTime) != 12:
|
|
continue
|
|
logStartTime = int(logStartTime)
|
|
logEndTime = int(logFile.split()[5])
|
|
# Filter out the log we need
|
|
if (logEndTime > int(g_opts.begin) and logStartTime < int(
|
|
g_opts.end) and log_check(logFileName)):
|
|
logs.append(logFileName)
|
|
findFiles = 1
|
|
if findFiles == 1:
|
|
g_jobInfo.successTask.append("find log files")
|
|
else:
|
|
g_jobInfo.failedTask["find log files"] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53504"] % 'log'
|
|
g_logger.debug("Successfully find log files.")
|
|
|
|
else:
|
|
g_jobInfo.failedTask["find log files"] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53505"]
|
|
g_logger.debug("There is no log files.")
|
|
|
|
# Make temporary directory and copy
|
|
cmd = "cd $GAUSSLOG && mkdir -p -m %s tmp_gs_collector" % \
|
|
DefaultValue.DIRECTORY_MODE
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
for log in logs:
|
|
Directorys.append(os.path.dirname(log))
|
|
for directory in Directorys:
|
|
cmd = "cd $GAUSSLOG && mkdir -p -m %s tmp_gs_collector/'%s'" % (
|
|
DefaultValue.DIRECTORY_MODE, directory)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
(status1, output1) = subprocess.getstatusoutput(deleteCmd)
|
|
g_jobInfo.failedTask["mkdir"] = ErrorCode.GAUSS_535["GAUSS_53506"]
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
g_logger.debug("Failed to mkdir. Error:\n%s." % output)
|
|
raise Exception("")
|
|
for log in logs:
|
|
if int(g_opts.speedLimitFlag) == 1:
|
|
cmd = "cd $GAUSSLOG && rsync --bwlimit=%d '%s' " \
|
|
"tmp_gs_collector/'%s'" % (
|
|
g_opts.speedLimitKBs, log, log)
|
|
else:
|
|
cmd = "cd $GAUSSLOG && cp '%s' tmp_gs_collector/'%s'" % (log, log)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0 and 'Permission denied' not in output:
|
|
(status1, output1) = subprocess.getstatusoutput(deleteCmd)
|
|
g_jobInfo.failedTask["copy log files"] = replaceInvalidStr(output)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
g_logger.debug("Failed to copy logFiles. Error:\n%s." % output)
|
|
raise Exception("")
|
|
|
|
g_jobInfo.successTask.append("copy log files")
|
|
g_logger.debug("Successful to copy logFiles.")
|
|
|
|
# Filter zip files
|
|
cmd = "cd $GAUSSLOG && find . -type f -iname '*.zip' -print" \
|
|
" | xargs ls --time-style='+ %Y%m%d%H%M' -ll"
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
zipFiles = output.split("\n")
|
|
# If there is a zip file filtered by time
|
|
if len(zipFiles[0].split()) != 2:
|
|
for zipFile in zipFiles:
|
|
zipFileName = zipFile.split()[6]
|
|
logStartTime = formatTime(zipFileName)
|
|
# If the zip file name does not meet the format requirements,skip
|
|
if not logStartTime.isdigit() or len(logStartTime) != 12:
|
|
continue
|
|
logStartTime = int(logStartTime)
|
|
logEndTime = int(zipFile.split()[5])
|
|
# Filter out the log we need
|
|
if (logEndTime > int(g_opts.begin) and logStartTime < int(
|
|
g_opts.end)):
|
|
zipdir = os.path.dirname(zipFileName)
|
|
g_jobInfo.successTask.append(
|
|
"find log zip files: %s" % zipFileName)
|
|
cmd = "cd $GAUSSLOG && mkdir -p -m %s tmp_gs_collector/%s " \
|
|
"&& unzip -o %s -d tmp_gs_collector/%s " % \
|
|
(DefaultValue.DIRECTORY_MODE, zipdir,
|
|
zipFileName, zipdir)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_jobInfo.failedTask[
|
|
"find log zip files"] = replaceInvalidStr(output)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
g_logger.debug(("Failed to filter zip files. Error:\n%s."
|
|
% output) + ("The cmd is %s " % cmd))
|
|
raise Exception("")
|
|
g_logger.debug("Successfully filter zip files.")
|
|
else:
|
|
g_logger.debug("There is no zip files.")
|
|
|
|
# Filter keywords
|
|
if g_opts.key is not None and g_opts.key != "":
|
|
if len(logs) != 0:
|
|
g_opts.key = g_opts.key.replace('$', '\$')
|
|
g_opts.key = g_opts.key.replace('\"', '\\\"')
|
|
cmd = "cd $GAUSSLOG/tmp_gs_collector && "
|
|
cmd = "%s grep \"%s\" -r * > %s/logfiles/%s" % (
|
|
cmd, g_opts.key, g_resultdir, keyword_result)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0 and output != "":
|
|
cmd = "rm -rf $GAUSSLOG/tmp_gs_collector"
|
|
(status1, output1) = CmdUtil.retryGetstatusoutput(cmd)
|
|
g_jobInfo.failedTask[
|
|
"filter keyword"] = "keywords: %s, Error: %s" % (
|
|
g_opts.key, output)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
g_logger.debug(
|
|
"Failed to filter keyword. Error:\n%s." % output)
|
|
raise Exception("")
|
|
else:
|
|
cmd = "rm -rf $GAUSSLOG/tmp_gs_collector"
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
|
|
g_logger.debug("Successfully filter keyword.")
|
|
g_jobInfo.successTask.append("filter keyword: %s" % g_opts.key)
|
|
|
|
else:
|
|
cmd = "touch %s/logfiles/%s && " % (g_resultdir, keyword_result)
|
|
cmd = "%s rm -rf $GAUSSLOG/tmp_gs_collector" % cmd
|
|
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
|
|
if status != 0:
|
|
g_jobInfo.failedTask["touch keyword file"] = replaceInvalidStr(
|
|
output)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
g_logger.debug(
|
|
"Failed to touch keyword file. Error:\n%s." % output)
|
|
raise Exception("")
|
|
g_logger.debug("Successfully filter keyword.")
|
|
else:
|
|
cmd = "cd $GAUSSLOG/tmp_gs_collector && tar -czf ../'%s' . && "\
|
|
% logfiletar
|
|
if int(g_opts.speedLimitFlag) == 1:
|
|
cmd = "%s rsync --bwlimit=%d $GAUSSLOG/'%s' '%s'/logfiles/ && " % (
|
|
cmd, g_opts.speedLimitKBs, logfiletar, g_resultdir,)
|
|
else:
|
|
cmd = "%s cp $GAUSSLOG/'%s' '%s'/logfiles/ && " % (
|
|
cmd, logfiletar, g_resultdir)
|
|
cmd = " %s rm -rf $GAUSSLOG/tmp_gs_collector " \
|
|
"&& rm -rf $GAUSSLOG/'%s'" % \
|
|
(cmd, logfiletar)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_jobInfo.failedTask[
|
|
"copy result file and delete tmp file"] = replaceInvalidStr(
|
|
output)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
g_logger.debug("Failed to delete log files. Error:\n%s." % output)
|
|
raise Exception("")
|
|
|
|
subprocess.getstatusoutput("cd '%s'/logfiles/ && chmod %s *" % (
|
|
g_resultdir, DefaultValue.FILE_MODE))
|
|
g_logger.debug("Successfully collected log files.")
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
|
|
|
|
def formatTime(filename):
|
|
"""
|
|
function: format time
|
|
input : filename
|
|
output : str
|
|
"""
|
|
try:
|
|
timelist = re.findall(r"\d\d\d\d-\d\d-\d\d_\d\d\d\d\d\d", filename)
|
|
time1 = re.findall("\d+", timelist[0])
|
|
time2 = ""
|
|
for i in time1:
|
|
time2 += i
|
|
return time2[:-2]
|
|
except Exception:
|
|
return "ERROR"
|
|
|
|
|
|
def xlog_copy():
|
|
"""
|
|
function: collected xlog files
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
g_logger.debug("Starting collect xlog.")
|
|
if int(g_opts.speedLimitFlag) == 1:
|
|
g_logger.debug(
|
|
"Speed limit to collect xlog files is %d KB/s."
|
|
% g_opts.speedLimitKBs)
|
|
g_jobInfo.jobName = "Collecting xlog information"
|
|
Instances = []
|
|
try:
|
|
for Inst in g_localnodeinfo.datanodes:
|
|
if "dn" in ",".join(g_opts.content).lower():
|
|
Instances.append(Inst)
|
|
for Inst in g_localnodeinfo.coordinators:
|
|
if "cn" in ",".join(g_opts.content).lower():
|
|
Instances.append(Inst)
|
|
# parallel copy xlog files
|
|
if Instances:
|
|
pool = ThreadPool(DefaultValue.getCpuSet())
|
|
pool.map(parallel_xlog, Instances)
|
|
pool.close()
|
|
pool.join()
|
|
path = "%s/xlogfiles" % g_resultdir
|
|
if checkEmpty(path) == 0:
|
|
cmd = " cd %s/xlogfiles " \
|
|
"&& tar -czf xlogfile_%s.tar.gz xlogfile_%s " \
|
|
"&& rm -rf xlogfile_%s" % \
|
|
(g_resultdir, g_current_time, g_current_time,
|
|
g_current_time)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to collect xlog. Command %s \n, Error %s \n",
|
|
(cmd, output))
|
|
g_jobInfo.failedTask["compress xlog files"] = \
|
|
ErrorCode.GAUSS_535["GAUSS_53507"] % 'tar'
|
|
else:
|
|
g_jobInfo.successTask.append("compress xlog files")
|
|
except Exception as e:
|
|
g_logger.debug(str(e))
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
raise Exception(str(e))
|
|
g_logger.debug("Successfully collected xlog.")
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
|
|
|
|
def getTargetFile(dir_path, fileList):
|
|
"""
|
|
function: get target file
|
|
input : dir_path, filelist
|
|
output: target file
|
|
"""
|
|
if os.path.isfile(dir_path):
|
|
create_time = time.strftime('%Y%m%d%H%M',
|
|
time.localtime(os.stat(dir_path).st_ctime))
|
|
if int(g_opts.begin) < int(create_time) < int(g_opts.end):
|
|
fileList.append(dir_path)
|
|
elif os.path.isdir(dir_path):
|
|
for s in os.listdir(dir_path):
|
|
if "archive" in s:
|
|
continue
|
|
newDir = os.path.join(dir_path, s)
|
|
getTargetFile(newDir, fileList)
|
|
return fileList
|
|
|
|
|
|
def getXlogCmd(Inst):
|
|
"""
|
|
function: get xlog file
|
|
input : Inst
|
|
output: xlog file
|
|
"""
|
|
pg_xlog = Inst.datadir + "/pg_xlog"
|
|
xlogs = getTargetFile(pg_xlog, [])
|
|
cmd = ""
|
|
if Inst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR:
|
|
if len(xlogs) == 0:
|
|
g_jobInfo.failedTask["find cn_%s xlog files" % Inst.instanceId] = \
|
|
ErrorCode.GAUSS_535["GAUSS_53504"] % 'xlog'
|
|
else:
|
|
g_jobInfo.successTask.append(
|
|
"find cn_%s xlog files" % Inst.instanceId)
|
|
cmd = "mkdir -p -m %s '%s/xlogfiles/xlogfile_%s/cn_%s'" % \
|
|
(
|
|
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
for xlog in xlogs:
|
|
if int(g_opts.speedLimitFlag) == 1:
|
|
cmd = \
|
|
"%s && rsync --bwlimit=%d %s" \
|
|
" '%s/xlogfiles/xlogfile_%s/cn_%s'" % \
|
|
(cmd, g_opts.speedLimitKBs, xlog, g_resultdir,
|
|
g_current_time, Inst.instanceId)
|
|
else:
|
|
cmd = "%s && cp -rf %s " \
|
|
"'%s/xlogfiles/xlogfile_%s/cn_%s'" % \
|
|
(cmd, xlog, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE:
|
|
if len(xlogs) == 0:
|
|
g_jobInfo.failedTask["find dn_%s xlog files" % Inst.instanceId] = \
|
|
ErrorCode.GAUSS_535["GAUSS_53504"] % 'xlog'
|
|
else:
|
|
g_jobInfo.successTask.append(
|
|
"find dn_%s xlog files" % Inst.instanceId)
|
|
cmd = "mkdir -p -m %s '%s/xlogfiles/xlogfile_%s/dn_%s'" % \
|
|
(
|
|
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
for xlog in xlogs:
|
|
if int(g_opts.speedLimitFlag) == 1:
|
|
cmd = "%s && rsync --bwlimit=%d %s" \
|
|
" '%s/xlogfiles/xlogfile_%s/dn_%s'" % \
|
|
(cmd, g_opts.speedLimitKBs, xlog, g_resultdir,
|
|
g_current_time, Inst.instanceId)
|
|
else:
|
|
cmd = "%s && cp -rf %s " \
|
|
"'%s/xlogfiles/xlogfile_%s/dn_%s'" % \
|
|
(cmd, xlog, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
return cmd
|
|
|
|
|
|
def parallel_xlog(Inst):
|
|
"""
|
|
parallel copy xlog files
|
|
"""
|
|
cmd = getXlogCmd(Inst)
|
|
if len(cmd) > 1:
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to collect xlog files. Command: %s.\n Error: %s\n" % (
|
|
cmd, output))
|
|
g_jobInfo.failedTask["collect xlog files"] = replaceInvalidStr(
|
|
output)
|
|
raise Exception("")
|
|
|
|
|
|
def core_copy():
|
|
"""
|
|
function: collected core files
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
g_logger.debug("Starting collect core dump.")
|
|
if int(g_opts.speedLimitFlag) == 1:
|
|
g_logger.debug(
|
|
"Speed limit to collect core dump files is %d KB/s."
|
|
% g_opts.speedLimitKBs)
|
|
g_jobInfo.jobName = "Collecting Core information"
|
|
Instances = []
|
|
cmd = "cat /proc/sys/kernel/core_pattern"
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to collect core dump files. Command: %s.\n Error:\n%s" % (
|
|
cmd, output))
|
|
g_jobInfo.failedTask["read core pattern"] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53507"] % 'cat'
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
raise Exception("")
|
|
core_config = str(output)
|
|
core_pattern = core_config.split('/')[-1]
|
|
core_path = "/".join(core_config.split("/")[:-1])
|
|
if core_pattern != g_core_pattern:
|
|
g_logger.debug(
|
|
"Failed to collect core dump files, core pattern is not '%s'."
|
|
% g_core_pattern)
|
|
g_jobInfo.failedTask["check core pattern"] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53508"]
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
raise Exception("")
|
|
|
|
g_jobInfo.successTask.append("check core pattern")
|
|
cmd = "mkdir -p -m %s '%s/coreDumpfiles/corefile_%s'" % \
|
|
(DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time)
|
|
cmd = "%s && gaussdb --version >>" \
|
|
" %s/coreDumpfiles/corefile_%s/version.txt" % \
|
|
(cmd, g_resultdir, g_current_time)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to collect gaussdb version info."
|
|
" Command: %s.\n Error:\n%s" % (
|
|
cmd, output))
|
|
g_jobInfo.failedTask["check gaussdb version"] = replaceInvalidStr(
|
|
output)
|
|
g_jobInfo.successTask.append("check gaussdb version")
|
|
|
|
cores = getTargetFile(core_path, [])
|
|
if len(cores) > 0:
|
|
g_jobInfo.successTask.append("find core files")
|
|
isEmpty = 1
|
|
for core in cores:
|
|
tempName = str(core.split("/")[-1])
|
|
if not tempName.startswith("core-"):
|
|
g_logger.debug(
|
|
"WARNING: core file %s is not match core-e-p-t." % (
|
|
str(core.split("/")[-1])))
|
|
continue
|
|
p = tempName.split("-")[1]
|
|
if "".join(p).lower() in ",".join(g_opts.content).lower():
|
|
p_stack = "%s_stack" % p
|
|
cmdList = []
|
|
if p_stack in g_opts.content:
|
|
cmd = "gdb -q --batch --ex" \
|
|
" \"set height 0\" -ex \"thread apply" \
|
|
" all bt full\" %s %s >> " \
|
|
"%s/coreDumpfiles/corefile_%s/%s-stack1.txt" % (
|
|
p, core, g_resultdir, g_current_time, core.split("/")[-1])
|
|
cmd += " && gdb -q --batch --ex \"set height 0\"" \
|
|
" -ex \"thread apply all bt\" %s %s >> " \
|
|
"%s/coreDumpfiles/corefile_%s/%s-stack2.txt" % (
|
|
p, core, g_resultdir, g_current_time, core.split("/")[-1])
|
|
cmdList.append(cmd)
|
|
|
|
if p in g_opts.content:
|
|
if int(g_opts.speedLimitFlag) == 1:
|
|
cmd = \
|
|
"rsync --bwlimit=%d %s" \
|
|
" '%s/coreDumpfiles/corefile_%s'" % (
|
|
g_opts.speedLimitKBs, core, g_resultdir,
|
|
g_current_time)
|
|
else:
|
|
cmd = "cp -rf %s '%s/coreDumpfiles/corefile_%s'" % (
|
|
core, g_resultdir, g_current_time)
|
|
cmdList.append(cmd)
|
|
for c in cmdList:
|
|
(status, output) = subprocess.getstatusoutput(c)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to copy core dump files. Command:"
|
|
" %s.\n Error:\n%s" % (
|
|
c, output))
|
|
g_jobInfo.failedTask[
|
|
"copy core file"] = replaceInvalidStr(output)
|
|
else:
|
|
isEmpty = 0
|
|
|
|
if isEmpty == 0:
|
|
cmd = "cd %s/coreDumpfiles && tar -czf corefile_%s.tar.gz" \
|
|
" corefile_%s && rm -rf corefile_%s" % \
|
|
(g_resultdir, g_current_time, g_current_time, g_current_time)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to collect core dump files."
|
|
" Command: %s.\n Error:\n%s" % (
|
|
cmd, output))
|
|
g_jobInfo.failedTask[
|
|
"compress core files"] = replaceInvalidStr(output)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
raise Exception("")
|
|
else:
|
|
g_jobInfo.successTask.append("compress core files")
|
|
else:
|
|
g_jobInfo.failedTask["copy core file"] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53509"]
|
|
else:
|
|
g_jobInfo.failedTask["find core files"] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53504"] % 'core'
|
|
|
|
g_logger.debug("Successfully collected core dump. %s" % cores)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
|
|
|
|
def conf_gstack(jobName):
|
|
"""
|
|
function: collected configuration files and processed stack information
|
|
output: Successfully collected configuration files
|
|
and processed stack information.
|
|
"""
|
|
g_logger.debug("Collecting %s information." % jobName)
|
|
g_jobInfo.jobName = "Collecting %s information" % jobName
|
|
try:
|
|
# Gets all instances of the cluster
|
|
Instances = []
|
|
for Inst in g_localnodeinfo.datanodes:
|
|
if "dn" in ",".join(g_opts.content).lower():
|
|
Instances.append(Inst)
|
|
# parallel copy configuration files, and get gstack
|
|
if Instances:
|
|
pool = ThreadPool(DefaultValue.getCpuSet())
|
|
pool.map(parallel_conf_gstack, Instances)
|
|
pool.close()
|
|
pool.join()
|
|
g_jobInfo.successTask.append("collect %s information" % jobName)
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
except Exception as e:
|
|
g_logger.debug(str(e))
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
raise Exception("")
|
|
|
|
g_logger.debug(
|
|
"Successfully collected configuration files "
|
|
"and processed stack information.")
|
|
|
|
|
|
def plan_simulator_check():
|
|
"""
|
|
function: collect plan simulator files
|
|
output: Successfully collected files.
|
|
"""
|
|
g_logger.debug("Collecting plan simulator.")
|
|
g_jobInfo.jobName = "Collecting plan simulator information"
|
|
haveCnInst = 0
|
|
for cnInst in g_localnodeinfo.datanodes:
|
|
haveCnInst = 1
|
|
if "*" in g_opts.content:
|
|
sql = "SELECT datname FROM pg_database" \
|
|
" Where datname NOT IN ('template1', 'template0');"
|
|
(status, output) = ClusterCommand.execSQLCommand(sql, g_opts.user,
|
|
"", cnInst.port)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to exec SQL command. please "
|
|
"check db status. sql: %s.\n Error: %s.\n" % (
|
|
sql, output))
|
|
g_jobInfo.failedTask["find database"] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53502"]
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
raise Exception("")
|
|
g_jobInfo.successTask.append("find database")
|
|
dbList = output.split("\n")
|
|
else:
|
|
dbList = g_opts.content
|
|
for db in dbList:
|
|
cmd = "mkdir -p -m %s '%s/planSimulatorfiles/%s'" % \
|
|
(DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, db)
|
|
cmd = "%s && gs_plan_simulator.sh -m dump -d %s " \
|
|
"-p %d -D %s/planSimulatorfiles/%s" % \
|
|
(cmd, db, cnInst.port, g_resultdir, db)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
g_logger.debug(
|
|
"Failed to Collect plan simulator. "
|
|
"Command %s.\n Error: %s.\n" % (
|
|
cmd, output))
|
|
g_jobInfo.failedTask["dump %s plan info" % db] = \
|
|
ErrorCode.GAUSS_535["GAUSS_53510"]
|
|
else:
|
|
g_jobInfo.successTask.append("dump %s plan info" % db)
|
|
if haveCnInst == 0:
|
|
g_jobInfo.failedTask["dump database plan info"] = ErrorCode.GAUSS_535[
|
|
"GAUSS_53503"]
|
|
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
|
|
|
|
|
def getBakConfCmd(Inst):
|
|
"""
|
|
function: get bak conf cmd
|
|
input : Inst
|
|
output : NA
|
|
"""
|
|
cmd = ""
|
|
pidfile = ""
|
|
if Inst.instanceRole == DefaultValue.INSTANCE_ROLE_GTM:
|
|
if g_need_gstack == 0:
|
|
cmd = "mkdir -p -m %s '%s/configfiles/config_%s/gtm_%s'" % \
|
|
(
|
|
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
cmd = "%s && cp '%s'/gtm.conf '%s'/gtm.control " \
|
|
"'%s'/configfiles/config_%s/gtm_%s/" % \
|
|
(
|
|
cmd, Inst.datadir, Inst.datadir, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
if Inst.instanceType == DefaultValue.MASTER_INSTANCE:
|
|
cmd = "%s && cp '%s'/gtm.sequence" \
|
|
" '%s'/configfiles/config_%s/gtm_%s/" % \
|
|
(cmd, Inst.datadir, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
else:
|
|
cmd = "mkdir -p -m %s '%s/gstackfiles/gstack_%s/gtm_%s'" % \
|
|
(
|
|
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
pidfile = Inst.datadir + "/gtm.pid"
|
|
try:
|
|
with open(pidfile, 'r') as f:
|
|
pid = int(f.readline())
|
|
if pid != 0:
|
|
cmd += " && gstack '%d' >" \
|
|
" '%s'/gtm.stack && mv " \
|
|
"'%s'/gtm.stack '%s'" \
|
|
"/gstackfiles/gstack_%s/gtm_%s/gtm_%s.stack" % \
|
|
(pid, Inst.datadir, Inst.datadir, g_resultdir,
|
|
g_current_time, Inst.instanceId,
|
|
Inst.instanceId)
|
|
except Exception:
|
|
g_jobInfo.failedTask[
|
|
"collect gtm_%s process stack info" % Inst.instanceId] = \
|
|
ErrorCode.GAUSS_535["GAUSS_53511"] % 'GTM'
|
|
|
|
elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR:
|
|
if g_need_gstack == 0:
|
|
cmd = "mkdir -p -m %s '%s/configfiles/config_%s/cn_%s'" % \
|
|
(
|
|
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
cmd = "%s && cp -rf '%s'/postgresql.conf '%s'" \
|
|
"/pg_hba.conf '%s'/global/pg_control" \
|
|
" '%s'/gaussdb.state %s/pg_replslot/ %s/pg_ident.conf" \
|
|
" '%s'/configfiles/config_%s/cn_%s/" % \
|
|
(cmd, Inst.datadir, Inst.datadir, Inst.datadir, Inst.datadir,
|
|
Inst.datadir, Inst.datadir,
|
|
g_resultdir, g_current_time, Inst.instanceId)
|
|
else:
|
|
cmd = "mkdir -p -m %s '%s/gstackfiles/gstack_%s/cn_%s'" % \
|
|
(
|
|
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
pidfile = Inst.datadir + "/postmaster.pid"
|
|
try:
|
|
with open(pidfile, 'r') as f:
|
|
pid = int(f.readline())
|
|
if pid != 0:
|
|
cmd = "%s && gstack '%d' > '%s'" \
|
|
"/cn.stack && mv '%s'/cn.stack '%s'" \
|
|
"/gstackfiles/gstack_%s/cn_%s/cn_%s.stack" % \
|
|
(cmd, pid, Inst.datadir, Inst.datadir,
|
|
g_resultdir, g_current_time, Inst.instanceId,
|
|
Inst.instanceId)
|
|
except Exception:
|
|
g_jobInfo.failedTask[
|
|
"collect cn_%s process stack info" % Inst.instanceId] = \
|
|
ErrorCode.GAUSS_535["GAUSS_53511"] % 'CN'
|
|
|
|
elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE:
|
|
if g_need_gstack == 0:
|
|
cmd = "mkdir -p -m %s '%s/configfiles/config_%s/dn_%s'" % \
|
|
(
|
|
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
cmd = "%s && cp -rf '%s'/postgresql.conf '%s'/pg_hba." \
|
|
"conf {}" \
|
|
" '%s'/gaussdb.state %s/pg_replslot/ %s/pg_ident.conf" \
|
|
" '%s'/configfiles/config_%s/dn_%s/" % \
|
|
(cmd, Inst.datadir, Inst.datadir, Inst.datadir,
|
|
Inst.datadir, Inst.datadir,
|
|
g_resultdir, g_current_time, Inst.instanceId)
|
|
pg_conf_dir = os.path.realpath(
|
|
os.path.join(Inst.datadir, 'global/pg_control'))
|
|
cmd = cmd.format(pg_conf_dir if os.path.isdir(pg_conf_dir) else "")
|
|
else:
|
|
cmd = "mkdir -p -m %s '%s/gstackfiles/gstack_%s/dn_%s'" % \
|
|
(
|
|
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir, g_current_time,
|
|
Inst.instanceId)
|
|
pidfile = Inst.datadir + "/postmaster.pid"
|
|
try:
|
|
with open(pidfile, 'r') as f:
|
|
pid = int(f.readline())
|
|
if pid != 0:
|
|
cmd = "%s && gstack '%d' > '%s'/dn.stack && mv" \
|
|
" '%s'/dn.stack '%s'" \
|
|
"/gstackfiles/gstack_%s/dn_%s/dn_%s.stack" % \
|
|
(cmd, pid, Inst.datadir, Inst.datadir,
|
|
g_resultdir, g_current_time, Inst.instanceId,
|
|
Inst.instanceId)
|
|
except Exception:
|
|
g_jobInfo.failedTask[
|
|
"collect dn_%s process stack info" % Inst.instanceId] = \
|
|
ErrorCode.GAUSS_535["GAUSS_53511"] % 'DN'
|
|
return (cmd, pidfile)
|
|
|
|
|
|
def parallel_conf_gstack(Inst):
|
|
"""
|
|
parallel copy configuration files, and get gstack
|
|
"""
|
|
(cmd, pidfile) = getBakConfCmd(Inst)
|
|
(status, output) = subprocess.getstatusoutput(cmd)
|
|
if status != 0:
|
|
if "command not found" in output:
|
|
g_jobInfo.failedTask["collect process stack info"] = \
|
|
ErrorCode.GAUSS_535["GAUSS_53512"]
|
|
g_logger.debug(
|
|
"Failed to collect gstack files. "
|
|
"Command: %s \n Error: %s.\n" % (
|
|
cmd, output))
|
|
raise Exception("")
|
|
elif "gstack" in output:
|
|
g_jobInfo.failedTask[
|
|
"collect process stack info"] = replaceInvalidStr(output)
|
|
g_logger.debug(
|
|
"Failed to collect gstack files."
|
|
" Command: %s \n Error: %s.\n" % (
|
|
cmd, output))
|
|
raise Exception("")
|
|
elif "Process" in output:
|
|
g_jobInfo.failedTask[
|
|
"collect process stack info"] = replaceInvalidStr(output)
|
|
g_logger.debug(
|
|
"Failed to collect gstack files. "
|
|
"Command: %s \n Error: %s.\n" % (
|
|
cmd, output))
|
|
raise Exception("")
|
|
else:
|
|
g_jobInfo.failedTask[
|
|
"collect configuration files"] = replaceInvalidStr(output)
|
|
g_logger.debug(
|
|
"Failed to collect configuration files."
|
|
" Command: %s \n Error: %s.\n" % (
|
|
cmd, output))
|
|
raise Exception("")
|
|
|
|
|
|
def parseConfig():
|
|
"""
|
|
function: parse Config parameter
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
if g_opts.config != "":
|
|
d = json.loads(g_opts.config)
|
|
g_opts.content = list(filter(None, d['Content'].split(",")))
|
|
|
|
|
|
def main():
|
|
"""
|
|
main function
|
|
"""
|
|
try:
|
|
parseCommandLine()
|
|
initGlobal()
|
|
parseConfig()
|
|
global g_jobInfo
|
|
g_jobInfo = JobInfo()
|
|
if g_opts.action == "check_command":
|
|
check_command()
|
|
elif g_opts.action == "create_dir":
|
|
create_temp_result_folder()
|
|
# Get system information
|
|
elif g_opts.action == "system_check":
|
|
system_check()
|
|
# Gets the database information
|
|
elif g_opts.action == "database_check":
|
|
database_check()
|
|
# Make a copy of the log file
|
|
elif g_opts.action == "log_copy":
|
|
log_copy()
|
|
elif g_opts.action == "dss_cert_replacer":
|
|
dss_cert_replacer(g_logger)
|
|
# Copy configuration files, and get g stack
|
|
elif g_opts.action == "Config":
|
|
conf_gstack("Config")
|
|
elif g_opts.action == "Gstack":
|
|
global g_need_gstack
|
|
g_need_gstack = 1
|
|
conf_gstack("Gstack")
|
|
g_need_gstack = 0
|
|
# Send all log files we collected to the command node.
|
|
elif g_opts.action == "copy_file":
|
|
sendLogFiles()
|
|
elif g_opts.action == "xlog_copy":
|
|
xlog_copy()
|
|
elif g_opts.action == "plan_simulator_check":
|
|
plan_simulator_check()
|
|
elif g_opts.action == "core_copy":
|
|
core_copy()
|
|
else:
|
|
g_logger.logExit("Unrecognized parameter: %s." % g_opts.action)
|
|
except Exception as e:
|
|
GaussLog.exitWithError(str(e))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
sys.exit(0)
|