402 lines
16 KiB
Python
402 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding:utf-8 -*-
|
|
#############################################################################
|
|
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
#
|
|
# openGauss is licensed under Mulan PSL v2.
|
|
# You can use this software according to the terms
|
|
# and conditions of the Mulan PSL v2.
|
|
# You may obtain a copy of Mulan PSL v2 at:
|
|
#
|
|
# http://license.coscl.org.cn/MulanPSL2
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OF ANY KIND,
|
|
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
# See the Mulan PSL v2 for more details.
|
|
# ----------------------------------------------------------------------------
|
|
# Description : gs_collector is a utility
|
|
# to collect information about the cluster.
|
|
#############################################################################
|
|
|
|
import os
|
|
import sys
|
|
import pwd
|
|
import time
|
|
import json
|
|
from datetime import datetime
|
|
|
|
from gspylib.common.GaussLog import GaussLog
|
|
from gspylib.common.Common import DefaultValue
|
|
from gspylib.common.ParallelBaseOM import ParallelBaseOM
|
|
from gspylib.common.ErrorCode import ErrorCode
|
|
from gspylib.common.ParameterParsecheck import Parameter
|
|
from impl.collect.OLAP.CollectImplOLAP import CollectImplOLAP
|
|
from domain_utils.cluster_file.cluster_log import ClusterLog
|
|
from base_utils.os.env_util import EnvUtil
|
|
from domain_utils.domain_common.cluster_constants import ClusterConstants
|
|
from domain_utils.cluster_os.cluster_user import ClusterUser
|
|
|
|
|
|
def my_obj_pairs_hook(lst):
|
|
result = {}
|
|
count = {}
|
|
for key, val in lst:
|
|
if key in count:
|
|
count[key] = 1 + count[key]
|
|
else:
|
|
count[key] = 1
|
|
if key in result:
|
|
if count[key] >= 2:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_512["GAUSS_51245"] % key)
|
|
else:
|
|
result[key] = [result[key], val]
|
|
else:
|
|
result[key] = val
|
|
return result
|
|
|
|
|
|
class Collect(ParallelBaseOM):
|
|
"""
|
|
define option
|
|
"""
|
|
|
|
def __init__(self):
|
|
ParallelBaseOM.__init__(self)
|
|
# initialize variable
|
|
self.host = ""
|
|
self.inFile = ""
|
|
self.outFile = ""
|
|
self.nodeName = []
|
|
self.config = {}
|
|
self.appPath = ""
|
|
|
|
self.begintime = ""
|
|
self.endtime = ""
|
|
self.keyword = ""
|
|
# speed limit to copy/scp files, in MB/s
|
|
self.speedLimit = 1024
|
|
self.speedLimitFlag = 0
|
|
|
|
# config file
|
|
self.configFile = ""
|
|
|
|
# Our products may generate 200MB/(1DN per day),
|
|
# So max log size is (8DN * (1master+7standbys) + 1CN) * 200MB = 13GB/node
|
|
# Other logs, such as OM/CM/Audit we ignore them here, which are too small.
|
|
self.LOG_SIZE_PER_DAY_ONE_NODE = 1024 * 13
|
|
|
|
# As we test, the speed for packaging logs into a compressed tar file is 45MB/s.
|
|
self.TAR_SPEED = 45
|
|
|
|
# endtime - begintime, in days, rounded up.
|
|
self.duration = 0
|
|
|
|
#############################################################################
|
|
# Parse and check parameters
|
|
#############################################################################
|
|
def usage(self):
|
|
"""
|
|
gs_collector is a utility to collect information about the cluster.
|
|
|
|
Usage:
|
|
gs_collector -? | --help
|
|
gs_collector -V | --version
|
|
gs_collector --begin-time="BEGINTIME" --end-time="ENDTIME" [-h HOSTNAME | -f HOSTFILE]
|
|
[--keyword=KEYWORD] [--speed-limit=SPEED] [-o OUTPUT] [-l LOGFILE] [-C CONFIGFILE]
|
|
|
|
General options:
|
|
--begin-time=BEGINTIME Time to start log file collection. Pattern:yyyymmdd hh:mm.
|
|
--end-time=ENDTIME Time to end log file collection. Pattern:yyyymmdd hh:mm.
|
|
--speed-limit=SPEED Bandwidth to copy files, a nonnegative integer, in MByte/s.
|
|
0 means unlimited. Only supported if rsync command exists.
|
|
-h Names of hosts whose information is to be collected.
|
|
Example: host1,host2.
|
|
-f File listing names of all the hosts to connect to.
|
|
--keyword=KEYWORD Save log files containing the keyword.
|
|
-o Save the result to the specified file.
|
|
-l Path of log file.
|
|
-?, --help Show help information for this utility, and exit the command line mode.
|
|
-V, --version Show version information.
|
|
-C gs_collector config file, listing which info to collect
|
|
# gs_collector.json example
|
|
{
|
|
"Collect":
|
|
[
|
|
{"TypeName": "name", "Content": "value", "Interval": "seconds", "Count": "counts"} # interval is in Second
|
|
]
|
|
}
|
|
|
|
# TypeName : content
|
|
COLLECT_INFO_MAP
|
|
{
|
|
"System" : "HardWareInfo,RunTimeInfo",
|
|
"Database" : "pg_locks,pg_stat_activity,pg_thread_wait_status",
|
|
"Log" : "DataNode,ClusterManager",
|
|
"XLog": "DataNode",
|
|
"Config" : "DataNode",
|
|
"Gstack" : "DataNode",
|
|
"CoreDump": "gaussdb,GaussMaster,gs_ctl"
|
|
"Trace": "Dump"
|
|
"Plan": "*" # Any database name or character "*"
|
|
}
|
|
|
|
"""
|
|
print(self.usage.__doc__)
|
|
|
|
def dateCheck(self, datestr):
|
|
"""
|
|
function: check the type of date wether is is correct or not
|
|
input : timedate
|
|
output: bool
|
|
"""
|
|
# Check the time format
|
|
try:
|
|
time.strptime(datestr, "%Y%m%d %H:%M")
|
|
if (len(datestr.split(" ")[0]) != 8
|
|
or len(datestr.split(" ")[1]) != 5):
|
|
return False
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
def parseCommandLine(self):
|
|
"""
|
|
function: do parse command line
|
|
input : cmdCommand
|
|
output: help/version information
|
|
"""
|
|
# Parse command
|
|
ParaObj = Parameter()
|
|
ParaDict = ParaObj.ParameterCommandLine("collector")
|
|
|
|
# If help is included in the parameter,
|
|
# the help message is printed and exited
|
|
if (ParaDict.__contains__("helpFlag")):
|
|
self.usage()
|
|
sys.exit(0)
|
|
# Save parameter
|
|
if (ParaDict.__contains__("nodename")):
|
|
self.nodeName = ParaDict.get("nodename")
|
|
# Save parameter hostfile
|
|
if (ParaDict.__contains__("hostfile")):
|
|
self.inFile = ParaDict.get("hostfile")
|
|
# Save parameter begintime
|
|
if (ParaDict.__contains__("begintime")):
|
|
self.begintime = ParaDict.get("begintime")
|
|
# Check the begin time parameter format is correct
|
|
if (not self.dateCheck(self.begintime)):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] %
|
|
('-begin-time',
|
|
"date") + " Pattern: yyyymmdd hh:mm.")
|
|
# Save parameter endtime
|
|
if (ParaDict.__contains__("endtime")):
|
|
self.endtime = ParaDict.get("endtime")
|
|
# Check the end time parameter format is correct
|
|
if (not self.dateCheck(self.endtime)):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] %
|
|
('-end-time',
|
|
"date") + " Pattern: yyyymmdd hh:mm.")
|
|
# Save parameter keyword
|
|
if (ParaDict.__contains__("keyword")):
|
|
self.keyword = ParaDict.get("keyword")
|
|
# Save parameter outFile
|
|
if (ParaDict.__contains__("outFile")):
|
|
self.outFile = ParaDict.get("outFile")
|
|
# Save parameter logFile
|
|
if (ParaDict.__contains__("logFile")):
|
|
self.logFile = ParaDict.get("logFile")
|
|
|
|
# Get speed limit to copy/remote copy files.
|
|
if (ParaDict.__contains__("speedLimit")):
|
|
self.speedLimit = str(ParaDict.get("speedLimit"))
|
|
if (not self.speedLimit.isdigit() or int(self.speedLimit) < 0):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] %
|
|
('-speed-limit',
|
|
'a nonnegative integer'))
|
|
self.speedLimit = int(self.speedLimit)
|
|
self.speedLimitFlag = 1
|
|
|
|
# Save parameter configFile
|
|
if (ParaDict.__contains__("configFile")):
|
|
self.configFile = ParaDict.get("configFile")
|
|
|
|
def checkParameter(self):
|
|
"""
|
|
function: do parameters checking
|
|
input : check parameters
|
|
output: ErrorCode
|
|
"""
|
|
|
|
# The -h and -f parameters can not be specified at the same time
|
|
if (len(self.nodeName) != 0 and self.inFile != ""):
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_500["GAUSS_50005"] % ('h', 'f'))
|
|
|
|
if (self.inFile != ""):
|
|
# Check if the hostname file exists
|
|
if (not os.path.exists(self.inFile)):
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_502["GAUSS_50201"] % self.inFile)
|
|
# Get the value in the hostname file
|
|
with open(self.inFile, "r") as fp:
|
|
for line in fp:
|
|
node = line.strip().split("\n")[0]
|
|
if node is not None and node != "" \
|
|
and (node not in self.nodeName):
|
|
self.nodeName.append(node)
|
|
# An error exit if the node name is not available
|
|
if len(self.nodeName) == 0:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_502["GAUSS_50203"] % self.inFile)
|
|
# check configFile
|
|
if self.configFile == "":
|
|
self.configFile = "%s/%s" % (
|
|
os.path.dirname(os.path.realpath(__file__)),
|
|
ClusterConstants.GS_COLLECTOR_CONFIG_FILE)
|
|
|
|
if self.configFile != "":
|
|
# Check if the config file exists
|
|
if not os.path.exists(self.configFile):
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_502["GAUSS_50201"] % self.configFile)
|
|
# Get the value in the configFile file
|
|
try:
|
|
with open(self.configFile, "r") as fp:
|
|
config_json = json.loads(fp.read(),
|
|
object_pairs_hook=my_obj_pairs_hook)
|
|
items = config_json.items()
|
|
|
|
for key, value in items:
|
|
if str(key) != "Collect":
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_512["GAUSS_51242"] % (
|
|
self.configFile, str(key)))
|
|
for it in value:
|
|
d_c = ""
|
|
u_c = ""
|
|
for k, v in it.items():
|
|
if k not in DefaultValue.COLLECT_CONF_JSON_KEY_LIST:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_512["GAUSS_51242"]
|
|
% (self.configFile, str(k)))
|
|
if k == "TypeName":
|
|
d_c = DefaultValue.COLLECT_CONF_MAP[v]
|
|
elif k == "Content":
|
|
u_c = v
|
|
elif k == "Interval" or k == "Count":
|
|
if (not v.replace(" ", "").isdigit()
|
|
or int(v.replace(" ", "")) < 0):
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_512["GAUSS_51241"]
|
|
% (k, v))
|
|
|
|
if len(u_c) > 0 and len(d_c) > 0:
|
|
T_Name = it["TypeName"]
|
|
it["Content"] = ""
|
|
if T_Name in "Plan,Database":
|
|
it["Content"] = u_c
|
|
else:
|
|
uc = u_c.replace(" ", "").split(",")
|
|
for c in uc:
|
|
if c not in d_c:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_512["GAUSS_51243"]
|
|
% (c, it['TypeName'],
|
|
self.configFile))
|
|
elif DefaultValue.COLLECT_CONF_CONTENT_MAP.__contains__(c):
|
|
it["Content"] += \
|
|
DefaultValue.COLLECT_CONF_CONTENT_MAP[c] + ","
|
|
else:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_512["GAUSS_51244"]
|
|
% c)
|
|
if self.config.__contains__(T_Name):
|
|
self.config[T_Name].append(it)
|
|
else:
|
|
contentList = [it]
|
|
self.config[T_Name] = contentList
|
|
else:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_512["GAUSS_51240"])
|
|
|
|
except Exception as e:
|
|
GaussLog.exitWithError(
|
|
ErrorCode.GAUSS_512["GAUSS_51239"] % self.configFile)
|
|
|
|
if len(self.config) == 0:
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_535["GAUSS_53516"])
|
|
|
|
# An error exit if the begin time parameter is not entered
|
|
if (not self.begintime):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"]
|
|
% '-begin-time' + " for [gs_collector].")
|
|
else:
|
|
# Extract the time in --end-time according to the format
|
|
self.begintime = self.begintime.replace(" ", "").replace(":", "")
|
|
|
|
# An error exit if the end time parameter is not entered
|
|
if (not self.endtime):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"]
|
|
% '-end-time' + " for [gs_collector].")
|
|
else:
|
|
# Extract the time in --begin-time according to the format
|
|
self.endtime = self.endtime.replace(" ", "").replace(":", "")
|
|
|
|
if self.endtime and self.begintime:
|
|
# The start time must be earlier than the end time,
|
|
# notice: using string comparison !!!
|
|
if (self.endtime < self.begintime):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"]
|
|
% "-end-time or --begin-time" +
|
|
"The value of '--end-time' must"
|
|
" be greater than the value "
|
|
"of '--begin-time'.")
|
|
datebegin = datetime.strptime(self.begintime, "%Y%m%d%H%M")
|
|
dateend = datetime.strptime(self.endtime, "%Y%m%d%H%M")
|
|
diff = dateend - datebegin
|
|
self.duration = diff.days + 1
|
|
|
|
# check mpprc file path
|
|
self.mpprcFile = EnvUtil.getMpprcFile()
|
|
# check if user exist and is the right user
|
|
try:
|
|
self.user = pwd.getpwuid(os.getuid()).pw_name
|
|
ClusterUser.checkUser(self.user)
|
|
except Exception as e:
|
|
GaussLog.exitWithError(str(e))
|
|
|
|
# check log file
|
|
if (self.logFile == ""):
|
|
self.logFile = ClusterLog.getOMLogPath(
|
|
ClusterConstants.GS_COLLECTOR_LOG_FILE, self.user, "")
|
|
|
|
if (self.speedLimit == 0):
|
|
self.speedLimit = 1024
|
|
|
|
|
|
if __name__ == '__main__':
|
|
"""
|
|
function: main
|
|
input : NA
|
|
output: NA
|
|
"""
|
|
# check if is root user
|
|
if (os.getuid() == 0):
|
|
GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"])
|
|
try:
|
|
# Objectize class
|
|
collectObj = Collect()
|
|
|
|
# Initialize self and Parse command line and save to global variable
|
|
collectObj.parseCommandLine()
|
|
# check the parameters is not OK
|
|
collectObj.checkParameter()
|
|
impl = CollectImplOLAP(collectObj)
|
|
impl.run()
|
|
except Exception as e:
|
|
GaussLog.exitWithError(str(e))
|
|
sys.exit(0)
|