om 630后修改合入
云对接工具修改 打包脚本修改
This commit is contained in:
@ -303,6 +303,11 @@ function install_gaussdb()
|
||||
export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH
|
||||
|
||||
commitid=$(LD_PRELOAD='' ${BUILD_DIR}/bin/gaussdb -V | awk '{print $5}' | cut -d ")" -f 1)
|
||||
if [ -z $commitid ]
|
||||
then
|
||||
commitid=$(date "+%Y%m%d%H%M%S")
|
||||
commitid=${commitid:4:8}
|
||||
fi
|
||||
echo "${commitid}" >>${SCRIPT_DIR}/version.cfg
|
||||
echo "End insert commitid into version.cfg" >> "$LOG_FILE" 2>&1
|
||||
}
|
||||
|
||||
@ -571,25 +571,34 @@ cmserver_ha_status_interval|int|0,2147483647|NULL|NULL|
|
||||
cmserver_self_vote_timeout|int|0,2147483647|NULL|This parameter works only when cmserver_self_vote_timeout >= cmserver_ha_heartbeat_timeout, otherwise, it will work based on cmserver_ha_heartbeat_timeout.|
|
||||
enable_transaction_read_only|bool|0,0|NULL|NULL|
|
||||
datastorage_threshold_check_interval|int|1,2592000|NULL|NULL|
|
||||
alarm_report_max_count|int|5,2592000|NULL|NULL|
|
||||
alarm_report_max_count|int|1,2592000|NULL|NULL|
|
||||
datastorage_threshold_value_check|int|1,99|NULL|NULL|
|
||||
max_datastorage_threshold_check|int|1,2592000|NULL|NULL|
|
||||
coordinator_heartbeat_timeout|int|0,2147483647|NULL|NULL|
|
||||
phony_dead_effective_time|int|0,2147483647|NULL|NULL|
|
||||
instance_keep_heartbeat_timeout|int|0,2147483647|NULL|NULL|
|
||||
cm_server_arbitrate_delay_base_time_out|int|0,2147483647|NULL|NULL|
|
||||
cm_server_arbitrate_delay_incrememtal_time_out|int|0,2147483647|NULL|NULL|
|
||||
enable_az_auto_switchover|int|0,1|NULL|NULL|
|
||||
cm_auth_method|enum|trust,gss|NULL|NULL|
|
||||
cm_krb_server_keyfile|string|0,0|NULL|NULL|
|
||||
switch_rto|int|0,2147483647|NULL|NULL|
|
||||
force_promote|int|0,1|NULL|NULL|
|
||||
az_switchover_threshold|int|1,100|NULL|NULL|
|
||||
az_check_and_arbitrate_interval|int|1,2147483647|NULL|NULL|
|
||||
az_connect_check_interval|int|1,2147483647|NULL|NULL|
|
||||
az_connect_check_delay_time|int|1,2147483647|NULL|NULL|
|
||||
cmserver_demote_delay_on_etcd_fault|int|1,2147483647|NULL|NULL|
|
||||
instance_phony_dead_restart_interval|int|1,2147483647|NULL|NULL|
|
||||
[cmagent]
|
||||
log_dir|string|0,0|NULL|NULL|
|
||||
log_file_size|int|0,2147483647|MB|NULL|
|
||||
log_min_messages|enum|debug5,debug1,log,warning,error,fatal|NULL|NULL|
|
||||
log_max_size|int|0,1024|NULL|NULL|
|
||||
log_max_count|int|0,100000|NULL|NULL|
|
||||
log_max_size|int|0,2147483647|NULL|NULL|
|
||||
log_max_count|int|0,10000|NULL|NULL|
|
||||
log_saved_days|int|0,1000|NULL|NULL|
|
||||
enable_log_compress|bool|0,0|NULL|NULL|
|
||||
alarm_report_interval|int|0,2147483647|NULL|NULL|
|
||||
alarm_report_max_count|int|5,2592000|NULL|NULL|
|
||||
alarm_report_max_count|int|1,2592000|NULL|NULL|
|
||||
alarm_component|string|0,0|NULL|NULL|
|
||||
incremental_build|bool|0,0|NULL|NULL|
|
||||
agent_report_interval|int|0,2147483647|NULL|NULL|
|
||||
@ -597,12 +606,15 @@ agent_heartbeat_timeout|int|0,2147483647|NULL|NULL|
|
||||
agent_connect_timeout|int|0,2147483647|NULL|NULL|
|
||||
agent_connect_retries|int|0,2147483647|NULL|NULL|
|
||||
agent_check_interval|int|0,2147483647|NULL|NULL|
|
||||
agent_kill_instance_timeout|int|0,2147483647|NULL|NULL|
|
||||
log_threshold_check_interval|int|0,2147483647|NULL|NULL|
|
||||
dilatation_shard_count_for_disk_capacity_alarm|int|0,2147483647|NULL|NULL|
|
||||
security_mode|bool|0,0|NULL|NULL|
|
||||
upgrade_from|int|0,4294967295|NULL|For upgrading, specify which version we are upgrading from.|
|
||||
unix_socket_directory|string|0,0|NULL|NULL|
|
||||
enable_xc_maintenance_mode|bool|0,0|NULL|NULL|
|
||||
process_cpu_affinity|int|0,2|NULL|NULL|
|
||||
enable_cn_auto_repair|bool|0,0|NULL|NULL|
|
||||
agent_phony_dead_check_interval|int|0,2147483647|NULL|NULL|
|
||||
[lcname]
|
||||
allow_concurrent_tuple_update|bool|0,0|NULL|NULL|
|
||||
prefetch_quantity|int|128,131072|kB|NULL|
|
||||
|
||||
@ -83,12 +83,32 @@ DEFAULT_TIMEOUT = 1500
|
||||
# because single clusters don't need to perform consistency checks and
|
||||
# internal communication class checks
|
||||
SINGLE_SKIP = ["CheckTimeZone", "CheckEncoding", "CheckKernelVer",
|
||||
"CheckNTPD",
|
||||
"CheckNoCheckSum", "CheckCpuCount",
|
||||
"CheckNTPD", "CheckNoCheckSum", "CheckCpuCount",
|
||||
"CheckMemInfo", "CheckDiskConfig",
|
||||
"CheckUpVer", "CheckPgxcgroup", "CheckPing",
|
||||
"CheckNetWorkDrop",
|
||||
"CheckNetSpeed"]
|
||||
"CheckNetWorkDrop", "CheckNetSpeed"]
|
||||
|
||||
SETITEM_SKIP = ["CheckCPU", "CheckTimeZone", "CheckOSVer", "CheckNTPD",
|
||||
"CheckSshdService", "CheckNoCheckSum", "CheckEtcHosts",
|
||||
"CheckCpuCount", "CheckHyperThread", "CheckMemInfo",
|
||||
"CheckKernelVer", "CheckEncoding", "CheckBootItems",
|
||||
"CheckDropCache", "CheckFilehandle", "CheckKeyProAdj",
|
||||
"CheckDiskFormat", "CheckInodeUsage", "CheckSpaceUsage",
|
||||
"CheckDiskConfig", "CheckXid", "CheckSysTabSize",
|
||||
"CheckClusterState", "CheckConfigFileDiff", "CheckUpVer",
|
||||
"CheckEnvProfile", "CheckGaussVer", "CheckPortRange",
|
||||
"CheckReadonlyMode", "CheckCatchup", "CheckProcessStatus",
|
||||
"CheckSpecialFile", "CheckCollector", "CheckLargeFile",
|
||||
"CheckProStartTime", "CheckMpprcFile", "CheckLockNum",
|
||||
"CheckCurConnCount", "CheckCursorNum", "CheckPgxcgroup",
|
||||
"CheckLockState", "CheckIdleSession", "CheckDBConnection",
|
||||
"CheckSysTable", "CheckSysTabSize", "CheckTableSpace",
|
||||
"CheckTableSkew", "CheckDNSkew", "CheckCreateView",
|
||||
"CheckHashIndex", "CheckNextvalInDefault", "CheckPgxcRedistb",
|
||||
"CheckReturnType", "CheckSysadminUser", "CheckTDDate",
|
||||
"CheckDropColumn", "CheckDiskFailure", "CheckPing",
|
||||
"CheckNetWorkDrop", "CheckUsedPort", "CheckNICModel",
|
||||
"CheckRouting", "CheckNetSpeed", "CheckDataDiskUsage"]
|
||||
|
||||
|
||||
class CmdOptions():
|
||||
@ -358,10 +378,10 @@ class CheckContext():
|
||||
input : remote host name and password map
|
||||
output : NA
|
||||
'''
|
||||
if (len(hosts) == 0 or g_opts.isSingle):
|
||||
if len(hosts) == 0 or g_opts.isSingle:
|
||||
return
|
||||
fileName = self.getCacheFile()
|
||||
if (not os.path.isfile(fileName)):
|
||||
if not os.path.isfile(fileName):
|
||||
raise CheckException("File %s is not exist or invalid" % fileName)
|
||||
try:
|
||||
pool = ThreadPool(DefaultValue.getCpuSet())
|
||||
@ -390,7 +410,8 @@ Usage:
|
||||
|
||||
General options:
|
||||
-i Health check item number.
|
||||
OLAP Example: -i CheckCPU,CheckMTU,CheckPing.
|
||||
OLAP Example: -i CheckCPU,CheckMTU,
|
||||
CheckPing.
|
||||
-e Health check scene name.
|
||||
OLAP Example: -e inspect/upgrade/slow_node/
|
||||
binary_upgrade/health/install/longtime
|
||||
@ -398,17 +419,19 @@ General options:
|
||||
-L Run the command as local mode.
|
||||
-l Path of log file.
|
||||
-o Save the result to the specified directory.
|
||||
--cid The check ID used for identify a check process,
|
||||
only for internal use.
|
||||
--cid The check ID used for identify a check
|
||||
process, only for internal use.
|
||||
--skip-root-items Skip the items with root privileges.
|
||||
--disk-threshold Set disk threshold for checking disk usage,
|
||||
only for CheckDataDiskUsage.
|
||||
--format Set the format of the result report.
|
||||
--set Set abnormal items if supported
|
||||
--time-out Set the timeout for scene check, default 1500 seconds.
|
||||
--time-out Set the timeout for scene check, default
|
||||
1500 seconds.
|
||||
--routing The network segment with business ip,
|
||||
example: 192.168.1.1:255.255.255.0
|
||||
--skip-items Skip the specified check item or setting item with scene check
|
||||
--skip-items Skip the specified check item or setting
|
||||
item with scene check
|
||||
Example: --skip-items CheckCPU,CheckMTU
|
||||
-?, --help Show help information for this utility,
|
||||
and exit the command line mode.
|
||||
@ -453,7 +476,7 @@ def parseCommandLine():
|
||||
g_opts = CmdOptions()
|
||||
ParaObj = Parameter()
|
||||
ParaDict = ParaObj.ParameterCommandLine("check")
|
||||
if ("helpFlag" in list(ParaDict.keys())):
|
||||
if "helpFlag" in list(ParaDict.keys()):
|
||||
usage()
|
||||
sys.exit(0)
|
||||
|
||||
@ -475,50 +498,51 @@ def parseCommandLine():
|
||||
raise UseBothParameterException(
|
||||
(paraNameMap[para], paraNameMap[irrelevantPara[para]]))
|
||||
|
||||
if ("itemstr" in list(ParaDict.keys())):
|
||||
if "itemstr" in list(ParaDict.keys()):
|
||||
g_opts.items = ParaDict["itemstr"]
|
||||
if ("scenes" in list(ParaDict.keys())):
|
||||
if "scenes" in list(ParaDict.keys()):
|
||||
g_opts.scene = ParaDict["scenes"]
|
||||
if ("outFile" in list(ParaDict.keys())):
|
||||
if "outFile" in list(ParaDict.keys()):
|
||||
g_context.outPath = ParaDict["outFile"]
|
||||
if ("logFile" in list(ParaDict.keys())):
|
||||
if "logFile" in list(ParaDict.keys()):
|
||||
g_opts.logFile = ParaDict["logFile"]
|
||||
if ("user" in list(ParaDict.keys())):
|
||||
if "user" in list(ParaDict.keys()):
|
||||
g_context.user = ParaDict["user"]
|
||||
if ("hostfile" in list(ParaDict.keys())):
|
||||
if "hostfile" in list(ParaDict.keys()):
|
||||
for node in g_file.readFile(ParaDict["hostfile"]):
|
||||
g_opts.nodes.append(node.strip())
|
||||
if ("cid" in list(ParaDict.keys())):
|
||||
if "cid" in list(ParaDict.keys()):
|
||||
g_context.setCheckID(ParaDict["cid"])
|
||||
g_opts.distributing = True
|
||||
if ("localMode" in list(ParaDict.keys())):
|
||||
if "localMode" in list(ParaDict.keys()):
|
||||
g_opts.localMode = True
|
||||
if ("skipRootItems" in list(ParaDict.keys())):
|
||||
if "skipRootItems" in list(ParaDict.keys()):
|
||||
g_opts.skipRootItems = True
|
||||
if ("disk-threshold" in list(ParaDict.keys())):
|
||||
if "disk-threshold" in list(ParaDict.keys()):
|
||||
g_context.thresholdDn = ParaDict["disk-threshold"]
|
||||
if ("set" in list(ParaDict.keys())):
|
||||
if "set" in list(ParaDict.keys()):
|
||||
g_context.set = True
|
||||
if ("routing" in list(ParaDict.keys())):
|
||||
if "routing" in list(ParaDict.keys()):
|
||||
g_opts.routing = ParaDict["routing"]
|
||||
if ("skipItems" in list(ParaDict.keys())):
|
||||
if "skipItems" in list(ParaDict.keys()):
|
||||
g_opts.skipItems = ParaDict["skipItems"]
|
||||
if ("nodegroup_name" in list(ParaDict.keys())):
|
||||
if "nodegroup_name" in list(ParaDict.keys()):
|
||||
g_context.LCName = ParaDict["nodegroup_name"]
|
||||
if ("shrinkNodes" in list(ParaDict.keys())):
|
||||
if "shrinkNodes" in list(ParaDict.keys()):
|
||||
g_context.ShrinkNodes = ParaDict["shrinkNodes"]
|
||||
if ("time_out" in list(ParaDict.keys())):
|
||||
if "time_out" in list(ParaDict.keys()):
|
||||
try:
|
||||
g_opts.timeout = int(ParaDict["time_out"])
|
||||
except Exception:
|
||||
raise CheckException("The parameter timeout set invalid value")
|
||||
if (g_opts.timeout < DEFAULT_TIMEOUT):
|
||||
if g_opts.timeout < DEFAULT_TIMEOUT:
|
||||
raise CheckException(
|
||||
"The timeout parameter must be set larger than default value 1500 seconds")
|
||||
"The timeout parameter must be set larger than default "
|
||||
"value 1500 seconds")
|
||||
setTimeOut()
|
||||
if ("format" in list(ParaDict.keys())):
|
||||
if "format" in list(ParaDict.keys()):
|
||||
g_opts.format = ParaDict["format"]
|
||||
if (g_opts.format not in formatList):
|
||||
if g_opts.format not in formatList:
|
||||
raise CheckException(
|
||||
"Format %s is not available,the valid format is %s" % (
|
||||
g_opts.format, ",".join(formatList)))
|
||||
@ -546,13 +570,13 @@ def checkParameter():
|
||||
|
||||
def checkuser():
|
||||
# The new node scenario does not need the -U parameter
|
||||
if (__isRoot() and not g_opts.localMode):
|
||||
if __isRoot() and not g_opts.localMode:
|
||||
g_context.user = None
|
||||
return
|
||||
# Default mode -U for the current user
|
||||
if (not __isRoot() and not g_context.user):
|
||||
if not __isRoot() and not g_context.user:
|
||||
g_context.user = SharedFuncs.getCurrentUser()
|
||||
if (g_context.user):
|
||||
if g_context.user:
|
||||
if not __isRoot() and g_context.user != SharedFuncs.getCurrentUser():
|
||||
raise CheckException(
|
||||
"The user %s is not current user" % g_context.user)
|
||||
@ -561,22 +585,22 @@ def checkuser():
|
||||
except Exception:
|
||||
raise CheckException(
|
||||
"The user %s is not a effective user." % g_context.user)
|
||||
if (user_uid == 0):
|
||||
if user_uid == 0:
|
||||
raise CheckException("The -U parameter can not be the root user.")
|
||||
isClusterUser = SharedFuncs.checkClusterUser(g_context.user,
|
||||
__getMpprcFile())
|
||||
if (isClusterUser):
|
||||
if isClusterUser:
|
||||
# get cluster information
|
||||
g_context.mpprc = __getMpprcFile()
|
||||
clusterInfo = g_context.loadClusterInfo(g_context.user)
|
||||
if (clusterInfo):
|
||||
if clusterInfo:
|
||||
g_opts.cluster = clusterInfo
|
||||
else:
|
||||
isClusterUser = False
|
||||
if (not isClusterUser):
|
||||
if not isClusterUser:
|
||||
raise CheckException(
|
||||
"The user %s is not valid cluster user" % g_context.user)
|
||||
if (g_opts.localMode or g_opts.distributing):
|
||||
if g_opts.localMode or g_opts.distributing:
|
||||
return
|
||||
|
||||
# Check cluster user trust
|
||||
@ -588,14 +612,14 @@ def checkuser():
|
||||
psshPath = os.path.join(appPath, 'script/gspylib/pssh/bin/pssh')
|
||||
cmd = "%s -H %s 'id' " % (psshPath, " -H ".join(dbNameList))
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
errorNode = []
|
||||
for result in output.split('\n'):
|
||||
if (result.strip() == ""):
|
||||
if result.strip() == "":
|
||||
continue
|
||||
resultInfo = result.split()
|
||||
# Analyze the results
|
||||
if (len(resultInfo) > 3 and resultInfo[2] == "[SUCCESS]"):
|
||||
if len(resultInfo) > 3 and resultInfo[2] == "[SUCCESS]":
|
||||
continue
|
||||
elif (len(resultInfo) > 3 and resultInfo[2] == "[FAILURE]" and
|
||||
resultInfo[3] in dbNameList):
|
||||
@ -604,7 +628,7 @@ def checkuser():
|
||||
raise CheckException(
|
||||
"Failed to check user trust. commands: %s Error:/n%s"
|
||||
% (cmd, output))
|
||||
if (errorNode):
|
||||
if errorNode:
|
||||
raise CheckException(
|
||||
"Failed to check user trust with %s" % errorNode)
|
||||
else:
|
||||
@ -613,20 +637,20 @@ def checkuser():
|
||||
|
||||
|
||||
def createPath(path, user=""):
|
||||
if (path == "/dev/null"):
|
||||
if path == "/dev/null":
|
||||
return
|
||||
if (os.path.isdir(path)):
|
||||
if os.path.isdir(path):
|
||||
# test write permissions
|
||||
if (not g_file.checkDirWriteable(path)):
|
||||
if not g_file.checkDirWriteable(path):
|
||||
raise CheckException(
|
||||
"Failed to create or delete file in the [%s]." % path)
|
||||
elif (os.path.isfile(path)):
|
||||
elif os.path.isfile(path):
|
||||
raise CheckException("The out path [%s] must be a directory." % path)
|
||||
else:
|
||||
# path is not exist. recursively create the path
|
||||
g_file.createDirectory(path, True, DefaultValue.KEY_DIRECTORY_MODE)
|
||||
# Modify the file owner
|
||||
if (__isRoot() and user):
|
||||
if __isRoot() and user:
|
||||
g_file.changeOwner(user, path)
|
||||
|
||||
|
||||
@ -651,11 +675,12 @@ def initLogFile():
|
||||
output: NA
|
||||
"""
|
||||
global g_context, g_logger
|
||||
# load the context when the script ruuning on local mode and the context was cached before
|
||||
# load the context when the script ruuning on local mode and the context
|
||||
# was cached before
|
||||
g_context.tmpPath = getTmpPath()
|
||||
if (g_context.isCached()):
|
||||
if g_context.isCached():
|
||||
g_context = g_context.load()
|
||||
if (__getLocalNode(g_context.nodes) in g_context.newNodes):
|
||||
if __getLocalNode(g_context.nodes) in g_context.newNodes:
|
||||
g_context.mpprc = None
|
||||
g_context.user = None
|
||||
g_context.cluster = None
|
||||
@ -667,9 +692,9 @@ def initLogFile():
|
||||
else:
|
||||
# Parameter specified first, followed by default GAUSSLOG,
|
||||
# last temporary directory
|
||||
if (g_opts.logFile):
|
||||
if g_opts.logFile:
|
||||
g_context.logFile = os.path.realpath(g_opts.logFile)
|
||||
elif (g_opts.cluster):
|
||||
elif g_opts.cluster:
|
||||
g_context.logFile = os.path.join(g_opts.cluster.logPath,
|
||||
'%s/om/gs_check.log'
|
||||
% g_context.user)
|
||||
@ -687,16 +712,16 @@ def initLogFile():
|
||||
# Load support check items by parsing the project folder
|
||||
g_context.loadSupportItems()
|
||||
# load the scene configuration
|
||||
if (g_opts.scene):
|
||||
if g_opts.scene:
|
||||
g_context.loadSceneConfiguration(g_opts.scene)
|
||||
# load cluster info
|
||||
if (g_opts.cluster):
|
||||
if g_opts.cluster:
|
||||
g_context.cluster = g_opts.cluster
|
||||
g_context.oldNodes = g_opts.cluster.getClusterSshIps()[0]
|
||||
# load nodes
|
||||
if (g_opts.nodes):
|
||||
if g_opts.nodes:
|
||||
for node in g_opts.nodes:
|
||||
if (node not in g_context.oldNodes):
|
||||
if node not in g_context.oldNodes:
|
||||
g_context.newNodes.append(node)
|
||||
g_context.nodes = g_context.oldNodes + g_context.newNodes
|
||||
|
||||
@ -716,11 +741,11 @@ def getRootUserPwd():
|
||||
g_logger.debug("Ask user input password interactive")
|
||||
for host in g_context.nodes:
|
||||
isPwdOk = SharedFuncs.verifyPasswd(host, rootuser, rootpwd)
|
||||
if (not isPwdOk):
|
||||
if not isPwdOk:
|
||||
# try to connect remote node again
|
||||
rootpwd = __retryConnection(host, rootuser)
|
||||
g_opts.pwdMap[host] = (rootuser, rootpwd)
|
||||
if (pwd.getpwnam(rootuser).pw_uid != 0):
|
||||
if pwd.getpwnam(rootuser).pw_uid != 0:
|
||||
raise CheckException("Enter the user [%s] does not have"
|
||||
" root privileges." % rootuser)
|
||||
# print message on screen
|
||||
@ -735,7 +760,7 @@ def parseCheckContext():
|
||||
"""
|
||||
global g_context
|
||||
initLogFile()
|
||||
if (g_context.isCached()):
|
||||
if g_context.isCached():
|
||||
return
|
||||
g_logger.debug("Start to parse the check items config file")
|
||||
items_all = []
|
||||
@ -744,7 +769,7 @@ def parseCheckContext():
|
||||
failedItems = []
|
||||
singleSkipList = []
|
||||
# generate the items from scene configuration
|
||||
if (g_opts.scene):
|
||||
if g_opts.scene:
|
||||
items_oldNode, failedItems = __parseScene(g_opts.scene)
|
||||
items_all += items_oldNode
|
||||
# generate the items from -i parameter value
|
||||
@ -756,23 +781,23 @@ def parseCheckContext():
|
||||
else:
|
||||
items_all.append(item)
|
||||
for item in items_all:
|
||||
if (not g_context.set and item['name'] in g_opts.skipItems):
|
||||
if not g_context.set and item['name'] in g_opts.skipItems:
|
||||
items_all.remove(item)
|
||||
continue
|
||||
if (g_context.set and item['set_permission'] == 'root'):
|
||||
if g_context.set and item['set_permission'] == 'root':
|
||||
g_context.rootItems.append(item)
|
||||
if (g_opts.skipRootItems and item['permission'] == 'root'):
|
||||
if g_opts.skipRootItems and item['permission'] == 'root':
|
||||
items_all.remove(item)
|
||||
continue
|
||||
if (item['permission'] == 'root'):
|
||||
if item['permission'] == 'root':
|
||||
g_context.rootItems.append(item)
|
||||
if (g_opts.isSingle and item['name'] in SINGLE_SKIP):
|
||||
if g_opts.isSingle and item['name'] in SINGLE_SKIP:
|
||||
singleSkipList.append(item['name'])
|
||||
continue
|
||||
if (item['name'] == "CheckRouting"):
|
||||
if (g_opts.routing):
|
||||
if item['name'] == "CheckRouting":
|
||||
if g_opts.routing:
|
||||
g_context.routing = g_opts.routing
|
||||
elif (g_opts.cluster):
|
||||
elif g_opts.cluster:
|
||||
workIP = g_opts.cluster.getDbNodeByName(
|
||||
DefaultValue.GetHostIpOrName()).backIps[0]
|
||||
g_context.routing = "%s:%s" % (
|
||||
@ -781,16 +806,16 @@ def parseCheckContext():
|
||||
raise CheckException(
|
||||
"The --routing is required when cluster dosen't exist")
|
||||
g_context.items.append(item)
|
||||
if (len(singleSkipList) != 0):
|
||||
if len(singleSkipList) != 0:
|
||||
__printOnScreen(
|
||||
"The following items are skipped when the type of cluster is"
|
||||
" single:\n[%s]" % ",".join(singleSkipList))
|
||||
if (not items_newNode):
|
||||
if not items_newNode:
|
||||
g_context.oldItems = g_context.items
|
||||
else:
|
||||
g_context.oldItems = items_oldNode
|
||||
g_context.newItems = items_newNode
|
||||
if (g_context.set and items_all):
|
||||
if g_context.set and items_all:
|
||||
# Settings will have a big impact and need to be confirmed
|
||||
confirmItem = {
|
||||
"CheckCrontabLeft": "Clear om_monitor in crond service",
|
||||
@ -798,36 +823,41 @@ def parseCheckContext():
|
||||
"'/var/log/Bigdata/','/home/omm/'",
|
||||
"CheckProcessLeft": "Kill all process with gaussdb and omm user",
|
||||
"CheckOmmUserExist": "Delete system user omm",
|
||||
"CheckPortConflict": "kill all process with occupies the 25xxx port"
|
||||
"CheckPortConflict": "kill all process with occupies "
|
||||
"the 25xxx port"
|
||||
}
|
||||
confirmMsg = ""
|
||||
for item in items_all:
|
||||
if (item['name'] in list(confirmItem.keys())):
|
||||
if item['name'] in list(confirmItem.keys()):
|
||||
confirmMsg += confirmItem[item['name']] + "\n"
|
||||
if (confirmMsg):
|
||||
confirmMsg = "Warning: Executing the settings will do the following at the [%s] node:\n" % \
|
||||
if item['name'] in SETITEM_SKIP:
|
||||
g_context.skipSetItem.append(item['name'])
|
||||
|
||||
if confirmMsg:
|
||||
confirmMsg = "Warning: Executing the settings will do " \
|
||||
"the following at the [%s] node:\n" % \
|
||||
','.join(g_context.newNodes) + confirmMsg
|
||||
__printOnScreen(confirmMsg)
|
||||
flag = input("Execution settings? (Y/N):")
|
||||
while (True):
|
||||
while True:
|
||||
# If it is not yes or all, it has been imported
|
||||
if (not flag.upper() in ("Y", "N", "YES", "NO")):
|
||||
if not flag.upper() in ("Y", "N", "YES", "NO"):
|
||||
flag = input("Please type 'yes' or 'no': ")
|
||||
continue
|
||||
break
|
||||
if (flag.upper() in ("Y", "YES")):
|
||||
if flag.upper() in ("Y", "YES"):
|
||||
pass
|
||||
if (flag.upper() in ("N", "NO")):
|
||||
skipSetItem = []
|
||||
if flag.upper() in ("N", "NO"):
|
||||
for Item in g_context.newItems:
|
||||
if (Item['name'] in list(confirmItem.keys())):
|
||||
if Item['name'] in list(confirmItem.keys()):
|
||||
g_context.newItems.remove(Item)
|
||||
skipSetItem.append(Item['name'])
|
||||
g_context.skipSetItem.append(Item['name'])
|
||||
__printOnScreen(
|
||||
'Skip the settings for [%s]' % ','.join(skipSetItem))
|
||||
if (failedItems):
|
||||
'Skip the settings for [%s]'
|
||||
% ','.join(g_context.skipSetItem))
|
||||
if failedItems:
|
||||
raise ParseItemException(failedItems)
|
||||
if (not g_context.items):
|
||||
if not g_context.items:
|
||||
raise CheckException("No check item can be performed,"
|
||||
" please confirm the input parameters.")
|
||||
|
||||
@ -853,7 +883,7 @@ def __printOnScreen(msg):
|
||||
"""
|
||||
function: print message on screen
|
||||
"""
|
||||
if (g_opts.localMode or g_opts.distributing):
|
||||
if g_opts.localMode or g_opts.distributing:
|
||||
return
|
||||
g_logger.info(msg)
|
||||
|
||||
@ -884,7 +914,7 @@ def __getLocalNode(nodes):
|
||||
"""
|
||||
if nodes:
|
||||
for n in nodes:
|
||||
if (SharedFuncs.is_local_node(n)):
|
||||
if SharedFuncs.is_local_node(n):
|
||||
return n
|
||||
return DefaultValue.GetHostIpOrName()
|
||||
|
||||
@ -893,7 +923,7 @@ def __getSeparatedValue(value, separator=","):
|
||||
'''
|
||||
get command line value which were separated by ","
|
||||
'''
|
||||
if (separator not in value):
|
||||
if separator not in value:
|
||||
return [value]
|
||||
return value.split(separator)
|
||||
|
||||
@ -924,7 +954,7 @@ def __retryConnection(host, user):
|
||||
"Please enter password for user[%s] on the node[%s]:"
|
||||
% (user, host))
|
||||
isOK = SharedFuncs.verifyPasswd(host, user, passwd)
|
||||
if (isOK):
|
||||
if isOK:
|
||||
return passwd
|
||||
else:
|
||||
continue
|
||||
@ -938,27 +968,27 @@ def __getMpprcFile():
|
||||
"""
|
||||
# get mpprc file
|
||||
envValue = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH")
|
||||
if (envValue is not None and os.path.isfile(envValue)):
|
||||
if envValue is not None and os.path.isfile(envValue):
|
||||
return envValue
|
||||
elif (not __isRoot() and DefaultValue.getEnv('GAUSS_ENV')):
|
||||
elif not __isRoot() and DefaultValue.getEnv('GAUSS_ENV'):
|
||||
cmd = "echo ~ 2>/dev/null"
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
raise CheckException(
|
||||
"Fetching user environment variable file failed."
|
||||
" Please setup environment variables." + "The cmd is %s" % cmd)
|
||||
else:
|
||||
return os.path.join(output, ".bashrc")
|
||||
elif (__isRoot() and g_context.user):
|
||||
elif __isRoot() and g_context.user:
|
||||
cmd = "su - %s -c 'echo ~ 2>/dev/null'" % g_context.user
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
raise CheckException(
|
||||
"Failed to get user [%s] home directory. Error: %s\n" % (
|
||||
g_context.user, output) + "The cmd is %s" % cmd)
|
||||
else:
|
||||
return os.path.join(output, ".bashrc")
|
||||
elif (__isRoot()):
|
||||
elif __isRoot():
|
||||
return ""
|
||||
else:
|
||||
raise CheckException("The separated mpprc file was not found."
|
||||
@ -986,7 +1016,7 @@ def __parseScene(sceneName):
|
||||
'''
|
||||
function: parse scene configure file
|
||||
'''
|
||||
if (not sceneName):
|
||||
if not sceneName:
|
||||
raise NotEmptyException("scene name")
|
||||
# Get scene xml
|
||||
xmlFile = "%s/config/scene_%s.xml" % (g_context.basePath, sceneName)
|
||||
@ -1003,7 +1033,7 @@ def __parseScene(sceneName):
|
||||
for elem in rootNode.findall('allowitems/item'):
|
||||
elemName = elem.attrib['name']
|
||||
# check the check item whether exist or not
|
||||
if (elemName not in list(g_context.supportItems.keys())):
|
||||
if elemName not in list(g_context.supportItems.keys()):
|
||||
raise NotExistException("elemName", "support items")
|
||||
# save threshold as text and parse them later
|
||||
subElem = elem.find('threshold')
|
||||
@ -1014,7 +1044,7 @@ def __parseScene(sceneName):
|
||||
# parse categories and get all items
|
||||
for category in rootNode.findall('allowcategories/category'):
|
||||
cpath = "%s/items/%s" % (g_context.basePath, category.attrib['name'])
|
||||
if (os.path.isdir(cpath)):
|
||||
if os.path.isdir(cpath):
|
||||
itemNames.extend(x[:-3] for x in os.listdir(cpath) if
|
||||
x[:-3] not in itemNames and x.endswith(".py"))
|
||||
|
||||
@ -1032,10 +1062,10 @@ def __parseScene(sceneName):
|
||||
failedItems.append(i)
|
||||
|
||||
# overwrite the threshold parameters
|
||||
if (thresholds and i in list(thresholds.keys())):
|
||||
if thresholds and i in list(thresholds.keys()):
|
||||
# parse the threshold of check item
|
||||
sceneThreshold = __parseThreshold(thresholds[i])
|
||||
if (item['threshold']):
|
||||
if item['threshold']:
|
||||
item['threshold'] = dict(item['threshold'], **sceneThreshold)
|
||||
else:
|
||||
item['threshold'] = sceneThreshold
|
||||
@ -1047,14 +1077,14 @@ def __parseOneItem(itemName):
|
||||
'''
|
||||
function: parse one check item and get the full information
|
||||
'''
|
||||
if (not itemName):
|
||||
if not itemName:
|
||||
raise NotEmptyException("Item name")
|
||||
item = {}
|
||||
# try to load check item configuration from xml file
|
||||
xmlFile = "%s/config/items.xml" % g_context.basePath
|
||||
for event, elem in ETree.iterparse(xmlFile):
|
||||
if (event == 'end'):
|
||||
if (elem.tag == 'checkitem' and elem.attrib['name'] == itemName):
|
||||
if event == 'end':
|
||||
if elem.tag == 'checkitem' and elem.attrib['name'] == itemName:
|
||||
# Parse the xml file
|
||||
item['id'] = elem.attrib['id']
|
||||
item['name'] = elem.attrib['name']
|
||||
@ -1076,7 +1106,7 @@ def __parseOneItem(itemName):
|
||||
'default')
|
||||
# Get the threshold
|
||||
threshold = elem.find('threshold')
|
||||
if (threshold is not None and threshold.text is not None):
|
||||
if threshold is not None and threshold.text is not None:
|
||||
# parse the threshold of check item
|
||||
item["threshold"] = __parseThreshold(
|
||||
threshold.text.strip())
|
||||
@ -1089,7 +1119,7 @@ def __parseAttr(elem, attr, language='zh'):
|
||||
function: parse the xml attr with language
|
||||
'''
|
||||
val = elem.find('/'.join([attr, language]))
|
||||
if (val is not None and val.text is not None):
|
||||
if val is not None and val.text is not None:
|
||||
return val.text.strip().encode('utf-8')
|
||||
return ""
|
||||
|
||||
@ -1100,7 +1130,7 @@ def __parseProperty(elem, propertyName, defaultValue):
|
||||
'''
|
||||
prop = elem.find(propertyName)
|
||||
result = defaultValue
|
||||
if (prop is not None and prop.text is not None):
|
||||
if prop is not None and prop.text is not None:
|
||||
result = prop.text.strip()
|
||||
return result
|
||||
|
||||
@ -1110,10 +1140,10 @@ def __parseThreshold(value, separator=";"):
|
||||
function: parse the threshold of check item
|
||||
'''
|
||||
result = {}
|
||||
if (separator not in value and "=" not in value):
|
||||
if separator not in value and "=" not in value:
|
||||
return result
|
||||
|
||||
if (separator not in value and "=" in value):
|
||||
if separator not in value and "=" in value:
|
||||
d = value.strip().split('=')
|
||||
result[d[0]] = d[1]
|
||||
else:
|
||||
@ -1137,11 +1167,11 @@ def getMTUValue(node):
|
||||
sshIp = node
|
||||
# get all network card information
|
||||
cmd1 = """printf \"\n\n`/sbin/ifconfig -a`\n\n\" """
|
||||
if (not g_opts.pwdMap):
|
||||
if not g_opts.pwdMap:
|
||||
output = SharedFuncs.runSshCmd(cmd1, sshIp, g_context.user)
|
||||
else:
|
||||
username, passwd = g_opts.pwdMap[node]
|
||||
if (username is None or passwd is None):
|
||||
if username is None or passwd is None:
|
||||
raise CheckException("Retrive username and password error.")
|
||||
output = SharedFuncs.runSshCmdWithPwd(cmd1, sshIp, username, passwd)
|
||||
# Separate each network card
|
||||
@ -1151,37 +1181,37 @@ def getMTUValue(node):
|
||||
mtuValue = ""
|
||||
# find network card by IP
|
||||
for eachNet in networkInfoList:
|
||||
if (eachNet.find(addr) > 0 and eachNet.find('inet') > 0):
|
||||
if eachNet.find(addr) > 0 and eachNet.find('inet') > 0:
|
||||
networkInfo = eachNet
|
||||
break
|
||||
if (not networkInfo):
|
||||
if not networkInfo:
|
||||
raise CheckException(
|
||||
"Failed to get network card information with '%s'." % node)
|
||||
# get network number
|
||||
networkNum = networkInfo.split()[0]
|
||||
# Remove : if it exists
|
||||
if (networkNum[-1] == ":"):
|
||||
if networkNum[-1] == ":":
|
||||
networkNum = networkNum[:-1]
|
||||
for eachLine in networkInfo.split('\n'):
|
||||
# get mtu Value with SuSE and redHat6.x
|
||||
if (eachLine.find('MTU') > 0):
|
||||
if eachLine.find('MTU') > 0:
|
||||
mtuValue = eachLine.split(':')[1].split(' ')[0].strip()
|
||||
break
|
||||
# get mtu Value with redHat7.x
|
||||
elif (eachLine.find('mtu') > 0):
|
||||
elif eachLine.find('mtu') > 0:
|
||||
mtuValue = eachLine.split()[-1]
|
||||
break
|
||||
else:
|
||||
continue
|
||||
if (not networkNum):
|
||||
if not networkNum:
|
||||
raise CheckException(
|
||||
"Failed to get network card number with '%s'." % node)
|
||||
if (not mtuValue):
|
||||
if not mtuValue:
|
||||
raise CheckException(
|
||||
"Failed to get network card mtu value with '%s' '%s'."
|
||||
% (node, networkNum))
|
||||
# The nodes are grouped by MTU value
|
||||
if (not mtuValue in list(g_mtuMap.keys())):
|
||||
if not mtuValue in list(g_mtuMap.keys()):
|
||||
g_mtuMap[mtuValue] = ["%s-%s" % (node, networkNum)]
|
||||
else:
|
||||
g_mtuMap[mtuValue].append("%s-%s" % (node, networkNum))
|
||||
@ -1194,10 +1224,10 @@ def preCheck():
|
||||
output: NA
|
||||
"""
|
||||
# patch ssh config
|
||||
if (__isRoot()):
|
||||
if __isRoot():
|
||||
cmd = "grep -E '^MaxStartups[\ \t]+1000' /etc/ssh/sshd_config"
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
cmd = "sed -i '/MaxStartups/d' /etc/ssh/sshd_config &&" \
|
||||
" echo 'MaxStartups 1000' >> /etc/ssh/sshd_config &&" \
|
||||
" service sshd reload"
|
||||
@ -1215,7 +1245,7 @@ def preCheck():
|
||||
except Exception as e:
|
||||
raise Exception(str(e))
|
||||
# According to the number of groups to determine whether the same
|
||||
if (len(list(g_mtuMap.keys())) > 1):
|
||||
if len(list(g_mtuMap.keys())) > 1:
|
||||
warningMsg = "Warning: The MTU value is inconsistent on all node," \
|
||||
" maybe checking will be slower or hang."
|
||||
for mtuValue in list(g_mtuMap.keys()):
|
||||
@ -1248,8 +1278,8 @@ def doCheck():
|
||||
output: NA
|
||||
"""
|
||||
# Local mode
|
||||
if (g_opts.localMode):
|
||||
if (__isDistributing()):
|
||||
if g_opts.localMode:
|
||||
if __isDistributing():
|
||||
# load check item dynamic and get the execute result
|
||||
doRunCheck()
|
||||
else:
|
||||
@ -1288,23 +1318,23 @@ def doCheck():
|
||||
# Time to hit the log
|
||||
LogCount = 0
|
||||
lastTimeProgress = -1
|
||||
while (len(nodes) and datetime.now() <= g_endTime):
|
||||
while len(nodes) and datetime.now() <= g_endTime:
|
||||
totleCount = 0
|
||||
slowNode = []
|
||||
for node in nodes:
|
||||
# Get user and password
|
||||
username, passwd = __getUserAndPwd(node)
|
||||
if (node in g_context.oldNodes):
|
||||
if node in g_context.oldNodes:
|
||||
itemCount_node = len(g_context.oldItems)
|
||||
else:
|
||||
itemCount_node = len(g_context.newItems)
|
||||
# Local execution
|
||||
if (SharedFuncs.is_local_node(node)):
|
||||
if SharedFuncs.is_local_node(node):
|
||||
checkCount = SharedFuncs.checkComplete(
|
||||
checkID, node, g_context.hostMapping[node],
|
||||
g_context.user, g_context.tmpPath)
|
||||
# Executed in new node scene
|
||||
elif (node in g_context.newNodes):
|
||||
elif node in g_context.newNodes:
|
||||
checkCount = SharedFuncs.checkComplete(
|
||||
checkID, node, g_context.hostMapping[node], username,
|
||||
g_context.tmpPath, passwd)
|
||||
@ -1318,14 +1348,14 @@ def doCheck():
|
||||
checkCount = 0
|
||||
# If there is a node check completed,
|
||||
# some nodes just started,record slow node
|
||||
if (overNodes > 0 and checkCount < 2):
|
||||
if overNodes > 0 and checkCount < 2:
|
||||
slowNode.append(node)
|
||||
if (checkCount == itemCount_node):
|
||||
if checkCount == itemCount_node:
|
||||
nodes.remove(node)
|
||||
# Record the number of completed nodes
|
||||
overNodes += 1
|
||||
if (not SharedFuncs.is_local_node(node)):
|
||||
if (node in g_context.newNodes):
|
||||
if not SharedFuncs.is_local_node(node):
|
||||
if node in g_context.newNodes:
|
||||
outItems = []
|
||||
for i in itemsName:
|
||||
outItems.append("%s/%s_%s_%s.out" % (
|
||||
@ -1353,23 +1383,23 @@ def doCheck():
|
||||
# Update execution progress
|
||||
progressInfo = totleCount // len(g_context.nodes)
|
||||
# Refresh only as the schedule changes
|
||||
if (lastTimeProgress < progressInfo <= itemCount):
|
||||
if lastTimeProgress < progressInfo <= itemCount:
|
||||
progress_manager.update("Checking...", progressInfo)
|
||||
lastTimeProgress = progressInfo
|
||||
# Suggest the slow node to log every 30 seconds
|
||||
if (slowNode and itemCount > 1 and LogCount % 30 == 0):
|
||||
if slowNode and itemCount > 1 and LogCount % 30 == 0:
|
||||
logMsg = "Warning: The node [%s] check progress" \
|
||||
" is slow." % ",".join(slowNode)
|
||||
g_logger.debug(logMsg)
|
||||
|
||||
for t in threads:
|
||||
if (t.exitcode == 1):
|
||||
if t.exitcode == 1:
|
||||
raise ThreadCheckException(t.name, t.exception)
|
||||
|
||||
for t in threads:
|
||||
t.join(1)
|
||||
|
||||
if (datetime.now() > g_endTime):
|
||||
if datetime.now() > g_endTime:
|
||||
raise TimeoutException(nodes)
|
||||
|
||||
__printOnScreen("Start to analysis the check result")
|
||||
@ -1395,11 +1425,11 @@ def doRunCheck():
|
||||
"""
|
||||
outputPath = g_context.tmpPath
|
||||
localHost = __getLocalNode(g_context.nodes)
|
||||
if (localHost in g_context.newNodes):
|
||||
if localHost in g_context.newNodes:
|
||||
items = g_context.newItems
|
||||
else:
|
||||
items = g_context.oldItems
|
||||
if (g_context.hostMapping):
|
||||
if g_context.hostMapping:
|
||||
localHost = g_context.hostMapping[localHost]
|
||||
for item in items:
|
||||
content = ""
|
||||
@ -1416,9 +1446,9 @@ def doRunCheck():
|
||||
itemResult = __analysisResult(content, item['name'])
|
||||
g_result.append(itemResult)
|
||||
# run the check process distributing and no need to clean the resource
|
||||
if (__isDistributing()):
|
||||
if __isDistributing():
|
||||
g_logger.debug("run check items done and exit the command")
|
||||
if (g_opts.format == 'default'):
|
||||
if g_opts.format == 'default':
|
||||
# Initialize the self.clusterInfo variable
|
||||
print(g_result.outputRaw())
|
||||
|
||||
@ -1448,11 +1478,11 @@ def __prepareCmd(items, user, checkid):
|
||||
userParam = ""
|
||||
checkIdParam = ""
|
||||
routingParam = ""
|
||||
if (user):
|
||||
if user:
|
||||
userParam = " -U %s " % user
|
||||
if (checkid):
|
||||
if checkid:
|
||||
checkIdParam = " --cid=%s " % checkid
|
||||
if (g_context.routing):
|
||||
if g_context.routing:
|
||||
routingParam = "--routing %s" % g_context.routing
|
||||
cmd = "%s/gs_check -i %s %s %s -L %s -o %s -l %s" % (
|
||||
cmdPath, ",".join(itemsName), userParam, checkIdParam,
|
||||
@ -1472,7 +1502,7 @@ def doLocalCheck(host):
|
||||
g_context.checkID)
|
||||
else:
|
||||
cmd = __prepareCmd(g_context.newItems, "", g_context.checkID)
|
||||
if (SharedFuncs.is_local_node(host)):
|
||||
if SharedFuncs.is_local_node(host):
|
||||
if __hasRootItems():
|
||||
SharedFuncs.runRootCmd(cmd, g_opts.pwdMap[host][0],
|
||||
g_opts.pwdMap[host][1], g_context.mpprc)
|
||||
@ -1484,7 +1514,7 @@ def doLocalCheck(host):
|
||||
else:
|
||||
# get username and password for certain node
|
||||
username, passwd = __getUserAndPwd(host)
|
||||
if (host in g_context.newNodes):
|
||||
if host in g_context.newNodes:
|
||||
SharedFuncs.runSshCmdWithPwd(cmd, host, username, passwd)
|
||||
else:
|
||||
SharedFuncs.runSshCmdWithPwd(cmd, host, username, passwd,
|
||||
@ -1496,7 +1526,7 @@ def __analysisResult(output, itemName):
|
||||
function: analysis the check result
|
||||
"""
|
||||
item_result = ItemResult.parse(output)
|
||||
if (not item_result):
|
||||
if not item_result:
|
||||
raise CheckException("analysis result occurs error")
|
||||
try:
|
||||
# load support item
|
||||
@ -1616,11 +1646,15 @@ def __checkFileExist(path, filePattern):
|
||||
def killChildProcess(node):
|
||||
checkID = g_context.checkID
|
||||
# cmd with switch users
|
||||
cmd_switch = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'|awk '{print \$2}'` """ % checkID
|
||||
cmd_switch += """ && (if [ X\"$proc_pid_list\" != X\"\" ]; then echo \"$proc_pid_list\" | xargs kill -9 ; fi)"""
|
||||
cmd_switch = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'""" \
|
||||
"""|awk '{print \$2}'` """ % checkID
|
||||
cmd_switch += """ && (if [ X\"$proc_pid_list\" != X\"\" ]; """ \
|
||||
"""then echo \"$proc_pid_list\" | xargs kill -9 ; fi)"""
|
||||
# cmd with not switch users
|
||||
cmd_current = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'|awk "{print \\\$2}"` """ % checkID
|
||||
cmd_current += """ && (if [ X"$proc_pid_list" != X"" ]; then echo "$proc_pid_list" | xargs kill -9 ; fi)"""
|
||||
cmd_current = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'""" \
|
||||
"""|awk "{print \\\$2}"` """ % checkID
|
||||
cmd_current += """ && (if [ X"$proc_pid_list" != X"" ]; then """ \
|
||||
"""echo "$proc_pid_list" | xargs kill -9 ; fi)"""
|
||||
|
||||
username, passwd = __getUserAndPwd(node)
|
||||
if SharedFuncs.is_local_node(node) and not __hasRootItems():
|
||||
@ -1633,7 +1667,7 @@ def killChildProcess(node):
|
||||
|
||||
def cleanTmpDir(node):
|
||||
# clean tmp files in all the nodes
|
||||
cmd = r"rm -rf %s" % (g_context.tmpPath)
|
||||
cmd = r"rm -rf %s" % g_context.tmpPath
|
||||
if SharedFuncs.is_local_node(node):
|
||||
SharedFuncs.runShellCmd(cmd)
|
||||
else:
|
||||
@ -1665,7 +1699,7 @@ def cleanEnvironment(skiplog=False):
|
||||
g_logger.debug(str(e))
|
||||
|
||||
# clean tmp files in all the nodes
|
||||
cmd = r"rm -rf %s" % (g_context.tmpPath)
|
||||
cmd = r"rm -rf %s" % g_context.tmpPath
|
||||
if g_opts.localMode:
|
||||
SharedFuncs.runShellCmd(cmd)
|
||||
else:
|
||||
|
||||
@ -150,8 +150,8 @@ gs_sshexkey is a utility to create SSH trust among nodes in a cluster.
|
||||
Usage:
|
||||
gs_sshexkey -? | --help
|
||||
gs_sshexkey -V | --version
|
||||
gs_sshexkey -f HOSTFILE [-W PASSWORD] [...] [--skip-hostname-set]
|
||||
[-l LOGFILE]
|
||||
gs_sshexkey -f HOSTFILE [-l LOGFILE] [--skip-hostname-set]
|
||||
|
||||
|
||||
General options:
|
||||
-f Host file containing the IP address of nodes.
|
||||
@ -478,10 +478,8 @@ General options:
|
||||
if (not os.path.exists("/etc/hosts")):
|
||||
raise Exception(ErrorCode.GAUSS_512["GAUSS_51221"] +
|
||||
" Error: \nThe /etc/hosts does not exist.")
|
||||
(status, output) = g_OSlib.getGrepValue("-v",
|
||||
" #Gauss.* IP Hosts "
|
||||
"Mapping",
|
||||
'/etc/hosts')
|
||||
cmd = "grep -v '" + HOSTS_MAPPING_FLAG + "' /etc/hosts"
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
try:
|
||||
g_file.createFile(tmpHostIpName)
|
||||
g_file.changeMode(DefaultValue.KEY_FILE_MODE, tmpHostIpName)
|
||||
@ -536,7 +534,7 @@ General options:
|
||||
ssh.close()
|
||||
raise Exception(ErrorCode.GAUSS_503["GAUSS_50317"]
|
||||
+ " Error: \n%s" % str(e))
|
||||
cmd = "grep -v '%s' %s > %s && cp %s %s && rm -rf %s" \
|
||||
cmd = "grep -v '%s' %s > %s ; cp %s %s && rm -rf %s" \
|
||||
% (" #Gauss.* IP Hosts Mapping", '/etc/hosts', tmpHostIpName,
|
||||
tmpHostIpName, '/etc/hosts', tmpHostIpName)
|
||||
channel = ssh.open_session()
|
||||
@ -631,7 +629,7 @@ General options:
|
||||
if (boolInvalidIp):
|
||||
boolInvalidIp = False
|
||||
continue
|
||||
cmd = "grep -v '%s' %s > %s && cp %s %s && rm -rf %s" % (
|
||||
cmd = "grep -v '%s' %s > %s ; cp %s %s && rm -rf %s" % (
|
||||
" #Gauss.* IP Hosts Mapping", '/etc/hosts', tmpHostIpName,
|
||||
tmpHostIpName, '/etc/hosts', tmpHostIpName)
|
||||
channel = ssh.open_session()
|
||||
|
||||
@ -774,11 +774,11 @@ class DefaultValue():
|
||||
netWorkNum = ""
|
||||
netWorkInfo = psutil.net_if_addrs()
|
||||
for nic_num in netWorkInfo.keys():
|
||||
netInfo = netWorkInfo[nic_num][0]
|
||||
if (netInfo.address == ipAddress):
|
||||
netWorkNum = nic_num
|
||||
break
|
||||
if (netWorkNum == ""):
|
||||
for netInfo in netWorkInfo[nic_num]:
|
||||
if netInfo.address == ipAddress:
|
||||
netWorkNum = nic_num
|
||||
break
|
||||
if netWorkNum == "":
|
||||
raise Exception(ErrorCode.GAUSS_506["GAUSS_50604"] % ipAddress)
|
||||
return netWorkNum
|
||||
except Exception as e:
|
||||
|
||||
@ -1525,7 +1525,12 @@ class dbClusterInfo():
|
||||
"sync_state : %s\n" %
|
||||
syncInfo.syncState)
|
||||
if syncInfo.secondPeerRole == "":
|
||||
syncInfo.secondPeerRole = "Unknown"
|
||||
outText = outText + "\n------------------------" \
|
||||
"---------------" \
|
||||
"--------------------------------\n\n"
|
||||
continue
|
||||
if syncInfo.secondSyncState == "":
|
||||
syncInfo.secondSyncState = "Unknown"
|
||||
outText = outText + (
|
||||
"secondary_state : %s\n" %
|
||||
syncInfo.secondPeerRole)
|
||||
@ -1553,8 +1558,6 @@ class dbClusterInfo():
|
||||
outText = outText + (
|
||||
"receiver_replay_location : %s\n" %
|
||||
syncInfo.secondReceiverReplayLocation)
|
||||
if syncInfo.secondSyncState == "":
|
||||
syncInfo.secondSyncState = "Unknown"
|
||||
outText = outText + (
|
||||
"sync_state : %s\n" %
|
||||
syncInfo.secondSyncState)
|
||||
@ -1847,8 +1850,7 @@ class dbClusterInfo():
|
||||
else:
|
||||
outText = outText + " "
|
||||
outText = outText + (
|
||||
"%s " % self.__getDnRole(roleStatusArray[i],
|
||||
dnInst.instanceType))
|
||||
"%s " % self.__getDnRole(dnInst.instanceType))
|
||||
if dnNodeCount == 1:
|
||||
outText = outText + ("%-7s" % "Primary")
|
||||
else:
|
||||
@ -1863,9 +1865,12 @@ class dbClusterInfo():
|
||||
except Exception as e:
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51652"] % str(e))
|
||||
|
||||
def __getDnRole(self, roleStatus, instanceType):
|
||||
if roleStatus in DN_ROLE_MAP.keys():
|
||||
return DN_ROLE_MAP[roleStatus]
|
||||
def __getDnRole(self, instanceType):
|
||||
"""
|
||||
function : Get DnRole by instanceType
|
||||
input : Int
|
||||
output : String
|
||||
"""
|
||||
if instanceType == MASTER_INSTANCE:
|
||||
return "P"
|
||||
elif instanceType == STANDBY_INSTANCE:
|
||||
|
||||
@ -82,24 +82,28 @@ class Kernel(BaseComponent):
|
||||
cmd += " -o \'--securitymode\'"
|
||||
self.logger.debug("start cmd = %s" % cmd)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if status != 0:
|
||||
if status != 0 or re.search("start failed", output):
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "instance"
|
||||
+ " Error: Please check the gs_ctl log for "
|
||||
"failure details.")
|
||||
"failure details." + "\n" + output)
|
||||
if re.search("another server might be running", output):
|
||||
self.logger.log(output)
|
||||
|
||||
def stop(self, stopMode="", time_out=300):
|
||||
"""
|
||||
"""
|
||||
cmd = "%s/gs_ctl stop -D %s " % (
|
||||
self.binPath, self.instInfo.datadir)
|
||||
if not self.isPidFileExist():
|
||||
return
|
||||
cmd = "%s/gs_ctl stop -D %s " % (self.binPath, self.instInfo.datadir)
|
||||
# check stop mode
|
||||
if (stopMode != ""):
|
||||
cmd += " -m %s" % stopMode
|
||||
cmd += " -m immediate"
|
||||
else:
|
||||
# check stop mode
|
||||
if stopMode != "":
|
||||
cmd += " -m %s" % stopMode
|
||||
cmd += " -t %s" % time_out
|
||||
self.logger.debug("stop cmd = %s" % cmd)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] %
|
||||
"instance" + " Error: \n%s." % output)
|
||||
|
||||
|
||||
@ -180,7 +180,7 @@ class diskInfo():
|
||||
dev_info = os.statvfs(path)
|
||||
used = dev_info.f_blocks - dev_info.f_bfree
|
||||
valueable = dev_info.f_bavail + used
|
||||
percent = math.ceil((float(used) // valueable) * 100)
|
||||
percent = math.ceil((float(used) / valueable) * 100)
|
||||
except Exception as e:
|
||||
raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] + " disk space."
|
||||
+ "Error: %s" % str(e))
|
||||
|
||||
@ -25,6 +25,12 @@ from gspylib.inspection.common import SharedFuncs
|
||||
from gspylib.common.Common import DefaultValue
|
||||
from gspylib.inspection.common.Log import LoggerFactory
|
||||
|
||||
class GsCheckEncoder(json.JSONEncoder):
|
||||
def default(self, obj):
|
||||
if isinstance(obj, bytes):
|
||||
return str(obj, encoding='utf-8')
|
||||
return json.JSONEncoder.default(self, obj)
|
||||
|
||||
|
||||
class ResultStatus(object):
|
||||
OK = "OK"
|
||||
@ -242,4 +248,4 @@ class CheckResult(object):
|
||||
local['raw'] = localitem.raw
|
||||
localList.append(local)
|
||||
resultDic['hosts'] = localList
|
||||
return json.dumps(resultDic, indent=2)
|
||||
return json.dumps(resultDic, cls=GsCheckEncoder, indent=2)
|
||||
|
||||
@ -39,8 +39,7 @@ class CheckMpprcFile(BaseItem):
|
||||
return
|
||||
try:
|
||||
with open(mpprcFile, 'r') as fp:
|
||||
mpp_content = fp.read()
|
||||
env_list = mpp_content.split('\n')
|
||||
env_list = fp.readlines()
|
||||
while '' in env_list:
|
||||
env_list.remove('')
|
||||
# get ec content
|
||||
@ -67,13 +66,17 @@ class CheckMpprcFile(BaseItem):
|
||||
"GAUSS_ENV", "KRB5_CONFIG", "PGKRBSRVNAME",
|
||||
"KRBHOSTNAME", "ETCD_UNSUPPORTED_ARCH"]
|
||||
# black elements
|
||||
list_black = ["|", ";", "&", "$", "<", ">", "`", "\\", "'", "\"",
|
||||
"{", "}", "(", ")", "[", "]", "~", "*", "?", " ",
|
||||
list_black = ["|", ";", "&", "<", ">", "`", "\\", "'", "\"",
|
||||
"{", "}", "(", ")", "[", "]", "~", "*", "?",
|
||||
"!", "\n"]
|
||||
for env in env_list:
|
||||
env = env.strip()
|
||||
if env == "":
|
||||
continue
|
||||
if len(env.split()) != 2:
|
||||
return
|
||||
if env.split()[0] == "umask" and env.split()[1] == "077":
|
||||
continue
|
||||
for black in list_black:
|
||||
flag = env.find(black)
|
||||
if flag >= 0:
|
||||
|
||||
@ -75,14 +75,14 @@ class CheckNetSpeed(BaseItem):
|
||||
def runClient(self, self_index, ipList):
|
||||
base_listen_port = DEFAULT_LISTEN_PORT
|
||||
max_server = 10
|
||||
group = self_index / max_server
|
||||
group = self_index // max_server
|
||||
path = self.context.basePath
|
||||
port = base_listen_port + self_index % max_server
|
||||
for ip in ipList:
|
||||
index = ipList.index(ip)
|
||||
if (index == self_index):
|
||||
continue
|
||||
if (index / max_server != group):
|
||||
if (index // max_server != group):
|
||||
continue
|
||||
try:
|
||||
p = subprocess.Popen([path + "/lib/checknetspeed/speed_test",
|
||||
|
||||
@ -88,9 +88,9 @@ class CheckNTPD(BaseItem):
|
||||
def postAnalysis(self, itemResult, category="", name=""):
|
||||
errors = []
|
||||
for i in itemResult.getLocalItems():
|
||||
if (i.rst == ResultStatus.NG):
|
||||
if i.rst == ResultStatus.NG :
|
||||
errors.append("%s: %s" % (i.host, i.val))
|
||||
if (len(errors) > 0):
|
||||
if len(errors) > 0:
|
||||
itemResult.rst = ResultStatus.NG
|
||||
itemResult.analysis = "\n".join(errors)
|
||||
return itemResult
|
||||
|
||||
@ -295,9 +295,12 @@ class PlatformCommand():
|
||||
"""
|
||||
pidList = []
|
||||
for pid in psutil.pids():
|
||||
p = psutil.Process(pid)
|
||||
if procName == p.name():
|
||||
pidList.append(pid)
|
||||
try:
|
||||
p = psutil.Process(pid)
|
||||
if procName == p.name():
|
||||
pidList.append(pid)
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
return pidList
|
||||
|
||||
def killProcessByProcName(self, procName, killType=2):
|
||||
|
||||
@ -230,9 +230,15 @@ class SshTool():
|
||||
g_file.removeFile(tmp_hosts)
|
||||
if output is not None:
|
||||
output = str(output, encoding='utf-8')
|
||||
GaussLog.printMessage(output.strip())
|
||||
if re.search("\[GAUSS\-", output):
|
||||
if re.search("Please enter password", output):
|
||||
GaussLog.printMessage(
|
||||
ErrorCode.GAUSS_503["GAUSS_50306"] % user)
|
||||
else:
|
||||
GaussLog.printMessage(output.strip())
|
||||
sys.exit(1)
|
||||
else:
|
||||
GaussLog.printMessage(output.strip())
|
||||
else:
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
|
||||
@ -19,6 +19,8 @@
|
||||
#############################################################################
|
||||
import subprocess
|
||||
import sys
|
||||
import re
|
||||
import time
|
||||
|
||||
sys.path.append(sys.path[0] + "/../../../../")
|
||||
from gspylib.common.DbClusterInfo import dbClusterInfo, queryCmd
|
||||
@ -221,11 +223,28 @@ class OmImplOLAP(OmImpl):
|
||||
self.context.g_opts.security_mode)
|
||||
if self.dataDir != "":
|
||||
cmd += " -D %s" % self.dataDir
|
||||
starttime = time.time()
|
||||
(statusMap, output) = self.sshTool.getSshStatusOutput(cmd, hostList)
|
||||
for nodeName in hostList:
|
||||
if statusMap[nodeName] != 'Success':
|
||||
raise Exception(
|
||||
ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, output))
|
||||
if re.search("another server might be running", output):
|
||||
self.logger.log(output)
|
||||
if startType == "cluster":
|
||||
cmd = "source %s; gs_om -t status|grep cluster_state|grep Normal" \
|
||||
% self.context.g_opts.mpprcFile
|
||||
while time.time() <= time_out + starttime:
|
||||
status = subprocess.getstatusoutput(cmd)[0]
|
||||
if status != 0:
|
||||
self.logger.log("Waiting for check cluster state...")
|
||||
time.sleep(5)
|
||||
else:
|
||||
break
|
||||
if time.time() > time_out + starttime:
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % "cluster"
|
||||
+ "Start timeout, please check the process"
|
||||
" status manually")
|
||||
self.logger.log("=========================================")
|
||||
self.logger.log("Successfully started.")
|
||||
self.logger.debug("Operation succeeded: Start.")
|
||||
|
||||
@ -509,7 +509,7 @@ class PreinstallImpl:
|
||||
# the temporary Files for /etc/hosts
|
||||
tmp_hostipname = "./tmp_hostsiphostname_%d" % os.getpid()
|
||||
# Delete the line with 'HOSTS_MAPPING_FLAG' in the /etc/hosts
|
||||
cmd = "grep -v '%s' %s > %s && cp %s %s && rm -rf '%s'" % \
|
||||
cmd = "grep -v '%s' %s > %s ; cp %s %s && rm -rf '%s'" % \
|
||||
("#Gauss.* IP Hosts Mapping", '/etc/hosts', tmp_hostipname,
|
||||
tmp_hostipname, '/etc/hosts', tmp_hostipname)
|
||||
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
|
||||
@ -559,7 +559,7 @@ class PreinstallImpl:
|
||||
tmp_hostipname = "./tmp_hostsiphostname_%d" % os.getpid()
|
||||
# Delete the line with 'HOSTS_MAPPING_FLAG' in the /etc/hosts
|
||||
cmd = "if [ -f '%s' ]; then grep -v '%s' %s > %s " \
|
||||
"&& cp %s %s && rm -rf '%s'; fi" % \
|
||||
"; cp %s %s ; rm -rf '%s'; fi" % \
|
||||
('/etc/hosts', "#Gauss.* IP Hosts Mapping", '/etc/hosts',
|
||||
tmp_hostipname, tmp_hostipname, '/etc/hosts', tmp_hostipname)
|
||||
# exec the cmd on all remote nodes
|
||||
|
||||
@ -166,7 +166,7 @@ def sendLogFiles():
|
||||
cmd = "%s && (if [ -f '%s'/'%s' ];then rm -rf '%s'/'%s';fi)" % \
|
||||
(cmd, g_tmpdir, tarName, g_tmpdir, tarName)
|
||||
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_logger.logExit("Failed to delete %s." % "%s and %s" % (
|
||||
g_resultdir, tarName) + " Error:\n%s" % output)
|
||||
g_logger.logExit("All collection tasks failed")
|
||||
@ -174,16 +174,16 @@ def sendLogFiles():
|
||||
cmd = "cd '%s' && tar -zcf '%s' '%s' && chmod %s '%s'" % \
|
||||
(g_tmpdir, tarName, HOSTNAME, DefaultValue.FILE_MODE, tarName)
|
||||
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_logger.logExit("Failed to compress %s." % ("directory %s/%s" % \
|
||||
(g_tmpdir,
|
||||
HOSTNAME))
|
||||
+ " Error: \n%s" % output)
|
||||
|
||||
if (g_opts.nodeName != ""):
|
||||
if g_opts.nodeName != "":
|
||||
# send backup file which is compressed to the node that is
|
||||
# currently performing the backup
|
||||
if (g_opts.nodeName == DefaultValue.GetHostIpOrName()):
|
||||
if g_opts.nodeName == DefaultValue.GetHostIpOrName():
|
||||
if int(g_opts.speedLimitFlag) == 1:
|
||||
cmd = "rsync --bwlimit=%d '%s'/'%s' '%s'/" % \
|
||||
(g_opts.speedLimitKBs, g_tmpdir, tarName,
|
||||
@ -198,7 +198,7 @@ def sendLogFiles():
|
||||
g_opts.speedLimitKBs * 8, g_opts.nodeName, g_tmpdir, tarName,
|
||||
g_opts.outputDir)
|
||||
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_logger.logExit(
|
||||
"Failed to copy %s." % tarName + " Error:\n%s" % output)
|
||||
|
||||
@ -208,7 +208,7 @@ def sendLogFiles():
|
||||
cmd = "%s && (if [ -f '%s'/'%s' ];then rm -rf '%s'/'%s';fi)" % \
|
||||
(cmd, g_tmpdir, tarName, g_tmpdir, tarName)
|
||||
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_logger.logExit("Failed to delete %s. %s" % (
|
||||
"%s and %s" % (g_resultdir, tarName), " Error:\n%s" % output))
|
||||
|
||||
@ -219,7 +219,7 @@ def checkParameterEmpty(parameter, parameterName):
|
||||
input : parameter, parameterName
|
||||
output : NA
|
||||
"""
|
||||
if (parameter == ""):
|
||||
if parameter == "":
|
||||
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"]
|
||||
% parameterName)
|
||||
|
||||
@ -239,7 +239,7 @@ def parseCommandLine():
|
||||
except getopt.GetoptError as e:
|
||||
# Error exit if an illegal parameter exists
|
||||
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e))
|
||||
if (len(args) > 0):
|
||||
if len(args) > 0:
|
||||
# Error exit if an illegal parameter exists
|
||||
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] %
|
||||
str(args[0]))
|
||||
@ -253,7 +253,7 @@ def parseCommandLine():
|
||||
parameter_keys = parameter_map.keys()
|
||||
|
||||
for key, value in opts:
|
||||
if (key in parameter_keys):
|
||||
if key in parameter_keys:
|
||||
if key == "-C":
|
||||
value = value.replace("#", "\"")
|
||||
parameter_map[key] = value.strip()
|
||||
@ -278,18 +278,18 @@ def parseCommandLine():
|
||||
checkParameterEmpty(g_opts.user, "U")
|
||||
DefaultValue.checkUser(g_opts.user, False)
|
||||
# check log file
|
||||
if (g_opts.logFile == ""):
|
||||
if g_opts.logFile == "":
|
||||
g_opts.logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE,
|
||||
g_opts.user, "", "")
|
||||
if (not os.path.isabs(g_opts.logFile)):
|
||||
if not os.path.isabs(g_opts.logFile):
|
||||
GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log")
|
||||
if (int(g_opts.speedLimitKBs) < 0):
|
||||
if int(g_opts.speedLimitKBs) < 0:
|
||||
GaussLog.exitWithError(ErrorCode.GAUSS_526["GAUSS_53032"])
|
||||
|
||||
g_opts.speedLimitKBs = int(g_opts.speedLimitKBs)
|
||||
|
||||
# 1048576 KB/s = 1GB/s, which means unlimited.
|
||||
if (g_opts.speedLimitKBs == 0):
|
||||
if g_opts.speedLimitKBs == 0:
|
||||
g_opts.speedLimitKBs = 1048576
|
||||
|
||||
|
||||
@ -370,7 +370,7 @@ def create_temp_result_folder():
|
||||
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
|
||||
g_logger.debug("Command for creating output directory: %s" % cmd)
|
||||
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_logger.logExit("Failed to create the %s directory." % \
|
||||
("%s/logfiles and %s/configfiles" % (
|
||||
g_resultdir, g_resultdir)) + " Error:\n%s" % output)
|
||||
@ -460,7 +460,7 @@ def basic_info_check():
|
||||
# file
|
||||
for cmd in cmds:
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_logger.debug(
|
||||
("Failed to collect basic information. Error:\n%s." % output) +
|
||||
("The cmd is %s " % cmd))
|
||||
@ -496,7 +496,7 @@ def system_check():
|
||||
cmd = cmd.replace("\n", " ")
|
||||
if "echo" in cmd:
|
||||
continue
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
if "Permission denied" in output:
|
||||
output = "can not print info to file: Permission denied"
|
||||
g_jobInfo.failedTask[cmd] = replaceInvalidStr(output)
|
||||
@ -682,17 +682,17 @@ def matchFile(begin_t, end_t, fileTime):
|
||||
and the end time.
|
||||
"""
|
||||
# both of begin_time and end_time
|
||||
if (begin_t and end_t):
|
||||
if begin_t and end_t:
|
||||
for t in fileTime:
|
||||
if (compareTime(t, begin_t) and compareTime(end_t, t)):
|
||||
if compareTime(t, begin_t) and compareTime(end_t, t):
|
||||
return True
|
||||
# only begin_time
|
||||
elif (begin_t and (not end_t)):
|
||||
elif begin_t and (not end_t):
|
||||
for t in fileTime:
|
||||
if compareTime(t, begin_t):
|
||||
return True
|
||||
# only end_time
|
||||
elif ((not begin_t) and end_t):
|
||||
elif (not begin_t) and end_t:
|
||||
for t in fileTime:
|
||||
if compareTime(end_t, t):
|
||||
return True
|
||||
@ -858,7 +858,7 @@ def log_copy_for_zenith():
|
||||
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
||||
raise Exception("")
|
||||
|
||||
if (g_opts.key):
|
||||
if g_opts.key:
|
||||
# Look for keyword matching in the dir and write to the specified file
|
||||
cmd = "echo \"\" > %s/logfiles/%s; for f in `find %s -type f`;" \
|
||||
" do grep -ai '%s' $f >> %s/logfiles/%s; done" % (
|
||||
@ -896,7 +896,7 @@ def log_copy():
|
||||
deleteCmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \
|
||||
"then rm -rf tmp_gs_collector; fi"
|
||||
|
||||
if (g_opts.key is not None and g_opts.key != ""):
|
||||
if g_opts.key is not None and g_opts.key != "":
|
||||
g_logger.debug(
|
||||
"Keyword for collecting log in base64 encode [%s]." % g_opts.key)
|
||||
g_opts.key = base64.b64decode(g_opts.key)
|
||||
@ -907,7 +907,7 @@ def log_copy():
|
||||
"Speed limit to copy log files is %d KB/s." % g_opts.speedLimitKBs)
|
||||
|
||||
# Filter the log files, if has keyword, do not collect prf file
|
||||
if (g_opts.key is not None and g_opts.key != ""):
|
||||
if g_opts.key is not None and g_opts.key != "":
|
||||
cmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \
|
||||
"then rm -rf tmp_gs_collector; " \
|
||||
"fi && (find . -type f -iname '*.log' -print)" \
|
||||
@ -1010,7 +1010,7 @@ def log_copy():
|
||||
(DefaultValue.DIRECTORY_MODE, zipdir,
|
||||
zipFileName, zipdir)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_jobInfo.failedTask[
|
||||
"find log zip files"] = replaceInvalidStr(output)
|
||||
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
||||
@ -1022,15 +1022,15 @@ def log_copy():
|
||||
g_logger.debug("There is no zip files.")
|
||||
|
||||
# Filter keywords
|
||||
if (g_opts.key is not None and g_opts.key != ""):
|
||||
if (len(logs) != 0):
|
||||
if g_opts.key is not None and g_opts.key != "":
|
||||
if len(logs) != 0:
|
||||
g_opts.key = g_opts.key.replace('$', '\$')
|
||||
g_opts.key = g_opts.key.replace('\"', '\\\"')
|
||||
cmd = "cd $GAUSSLOG/tmp_gs_collector && "
|
||||
cmd = "%s grep \"%s\" -r * > %s/logfiles/%s" % (
|
||||
cmd, g_opts.key, g_resultdir, keyword_result)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0 and output != ""):
|
||||
if status != 0 and output != "":
|
||||
cmd = "rm -rf $GAUSSLOG/tmp_gs_collector"
|
||||
(status1, output1) = DefaultValue.retryGetstatusoutput(cmd)
|
||||
g_jobInfo.failedTask[
|
||||
@ -1050,7 +1050,7 @@ def log_copy():
|
||||
cmd = "touch %s/logfiles/%s && " % (g_resultdir, keyword_result)
|
||||
cmd = "%s rm -rf $GAUSSLOG/tmp_gs_collector" % cmd
|
||||
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_jobInfo.failedTask["touch keyword file"] = replaceInvalidStr(
|
||||
output)
|
||||
g_logger.log(json.dumps(g_jobInfo.__dict__))
|
||||
@ -1071,7 +1071,7 @@ def log_copy():
|
||||
"&& rm -rf $GAUSSLOG/'%s'" % \
|
||||
(cmd, logfiletar)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_jobInfo.failedTask[
|
||||
"copy result file and delete tmp file"] = replaceInvalidStr(
|
||||
output)
|
||||
@ -1136,7 +1136,7 @@ def xlog_copy():
|
||||
(g_resultdir, g_current_time, g_current_time,
|
||||
g_current_time)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_logger.debug(
|
||||
"Failed to collect xlog. Command %s \n, Error %s \n",
|
||||
(cmd, output))
|
||||
@ -1236,7 +1236,7 @@ def parallel_xlog(Inst):
|
||||
cmd = getXlogCmd(Inst)
|
||||
if len(cmd) > 1:
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_logger.debug(
|
||||
"Failed to collect xlog files. Command: %s.\n Error: %s\n" % (
|
||||
cmd, output))
|
||||
@ -1383,17 +1383,9 @@ def conf_gstack(jobName):
|
||||
try:
|
||||
# Gets all instances of the cluster
|
||||
Instances = []
|
||||
for Inst in g_localnodeinfo.gtms:
|
||||
if "gtm" in ",".join(g_opts.content).lower():
|
||||
Instances.append(Inst)
|
||||
for Inst in g_localnodeinfo.coordinators:
|
||||
if "cn" in ",".join(g_opts.content).lower():
|
||||
Instances.append(Inst)
|
||||
for Inst in g_localnodeinfo.datanodes:
|
||||
if "dn" in ",".join(g_opts.content).lower():
|
||||
Instances.append(Inst)
|
||||
for Inst in g_localnodeinfo.gtses:
|
||||
Instances.append(Inst)
|
||||
# parallel copy configuration files, and get gstack
|
||||
if Instances:
|
||||
pool = ThreadPool(DefaultValue.getCpuSet())
|
||||
@ -1447,7 +1439,7 @@ def plan_simulator_check():
|
||||
"-p %d -D %s/planSimulatorfiles/%s" % \
|
||||
(cmd, db, cnInst.port, g_resultdir, db)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
g_logger.debug(
|
||||
"Failed to Collect plan simulator. "
|
||||
"Command %s.\n Error: %s.\n" % (
|
||||
@ -1470,7 +1462,7 @@ def getBakConfCmd(Inst):
|
||||
"""
|
||||
cmd = ""
|
||||
pidfile = ""
|
||||
if (Inst.instanceRole == DefaultValue.INSTANCE_ROLE_GTM):
|
||||
if Inst.instanceRole == DefaultValue.INSTANCE_ROLE_GTM:
|
||||
if g_need_gstack == 0:
|
||||
cmd = "mkdir -p -m %s '%s/configfiles/config_%s/gtm_%s'" % \
|
||||
(
|
||||
@ -1508,7 +1500,7 @@ def getBakConfCmd(Inst):
|
||||
"collect gtm_%s process stack info" % Inst.instanceId] = \
|
||||
ErrorCode.GAUSS_535["GAUSS_53511"] % 'GTM'
|
||||
|
||||
elif (Inst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR):
|
||||
elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR:
|
||||
if g_need_gstack == 0:
|
||||
cmd = "mkdir -p -m %s '%s/configfiles/config_%s/cn_%s'" % \
|
||||
(
|
||||
@ -1542,7 +1534,7 @@ def getBakConfCmd(Inst):
|
||||
"collect cn_%s process stack info" % Inst.instanceId] = \
|
||||
ErrorCode.GAUSS_535["GAUSS_53511"] % 'CN'
|
||||
|
||||
elif (Inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE):
|
||||
elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE:
|
||||
if g_need_gstack == 0:
|
||||
cmd = "mkdir -p -m %s '%s/configfiles/config_%s/dn_%s'" % \
|
||||
(
|
||||
@ -1584,7 +1576,7 @@ def parallel_conf_gstack(Inst):
|
||||
"""
|
||||
(cmd, pidfile) = getBakConfCmd(Inst)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
if status != 0:
|
||||
if "command not found" in output:
|
||||
g_jobInfo.failedTask["collect process stack info"] = \
|
||||
ErrorCode.GAUSS_535["GAUSS_53512"]
|
||||
@ -1625,7 +1617,7 @@ def parseConfig():
|
||||
input : NA
|
||||
output: NA
|
||||
"""
|
||||
if (g_opts.config != ""):
|
||||
if g_opts.config != "":
|
||||
d = json.loads(g_opts.config)
|
||||
g_opts.content = d['Content'].split(",")
|
||||
|
||||
@ -1645,24 +1637,24 @@ def main():
|
||||
elif g_opts.action == "create_dir":
|
||||
create_temp_result_folder()
|
||||
# Get system information
|
||||
elif (g_opts.action == "system_check"):
|
||||
elif g_opts.action == "system_check":
|
||||
system_check()
|
||||
# Gets the database information
|
||||
elif (g_opts.action == "database_check"):
|
||||
elif g_opts.action == "database_check":
|
||||
database_check()
|
||||
# Make a copy of the log file
|
||||
elif (g_opts.action == "log_copy"):
|
||||
elif g_opts.action == "log_copy":
|
||||
log_copy()
|
||||
# Copy configuration files, and get g stack
|
||||
elif (g_opts.action == "Config"):
|
||||
elif g_opts.action == "Config":
|
||||
conf_gstack("Config")
|
||||
elif (g_opts.action == "Gstack"):
|
||||
elif g_opts.action == "Gstack":
|
||||
global g_need_gstack
|
||||
g_need_gstack = 1
|
||||
conf_gstack("Gstack")
|
||||
g_need_gstack = 0
|
||||
# Send all log files we collected to the command node.
|
||||
elif (g_opts.action == "copy_file"):
|
||||
elif g_opts.action == "copy_file":
|
||||
sendLogFiles()
|
||||
elif g_opts.action == "xlog_copy":
|
||||
xlog_copy()
|
||||
|
||||
Reference in New Issue
Block a user