om 630后修改合入

云对接工具修改
打包脚本修改
This commit is contained in:
gyt0221
2020-08-26 16:06:27 +08:00
parent ed27420caa
commit 2ca588a3d8
17 changed files with 342 additions and 255 deletions

View File

@ -303,6 +303,11 @@ function install_gaussdb()
export LD_LIBRARY_PATH=$GAUSSHOME/lib:$LD_LIBRARY_PATH
commitid=$(LD_PRELOAD='' ${BUILD_DIR}/bin/gaussdb -V | awk '{print $5}' | cut -d ")" -f 1)
if [ -z $commitid ]
then
commitid=$(date "+%Y%m%d%H%M%S")
commitid=${commitid:4:8}
fi
echo "${commitid}" >>${SCRIPT_DIR}/version.cfg
echo "End insert commitid into version.cfg" >> "$LOG_FILE" 2>&1
}

View File

@ -571,25 +571,34 @@ cmserver_ha_status_interval|int|0,2147483647|NULL|NULL|
cmserver_self_vote_timeout|int|0,2147483647|NULL|This parameter works only when cmserver_self_vote_timeout >= cmserver_ha_heartbeat_timeout, otherwise, it will work based on cmserver_ha_heartbeat_timeout.|
enable_transaction_read_only|bool|0,0|NULL|NULL|
datastorage_threshold_check_interval|int|1,2592000|NULL|NULL|
alarm_report_max_count|int|5,2592000|NULL|NULL|
alarm_report_max_count|int|1,2592000|NULL|NULL|
datastorage_threshold_value_check|int|1,99|NULL|NULL|
max_datastorage_threshold_check|int|1,2592000|NULL|NULL|
coordinator_heartbeat_timeout|int|0,2147483647|NULL|NULL|
phony_dead_effective_time|int|0,2147483647|NULL|NULL|
instance_keep_heartbeat_timeout|int|0,2147483647|NULL|NULL|
cm_server_arbitrate_delay_base_time_out|int|0,2147483647|NULL|NULL|
cm_server_arbitrate_delay_incrememtal_time_out|int|0,2147483647|NULL|NULL|
enable_az_auto_switchover|int|0,1|NULL|NULL|
cm_auth_method|enum|trust,gss|NULL|NULL|
cm_krb_server_keyfile|string|0,0|NULL|NULL|
switch_rto|int|0,2147483647|NULL|NULL|
force_promote|int|0,1|NULL|NULL|
az_switchover_threshold|int|1,100|NULL|NULL|
az_check_and_arbitrate_interval|int|1,2147483647|NULL|NULL|
az_connect_check_interval|int|1,2147483647|NULL|NULL|
az_connect_check_delay_time|int|1,2147483647|NULL|NULL|
cmserver_demote_delay_on_etcd_fault|int|1,2147483647|NULL|NULL|
instance_phony_dead_restart_interval|int|1,2147483647|NULL|NULL|
[cmagent]
log_dir|string|0,0|NULL|NULL|
log_file_size|int|0,2147483647|MB|NULL|
log_min_messages|enum|debug5,debug1,log,warning,error,fatal|NULL|NULL|
log_max_size|int|0,1024|NULL|NULL|
log_max_count|int|0,100000|NULL|NULL|
log_max_size|int|0,2147483647|NULL|NULL|
log_max_count|int|0,10000|NULL|NULL|
log_saved_days|int|0,1000|NULL|NULL|
enable_log_compress|bool|0,0|NULL|NULL|
alarm_report_interval|int|0,2147483647|NULL|NULL|
alarm_report_max_count|int|5,2592000|NULL|NULL|
alarm_report_max_count|int|1,2592000|NULL|NULL|
alarm_component|string|0,0|NULL|NULL|
incremental_build|bool|0,0|NULL|NULL|
agent_report_interval|int|0,2147483647|NULL|NULL|
@ -597,12 +606,15 @@ agent_heartbeat_timeout|int|0,2147483647|NULL|NULL|
agent_connect_timeout|int|0,2147483647|NULL|NULL|
agent_connect_retries|int|0,2147483647|NULL|NULL|
agent_check_interval|int|0,2147483647|NULL|NULL|
agent_kill_instance_timeout|int|0,2147483647|NULL|NULL|
log_threshold_check_interval|int|0,2147483647|NULL|NULL|
dilatation_shard_count_for_disk_capacity_alarm|int|0,2147483647|NULL|NULL|
security_mode|bool|0,0|NULL|NULL|
upgrade_from|int|0,4294967295|NULL|For upgrading, specify which version we are upgrading from.|
unix_socket_directory|string|0,0|NULL|NULL|
enable_xc_maintenance_mode|bool|0,0|NULL|NULL|
process_cpu_affinity|int|0,2|NULL|NULL|
enable_cn_auto_repair|bool|0,0|NULL|NULL|
agent_phony_dead_check_interval|int|0,2147483647|NULL|NULL|
[lcname]
allow_concurrent_tuple_update|bool|0,0|NULL|NULL|
prefetch_quantity|int|128,131072|kB|NULL|

View File

@ -83,12 +83,32 @@ DEFAULT_TIMEOUT = 1500
# because single clusters don't need to perform consistency checks and
# internal communication class checks
SINGLE_SKIP = ["CheckTimeZone", "CheckEncoding", "CheckKernelVer",
"CheckNTPD",
"CheckNoCheckSum", "CheckCpuCount",
"CheckNTPD", "CheckNoCheckSum", "CheckCpuCount",
"CheckMemInfo", "CheckDiskConfig",
"CheckUpVer", "CheckPgxcgroup", "CheckPing",
"CheckNetWorkDrop",
"CheckNetSpeed"]
"CheckNetWorkDrop", "CheckNetSpeed"]
SETITEM_SKIP = ["CheckCPU", "CheckTimeZone", "CheckOSVer", "CheckNTPD",
"CheckSshdService", "CheckNoCheckSum", "CheckEtcHosts",
"CheckCpuCount", "CheckHyperThread", "CheckMemInfo",
"CheckKernelVer", "CheckEncoding", "CheckBootItems",
"CheckDropCache", "CheckFilehandle", "CheckKeyProAdj",
"CheckDiskFormat", "CheckInodeUsage", "CheckSpaceUsage",
"CheckDiskConfig", "CheckXid", "CheckSysTabSize",
"CheckClusterState", "CheckConfigFileDiff", "CheckUpVer",
"CheckEnvProfile", "CheckGaussVer", "CheckPortRange",
"CheckReadonlyMode", "CheckCatchup", "CheckProcessStatus",
"CheckSpecialFile", "CheckCollector", "CheckLargeFile",
"CheckProStartTime", "CheckMpprcFile", "CheckLockNum",
"CheckCurConnCount", "CheckCursorNum", "CheckPgxcgroup",
"CheckLockState", "CheckIdleSession", "CheckDBConnection",
"CheckSysTable", "CheckSysTabSize", "CheckTableSpace",
"CheckTableSkew", "CheckDNSkew", "CheckCreateView",
"CheckHashIndex", "CheckNextvalInDefault", "CheckPgxcRedistb",
"CheckReturnType", "CheckSysadminUser", "CheckTDDate",
"CheckDropColumn", "CheckDiskFailure", "CheckPing",
"CheckNetWorkDrop", "CheckUsedPort", "CheckNICModel",
"CheckRouting", "CheckNetSpeed", "CheckDataDiskUsage"]
class CmdOptions():
@ -358,10 +378,10 @@ class CheckContext():
input : remote host name and password map
output : NA
'''
if (len(hosts) == 0 or g_opts.isSingle):
if len(hosts) == 0 or g_opts.isSingle:
return
fileName = self.getCacheFile()
if (not os.path.isfile(fileName)):
if not os.path.isfile(fileName):
raise CheckException("File %s is not exist or invalid" % fileName)
try:
pool = ThreadPool(DefaultValue.getCpuSet())
@ -390,7 +410,8 @@ Usage:
General options:
-i Health check item number.
OLAP Example: -i CheckCPU,CheckMTU,CheckPing.
OLAP Example: -i CheckCPU,CheckMTU,
CheckPing.
-e Health check scene name.
OLAP Example: -e inspect/upgrade/slow_node/
binary_upgrade/health/install/longtime
@ -398,17 +419,19 @@ General options:
-L Run the command as local mode.
-l Path of log file.
-o Save the result to the specified directory.
--cid The check ID used for identify a check process,
only for internal use.
--cid The check ID used for identify a check
process, only for internal use.
--skip-root-items Skip the items with root privileges.
--disk-threshold Set disk threshold for checking disk usage,
only for CheckDataDiskUsage.
--format Set the format of the result report.
--set Set abnormal items if supported
--time-out Set the timeout for scene check, default 1500 seconds.
--time-out Set the timeout for scene check, default
1500 seconds.
--routing The network segment with business ip,
example: 192.168.1.1:255.255.255.0
--skip-items Skip the specified check item or setting item with scene check
--skip-items Skip the specified check item or setting
item with scene check
Example: --skip-items CheckCPU,CheckMTU
-?, --help Show help information for this utility,
and exit the command line mode.
@ -453,7 +476,7 @@ def parseCommandLine():
g_opts = CmdOptions()
ParaObj = Parameter()
ParaDict = ParaObj.ParameterCommandLine("check")
if ("helpFlag" in list(ParaDict.keys())):
if "helpFlag" in list(ParaDict.keys()):
usage()
sys.exit(0)
@ -475,50 +498,51 @@ def parseCommandLine():
raise UseBothParameterException(
(paraNameMap[para], paraNameMap[irrelevantPara[para]]))
if ("itemstr" in list(ParaDict.keys())):
if "itemstr" in list(ParaDict.keys()):
g_opts.items = ParaDict["itemstr"]
if ("scenes" in list(ParaDict.keys())):
if "scenes" in list(ParaDict.keys()):
g_opts.scene = ParaDict["scenes"]
if ("outFile" in list(ParaDict.keys())):
if "outFile" in list(ParaDict.keys()):
g_context.outPath = ParaDict["outFile"]
if ("logFile" in list(ParaDict.keys())):
if "logFile" in list(ParaDict.keys()):
g_opts.logFile = ParaDict["logFile"]
if ("user" in list(ParaDict.keys())):
if "user" in list(ParaDict.keys()):
g_context.user = ParaDict["user"]
if ("hostfile" in list(ParaDict.keys())):
if "hostfile" in list(ParaDict.keys()):
for node in g_file.readFile(ParaDict["hostfile"]):
g_opts.nodes.append(node.strip())
if ("cid" in list(ParaDict.keys())):
if "cid" in list(ParaDict.keys()):
g_context.setCheckID(ParaDict["cid"])
g_opts.distributing = True
if ("localMode" in list(ParaDict.keys())):
if "localMode" in list(ParaDict.keys()):
g_opts.localMode = True
if ("skipRootItems" in list(ParaDict.keys())):
if "skipRootItems" in list(ParaDict.keys()):
g_opts.skipRootItems = True
if ("disk-threshold" in list(ParaDict.keys())):
if "disk-threshold" in list(ParaDict.keys()):
g_context.thresholdDn = ParaDict["disk-threshold"]
if ("set" in list(ParaDict.keys())):
if "set" in list(ParaDict.keys()):
g_context.set = True
if ("routing" in list(ParaDict.keys())):
if "routing" in list(ParaDict.keys()):
g_opts.routing = ParaDict["routing"]
if ("skipItems" in list(ParaDict.keys())):
if "skipItems" in list(ParaDict.keys()):
g_opts.skipItems = ParaDict["skipItems"]
if ("nodegroup_name" in list(ParaDict.keys())):
if "nodegroup_name" in list(ParaDict.keys()):
g_context.LCName = ParaDict["nodegroup_name"]
if ("shrinkNodes" in list(ParaDict.keys())):
if "shrinkNodes" in list(ParaDict.keys()):
g_context.ShrinkNodes = ParaDict["shrinkNodes"]
if ("time_out" in list(ParaDict.keys())):
if "time_out" in list(ParaDict.keys()):
try:
g_opts.timeout = int(ParaDict["time_out"])
except Exception:
raise CheckException("The parameter timeout set invalid value")
if (g_opts.timeout < DEFAULT_TIMEOUT):
if g_opts.timeout < DEFAULT_TIMEOUT:
raise CheckException(
"The timeout parameter must be set larger than default value 1500 seconds")
"The timeout parameter must be set larger than default "
"value 1500 seconds")
setTimeOut()
if ("format" in list(ParaDict.keys())):
if "format" in list(ParaDict.keys()):
g_opts.format = ParaDict["format"]
if (g_opts.format not in formatList):
if g_opts.format not in formatList:
raise CheckException(
"Format %s is not available,the valid format is %s" % (
g_opts.format, ",".join(formatList)))
@ -546,13 +570,13 @@ def checkParameter():
def checkuser():
# The new node scenario does not need the -U parameter
if (__isRoot() and not g_opts.localMode):
if __isRoot() and not g_opts.localMode:
g_context.user = None
return
# Default mode -U for the current user
if (not __isRoot() and not g_context.user):
if not __isRoot() and not g_context.user:
g_context.user = SharedFuncs.getCurrentUser()
if (g_context.user):
if g_context.user:
if not __isRoot() and g_context.user != SharedFuncs.getCurrentUser():
raise CheckException(
"The user %s is not current user" % g_context.user)
@ -561,22 +585,22 @@ def checkuser():
except Exception:
raise CheckException(
"The user %s is not a effective user." % g_context.user)
if (user_uid == 0):
if user_uid == 0:
raise CheckException("The -U parameter can not be the root user.")
isClusterUser = SharedFuncs.checkClusterUser(g_context.user,
__getMpprcFile())
if (isClusterUser):
if isClusterUser:
# get cluster information
g_context.mpprc = __getMpprcFile()
clusterInfo = g_context.loadClusterInfo(g_context.user)
if (clusterInfo):
if clusterInfo:
g_opts.cluster = clusterInfo
else:
isClusterUser = False
if (not isClusterUser):
if not isClusterUser:
raise CheckException(
"The user %s is not valid cluster user" % g_context.user)
if (g_opts.localMode or g_opts.distributing):
if g_opts.localMode or g_opts.distributing:
return
# Check cluster user trust
@ -588,14 +612,14 @@ def checkuser():
psshPath = os.path.join(appPath, 'script/gspylib/pssh/bin/pssh')
cmd = "%s -H %s 'id' " % (psshPath, " -H ".join(dbNameList))
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
errorNode = []
for result in output.split('\n'):
if (result.strip() == ""):
if result.strip() == "":
continue
resultInfo = result.split()
# Analyze the results
if (len(resultInfo) > 3 and resultInfo[2] == "[SUCCESS]"):
if len(resultInfo) > 3 and resultInfo[2] == "[SUCCESS]":
continue
elif (len(resultInfo) > 3 and resultInfo[2] == "[FAILURE]" and
resultInfo[3] in dbNameList):
@ -604,7 +628,7 @@ def checkuser():
raise CheckException(
"Failed to check user trust. commands: %s Error:/n%s"
% (cmd, output))
if (errorNode):
if errorNode:
raise CheckException(
"Failed to check user trust with %s" % errorNode)
else:
@ -613,20 +637,20 @@ def checkuser():
def createPath(path, user=""):
if (path == "/dev/null"):
if path == "/dev/null":
return
if (os.path.isdir(path)):
if os.path.isdir(path):
# test write permissions
if (not g_file.checkDirWriteable(path)):
if not g_file.checkDirWriteable(path):
raise CheckException(
"Failed to create or delete file in the [%s]." % path)
elif (os.path.isfile(path)):
elif os.path.isfile(path):
raise CheckException("The out path [%s] must be a directory." % path)
else:
# path is not exist. recursively create the path
g_file.createDirectory(path, True, DefaultValue.KEY_DIRECTORY_MODE)
# Modify the file owner
if (__isRoot() and user):
if __isRoot() and user:
g_file.changeOwner(user, path)
@ -651,11 +675,12 @@ def initLogFile():
output: NA
"""
global g_context, g_logger
# load the context when the script ruuning on local mode and the context was cached before
# load the context when the script ruuning on local mode and the context
# was cached before
g_context.tmpPath = getTmpPath()
if (g_context.isCached()):
if g_context.isCached():
g_context = g_context.load()
if (__getLocalNode(g_context.nodes) in g_context.newNodes):
if __getLocalNode(g_context.nodes) in g_context.newNodes:
g_context.mpprc = None
g_context.user = None
g_context.cluster = None
@ -667,9 +692,9 @@ def initLogFile():
else:
# Parameter specified first, followed by default GAUSSLOG,
# last temporary directory
if (g_opts.logFile):
if g_opts.logFile:
g_context.logFile = os.path.realpath(g_opts.logFile)
elif (g_opts.cluster):
elif g_opts.cluster:
g_context.logFile = os.path.join(g_opts.cluster.logPath,
'%s/om/gs_check.log'
% g_context.user)
@ -687,16 +712,16 @@ def initLogFile():
# Load support check items by parsing the project folder
g_context.loadSupportItems()
# load the scene configuration
if (g_opts.scene):
if g_opts.scene:
g_context.loadSceneConfiguration(g_opts.scene)
# load cluster info
if (g_opts.cluster):
if g_opts.cluster:
g_context.cluster = g_opts.cluster
g_context.oldNodes = g_opts.cluster.getClusterSshIps()[0]
# load nodes
if (g_opts.nodes):
if g_opts.nodes:
for node in g_opts.nodes:
if (node not in g_context.oldNodes):
if node not in g_context.oldNodes:
g_context.newNodes.append(node)
g_context.nodes = g_context.oldNodes + g_context.newNodes
@ -716,11 +741,11 @@ def getRootUserPwd():
g_logger.debug("Ask user input password interactive")
for host in g_context.nodes:
isPwdOk = SharedFuncs.verifyPasswd(host, rootuser, rootpwd)
if (not isPwdOk):
if not isPwdOk:
# try to connect remote node again
rootpwd = __retryConnection(host, rootuser)
g_opts.pwdMap[host] = (rootuser, rootpwd)
if (pwd.getpwnam(rootuser).pw_uid != 0):
if pwd.getpwnam(rootuser).pw_uid != 0:
raise CheckException("Enter the user [%s] does not have"
" root privileges." % rootuser)
# print message on screen
@ -735,7 +760,7 @@ def parseCheckContext():
"""
global g_context
initLogFile()
if (g_context.isCached()):
if g_context.isCached():
return
g_logger.debug("Start to parse the check items config file")
items_all = []
@ -744,7 +769,7 @@ def parseCheckContext():
failedItems = []
singleSkipList = []
# generate the items from scene configuration
if (g_opts.scene):
if g_opts.scene:
items_oldNode, failedItems = __parseScene(g_opts.scene)
items_all += items_oldNode
# generate the items from -i parameter value
@ -756,23 +781,23 @@ def parseCheckContext():
else:
items_all.append(item)
for item in items_all:
if (not g_context.set and item['name'] in g_opts.skipItems):
if not g_context.set and item['name'] in g_opts.skipItems:
items_all.remove(item)
continue
if (g_context.set and item['set_permission'] == 'root'):
if g_context.set and item['set_permission'] == 'root':
g_context.rootItems.append(item)
if (g_opts.skipRootItems and item['permission'] == 'root'):
if g_opts.skipRootItems and item['permission'] == 'root':
items_all.remove(item)
continue
if (item['permission'] == 'root'):
if item['permission'] == 'root':
g_context.rootItems.append(item)
if (g_opts.isSingle and item['name'] in SINGLE_SKIP):
if g_opts.isSingle and item['name'] in SINGLE_SKIP:
singleSkipList.append(item['name'])
continue
if (item['name'] == "CheckRouting"):
if (g_opts.routing):
if item['name'] == "CheckRouting":
if g_opts.routing:
g_context.routing = g_opts.routing
elif (g_opts.cluster):
elif g_opts.cluster:
workIP = g_opts.cluster.getDbNodeByName(
DefaultValue.GetHostIpOrName()).backIps[0]
g_context.routing = "%s:%s" % (
@ -781,16 +806,16 @@ def parseCheckContext():
raise CheckException(
"The --routing is required when cluster dosen't exist")
g_context.items.append(item)
if (len(singleSkipList) != 0):
if len(singleSkipList) != 0:
__printOnScreen(
"The following items are skipped when the type of cluster is"
" single:\n[%s]" % ",".join(singleSkipList))
if (not items_newNode):
if not items_newNode:
g_context.oldItems = g_context.items
else:
g_context.oldItems = items_oldNode
g_context.newItems = items_newNode
if (g_context.set and items_all):
if g_context.set and items_all:
# Settings will have a big impact and need to be confirmed
confirmItem = {
"CheckCrontabLeft": "Clear om_monitor in crond service",
@ -798,36 +823,41 @@ def parseCheckContext():
"'/var/log/Bigdata/','/home/omm/'",
"CheckProcessLeft": "Kill all process with gaussdb and omm user",
"CheckOmmUserExist": "Delete system user omm",
"CheckPortConflict": "kill all process with occupies the 25xxx port"
"CheckPortConflict": "kill all process with occupies "
"the 25xxx port"
}
confirmMsg = ""
for item in items_all:
if (item['name'] in list(confirmItem.keys())):
if item['name'] in list(confirmItem.keys()):
confirmMsg += confirmItem[item['name']] + "\n"
if (confirmMsg):
confirmMsg = "Warning: Executing the settings will do the following at the [%s] node:\n" % \
if item['name'] in SETITEM_SKIP:
g_context.skipSetItem.append(item['name'])
if confirmMsg:
confirmMsg = "Warning: Executing the settings will do " \
"the following at the [%s] node:\n" % \
','.join(g_context.newNodes) + confirmMsg
__printOnScreen(confirmMsg)
flag = input("Execution settings? (Y/N):")
while (True):
while True:
# If it is not yes or all, it has been imported
if (not flag.upper() in ("Y", "N", "YES", "NO")):
if not flag.upper() in ("Y", "N", "YES", "NO"):
flag = input("Please type 'yes' or 'no': ")
continue
break
if (flag.upper() in ("Y", "YES")):
if flag.upper() in ("Y", "YES"):
pass
if (flag.upper() in ("N", "NO")):
skipSetItem = []
if flag.upper() in ("N", "NO"):
for Item in g_context.newItems:
if (Item['name'] in list(confirmItem.keys())):
if Item['name'] in list(confirmItem.keys()):
g_context.newItems.remove(Item)
skipSetItem.append(Item['name'])
g_context.skipSetItem.append(Item['name'])
__printOnScreen(
'Skip the settings for [%s]' % ','.join(skipSetItem))
if (failedItems):
'Skip the settings for [%s]'
% ','.join(g_context.skipSetItem))
if failedItems:
raise ParseItemException(failedItems)
if (not g_context.items):
if not g_context.items:
raise CheckException("No check item can be performed,"
" please confirm the input parameters.")
@ -853,7 +883,7 @@ def __printOnScreen(msg):
"""
function: print message on screen
"""
if (g_opts.localMode or g_opts.distributing):
if g_opts.localMode or g_opts.distributing:
return
g_logger.info(msg)
@ -884,7 +914,7 @@ def __getLocalNode(nodes):
"""
if nodes:
for n in nodes:
if (SharedFuncs.is_local_node(n)):
if SharedFuncs.is_local_node(n):
return n
return DefaultValue.GetHostIpOrName()
@ -893,7 +923,7 @@ def __getSeparatedValue(value, separator=","):
'''
get command line value which were separated by ","
'''
if (separator not in value):
if separator not in value:
return [value]
return value.split(separator)
@ -924,7 +954,7 @@ def __retryConnection(host, user):
"Please enter password for user[%s] on the node[%s]:"
% (user, host))
isOK = SharedFuncs.verifyPasswd(host, user, passwd)
if (isOK):
if isOK:
return passwd
else:
continue
@ -938,27 +968,27 @@ def __getMpprcFile():
"""
# get mpprc file
envValue = DefaultValue.getEnv("MPPDB_ENV_SEPARATE_PATH")
if (envValue is not None and os.path.isfile(envValue)):
if envValue is not None and os.path.isfile(envValue):
return envValue
elif (not __isRoot() and DefaultValue.getEnv('GAUSS_ENV')):
elif not __isRoot() and DefaultValue.getEnv('GAUSS_ENV'):
cmd = "echo ~ 2>/dev/null"
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
raise CheckException(
"Fetching user environment variable file failed."
" Please setup environment variables." + "The cmd is %s" % cmd)
else:
return os.path.join(output, ".bashrc")
elif (__isRoot() and g_context.user):
elif __isRoot() and g_context.user:
cmd = "su - %s -c 'echo ~ 2>/dev/null'" % g_context.user
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
raise CheckException(
"Failed to get user [%s] home directory. Error: %s\n" % (
g_context.user, output) + "The cmd is %s" % cmd)
else:
return os.path.join(output, ".bashrc")
elif (__isRoot()):
elif __isRoot():
return ""
else:
raise CheckException("The separated mpprc file was not found."
@ -986,7 +1016,7 @@ def __parseScene(sceneName):
'''
function: parse scene configure file
'''
if (not sceneName):
if not sceneName:
raise NotEmptyException("scene name")
# Get scene xml
xmlFile = "%s/config/scene_%s.xml" % (g_context.basePath, sceneName)
@ -1003,7 +1033,7 @@ def __parseScene(sceneName):
for elem in rootNode.findall('allowitems/item'):
elemName = elem.attrib['name']
# check the check item whether exist or not
if (elemName not in list(g_context.supportItems.keys())):
if elemName not in list(g_context.supportItems.keys()):
raise NotExistException("elemName", "support items")
# save threshold as text and parse them later
subElem = elem.find('threshold')
@ -1014,7 +1044,7 @@ def __parseScene(sceneName):
# parse categories and get all items
for category in rootNode.findall('allowcategories/category'):
cpath = "%s/items/%s" % (g_context.basePath, category.attrib['name'])
if (os.path.isdir(cpath)):
if os.path.isdir(cpath):
itemNames.extend(x[:-3] for x in os.listdir(cpath) if
x[:-3] not in itemNames and x.endswith(".py"))
@ -1032,10 +1062,10 @@ def __parseScene(sceneName):
failedItems.append(i)
# overwrite the threshold parameters
if (thresholds and i in list(thresholds.keys())):
if thresholds and i in list(thresholds.keys()):
# parse the threshold of check item
sceneThreshold = __parseThreshold(thresholds[i])
if (item['threshold']):
if item['threshold']:
item['threshold'] = dict(item['threshold'], **sceneThreshold)
else:
item['threshold'] = sceneThreshold
@ -1047,14 +1077,14 @@ def __parseOneItem(itemName):
'''
function: parse one check item and get the full information
'''
if (not itemName):
if not itemName:
raise NotEmptyException("Item name")
item = {}
# try to load check item configuration from xml file
xmlFile = "%s/config/items.xml" % g_context.basePath
for event, elem in ETree.iterparse(xmlFile):
if (event == 'end'):
if (elem.tag == 'checkitem' and elem.attrib['name'] == itemName):
if event == 'end':
if elem.tag == 'checkitem' and elem.attrib['name'] == itemName:
# Parse the xml file
item['id'] = elem.attrib['id']
item['name'] = elem.attrib['name']
@ -1076,7 +1106,7 @@ def __parseOneItem(itemName):
'default')
# Get the threshold
threshold = elem.find('threshold')
if (threshold is not None and threshold.text is not None):
if threshold is not None and threshold.text is not None:
# parse the threshold of check item
item["threshold"] = __parseThreshold(
threshold.text.strip())
@ -1089,7 +1119,7 @@ def __parseAttr(elem, attr, language='zh'):
function: parse the xml attr with language
'''
val = elem.find('/'.join([attr, language]))
if (val is not None and val.text is not None):
if val is not None and val.text is not None:
return val.text.strip().encode('utf-8')
return ""
@ -1100,7 +1130,7 @@ def __parseProperty(elem, propertyName, defaultValue):
'''
prop = elem.find(propertyName)
result = defaultValue
if (prop is not None and prop.text is not None):
if prop is not None and prop.text is not None:
result = prop.text.strip()
return result
@ -1110,10 +1140,10 @@ def __parseThreshold(value, separator=";"):
function: parse the threshold of check item
'''
result = {}
if (separator not in value and "=" not in value):
if separator not in value and "=" not in value:
return result
if (separator not in value and "=" in value):
if separator not in value and "=" in value:
d = value.strip().split('=')
result[d[0]] = d[1]
else:
@ -1137,11 +1167,11 @@ def getMTUValue(node):
sshIp = node
# get all network card information
cmd1 = """printf \"\n\n`/sbin/ifconfig -a`\n\n\" """
if (not g_opts.pwdMap):
if not g_opts.pwdMap:
output = SharedFuncs.runSshCmd(cmd1, sshIp, g_context.user)
else:
username, passwd = g_opts.pwdMap[node]
if (username is None or passwd is None):
if username is None or passwd is None:
raise CheckException("Retrive username and password error.")
output = SharedFuncs.runSshCmdWithPwd(cmd1, sshIp, username, passwd)
# Separate each network card
@ -1151,37 +1181,37 @@ def getMTUValue(node):
mtuValue = ""
# find network card by IP
for eachNet in networkInfoList:
if (eachNet.find(addr) > 0 and eachNet.find('inet') > 0):
if eachNet.find(addr) > 0 and eachNet.find('inet') > 0:
networkInfo = eachNet
break
if (not networkInfo):
if not networkInfo:
raise CheckException(
"Failed to get network card information with '%s'." % node)
# get network number
networkNum = networkInfo.split()[0]
# Remove : if it exists
if (networkNum[-1] == ":"):
if networkNum[-1] == ":":
networkNum = networkNum[:-1]
for eachLine in networkInfo.split('\n'):
# get mtu Value with SuSE and redHat6.x
if (eachLine.find('MTU') > 0):
if eachLine.find('MTU') > 0:
mtuValue = eachLine.split(':')[1].split(' ')[0].strip()
break
# get mtu Value with redHat7.x
elif (eachLine.find('mtu') > 0):
elif eachLine.find('mtu') > 0:
mtuValue = eachLine.split()[-1]
break
else:
continue
if (not networkNum):
if not networkNum:
raise CheckException(
"Failed to get network card number with '%s'." % node)
if (not mtuValue):
if not mtuValue:
raise CheckException(
"Failed to get network card mtu value with '%s' '%s'."
% (node, networkNum))
# The nodes are grouped by MTU value
if (not mtuValue in list(g_mtuMap.keys())):
if not mtuValue in list(g_mtuMap.keys()):
g_mtuMap[mtuValue] = ["%s-%s" % (node, networkNum)]
else:
g_mtuMap[mtuValue].append("%s-%s" % (node, networkNum))
@ -1194,10 +1224,10 @@ def preCheck():
output: NA
"""
# patch ssh config
if (__isRoot()):
if __isRoot():
cmd = "grep -E '^MaxStartups[\ \t]+1000' /etc/ssh/sshd_config"
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
cmd = "sed -i '/MaxStartups/d' /etc/ssh/sshd_config &&" \
" echo 'MaxStartups 1000' >> /etc/ssh/sshd_config &&" \
" service sshd reload"
@ -1215,7 +1245,7 @@ def preCheck():
except Exception as e:
raise Exception(str(e))
# According to the number of groups to determine whether the same
if (len(list(g_mtuMap.keys())) > 1):
if len(list(g_mtuMap.keys())) > 1:
warningMsg = "Warning: The MTU value is inconsistent on all node," \
" maybe checking will be slower or hang."
for mtuValue in list(g_mtuMap.keys()):
@ -1248,8 +1278,8 @@ def doCheck():
output: NA
"""
# Local mode
if (g_opts.localMode):
if (__isDistributing()):
if g_opts.localMode:
if __isDistributing():
# load check item dynamic and get the execute result
doRunCheck()
else:
@ -1288,23 +1318,23 @@ def doCheck():
# Time to hit the log
LogCount = 0
lastTimeProgress = -1
while (len(nodes) and datetime.now() <= g_endTime):
while len(nodes) and datetime.now() <= g_endTime:
totleCount = 0
slowNode = []
for node in nodes:
# Get user and password
username, passwd = __getUserAndPwd(node)
if (node in g_context.oldNodes):
if node in g_context.oldNodes:
itemCount_node = len(g_context.oldItems)
else:
itemCount_node = len(g_context.newItems)
# Local execution
if (SharedFuncs.is_local_node(node)):
if SharedFuncs.is_local_node(node):
checkCount = SharedFuncs.checkComplete(
checkID, node, g_context.hostMapping[node],
g_context.user, g_context.tmpPath)
# Executed in new node scene
elif (node in g_context.newNodes):
elif node in g_context.newNodes:
checkCount = SharedFuncs.checkComplete(
checkID, node, g_context.hostMapping[node], username,
g_context.tmpPath, passwd)
@ -1318,14 +1348,14 @@ def doCheck():
checkCount = 0
# If there is a node check completed,
# some nodes just started,record slow node
if (overNodes > 0 and checkCount < 2):
if overNodes > 0 and checkCount < 2:
slowNode.append(node)
if (checkCount == itemCount_node):
if checkCount == itemCount_node:
nodes.remove(node)
# Record the number of completed nodes
overNodes += 1
if (not SharedFuncs.is_local_node(node)):
if (node in g_context.newNodes):
if not SharedFuncs.is_local_node(node):
if node in g_context.newNodes:
outItems = []
for i in itemsName:
outItems.append("%s/%s_%s_%s.out" % (
@ -1353,23 +1383,23 @@ def doCheck():
# Update execution progress
progressInfo = totleCount // len(g_context.nodes)
# Refresh only as the schedule changes
if (lastTimeProgress < progressInfo <= itemCount):
if lastTimeProgress < progressInfo <= itemCount:
progress_manager.update("Checking...", progressInfo)
lastTimeProgress = progressInfo
# Suggest the slow node to log every 30 seconds
if (slowNode and itemCount > 1 and LogCount % 30 == 0):
if slowNode and itemCount > 1 and LogCount % 30 == 0:
logMsg = "Warning: The node [%s] check progress" \
" is slow." % ",".join(slowNode)
g_logger.debug(logMsg)
for t in threads:
if (t.exitcode == 1):
if t.exitcode == 1:
raise ThreadCheckException(t.name, t.exception)
for t in threads:
t.join(1)
if (datetime.now() > g_endTime):
if datetime.now() > g_endTime:
raise TimeoutException(nodes)
__printOnScreen("Start to analysis the check result")
@ -1395,11 +1425,11 @@ def doRunCheck():
"""
outputPath = g_context.tmpPath
localHost = __getLocalNode(g_context.nodes)
if (localHost in g_context.newNodes):
if localHost in g_context.newNodes:
items = g_context.newItems
else:
items = g_context.oldItems
if (g_context.hostMapping):
if g_context.hostMapping:
localHost = g_context.hostMapping[localHost]
for item in items:
content = ""
@ -1416,9 +1446,9 @@ def doRunCheck():
itemResult = __analysisResult(content, item['name'])
g_result.append(itemResult)
# run the check process distributing and no need to clean the resource
if (__isDistributing()):
if __isDistributing():
g_logger.debug("run check items done and exit the command")
if (g_opts.format == 'default'):
if g_opts.format == 'default':
# Initialize the self.clusterInfo variable
print(g_result.outputRaw())
@ -1448,11 +1478,11 @@ def __prepareCmd(items, user, checkid):
userParam = ""
checkIdParam = ""
routingParam = ""
if (user):
if user:
userParam = " -U %s " % user
if (checkid):
if checkid:
checkIdParam = " --cid=%s " % checkid
if (g_context.routing):
if g_context.routing:
routingParam = "--routing %s" % g_context.routing
cmd = "%s/gs_check -i %s %s %s -L %s -o %s -l %s" % (
cmdPath, ",".join(itemsName), userParam, checkIdParam,
@ -1472,7 +1502,7 @@ def doLocalCheck(host):
g_context.checkID)
else:
cmd = __prepareCmd(g_context.newItems, "", g_context.checkID)
if (SharedFuncs.is_local_node(host)):
if SharedFuncs.is_local_node(host):
if __hasRootItems():
SharedFuncs.runRootCmd(cmd, g_opts.pwdMap[host][0],
g_opts.pwdMap[host][1], g_context.mpprc)
@ -1484,7 +1514,7 @@ def doLocalCheck(host):
else:
# get username and password for certain node
username, passwd = __getUserAndPwd(host)
if (host in g_context.newNodes):
if host in g_context.newNodes:
SharedFuncs.runSshCmdWithPwd(cmd, host, username, passwd)
else:
SharedFuncs.runSshCmdWithPwd(cmd, host, username, passwd,
@ -1496,7 +1526,7 @@ def __analysisResult(output, itemName):
function: analysis the check result
"""
item_result = ItemResult.parse(output)
if (not item_result):
if not item_result:
raise CheckException("analysis result occurs error")
try:
# load support item
@ -1616,11 +1646,15 @@ def __checkFileExist(path, filePattern):
def killChildProcess(node):
checkID = g_context.checkID
# cmd with switch users
cmd_switch = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'|awk '{print \$2}'` """ % checkID
cmd_switch += """ && (if [ X\"$proc_pid_list\" != X\"\" ]; then echo \"$proc_pid_list\" | xargs kill -9 ; fi)"""
cmd_switch = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'""" \
"""|awk '{print \$2}'` """ % checkID
cmd_switch += """ && (if [ X\"$proc_pid_list\" != X\"\" ]; """ \
"""then echo \"$proc_pid_list\" | xargs kill -9 ; fi)"""
# cmd with not switch users
cmd_current = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'|awk "{print \\\$2}"` """ % checkID
cmd_current += """ && (if [ X"$proc_pid_list" != X"" ]; then echo "$proc_pid_list" | xargs kill -9 ; fi)"""
cmd_current = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'""" \
"""|awk "{print \\\$2}"` """ % checkID
cmd_current += """ && (if [ X"$proc_pid_list" != X"" ]; then """ \
"""echo "$proc_pid_list" | xargs kill -9 ; fi)"""
username, passwd = __getUserAndPwd(node)
if SharedFuncs.is_local_node(node) and not __hasRootItems():
@ -1633,7 +1667,7 @@ def killChildProcess(node):
def cleanTmpDir(node):
# clean tmp files in all the nodes
cmd = r"rm -rf %s" % (g_context.tmpPath)
cmd = r"rm -rf %s" % g_context.tmpPath
if SharedFuncs.is_local_node(node):
SharedFuncs.runShellCmd(cmd)
else:
@ -1665,7 +1699,7 @@ def cleanEnvironment(skiplog=False):
g_logger.debug(str(e))
# clean tmp files in all the nodes
cmd = r"rm -rf %s" % (g_context.tmpPath)
cmd = r"rm -rf %s" % g_context.tmpPath
if g_opts.localMode:
SharedFuncs.runShellCmd(cmd)
else:

View File

@ -150,8 +150,8 @@ gs_sshexkey is a utility to create SSH trust among nodes in a cluster.
Usage:
gs_sshexkey -? | --help
gs_sshexkey -V | --version
gs_sshexkey -f HOSTFILE [-W PASSWORD] [...] [--skip-hostname-set]
[-l LOGFILE]
gs_sshexkey -f HOSTFILE [-l LOGFILE] [--skip-hostname-set]
General options:
-f Host file containing the IP address of nodes.
@ -478,10 +478,8 @@ General options:
if (not os.path.exists("/etc/hosts")):
raise Exception(ErrorCode.GAUSS_512["GAUSS_51221"] +
" Error: \nThe /etc/hosts does not exist.")
(status, output) = g_OSlib.getGrepValue("-v",
" #Gauss.* IP Hosts "
"Mapping",
'/etc/hosts')
cmd = "grep -v '" + HOSTS_MAPPING_FLAG + "' /etc/hosts"
(status, output) = subprocess.getstatusoutput(cmd)
try:
g_file.createFile(tmpHostIpName)
g_file.changeMode(DefaultValue.KEY_FILE_MODE, tmpHostIpName)
@ -536,7 +534,7 @@ General options:
ssh.close()
raise Exception(ErrorCode.GAUSS_503["GAUSS_50317"]
+ " Error: \n%s" % str(e))
cmd = "grep -v '%s' %s > %s && cp %s %s && rm -rf %s" \
cmd = "grep -v '%s' %s > %s ; cp %s %s && rm -rf %s" \
% (" #Gauss.* IP Hosts Mapping", '/etc/hosts', tmpHostIpName,
tmpHostIpName, '/etc/hosts', tmpHostIpName)
channel = ssh.open_session()
@ -631,7 +629,7 @@ General options:
if (boolInvalidIp):
boolInvalidIp = False
continue
cmd = "grep -v '%s' %s > %s && cp %s %s && rm -rf %s" % (
cmd = "grep -v '%s' %s > %s ; cp %s %s && rm -rf %s" % (
" #Gauss.* IP Hosts Mapping", '/etc/hosts', tmpHostIpName,
tmpHostIpName, '/etc/hosts', tmpHostIpName)
channel = ssh.open_session()

View File

@ -774,11 +774,11 @@ class DefaultValue():
netWorkNum = ""
netWorkInfo = psutil.net_if_addrs()
for nic_num in netWorkInfo.keys():
netInfo = netWorkInfo[nic_num][0]
if (netInfo.address == ipAddress):
netWorkNum = nic_num
break
if (netWorkNum == ""):
for netInfo in netWorkInfo[nic_num]:
if netInfo.address == ipAddress:
netWorkNum = nic_num
break
if netWorkNum == "":
raise Exception(ErrorCode.GAUSS_506["GAUSS_50604"] % ipAddress)
return netWorkNum
except Exception as e:

View File

@ -1525,7 +1525,12 @@ class dbClusterInfo():
"sync_state : %s\n" %
syncInfo.syncState)
if syncInfo.secondPeerRole == "":
syncInfo.secondPeerRole = "Unknown"
outText = outText + "\n------------------------" \
"---------------" \
"--------------------------------\n\n"
continue
if syncInfo.secondSyncState == "":
syncInfo.secondSyncState = "Unknown"
outText = outText + (
"secondary_state : %s\n" %
syncInfo.secondPeerRole)
@ -1553,8 +1558,6 @@ class dbClusterInfo():
outText = outText + (
"receiver_replay_location : %s\n" %
syncInfo.secondReceiverReplayLocation)
if syncInfo.secondSyncState == "":
syncInfo.secondSyncState = "Unknown"
outText = outText + (
"sync_state : %s\n" %
syncInfo.secondSyncState)
@ -1847,8 +1850,7 @@ class dbClusterInfo():
else:
outText = outText + " "
outText = outText + (
"%s " % self.__getDnRole(roleStatusArray[i],
dnInst.instanceType))
"%s " % self.__getDnRole(dnInst.instanceType))
if dnNodeCount == 1:
outText = outText + ("%-7s" % "Primary")
else:
@ -1863,9 +1865,12 @@ class dbClusterInfo():
except Exception as e:
raise Exception(ErrorCode.GAUSS_516["GAUSS_51652"] % str(e))
def __getDnRole(self, roleStatus, instanceType):
if roleStatus in DN_ROLE_MAP.keys():
return DN_ROLE_MAP[roleStatus]
def __getDnRole(self, instanceType):
"""
function : Get DnRole by instanceType
input : Int
output : String
"""
if instanceType == MASTER_INSTANCE:
return "P"
elif instanceType == STANDBY_INSTANCE:

View File

@ -82,24 +82,28 @@ class Kernel(BaseComponent):
cmd += " -o \'--securitymode\'"
self.logger.debug("start cmd = %s" % cmd)
(status, output) = subprocess.getstatusoutput(cmd)
if status != 0:
if status != 0 or re.search("start failed", output):
raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "instance"
+ " Error: Please check the gs_ctl log for "
"failure details.")
"failure details." + "\n" + output)
if re.search("another server might be running", output):
self.logger.log(output)
def stop(self, stopMode="", time_out=300):
"""
"""
cmd = "%s/gs_ctl stop -D %s " % (
self.binPath, self.instInfo.datadir)
if not self.isPidFileExist():
return
cmd = "%s/gs_ctl stop -D %s " % (self.binPath, self.instInfo.datadir)
# check stop mode
if (stopMode != ""):
cmd += " -m %s" % stopMode
cmd += " -m immediate"
else:
# check stop mode
if stopMode != "":
cmd += " -m %s" % stopMode
cmd += " -t %s" % time_out
self.logger.debug("stop cmd = %s" % cmd)
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] %
"instance" + " Error: \n%s." % output)

View File

@ -180,7 +180,7 @@ class diskInfo():
dev_info = os.statvfs(path)
used = dev_info.f_blocks - dev_info.f_bfree
valueable = dev_info.f_bavail + used
percent = math.ceil((float(used) // valueable) * 100)
percent = math.ceil((float(used) / valueable) * 100)
except Exception as e:
raise Exception(ErrorCode.GAUSS_530["GAUSS_53011"] + " disk space."
+ "Error: %s" % str(e))

View File

@ -25,6 +25,12 @@ from gspylib.inspection.common import SharedFuncs
from gspylib.common.Common import DefaultValue
from gspylib.inspection.common.Log import LoggerFactory
class GsCheckEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, bytes):
return str(obj, encoding='utf-8')
return json.JSONEncoder.default(self, obj)
class ResultStatus(object):
OK = "OK"
@ -242,4 +248,4 @@ class CheckResult(object):
local['raw'] = localitem.raw
localList.append(local)
resultDic['hosts'] = localList
return json.dumps(resultDic, indent=2)
return json.dumps(resultDic, cls=GsCheckEncoder, indent=2)

View File

@ -39,8 +39,7 @@ class CheckMpprcFile(BaseItem):
return
try:
with open(mpprcFile, 'r') as fp:
mpp_content = fp.read()
env_list = mpp_content.split('\n')
env_list = fp.readlines()
while '' in env_list:
env_list.remove('')
# get ec content
@ -67,13 +66,17 @@ class CheckMpprcFile(BaseItem):
"GAUSS_ENV", "KRB5_CONFIG", "PGKRBSRVNAME",
"KRBHOSTNAME", "ETCD_UNSUPPORTED_ARCH"]
# black elements
list_black = ["|", ";", "&", "$", "<", ">", "`", "\\", "'", "\"",
"{", "}", "(", ")", "[", "]", "~", "*", "?", " ",
list_black = ["|", ";", "&", "<", ">", "`", "\\", "'", "\"",
"{", "}", "(", ")", "[", "]", "~", "*", "?",
"!", "\n"]
for env in env_list:
env = env.strip()
if env == "":
continue
if len(env.split()) != 2:
return
if env.split()[0] == "umask" and env.split()[1] == "077":
continue
for black in list_black:
flag = env.find(black)
if flag >= 0:

View File

@ -75,14 +75,14 @@ class CheckNetSpeed(BaseItem):
def runClient(self, self_index, ipList):
base_listen_port = DEFAULT_LISTEN_PORT
max_server = 10
group = self_index / max_server
group = self_index // max_server
path = self.context.basePath
port = base_listen_port + self_index % max_server
for ip in ipList:
index = ipList.index(ip)
if (index == self_index):
continue
if (index / max_server != group):
if (index // max_server != group):
continue
try:
p = subprocess.Popen([path + "/lib/checknetspeed/speed_test",

View File

@ -88,9 +88,9 @@ class CheckNTPD(BaseItem):
def postAnalysis(self, itemResult, category="", name=""):
errors = []
for i in itemResult.getLocalItems():
if (i.rst == ResultStatus.NG):
if i.rst == ResultStatus.NG :
errors.append("%s: %s" % (i.host, i.val))
if (len(errors) > 0):
if len(errors) > 0:
itemResult.rst = ResultStatus.NG
itemResult.analysis = "\n".join(errors)
return itemResult

View File

@ -295,9 +295,12 @@ class PlatformCommand():
"""
pidList = []
for pid in psutil.pids():
p = psutil.Process(pid)
if procName == p.name():
pidList.append(pid)
try:
p = psutil.Process(pid)
if procName == p.name():
pidList.append(pid)
except psutil.NoSuchProcess:
pass
return pidList
def killProcessByProcName(self, procName, killType=2):

View File

@ -230,9 +230,15 @@ class SshTool():
g_file.removeFile(tmp_hosts)
if output is not None:
output = str(output, encoding='utf-8')
GaussLog.printMessage(output.strip())
if re.search("\[GAUSS\-", output):
if re.search("Please enter password", output):
GaussLog.printMessage(
ErrorCode.GAUSS_503["GAUSS_50306"] % user)
else:
GaussLog.printMessage(output.strip())
sys.exit(1)
else:
GaussLog.printMessage(output.strip())
else:
sys.exit(1)
except Exception as e:

View File

@ -19,6 +19,8 @@
#############################################################################
import subprocess
import sys
import re
import time
sys.path.append(sys.path[0] + "/../../../../")
from gspylib.common.DbClusterInfo import dbClusterInfo, queryCmd
@ -221,11 +223,28 @@ class OmImplOLAP(OmImpl):
self.context.g_opts.security_mode)
if self.dataDir != "":
cmd += " -D %s" % self.dataDir
starttime = time.time()
(statusMap, output) = self.sshTool.getSshStatusOutput(cmd, hostList)
for nodeName in hostList:
if statusMap[nodeName] != 'Success':
raise Exception(
ErrorCode.GAUSS_536["GAUSS_53600"] % (cmd, output))
if re.search("another server might be running", output):
self.logger.log(output)
if startType == "cluster":
cmd = "source %s; gs_om -t status|grep cluster_state|grep Normal" \
% self.context.g_opts.mpprcFile
while time.time() <= time_out + starttime:
status = subprocess.getstatusoutput(cmd)[0]
if status != 0:
self.logger.log("Waiting for check cluster state...")
time.sleep(5)
else:
break
if time.time() > time_out + starttime:
raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % "cluster"
+ "Start timeout, please check the process"
" status manually")
self.logger.log("=========================================")
self.logger.log("Successfully started.")
self.logger.debug("Operation succeeded: Start.")

View File

@ -509,7 +509,7 @@ class PreinstallImpl:
# the temporary Files for /etc/hosts
tmp_hostipname = "./tmp_hostsiphostname_%d" % os.getpid()
# Delete the line with 'HOSTS_MAPPING_FLAG' in the /etc/hosts
cmd = "grep -v '%s' %s > %s && cp %s %s && rm -rf '%s'" % \
cmd = "grep -v '%s' %s > %s ; cp %s %s && rm -rf '%s'" % \
("#Gauss.* IP Hosts Mapping", '/etc/hosts', tmp_hostipname,
tmp_hostipname, '/etc/hosts', tmp_hostipname)
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
@ -559,7 +559,7 @@ class PreinstallImpl:
tmp_hostipname = "./tmp_hostsiphostname_%d" % os.getpid()
# Delete the line with 'HOSTS_MAPPING_FLAG' in the /etc/hosts
cmd = "if [ -f '%s' ]; then grep -v '%s' %s > %s " \
"&& cp %s %s && rm -rf '%s'; fi" % \
"; cp %s %s ; rm -rf '%s'; fi" % \
('/etc/hosts', "#Gauss.* IP Hosts Mapping", '/etc/hosts',
tmp_hostipname, tmp_hostipname, '/etc/hosts', tmp_hostipname)
# exec the cmd on all remote nodes

View File

@ -166,7 +166,7 @@ def sendLogFiles():
cmd = "%s && (if [ -f '%s'/'%s' ];then rm -rf '%s'/'%s';fi)" % \
(cmd, g_tmpdir, tarName, g_tmpdir, tarName)
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
if (status != 0):
if status != 0:
g_logger.logExit("Failed to delete %s." % "%s and %s" % (
g_resultdir, tarName) + " Error:\n%s" % output)
g_logger.logExit("All collection tasks failed")
@ -174,16 +174,16 @@ def sendLogFiles():
cmd = "cd '%s' && tar -zcf '%s' '%s' && chmod %s '%s'" % \
(g_tmpdir, tarName, HOSTNAME, DefaultValue.FILE_MODE, tarName)
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
if (status != 0):
if status != 0:
g_logger.logExit("Failed to compress %s." % ("directory %s/%s" % \
(g_tmpdir,
HOSTNAME))
+ " Error: \n%s" % output)
if (g_opts.nodeName != ""):
if g_opts.nodeName != "":
# send backup file which is compressed to the node that is
# currently performing the backup
if (g_opts.nodeName == DefaultValue.GetHostIpOrName()):
if g_opts.nodeName == DefaultValue.GetHostIpOrName():
if int(g_opts.speedLimitFlag) == 1:
cmd = "rsync --bwlimit=%d '%s'/'%s' '%s'/" % \
(g_opts.speedLimitKBs, g_tmpdir, tarName,
@ -198,7 +198,7 @@ def sendLogFiles():
g_opts.speedLimitKBs * 8, g_opts.nodeName, g_tmpdir, tarName,
g_opts.outputDir)
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
if (status != 0):
if status != 0:
g_logger.logExit(
"Failed to copy %s." % tarName + " Error:\n%s" % output)
@ -208,7 +208,7 @@ def sendLogFiles():
cmd = "%s && (if [ -f '%s'/'%s' ];then rm -rf '%s'/'%s';fi)" % \
(cmd, g_tmpdir, tarName, g_tmpdir, tarName)
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
if (status != 0):
if status != 0:
g_logger.logExit("Failed to delete %s. %s" % (
"%s and %s" % (g_resultdir, tarName), " Error:\n%s" % output))
@ -219,7 +219,7 @@ def checkParameterEmpty(parameter, parameterName):
input : parameter, parameterName
output : NA
"""
if (parameter == ""):
if parameter == "":
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"]
% parameterName)
@ -239,7 +239,7 @@ def parseCommandLine():
except getopt.GetoptError as e:
# Error exit if an illegal parameter exists
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e))
if (len(args) > 0):
if len(args) > 0:
# Error exit if an illegal parameter exists
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] %
str(args[0]))
@ -253,7 +253,7 @@ def parseCommandLine():
parameter_keys = parameter_map.keys()
for key, value in opts:
if (key in parameter_keys):
if key in parameter_keys:
if key == "-C":
value = value.replace("#", "\"")
parameter_map[key] = value.strip()
@ -278,18 +278,18 @@ def parseCommandLine():
checkParameterEmpty(g_opts.user, "U")
DefaultValue.checkUser(g_opts.user, False)
# check log file
if (g_opts.logFile == ""):
if g_opts.logFile == "":
g_opts.logFile = DefaultValue.getOMLogPath(DefaultValue.LOCAL_LOG_FILE,
g_opts.user, "", "")
if (not os.path.isabs(g_opts.logFile)):
if not os.path.isabs(g_opts.logFile):
GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log")
if (int(g_opts.speedLimitKBs) < 0):
if int(g_opts.speedLimitKBs) < 0:
GaussLog.exitWithError(ErrorCode.GAUSS_526["GAUSS_53032"])
g_opts.speedLimitKBs = int(g_opts.speedLimitKBs)
# 1048576 KB/s = 1GB/s, which means unlimited.
if (g_opts.speedLimitKBs == 0):
if g_opts.speedLimitKBs == 0:
g_opts.speedLimitKBs = 1048576
@ -370,7 +370,7 @@ def create_temp_result_folder():
DefaultValue.KEY_DIRECTORY_MODE, g_resultdir)
g_logger.debug("Command for creating output directory: %s" % cmd)
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
if (status != 0):
if status != 0:
g_logger.logExit("Failed to create the %s directory." % \
("%s/logfiles and %s/configfiles" % (
g_resultdir, g_resultdir)) + " Error:\n%s" % output)
@ -460,7 +460,7 @@ def basic_info_check():
# file
for cmd in cmds:
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
g_logger.debug(
("Failed to collect basic information. Error:\n%s." % output) +
("The cmd is %s " % cmd))
@ -496,7 +496,7 @@ def system_check():
cmd = cmd.replace("\n", " ")
if "echo" in cmd:
continue
if (status != 0):
if status != 0:
if "Permission denied" in output:
output = "can not print info to file: Permission denied"
g_jobInfo.failedTask[cmd] = replaceInvalidStr(output)
@ -682,17 +682,17 @@ def matchFile(begin_t, end_t, fileTime):
and the end time.
"""
# both of begin_time and end_time
if (begin_t and end_t):
if begin_t and end_t:
for t in fileTime:
if (compareTime(t, begin_t) and compareTime(end_t, t)):
if compareTime(t, begin_t) and compareTime(end_t, t):
return True
# only begin_time
elif (begin_t and (not end_t)):
elif begin_t and (not end_t):
for t in fileTime:
if compareTime(t, begin_t):
return True
# only end_time
elif ((not begin_t) and end_t):
elif (not begin_t) and end_t:
for t in fileTime:
if compareTime(end_t, t):
return True
@ -858,7 +858,7 @@ def log_copy_for_zenith():
g_logger.log(json.dumps(g_jobInfo.__dict__))
raise Exception("")
if (g_opts.key):
if g_opts.key:
# Look for keyword matching in the dir and write to the specified file
cmd = "echo \"\" > %s/logfiles/%s; for f in `find %s -type f`;" \
" do grep -ai '%s' $f >> %s/logfiles/%s; done" % (
@ -896,7 +896,7 @@ def log_copy():
deleteCmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \
"then rm -rf tmp_gs_collector; fi"
if (g_opts.key is not None and g_opts.key != ""):
if g_opts.key is not None and g_opts.key != "":
g_logger.debug(
"Keyword for collecting log in base64 encode [%s]." % g_opts.key)
g_opts.key = base64.b64decode(g_opts.key)
@ -907,7 +907,7 @@ def log_copy():
"Speed limit to copy log files is %d KB/s." % g_opts.speedLimitKBs)
# Filter the log files, if has keyword, do not collect prf file
if (g_opts.key is not None and g_opts.key != ""):
if g_opts.key is not None and g_opts.key != "":
cmd = "cd $GAUSSLOG && if [ -d tmp_gs_collector ];" \
"then rm -rf tmp_gs_collector; " \
"fi && (find . -type f -iname '*.log' -print)" \
@ -1010,7 +1010,7 @@ def log_copy():
(DefaultValue.DIRECTORY_MODE, zipdir,
zipFileName, zipdir)
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
g_jobInfo.failedTask[
"find log zip files"] = replaceInvalidStr(output)
g_logger.log(json.dumps(g_jobInfo.__dict__))
@ -1022,15 +1022,15 @@ def log_copy():
g_logger.debug("There is no zip files.")
# Filter keywords
if (g_opts.key is not None and g_opts.key != ""):
if (len(logs) != 0):
if g_opts.key is not None and g_opts.key != "":
if len(logs) != 0:
g_opts.key = g_opts.key.replace('$', '\$')
g_opts.key = g_opts.key.replace('\"', '\\\"')
cmd = "cd $GAUSSLOG/tmp_gs_collector && "
cmd = "%s grep \"%s\" -r * > %s/logfiles/%s" % (
cmd, g_opts.key, g_resultdir, keyword_result)
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0 and output != ""):
if status != 0 and output != "":
cmd = "rm -rf $GAUSSLOG/tmp_gs_collector"
(status1, output1) = DefaultValue.retryGetstatusoutput(cmd)
g_jobInfo.failedTask[
@ -1050,7 +1050,7 @@ def log_copy():
cmd = "touch %s/logfiles/%s && " % (g_resultdir, keyword_result)
cmd = "%s rm -rf $GAUSSLOG/tmp_gs_collector" % cmd
(status, output) = DefaultValue.retryGetstatusoutput(cmd)
if (status != 0):
if status != 0:
g_jobInfo.failedTask["touch keyword file"] = replaceInvalidStr(
output)
g_logger.log(json.dumps(g_jobInfo.__dict__))
@ -1071,7 +1071,7 @@ def log_copy():
"&& rm -rf $GAUSSLOG/'%s'" % \
(cmd, logfiletar)
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
g_jobInfo.failedTask[
"copy result file and delete tmp file"] = replaceInvalidStr(
output)
@ -1136,7 +1136,7 @@ def xlog_copy():
(g_resultdir, g_current_time, g_current_time,
g_current_time)
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
g_logger.debug(
"Failed to collect xlog. Command %s \n, Error %s \n",
(cmd, output))
@ -1236,7 +1236,7 @@ def parallel_xlog(Inst):
cmd = getXlogCmd(Inst)
if len(cmd) > 1:
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
g_logger.debug(
"Failed to collect xlog files. Command: %s.\n Error: %s\n" % (
cmd, output))
@ -1383,17 +1383,9 @@ def conf_gstack(jobName):
try:
# Gets all instances of the cluster
Instances = []
for Inst in g_localnodeinfo.gtms:
if "gtm" in ",".join(g_opts.content).lower():
Instances.append(Inst)
for Inst in g_localnodeinfo.coordinators:
if "cn" in ",".join(g_opts.content).lower():
Instances.append(Inst)
for Inst in g_localnodeinfo.datanodes:
if "dn" in ",".join(g_opts.content).lower():
Instances.append(Inst)
for Inst in g_localnodeinfo.gtses:
Instances.append(Inst)
# parallel copy configuration files, and get gstack
if Instances:
pool = ThreadPool(DefaultValue.getCpuSet())
@ -1447,7 +1439,7 @@ def plan_simulator_check():
"-p %d -D %s/planSimulatorfiles/%s" % \
(cmd, db, cnInst.port, g_resultdir, db)
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
g_logger.debug(
"Failed to Collect plan simulator. "
"Command %s.\n Error: %s.\n" % (
@ -1470,7 +1462,7 @@ def getBakConfCmd(Inst):
"""
cmd = ""
pidfile = ""
if (Inst.instanceRole == DefaultValue.INSTANCE_ROLE_GTM):
if Inst.instanceRole == DefaultValue.INSTANCE_ROLE_GTM:
if g_need_gstack == 0:
cmd = "mkdir -p -m %s '%s/configfiles/config_%s/gtm_%s'" % \
(
@ -1508,7 +1500,7 @@ def getBakConfCmd(Inst):
"collect gtm_%s process stack info" % Inst.instanceId] = \
ErrorCode.GAUSS_535["GAUSS_53511"] % 'GTM'
elif (Inst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR):
elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_COODINATOR:
if g_need_gstack == 0:
cmd = "mkdir -p -m %s '%s/configfiles/config_%s/cn_%s'" % \
(
@ -1542,7 +1534,7 @@ def getBakConfCmd(Inst):
"collect cn_%s process stack info" % Inst.instanceId] = \
ErrorCode.GAUSS_535["GAUSS_53511"] % 'CN'
elif (Inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE):
elif Inst.instanceRole == DefaultValue.INSTANCE_ROLE_DATANODE:
if g_need_gstack == 0:
cmd = "mkdir -p -m %s '%s/configfiles/config_%s/dn_%s'" % \
(
@ -1584,7 +1576,7 @@ def parallel_conf_gstack(Inst):
"""
(cmd, pidfile) = getBakConfCmd(Inst)
(status, output) = subprocess.getstatusoutput(cmd)
if (status != 0):
if status != 0:
if "command not found" in output:
g_jobInfo.failedTask["collect process stack info"] = \
ErrorCode.GAUSS_535["GAUSS_53512"]
@ -1625,7 +1617,7 @@ def parseConfig():
input : NA
output: NA
"""
if (g_opts.config != ""):
if g_opts.config != "":
d = json.loads(g_opts.config)
g_opts.content = d['Content'].split(",")
@ -1645,24 +1637,24 @@ def main():
elif g_opts.action == "create_dir":
create_temp_result_folder()
# Get system information
elif (g_opts.action == "system_check"):
elif g_opts.action == "system_check":
system_check()
# Gets the database information
elif (g_opts.action == "database_check"):
elif g_opts.action == "database_check":
database_check()
# Make a copy of the log file
elif (g_opts.action == "log_copy"):
elif g_opts.action == "log_copy":
log_copy()
# Copy configuration files, and get g stack
elif (g_opts.action == "Config"):
elif g_opts.action == "Config":
conf_gstack("Config")
elif (g_opts.action == "Gstack"):
elif g_opts.action == "Gstack":
global g_need_gstack
g_need_gstack = 1
conf_gstack("Gstack")
g_need_gstack = 0
# Send all log files we collected to the command node.
elif (g_opts.action == "copy_file"):
elif g_opts.action == "copy_file":
sendLogFiles()
elif g_opts.action == "xlog_copy":
xlog_copy()