openGauss-OM/script/gs_check

#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################################
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
#
# openGauss is licensed under Mulan PSL v2.
# You can use this software according to the terms
# and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
#          http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
# ----------------------------------------------------------------------------
# Description  : gs_check is a utility to check cluster and database status
#############################################################################

import subprocess
import os
import sys
import re
import getpass
import time
import pwd
import pickle
package_path = os.path.dirname(os.path.realpath(__file__))
ld_path = package_path + "/gspylib/clib"
if 'LD_LIBRARY_PATH' not in os.environ:
    os.environ['LD_LIBRARY_PATH'] = ld_path
    os.execve(os.path.realpath(__file__), sys.argv, os.environ)
if not os.environ.get('LD_LIBRARY_PATH').startswith(ld_path):
    os.environ['LD_LIBRARY_PATH'] = \
        ld_path + ":" + os.environ['LD_LIBRARY_PATH']
    os.execve(os.path.realpath(__file__), sys.argv, os.environ)

import xml.etree.cElementTree as ETree
from datetime import datetime, timedelta
from multiprocessing.dummy import Pool as ThreadPool
from gspylib.inspection.common.Exception import CheckException, \
    UseBothParameterException, \
    SceneNotFoundException, ParseItemException, \
    NotEmptyException, \
    NotExistException, InterruptException, ThreadCheckException, \
    ContextDumpException, ContextLoadException, \
    TimeoutException
from gspylib.common.Common import DefaultValue
from gspylib.common.ParameterParsecheck import Parameter
from gspylib.inspection.common import SharedFuncs
from gspylib.inspection.common.Log import LoggerFactory
from gspylib.inspection.common.TaskPool import Watcher, CheckThread
from gspylib.inspection.common.CheckResult import CheckResult, ItemResult
from gspylib.inspection.common.CheckItem import CheckItemFactory
from gspylib.inspection.common.ProgressBar import MultiProgressManager, \
    LineProgress
from gspylib.common.DbClusterInfo import dbClusterInfo
from base_utils.os.env_util import EnvUtil
from base_utils.os.file_util import FileUtil
from base_utils.os.net_util import NetUtil
from domain_utils.domain_common.cluster_constants import ClusterConstants


#############################################################################
# Global variables
#   g_opts: global option
#   g_logger: global logger
#   g_context :global context
#   g_result : global result
#   g_endTime : global endTime
#   DIRECTORY_MODE: global directory mode
#   MPPDB_VERSION_R5 : mppdb version
#   DEFAULT_TIMEOUT : time out
#############################################################################
g_logger = None
g_opts = None
g_context = None
g_result = None
g_endTime = None
g_mtuMap = {}
g_itemResult = {}

DEFAULT_TIMEOUT = 1500
# single cluster will skip these items
# because single clusters don't need to perform consistency checks and
# internal communication class checks
SINGLE_SKIP = ["CheckTimeZone", "CheckEncoding", "CheckKernelVer",
               "CheckNTPD", "CheckCpuCount",
               "CheckMemInfo", "CheckDiskConfig",
               "CheckUpVer", "CheckPgxcgroup", "CheckPing",
               "CheckNetWorkDrop", "CheckNetSpeed"]

SETITEM_SKIP = ["CheckCPU", "CheckTimeZone", "CheckOSVer", "CheckNTPD",
                "CheckSshdService", "CheckEtcHosts",
                "CheckCpuCount", "CheckHyperThread", "CheckMemInfo",
                "CheckKernelVer", "CheckEncoding", "CheckBootItems",
                "CheckDropCache", "CheckFilehandle", "CheckKeyProAdj",
                "CheckDiskFormat", "CheckInodeUsage", "CheckSpaceUsage",
                "CheckDiskConfig", "CheckXid", "CheckSysTabSize",
                "CheckClusterState", "CheckConfigFileDiff", "CheckUpVer",
                "CheckEnvProfile", "CheckGaussVer", "CheckPortRange",
                "CheckReadonlyMode", "CheckCatchup", "CheckProcessStatus",
                "CheckSpecialFile", "CheckCollector", "CheckLargeFile",
                "CheckProStartTime", "CheckMpprcFile", "CheckLockNum",
                "CheckCurConnCount", "CheckCursorNum", "CheckPgxcgroup",
                "CheckLockState", "CheckIdleSession", "CheckDBConnection",
                "CheckSysTable", "CheckSysTabSize", "CheckTableSpace",
                "CheckTableSkew", "CheckDNSkew", "CheckCreateView",
                "CheckHashIndex", "CheckNextvalInDefault", "CheckPgxcRedistb",
                "CheckReturnType", "CheckSysadminUser", "CheckTDDate",
                "CheckDropColumn", "CheckDiskFailure", "CheckPing",
                "CheckNetWorkDrop", "CheckUsedPort", "CheckNICModel",
                "CheckRouting", "CheckNetSpeed", "CheckDataDiskUsage"]


class CmdOptions():
    """
    command-line options
    """

    def __init__(self):
        # initialize variable
        self.user = None
        self.localMode = False
        self.distributing = False
        self.skipRootItems = False
        self.set = False
        self.language = 'zh'
        self.format = 'default'
        self.scene = None
        self.items = None
        self.nodes = []
        self.cluster = None
        self.timeout = DEFAULT_TIMEOUT
        self.pwdMap = {}
        self.thresholdDn = None
        self.outPath = None
        self.logFile = None
        self.isSingle = False
        self.routing = None
        self.skipItems = []
        self.LCName = None
        self.ShrinkNodes = None
        self.nonPrinting = False


class CheckContext():
    """
    check execution context
    """

    def __init__(self):
        """
        Constructor
        """
        # Initialize the self.clusterInfo variable
        self.basePath = os.path.join(
            os.path.split(os.path.realpath(__file__))[0], 'gspylib',
            'inspection')
        self.user = None
        self.set = None
        self.log = None
        self.postAnalysis = False
        self.supportItems = {}
        self.supportScenes = {}
        self.items = []
        self.rootItems = []
        self.cluster = None
        self.nodes = []
        self.mpprc = None
        self.checkID = self.genCheckID()
        self.thresholdDn = None
        self.outPath = os.path.join(self.basePath, "output")
        self.logFile = None
        self.tmpPath = None
        self.hostMapping = None
        self.routing = None
        self.skipSetItem = []
        self.oldNodes = []
        self.newNodes = []
        self.oldItems = []
        self.newItems = []
        self.LCName = None
        self.ShrinkNodes = None

    def genCheckID(self):
        '''
        function : generate the check ID which is unique for once checking
        input : NA
        output : checkID
        '''
        # Get Time
        t = time.localtime(time.time())
        dateString = time.strftime("%Y%m%d", t)
        seconds = timedelta(hours=t.tm_hour, minutes=t.tm_min,
                            seconds=t.tm_sec).seconds
        pidString = str(os.getpid())
        return dateString + str(seconds) + pidString

    def setCheckID(self, checkID):
        '''
        function : set check id
        '''
        self.checkID = checkID

    def getCacheFile(self):
        return "%s/context_%s.cache" % (self.tmpPath, self.checkID)

    def checkMPPDBVersion(self):
        '''
        function : check mppdb version
        input : NA
        output : NA
        '''
        # check the version number
        cmd = "gsql -V"
        output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprc)
        return re.compile(r'V[0-9]{3}R[0-9]{3}C[0-9]{2}').search(
            output).group()

    def loadClusterInfo(self, user=None):
        '''
        function : load cluster info from static config file
        input : user
        output : NA
        '''
        # Get the user
        u = user if user is not None else self.user
        if (u is None):
            return None
        try:
            # Init cluster info
            clusterInfo = dbClusterInfo()
            # Initialize the self.clusterInfo variable
            clusterInfo.initFromStaticConfig(u)
            return clusterInfo
        except Exception:
            return None

    def loadSupportItems(self):
        '''
        function : load support items by scanning the disk files
        input : NA
        output : NA
        '''
        # Get check items
        itemPath = "%s/items/" % self.basePath
        for (dirpath, dirnames, filenames) in os.walk(itemPath):
            for f in filenames:
                (fileName, suffix) = os.path.splitext(f)
                if (fileName.find("Check") == 0 and suffix == ".py"):
                    self.supportItems[fileName] = os.path.join(dirpath, f)
        if (not self.supportItems):
            raise NotEmptyException("support items")

    def loadSupportScene(self):
        '''
        function : load support scene by scanning the scene
                   configuration files in config folder
        input : NA
        output : NA
        '''
        configPath = "%s/config/" % self.basePath
        for (dirpath, dirnames, filenames) in os.walk(configPath):
            for f in filenames:
                (fileName, suffix) = os.path.splitext(f)
                if (fileName.find("scene_") == 0 and suffix == ".xml"):
                    self.supportScenes[fileName[6:]] =\
                        os.path.join(dirpath, f)
        if (not self.supportScenes):
            raise NotEmptyException("support scenes")

    def loadSceneConfiguration(self, scene):
        '''
        function : load certain scene configuration in xml file
        input : NA
        output : NA
        '''
        # Get scene xml
        configFile = "%s/config/scene_%s.xml" % (self.basePath, scene)
        if not os.path.isfile(configFile):
            raise SceneNotFoundException(scene, self.supportScenes)
        # root node
        rootNode = ETree.parse(configFile).getroot()
        configElem = rootNode.find('configuration')
        if configElem is not None:
            for elem in list(configElem):
                setattr(self, elem.tag, elem.text.strip())

    def isCached(self):
        '''
        function : whether the context was serialized to disk
        input : NA
        output : boolean
        '''
        # Check if Cache file exists
        if os.path.isfile(self.getCacheFile()):
            return True
        else:
            return False

    def clean(self):
        '''
        function : clean the cache file
        input : NA
        output : boolean
        '''
        # Delete Cache files
        cmd = "rm -rf %s" % self.getCacheFile()
        SharedFuncs.runShellCmd(cmd)

    def dump(self):
        '''
        function : serialize the check context to disk
        input : NA
        output : NA
        '''
        self.clean()
        try:
            pickle.dump(self, open(self.getCacheFile(), "wb"), True)
            SharedFuncs.chmodFile(self.getCacheFile(),
                                  DefaultValue.KEY_FILE_MODE)
        except Exception as e:
            raise ContextDumpException(e)

    def load(self, fileName=None):
        '''
        function : load the check context from disk
        input : path of the context file
        output : CheckContext
        '''
        f = fileName if fileName is not None else self.getCacheFile()
        result = None
        if self.isCached():
            try:
                result = pickle.load(open(f, "rb"))
            except Exception as e:
                raise ContextLoadException(e)
        return result

    def getNodeName(self, host):
        if "HOST_IP" in list(os.environ.keys()):
            cmd = "echo $HOST_IP"
        else:
            cmd = "hostname"
        if SharedFuncs.is_local_node(host):
            output = SharedFuncs.runShellCmd(cmd)
        else:
            output = SharedFuncs.runSshCmd(cmd, host, self.user)
        hostname = output.strip().split('\n')[-1].strip()
        self.hostMapping[host] = hostname

    def getMapping(self):
        '''
        function : get the ip to hostname mapping with all host
        input : remote host name and password map
        output : NA
        '''
        self.hostMapping = {}
        if (not self.nodes):
            return
        try:
            pool = ThreadPool(DefaultValue.getCpuSet())
            results = pool.map(self.getNodeName, self.nodes)
            pool.close()
            pool.join()
        except Exception as e:
            raise Exception(str(e))

    def sendTmpFile(self, host):

        cmd = "if [ ! -d %s ]; then mkdir %s -p -m %s;fi" % (
            self.tmpPath, self.tmpPath, DefaultValue.KEY_DIRECTORY_MODE)
        SharedFuncs.runSshCmd(cmd, host, self.user)
        SharedFuncs.sendFile(self.getCacheFile(), host, self.user,
                                 self.tmpPath)

    def dispatch(self, hosts):
        '''
        function : send the serialized context file to remote host
        input : remote host name and password map
        output : NA
        '''
        if len(hosts) == 0 or g_opts.isSingle:
            return
        fileName = self.getCacheFile()
        if not os.path.isfile(fileName):
            raise CheckException("File %s is not exist or invalid" % fileName)
        try:
            pool = ThreadPool(DefaultValue.getCpuSet())
            results = pool.map(self.sendTmpFile, hosts)
            pool.close()
            pool.join()
        except Exception as e:
            raise Exception(str(e))


#############################################################################
# Parse and check parameters
#############################################################################
def usage():
    """
gs_check is a utility to check the health status of a cluster.

Usage:
  gs_check -? | --help
  Example:
    gs_check -i ITEM [...] [-U USER] [-L] [-l LOGFILE] [-o OUTPUTDIR]
                           [--skip-root-items] [--set] [--routing]
    gs_check -e SCENE_NAME [-U USER] [-L] [-l LOGFILE] [-o OUTPUTDIR]
                           [--skip-root-items] [--set] [--time-out=SECS]
                           [--routing] [--skip-items]

General options:
  -i                                Health check item number.
                                    OLAP Example: -i CheckCPU,CheckMTU,
                                                      CheckPing.
  -e                                Health check scene name.
                                    OLAP Example: -e inspect/upgrade/slow_node/
                                    binary_upgrade/health/install/longtime
  -U                                Cluster user.
  -L                                Run the command as local mode.
  -l                                Path of log file.
  -o                                Save the result to the specified directory.
      --cid                         The check ID used for identify a check
                                    process, only for internal use.
      --skip-root-items             Skip the items with root privileges.
      --disk-threshold              Set disk threshold for checking disk usage,
                                    only for CheckDataDiskUsage.
      --format                      Set the format of the result report.
      --set                         Set abnormal items if supported
      --time-out                    Set the timeout for scene check, default
                                    1500 seconds.
      --routing                     The network segment with business ip,
                                    example: 192.168.1.1:255.255.255.0
      --skip-items                  Skip the specified check item or setting
                                    item with scene check
                                    Example: --skip-items CheckCPU,CheckMTU
      --non-print                   Do not print output result.
  -?, --help                        Show help information for this utility,
                                    and exit the command line mode.
  -V, --version                     Show version information.
    """
    print(usage.__doc__)


def version():
    '''
    function : get the version the check tool
    input : NA
    output: NA
    '''
    print(SharedFuncs.getVersion())


#########################################################
# Init global log
#########################################################
def initGlobal():
    """
    function: initialize the global variable
    input : NA
    output: NA
    """
    # state global variable
    global g_opts, g_context, g_result
    g_opts = CmdOptions()
    g_context = CheckContext()
    g_result = CheckResult()


def parseCommandLine():
    """
    function: Parse command line and save to global variable
    input : NA
    output: NA
    """
    # Resolves the command line
    global g_opts
    g_opts = CmdOptions()
    ParaObj = Parameter()
    ParaDict = ParaObj.ParameterCommandLine("check")
    if "helpFlag" in list(ParaDict.keys()):
        usage()
        sys.exit(0)

    # command line parameter group definition for gs_check
    irrelevantPara = {"scenes": "itemstr", "time_out": "itemstr",
                      "skipItems": "itemstr",
                      "cid": "scenes", "nodegroup_name": "scenes",
                      "shrinkNodes": "scenes"}
    paraNameMap = {"itemstr": "i", "scenes": "e", "time_out": "-time-out",
                   "skipItems": "-skip-items",
                   "cid": "-cid", "nodegroup_name": "-nodegroup-name",
                   "shrinkNodes": "-ShrinkNodes"}
    formatList = ['default', 'json']

    # position parameter can not be set at the same time
    for para in list(irrelevantPara.keys()):
        if (para in list(ParaDict.keys()) and
                irrelevantPara[para] in list(ParaDict.keys())):
            raise UseBothParameterException(
                (paraNameMap[para], paraNameMap[irrelevantPara[para]]))

    if "itemstr" in list(ParaDict.keys()):
        g_opts.items = ParaDict["itemstr"]
    if "scenes" in list(ParaDict.keys()):
        g_opts.scene = ParaDict["scenes"]
    if "outFile" in list(ParaDict.keys()):
        g_context.outPath = ParaDict["outFile"]
    if "logFile" in list(ParaDict.keys()):
        g_opts.logFile = ParaDict["logFile"]
    if "user" in list(ParaDict.keys()):
        g_context.user = ParaDict["user"]
    if "hostfile" in list(ParaDict.keys()):
        for node in FileUtil.readFile(ParaDict["hostfile"]):
            g_opts.nodes.append(node.strip())
    if "cid" in list(ParaDict.keys()):
        g_context.setCheckID(ParaDict["cid"])
        g_opts.distributing = True
    if "localMode" in list(ParaDict.keys()):
        g_opts.localMode = True
    if "skipRootItems" in list(ParaDict.keys()):
        g_opts.skipRootItems = True
    if "disk-threshold" in list(ParaDict.keys()):
        g_context.thresholdDn = ParaDict["disk-threshold"]
    if "set" in list(ParaDict.keys()):
        g_context.set = True
    if "routing" in list(ParaDict.keys()):
        g_opts.routing = ParaDict["routing"]
    if "skipItems" in list(ParaDict.keys()):
        g_opts.skipItems = ParaDict["skipItems"]
    if "nodegroup_name" in list(ParaDict.keys()):
        g_context.LCName = ParaDict["nodegroup_name"]
    if "shrinkNodes" in list(ParaDict.keys()):
        g_context.ShrinkNodes = ParaDict["shrinkNodes"]
    if "time_out" in list(ParaDict.keys()):
        try:
            g_opts.timeout = int(ParaDict["time_out"])
        except Exception:
            raise CheckException("The parameter timeout set invalid value")
        if g_opts.timeout < DEFAULT_TIMEOUT:
            raise CheckException(
                "The timeout parameter must be set larger than default "
                "value 1500 seconds")
    setTimeOut()
    if "format" in list(ParaDict.keys()):
        g_opts.format = ParaDict["format"]
        if g_opts.format not in formatList:
            raise CheckException(
                "Format %s is not available,the valid format is %s" % (
                    g_opts.format, ",".join(formatList)))
    if "nonPrinting" in list(ParaDict.keys()):
        g_opts.nonPrinting = True

def checkParameter():
    ##########################################################
    if g_opts.nodes:
        raise CheckException("The --hosts parameter is not available")
    if __isRoot() and not __isDistributing():
        if not g_opts.localMode:
            raise CheckException(
                "The command must be running with cluster user")
    ########################################################
    # Get the -U parameter
    ########################################################
    checkuser()

    if (g_opts.outPath and not g_opts.localMode):
        ########################################################
        # create output path
        ########################################################
        createPath(g_opts.outPath, g_context.user)


def checkuser():
    # The new node scenario does not need the -U parameter
    if __isRoot() and not g_opts.localMode:
        g_context.user = None
        return
    # Default mode -U for the current user
    if not __isRoot() and not g_context.user:
        g_context.user = SharedFuncs.getCurrentUser()
    if g_context.user:
        if not __isRoot() and g_context.user != SharedFuncs.getCurrentUser():
            raise CheckException(
                "The user %s is not current user" % g_context.user)
        try:
            user_uid = pwd.getpwnam(g_context.user).pw_uid
        except Exception:
            raise CheckException(
                "The user %s is not a effective user." % g_context.user)
        if user_uid == 0:
            raise CheckException("The -U parameter can not be the root user.")
        isClusterUser = SharedFuncs.checkClusterUser(g_context.user,
                                                     __getMpprcFile())
        if isClusterUser:
            # get cluster information
            g_context.mpprc = __getMpprcFile()
            clusterInfo = g_context.loadClusterInfo(g_context.user)
            if clusterInfo:
                g_opts.cluster = clusterInfo
            else:
                isClusterUser = False
        if not isClusterUser:
            raise CheckException(
                "The user %s is not valid cluster user" % g_context.user)
        if g_opts.localMode or g_opts.distributing:
            return

        # Check cluster user trust
        dbNameList = g_opts.cluster.getClusterNodeNames()
        if (len(dbNameList) == 1 and
                dbNameList[0] == NetUtil.GetHostIpOrName()):
            return
        appPath = EnvUtil.getEnv('GPHOME', g_opts.cluster.appPath)
        psshPath = os.path.join(appPath, 'script/gspylib/pssh/bin/pssh')
        cmd = "%s -H %s 'id' " % (psshPath, " -H ".join(dbNameList))
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            errorNode = []
            for result in output.split('\n'):
                if result.strip() == "":
                    continue
                resultInfo = result.split()
                # Analyze the results
                if len(resultInfo) > 3 and resultInfo[2] == "[SUCCESS]":
                    continue
                elif (len(resultInfo) > 3 and resultInfo[2] == "[FAILURE]" and
                      resultInfo[3] in dbNameList):
                    errorNode.append(resultInfo[3])
                else:
                    raise CheckException(
                        "Failed to check user trust. commands: %s Error:/n%s"
                        % (cmd, output))
            if errorNode:
                raise CheckException(
                    "Failed to check user trust with %s" % errorNode)
            else:
                raise CheckException(
                    "Failed to check user trust. Error:/n%s" % output)


def createPath(path, user=""):
    if path == ClusterConstants.DEV_NULL:
        return
    if os.path.isdir(path):
        # test write permissions
        if not FileUtil.checkDirWriteable(path):
            raise CheckException(
                "Failed to create or delete file in the [%s]." % path)
    elif os.path.isfile(path):
        raise CheckException("The out path [%s] must be a directory." % path)
    else:
        # path is not exist. recursively create the path
        FileUtil.createDirectory(path, True, DefaultValue.KEY_DIRECTORY_MODE)
    # Modify the file owner
    if __isRoot() and user:
        FileUtil.changeOwner(user, path)


def getTmpPath():
    """
    function: Get and return temporary directory.
    input : NA
    output: String
    """
    tmpPath = os.path.join("/tmp", "check_%s" % g_context.checkID)
    # Get the tmp file path
    createPath(tmpPath, g_context.user)
    createPath(os.path.join(tmpPath, "log"), g_context.user)
    createPath(os.path.join(tmpPath, "nodes"), g_context.user)
    return tmpPath


def initLogFile():
    """
    function: Get and return temporary directory.
    input : NA
    output: NA
    """
    global g_context, g_logger
    # load the context when the script ruuning on local mode and the context
    # was cached before
    g_context.tmpPath = getTmpPath()
    if g_context.isCached():
        g_context = g_context.load()
        if __getLocalNode(g_context.nodes) in g_context.newNodes:
            g_context.mpprc = None
            g_context.user = None
            g_context.cluster = None
        (g_logger, logFile) = LoggerFactory.getLogger('gs_check',
                                                      g_context.logFile,
                                                      g_context.user)
        g_context.log = g_logger.debug
        g_logger.debug("Load check context from cache file")
    else:
        # Parameter specified first, followed by default GAUSSLOG,
        # last temporary directory
        if g_opts.logFile:
            g_context.logFile = os.path.realpath(g_opts.logFile)
        elif g_opts.cluster:
            g_context.logFile = os.path.join(g_opts.cluster.logPath,
                                             '%s/om/gs_check.log'
                                             % g_context.user)
        else:
            g_context.logFile = os.path.join(g_context.tmpPath,
                                             'log/gs_check.log')
        (g_logger, g_context.logFile) = LoggerFactory.getLogger(
            'gs_check', g_context.logFile, g_context.user)
        # clean the cache files for reentry the command
        g_context.clean()
        # set mpprc file
        g_context.mpprc = __getMpprcFile()
        # Load support scene by parsing project folder
        g_context.loadSupportScene()
        # Load support check items by parsing the project folder
        g_context.loadSupportItems()
        # load the scene configuration
        if g_opts.scene:
            g_context.loadSceneConfiguration(g_opts.scene)
        # load cluster info
        if g_opts.cluster:
            g_context.cluster = g_opts.cluster
            g_context.oldNodes = g_opts.cluster.getClusterSshIps()[0]
        # load nodes
        if g_opts.nodes:
            for node in g_opts.nodes:
                if node not in g_context.oldNodes:
                    g_context.newNodes.append(node)
        g_context.nodes = g_context.oldNodes + g_context.newNodes


def getRootUserPwd():
    # ask user input root password interactive when in new node scene or
    # contains items with root permission
    if __hasRootItems() and not __isRoot():
        rootItems = [i['name'] for i in g_context.rootItems]
        __printOnScreen(
            "The below items require root privileges to execute:[%s]"
            % " ".join(rootItems))
        rootuser = input("Please enter root privileges user[root]:")\
                   or "root"
        rootpwd = getpass.getpass("Please enter password for user[%s]:"
                                  % rootuser)
        g_logger.debug("Ask user input password interactive")
        for host in g_context.nodes:
            isPwdOk = SharedFuncs.verifyPasswd(host, rootuser, rootpwd)
            if not isPwdOk:
                # try to connect remote node again
                rootpwd = __retryConnection(host, rootuser)
            g_opts.pwdMap[host] = (rootuser, rootpwd)
        if pwd.getpwnam(rootuser).pw_uid != 0:
            raise CheckException("Enter the user [%s] does not have"
                                 " root privileges." % rootuser)
        # print message on screen
        __printOnScreen("Check root password connection successfully")


def parseCheckContext():
    """
    function: Parse check context and initialize all the context value
    input : NA
    output: NA
    """
    global g_context
    initLogFile()
    if g_context.isCached():
        return
    g_logger.debug("Start to parse the check items config file")
    items_all = []
    items_oldNode = []
    items_newNode = []
    failedItems = []
    singleSkipList = []
    # generate the items from scene configuration
    if g_opts.scene:
        items_oldNode, failedItems = __parseScene(g_opts.scene)
        items_all += items_oldNode
    # generate the items from -i parameter value
    elif (g_opts.items):
        for i in g_opts.items:
            item = __parseOneItem(i)
            if (not item):
                failedItems.append(i)
            else:
                items_all.append(item)
    for item in items_all[:]:
        if not g_context.set and item['name'] in g_opts.skipItems:
            items_all.remove(item)
            continue
        if g_context.set and item['set_permission'] == 'root':
            g_context.rootItems.append(item)
        if g_opts.skipRootItems and item['permission'] == 'root':
            items_all.remove(item)
            continue
        if item['permission'] == 'root':
            g_context.rootItems.append(item)
        if g_opts.isSingle and item['name'] in SINGLE_SKIP:
            singleSkipList.append(item['name'])
            continue
        if item['name'] == "CheckRouting":
            if g_opts.routing:
                g_context.routing = g_opts.routing
            elif g_opts.cluster:
                workIP = g_opts.cluster.getDbNodeByName(
                    NetUtil.GetHostIpOrName()).backIps[0]
                g_context.routing = "%s:%s" % (
                    workIP, SharedFuncs.getMaskByIP(workIP))
            else:
                raise CheckException(
                    "The --routing is required when cluster dosen't exist")
        g_context.items.append(item)
    if len(singleSkipList) != 0:
        __printOnScreen(
            "The following items are skipped when the type of cluster is"
            " single:\n[%s]" % ",".join(singleSkipList))
    if not items_newNode:
        g_context.oldItems = g_context.items
    else:
        g_context.oldItems = items_oldNode
        g_context.newItems = items_newNode
    if g_context.set and items_all:
        # Settings will have a big impact and need to be confirmed
        confirmItem = {
            "CheckCrontabLeft": "Clear om_monitor in crond service",
            "CheckDirLeft": "Delete all file in '/opt/huawei/Bigdata/',"
                            "'/var/log/Bigdata/','/home/omm/'",
            "CheckProcessLeft": "Kill all process with gaussdb and omm user",
            "CheckOmmUserExist": "Delete system user omm",
            "CheckPortConflict": "kill all process with occupies "
                                 "the 25xxx port"
        }
        confirmMsg = ""
        for item in items_all:
            if item['name'] in list(confirmItem.keys()):
                confirmMsg += confirmItem[item['name']] + "\n"
            if item['name'] in SETITEM_SKIP:
                g_context.skipSetItem.append(item['name'])

        if confirmMsg:
            confirmMsg = "Warning: Executing the settings will do " \
                         "the following at the [%s] node:\n" % \
                         ','.join(g_context.newNodes) + confirmMsg
            __printOnScreen(confirmMsg)
            flag = input("Execution settings? (Y/N):")
            while True:
                # If it is not yes or all, it has been imported
                if not flag.upper() in ("Y", "N", "YES", "NO"):
                    flag = input("Please type 'yes' or 'no': ")
                    continue
                break
            if flag.upper() in ("Y", "YES"):
                pass
            if flag.upper() in ("N", "NO"):
                for Item in items_all:
                    if Item['name'] in list(confirmItem.keys()):
                        g_context.skipSetItem.append(Item['name'])
                __printOnScreen(
                    'Skip the settings for [%s]'
                    % ','.join(g_context.skipSetItem))
    if failedItems:
        raise ParseItemException(failedItems)
    if not g_context.items:
        raise CheckException("No check item can be performed,"
                             " please confirm the input parameters.")

    # print message on screen
    __printOnScreen("Parsing the check items config file successfully")
    getRootUserPwd()
    g_context.getMapping()
    g_context.dump()


def dispatchCached():
    # dispatch the context file to remote node
    if (not g_opts.localMode and not g_opts.distributing and
            not g_opts.isSingle):
        g_logger.debug("Start to distributing the check context dump file")
        g_context.dispatch(__getRemoteNodes(g_context.nodes))
        # print message on screen
        __printOnScreen(
            "Distribute the context file to remote hosts successfully")


def __printOnScreen(msg):
    """
    function: print message on screen
    """
    if g_opts.localMode or g_opts.distributing:
        return
    g_logger.info(msg)


def __isRoot():
    """
    function: whether the item running under root user
    """
    return os.getuid() == 0


def __hasRootItems():
    """
    function: whether the items required root privileges
    """
    return g_context.rootItems is not None and len(g_context.rootItems) > 0


def __isDistributing():
    """
    function: whether execution is distributing
    """
    return g_opts.distributing

def __getLocalNode(nodes):
    """
    function: get local node
    """
    if nodes:
        for n in nodes:
            if SharedFuncs.is_local_node(n):
                return n
    return NetUtil.GetHostIpOrName()


def __getSeparatedValue(value, separator=","):
    '''
    get command line value which were separated by ","
    '''
    if separator not in value:
        return [value]
    return value.split(separator)


def __getNodesFromFile(fileName):
    """
    function: get nodes information from hostFile
    """
    lines = []
    try:
        with open(fileName, 'r') as fp:
            for line in [line.strip().rstrip('\n') for line in fp]:
                if not line or line in lines or line.startswith('#'):
                    continue
                lines.append(line.strip())
    except Exception as e:
        raise Exception(str(e))
    return lines


def __retryConnection(host, user):
    """
    function: try to connect remote node again
    """
    # Try connecting to the remote node three times
    for i in range(3):
        passwd = getpass.getpass(
            "Please enter password for user[%s] on the node[%s]:"
            % (user, host))
        isOK = SharedFuncs.verifyPasswd(host, user, passwd)
        if isOK:
            return passwd
        else:
            continue
    raise CheckException(
        "Verify password failed for user[%s] on the node[%s]" % (user, host))


def __getMpprcFile():
    """
    function: get separated environment variables
    """
    # get mpprc file
    envValue = EnvUtil.getEnv("MPPDB_ENV_SEPARATE_PATH")
    if envValue is not None and os.path.isfile(envValue):
        return envValue
    elif not __isRoot() and EnvUtil.getEnv('GAUSS_ENV'):
        cmd = "echo ~ 2>/dev/null"
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            raise CheckException(
                "Fetching user environment variable file failed."
                " Please setup environment variables." + "The cmd is %s" % cmd)
        else:
            return os.path.join(output, ".bashrc")
    elif __isRoot() and g_context.user:
        cmd = "su - %s -c 'echo ~ 2>/dev/null'" % g_context.user
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            raise CheckException(
                "Failed to get user [%s] home directory. Error: %s\n" % (
                    g_context.user, output) + "The cmd is %s" % cmd)
        else:
            return os.path.join(output, ".bashrc")
    elif __isRoot():
        return ""
    else:
        raise CheckException("The separated mpprc file was not found."
                             " Please setup environment variables")


def __getUserAndPwd(node):
    """
    function: get username and password for certain node
    """
    if __hasRootItems() and not __isRoot():
        return (g_opts.pwdMap[node][0], g_opts.pwdMap[node][1])
    else:
        return (g_context.user, None)


def __getRemoteNodes(hosts):
    '''
    function: get the remote host ignore the local host
    '''
    return [h for h in hosts if not SharedFuncs.is_local_node(h)]


def __parseScene(sceneName):
    '''
    function: parse scene configure file
    '''
    if not sceneName:
        raise NotEmptyException("scene name")
    # Get scene xml
    xmlFile = "%s/config/scene_%s.xml" % (g_context.basePath, sceneName)
    if not os.path.isfile(xmlFile):
        raise SceneNotFoundException(sceneName, g_context.supportScenes)

    domTree = ETree.parse(xmlFile)
    rootNode = domTree.getroot()

    itemNames = []
    thresholds = {}

    # parse items from allow items
    for elem in rootNode.findall('allowitems/item'):
        elemName = elem.attrib['name']
        # check the check item whether exist or not
        if elemName not in list(g_context.supportItems.keys()):
            raise NotExistException("elemName", "support items")
        # save threshold as text and parse them later
        subElem = elem.find('threshold')
        if subElem is not None:
            thresholds[elemName] = subElem.text.strip()
        itemNames.append(elemName)

    # parse categories and get all items
    for category in rootNode.findall('allowcategories/category'):
        cpath = "%s/items/%s" % (g_context.basePath, category.attrib['name'])
        if os.path.isdir(cpath):
            itemNames.extend(x[:-3] for x in os.listdir(cpath) if
                             x[:-3] not in itemNames and x.endswith(".py"))

    # parse deny items
    for elem in rootNode.findall('denyitems/item'):
        elemName = elem.attrib['name']
        if elemName in itemNames:
            itemNames.remove(elemName)

    items = []
    failedItems = []
    for i in itemNames:
        item = __parseOneItem(i)
        if (not item):
            failedItems.append(i)

        # overwrite the threshold parameters
        if thresholds and i in list(thresholds.keys()):
            # parse the threshold of check item
            sceneThreshold = __parseThreshold(thresholds[i])
            if item['threshold']:
                item['threshold'] = dict(item['threshold'], **sceneThreshold)
            else:
                item['threshold'] = sceneThreshold
        items.append(item)
    return (items, failedItems)


def __parseOneItem(itemName):
    '''
    function: parse one check item and get the full information
    '''
    if not itemName:
        raise NotEmptyException("Item name")
    item = {}
    # try to load check item configuration from xml file
    xmlFile = "%s/config/items.xml" % g_context.basePath
    for event, elem in ETree.iterparse(xmlFile):
        if event == 'end':
            if elem.tag == 'checkitem' and elem.attrib['name'] == itemName:
                # Parse the xml file
                item['id'] = elem.attrib['id']
                item['name'] = elem.attrib['name']

                item['title_zh'] = __parseAttr(elem, "title", "zh")
                item['title_en'] = __parseAttr(elem, "title", "en")
                item['suggestion_zh'] = __parseAttr(elem, "suggestion", "zh")
                item['suggestion_en'] = __parseAttr(elem, "suggestion", "en")
                item['standard_zh'] = __parseAttr(elem, "standard", "zh")
                item['standard_en'] = __parseAttr(elem, "standard", "en")
                item['category'] = __parseProperty(elem, 'category', 'other')
                item['permission'] = __parseProperty(elem, 'permission',
                                                     'user')
                item['set_permission'] = __parseProperty(elem,
                                                         'set_permission',
                                                         'user')
                item['scope'] = __parseProperty(elem, 'scope', 'all')
                item['analysis'] = __parseProperty(elem, 'analysis',
                                                   'default')
                # Get the threshold
                threshold = elem.find('threshold')
                if threshold is not None and threshold.text is not None:
                    # parse the threshold of check item
                    item["threshold"] = __parseThreshold(
                        threshold.text.strip())
                break
    return item


def __parseAttr(elem, attr, language='zh'):
    '''
    function: parse the xml attr with language
    '''
    val = elem.find('/'.join([attr, language]))
    if val is not None and val.text is not None:
        return val.text.strip().encode('utf-8')
    return ""


def __parseProperty(elem, propertyName, defaultValue):
    '''
    function: parse the property of check item
    '''
    prop = elem.find(propertyName)
    result = defaultValue
    if prop is not None and prop.text is not None:
        result = prop.text.strip()
    return result


def __parseThreshold(value, separator=";"):
    '''
    function: parse the threshold of check item
    '''
    result = {}
    if separator not in value and "=" not in value:
        return result

    if separator not in value and "=" in value:
        d = value.strip().split('=')
        result[d[0]] = d[1]
    else:
        for v in value.strip().split(separator):
            d = v.strip().split('=')
            result[d[0]] = d[1]
    return result


def getMTUValue(node):
    global g_mtuMap
    # get ip address
    # maybe backIP has no trust
    nodeName = g_context.hostMapping[node]
    if (g_context.cluster and
            nodeName in g_context.cluster.getClusterNodeNames()):
        addr = g_context.cluster.getDbNodeByName(nodeName).backIps[0]
        sshIp = g_context.cluster.getDbNodeByName(nodeName).sshIps[0]
    else:
        addr = node
        sshIp = node
    # get all network card information
    cmd1 = """printf \"\n\n`/sbin/ifconfig -a`\n\n\" """
    if not g_opts.pwdMap:
        output = SharedFuncs.runSshCmd(cmd1, sshIp, g_context.user)
    else:
        username, passwd = g_opts.pwdMap[node]
        if username is None or passwd is None:
            raise CheckException("Retrive username and password error.")
        output = SharedFuncs.runSshCmdWithPwd(cmd1, sshIp, username, passwd)
    # Separate each network card
    networkInfoList = output.strip().split('\n\n')

    networkInfo = ""
    mtuValue = ""
    # find network card by IP
    for eachNet in networkInfoList:
        if eachNet.find(addr) > 0 and eachNet.find('inet') > 0:
            networkInfo = eachNet
            break
    if not networkInfo:
        raise CheckException(
            "Failed to get network card information with '%s'." % node)
    # get network number
    networkNum = networkInfo.split()[0]
    # Remove : if it exists
    if networkNum[-1] == ":":
        networkNum = networkNum[:-1]
    for eachLine in networkInfo.split('\n'):
        # get mtu Value with SuSE and redHat6.x
        if eachLine.find('MTU') > 0:
            mtuValue = eachLine.split(':')[1].split(' ')[0].strip()
            break
        # get mtu Value with redHat7.x
        elif eachLine.find('mtu') > 0:
            mtuValue = eachLine.split()[-1]
            break
        else:
            continue
    if not networkNum:
        raise CheckException(
            "Failed to get network card number with '%s'." % node)
    if not mtuValue:
        raise CheckException(
            "Failed to get network card mtu value with '%s' '%s'."
            % (node, networkNum))
    # The nodes are grouped by MTU value
    if not mtuValue in list(g_mtuMap.keys()):
        g_mtuMap[mtuValue] = ["%s-%s" % (node, networkNum)]
    else:
        g_mtuMap[mtuValue].append("%s-%s" % (node, networkNum))


def preCheck():
    """
    function: preCheck for different scene
    input : NA
    output: NA
    """
    # patch ssh config
    if __isRoot():
        cmd = "grep -E '^MaxStartups[\ \t]+1000' /etc/ssh/sshd_config"
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            cmd = "sed -i '/MaxStartups/d' /etc/ssh/sshd_config &&" \
                  " echo 'MaxStartups 1000' >> /etc/ssh/sshd_config &&" \
                  " service sshd reload"
            SharedFuncs.runShellCmd(cmd)

    if (g_opts.distributing or g_opts.localMode or
            g_opts.isSingle or not g_context.nodes):
        return
    # Check all node MTU value
    try:
        pool = ThreadPool(DefaultValue.getCpuSet())
        results = pool.map(getMTUValue, g_context.nodes)
        pool.close()
        pool.join()
    except Exception as e:
        raise Exception(str(e))
    # According to the number of groups to determine whether the same
    if len(list(g_mtuMap.keys())) > 1:
        warningMsg = "Warning: The MTU value is inconsistent on all node," \
                     " maybe checking will be slower or hang."
        for mtuValue in list(g_mtuMap.keys()):
            warningMsg += "\n%s: [%s]" % (
                mtuValue, ','.join(g_mtuMap[mtuValue]))
        __printOnScreen(warningMsg)


def analysisResult(item):
    global g_itemResult
    outputPath = g_context.tmpPath
    checkID = g_context.checkID
    itemName = item['name']
    files = []
    content = ""
    for n in g_context.nodes:
        fileName = "%s/%s_%s_%s.out" % (
            outputPath, itemName, g_context.hostMapping[n], checkID)
        files.append(fileName)
    for f in files:
        content += "".join(FileUtil.readFile(f))
    itemResult = __analysisResult(content, itemName)
    g_itemResult[itemName] = [itemResult, itemResult.formatOutput()]


def doCheck():
    """
    function: do check process
    input : NA
    output: NA
    """
    # Local mode
    if g_opts.localMode:
        if __isDistributing():
            # load check item dynamic and get the execute result
            doRunCheck()
        else:
            if not __hasRootItems() or __isRoot():
                # load check item dynamic and get the execute result
                doRunCheck()
            else:
                # check with root privileges
                doRootCheck()
    else:
        # watching the threads and response for Ctrl+C signal
        Watcher()
        threads = []
        __printOnScreen(
            "Start to health check for the cluster. Total Items:%s Nodes:%s"
            % (len(g_context.items), len(g_context.nodes)))
        for n in g_context.nodes:
            t = CheckThread("%s Thread" % n, doLocalCheck, n)
            threads.append(t)

        items = g_context.items
        itemCount = len(items)
        itemsName = [i['name'] for i in items]
        outputPath = g_context.tmpPath
        nodes = g_context.nodes[:]
        checkID = g_context.checkID
        # init progress display
        progress_manager = MultiProgressManager()
        progress_manager.put('Checking...', LineProgress(total=itemCount,
                                                         title='Checking...'))
        # fix the display format for progress bar
        newLine = '\n'
        print(newLine)
        # Check the number of completed nodes
        overNodes = 0
        # Time to hit the log
        LogCount = 0
        lastTimeProgress = -1
        while len(nodes) and datetime.now() <= g_endTime:
            totleCount = 0
            slowNode = []
            for node in nodes:
                # Get user and password
                username, passwd = __getUserAndPwd(node)
                if node in g_context.oldNodes:
                    itemCount_node = len(g_context.oldItems)
                else:
                    itemCount_node = len(g_context.newItems)
                # Local execution
                if SharedFuncs.is_local_node(node):
                    checkCount = SharedFuncs.checkComplete(
                        checkID, node, g_context.hostMapping[node],
                        g_context.user, g_context.tmpPath)
                # Executed in new node scene
                elif node in g_context.newNodes:
                    checkCount = SharedFuncs.checkComplete(
                        checkID, node, g_context.hostMapping[node], username,
                        g_context.tmpPath, passwd)
                else:
                    checkCount = SharedFuncs.checkComplete(
                        checkID, node, g_context.hostMapping[node],
                        g_context.user, g_context.tmpPath)
                try:
                    checkCount = int(checkCount.strip())
                except Exception:
                    checkCount = 0
                # If there is a node check completed,
                # some nodes just started,record slow node
                if overNodes > 0 and checkCount < 2:
                    slowNode.append(node)
                if checkCount == itemCount_node:
                    nodes.remove(node)
                    # Record the number of completed nodes
                    overNodes += 1
                    if not SharedFuncs.is_local_node(node):
                        if node in g_context.newNodes:
                            outItems = []
                            for i in itemsName:
                                outItems.append("%s/%s_%s_%s.out" % (
                                    outputPath, i,
                                    g_context.hostMapping[node],
                                    checkID))
                            SharedFuncs.receiveFile(outItems, node, username,
                                                    outputPath, passwd)
                        else:
                            fileName = "%s/*_%s_%s.out" % (
                                outputPath, g_context.hostMapping[node],
                                checkID)
                            # Delete Files
                            SharedFuncs.receiveFile(fileName, node,
                                                    g_context.user,
                                                    outputPath)
                else:
                    totleCount += checkCount
            # All nodes check the number of completed
            totleCount += itemCount * overNodes

            # Timed and counted
            time.sleep(1)
            LogCount += 1
            # Update execution progress
            progressInfo = totleCount // len(g_context.nodes)
            # Refresh only as the schedule changes
            if lastTimeProgress < progressInfo <= itemCount:
                progress_manager.update("Checking...", progressInfo)
                lastTimeProgress = progressInfo
            # Suggest the slow node to log every 30 seconds
            if slowNode and itemCount > 1 and LogCount % 30 == 0:
                logMsg = "Warning: The node [%s] check progress" \
                         " is slow." % ",".join(slowNode)
                g_logger.debug(logMsg)

            for t in threads:
                if t.exitcode == 1:
                    raise ThreadCheckException(t.name, t.exception)

        for t in threads:
            t.join(1)

        if datetime.now() > g_endTime:
            raise TimeoutException(nodes)

        __printOnScreen("Start to analysis the check result")
        try:
            pool = ThreadPool(DefaultValue.getCpuSet())
            results = pool.map(analysisResult, g_context.items)
            pool.close()
            pool.join()
        except Exception as e:
            raise Exception(str(e))
        for item in g_context.items:
            g_result.append(g_itemResult[item['name']][0])
            print(g_itemResult[item['name']][1])

        __printOnScreen("Analysis the check result successfully")


def doRunCheck():
    """
    function: load check item dynamic and get the execute result
    input : NA
    output: NA
    """
    outputPath = g_context.tmpPath
    localHost = __getLocalNode(g_context.nodes)
    if localHost in g_context.newNodes:
        items = g_context.newItems
    else:
        items = g_context.oldItems
    if g_context.hostMapping:
        localHost = g_context.hostMapping[localHost]
    for item in items:
        content = ""
        modPath = g_context.supportItems[item['name']]
        checker = CheckItemFactory.createItem(item['name'], modPath,
                                              item['scope'], item['analysis'])
        checker.runCheck(g_context, g_logger)

        # for local run get the content
        fileName = "%s/%s_%s_%s.out" % (
            outputPath, item['name'], localHost, g_context.checkID)

        content += "".join(FileUtil.readFile(fileName))
        itemResult = __analysisResult(content, item['name'])
        g_result.append(itemResult)
    # run the check process distributing and no need to clean the resource
    if __isDistributing():
        g_logger.debug("run check items done and exit the command")
    if g_opts.format == 'default' and not g_opts.nonPrinting:
        # Initialize the self.clusterInfo variable
        print(g_result.outputRaw())


def doRootCheck():
    """
    function: check with root privileges
    input : NA
    output: NA
    """
    # get local node
    host = __getLocalNode(g_context.nodes)
    # prepare the command for running check
    cmd = __prepareCmd(g_context.items, g_context.user, g_context.checkID, True)
    # run root cmd
    output = SharedFuncs.runRootCmd(cmd, g_opts.pwdMap[host][0],
                                    g_opts.pwdMap[host][1], g_context.mpprc)
    print(output.decode())


def __prepareCmd(items, user, checkid, print_output=False):
    """
    function: prepare the command for running check
    """
    cmdPath = os.path.realpath(os.path.dirname(__file__))
    itemsName = [i['name'] for i in items]
    userParam = ""
    checkIdParam = ""
    routingParam = ""
    printParam = ""
    if not print_output:
        printParam = "--non-print"

    if user:
        userParam = " -U %s " % user
    if checkid:
        checkIdParam = " --cid=%s " % checkid
    if g_context.routing:
        routingParam = "--routing %s" % g_context.routing
    cmd = "%s/gs_check -i %s %s %s -L %s -o %s -l %s %s" % (
        cmdPath, ",".join(itemsName), userParam, checkIdParam,
        routingParam, g_context.tmpPath, g_context.logFile, printParam)
    return cmd


def doLocalCheck(host):
    """
    function: running check on different threads
    input : NA
    output: NA
    """
    # prepare the command for running check
    if host in g_context.oldNodes:
        cmd = __prepareCmd(g_context.oldItems, g_context.user,
                           g_context.checkID)
    else:
        cmd = __prepareCmd(g_context.newItems, "", g_context.checkID)
    if SharedFuncs.is_local_node(host):
        if __hasRootItems():
            SharedFuncs.runRootCmd(cmd, g_opts.pwdMap[host][0],
                                   g_opts.pwdMap[host][1], g_context.mpprc)
        else:
            SharedFuncs.runShellCmd(cmd, g_context.user, g_context.mpprc)
    else:
        if not __hasRootItems():
            SharedFuncs.runSshCmd(cmd, host, g_context.user, g_context.mpprc)
        else:
            # get username and password for certain node
            username, passwd = __getUserAndPwd(host)
            if host in g_context.newNodes:
                SharedFuncs.runSshCmdWithPwd(cmd, host, username, passwd)
            else:
                SharedFuncs.runSshCmdWithPwd(cmd, host, username, passwd,
                                             g_context.mpprc)


def __analysisResult(output, itemName):
    """
    function: analysis the check result
    """
    item_result = ItemResult.parse(output)
    if not item_result:
        raise CheckException("analysis result occurs error")
    try:
        # load support item
        mod_path = g_context.supportItems[itemName]
        checker = CheckItemFactory.createFrom(itemName, mod_path, g_context)
        # analysis the item result got from each node
        item_result = checker.postAnalysis(item_result)
    except Exception as e:
        raise CheckException(str(e))
    return item_result


def moveLogFile(host):
    tmpLog = os.path.join(g_context.tmpPath, "log/gs_check.log")
    SharedFuncs.receiveFile(g_context.logFile, host, g_context.user,
                            tmpLog[:-4] + "_" + host + ".log")


def formatOutput():
    """
    function: format and zip the result package
    input : NA
    output: NA
    """
    if g_opts.distributing or not g_result:
        return

    try:
        # output the result to a file
        resultFile = os.path.join(g_context.tmpPath,
                                  "CheckResult_%s" % g_context.checkID)
        FileUtil.createFile(resultFile, True)
        FileUtil.writeFile(resultFile, [g_result.outputResult()])
    except Exception as e:
        if os.path.exists(resultFile):
            FileUtil.removeFile(resultFile)
        g_logger.info("Warning! Generate check result output file failed.")
        g_logger.debug(str(e))

    if g_opts.localMode:
        return

    # export the check result to excel file in output folder,
    # only export excel for certain scene
    scene = '_' + g_opts.scene if g_opts.scene else ""

    # collect the log file from remote host
    tmpLog = os.path.join(g_context.tmpPath, "log/gs_check.log")
    # Get the log file
    if g_opts.logFile or g_opts.cluster:
        FileUtil.cpFile(g_context.logFile, tmpLog[:-4] + "_" +
                      NetUtil.GetHostIpOrName() + ".log")
    else:
        FileUtil.moveFile(g_context.logFile, tmpLog[:-4] + "_" +
                        NetUtil.GetHostIpOrName() + ".log")
    hosts = __getRemoteNodes(g_context.nodes)
    if hosts:
        try:
            pool = ThreadPool(DefaultValue.getCpuSet())
            results = pool.map(moveLogFile, hosts)
            pool.close()
            pool.join()
        except Exception as e:
            g_logger.info(
                "Warning! Retrieve log file from remote host failed.")
            g_logger.debug(str(e))

    # move the *.out file to nodes folder
    outputFolder = g_context.tmpPath
    checkID = g_context.checkID
    cmd = "cd %s; find . -name \'*%s.out\' -exec mv {} %s \;"\
          % (g_context.tmpPath, checkID, os.path.join(outputFolder, "nodes"))
    SharedFuncs.runShellCmd(cmd, g_context.user)

    # No check result is generated when the output is specified as /dev/null
    if g_context.outPath == ClusterConstants.DEV_NULL:
        print(g_result.outputStatistic())
        print("The inspection report has been cleared by /dev/null.")
        return
    tarFile = "%s/CheckReport%s_%s.tar.gz" %\
              (g_context.outPath, scene, g_context.checkID)
    # tar the output for this check
    tarFiles = ''
    if (__checkFileExist(os.path.join(outputFolder, "nodes"),
                         '%s.out' % checkID)):
        tarFiles += ' nodes '
    if __checkFileExist(os.path.join(outputFolder, "log"), '.log'):
        tarFiles += ' log '
    if __checkFileExist(outputFolder, '%s.zip' % checkID):
        tarFiles += ' *%s.zip ' % checkID
    if __checkFileExist(outputFolder, 'CheckResult_%s' % checkID):
        tarFiles += ' CheckResult_%s ' % checkID
    tarcmd = "cd %s;tar -zcf %s %s 2>&1; chmod %s '%s'" \
             % (outputFolder, tarFile, tarFiles,
                DefaultValue.KEY_FILE_MODE, tarFile)
    SharedFuncs.runShellCmd(tarcmd, g_context.user)

    if g_opts.format == 'default':
        print(g_result.outputStatistic())
        print("For more information please refer to %s"
              % os.path.join(outputFolder, tarFile))

    if g_opts.format == 'json':
        print(g_result.outputJson())


def __checkFileExist(path, filePattern):
    # Check the file exists
    cmd = "cd %s; ls | grep '%s' | wc -l" % (path, filePattern)
    (status, output) = subprocess.getstatusoutput(cmd)
    if status == 0 and output != "0":
        return True
    else:
        return False


def killChildProcess(node):
    checkID = g_context.checkID
    # cmd with switch users
    cmd_switch = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'""" \
                 """|awk '{print \$2}'` """ % checkID
    cmd_switch += """ && (if [ X\"$proc_pid_list\" != X\"\" ]; """ \
                  """then echo \"$proc_pid_list\" | xargs kill -9 ; fi)"""
    # cmd with not switch users
    cmd_current = """proc_pid_list=`ps -ef | grep 'cid=%s'| grep -v 'grep'""" \
                  """|awk "{print \\\$2}"` """ % checkID
    cmd_current += """ && (if [ X"$proc_pid_list" != X"" ]; then """ \
                   """echo "$proc_pid_list" | xargs kill -9 ; fi)"""

    username, passwd = __getUserAndPwd(node)
    if SharedFuncs.is_local_node(node) and not __hasRootItems():
        SharedFuncs.runShellCmd(cmd_current)
    elif __hasRootItems():
        SharedFuncs.runSshCmdWithPwd(cmd_switch, node, username, passwd)
    else:
        SharedFuncs.runSshCmd(cmd_current, node, g_context.user)


def cleanTmpDir(node):
    # clean tmp files in all the nodes
    cmd = r"rm -rf %s" % g_context.tmpPath
    if SharedFuncs.is_local_node(node):
        SharedFuncs.runShellCmd(cmd)
    else:
        SharedFuncs.runSshCmd(cmd, node, g_context.user)


def cleanEnvironment(skiplog=False):
    """
    function: clean the environment
    input : NA
    output: NA
    """
    if __isDistributing():
        return
    if not g_context.tmpPath:
        return
    if not g_context.nodes:
        return

    # kill child process on all hosts when exception(skip log)
    if skiplog:
        try:
            pool = ThreadPool(DefaultValue.getCpuSet())
            results = pool.map(killChildProcess, g_context.nodes)
            pool.close()
            pool.join()
        except Exception as e:
            g_logger.info("Warning! Failed to kill child process.")
            g_logger.debug(str(e))

    # clean tmp files in all the nodes
    cmd = r"rm -rf %s" % g_context.tmpPath
    if g_opts.localMode:
        SharedFuncs.runShellCmd(cmd)
    else:
        try:
            pool = ThreadPool(DefaultValue.getCpuSet())
            results = pool.map(cleanTmpDir, g_context.nodes)
            pool.close()
            pool.join()
        except Exception as e:
            g_logger.info("Warning! Failed to clear tmp directory.")
            g_logger.debug(str(e))


def setTimeOut():
    """
    function: set time out
    input : NA
    output: NA
    """
    global g_endTime
    # end time
    g_endTime = datetime.now() + timedelta(seconds=g_opts.timeout)


if __name__ == '__main__':
    # main function
    try:
        initGlobal()
        parseCommandLine()
        checkParameter()
        parseCheckContext()
        preCheck()
        dispatchCached()
        doCheck()
        formatOutput()
        cleanEnvironment()
    except (InterruptException, ThreadCheckException, TimeoutException) as e:
        g_logger.error(str(e))
        # clean the environment and child process when using Ctrl+C force or
        # except or timeout to exit the command
        cleanEnvironment(True)
        sys.exit(1)
    except Exception as e:
        if not g_logger:
            sys.stdout = sys.stderr
            print(str(e))
        else:
            g_logger.error(str(e))
        cleanEnvironment()
        sys.exit(1)
    else:
        sys.exit(0)