387 lines
14 KiB
Python
387 lines
14 KiB
Python
# -*- coding:utf-8 -*-
|
|
#############################################################################
|
|
# Copyright (c) 2022 Huawei Technologies Co.,Ltd.
|
|
#
|
|
# openGauss is licensed under Mulan PSL v2.
|
|
# You can use this software according to the terms
|
|
# and conditions of the Mulan PSL v2.
|
|
# You may obtain a copy of Mulan PSL v2 at:
|
|
#
|
|
# http://license.coscl.org.cn/MulanPSL2
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OF ANY KIND,
|
|
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
# See the Mulan PSL v2 for more details.
|
|
# ----------------------------------------------------------------------------
|
|
# Description : dss_checker.py is a utility to check parameter.
|
|
|
|
import os
|
|
import stat
|
|
import re
|
|
import sys
|
|
import base64
|
|
import json
|
|
import getpass
|
|
import time
|
|
|
|
try:
|
|
sys.path.append(sys.path[0] + "/../../../")
|
|
from gspylib.common.ErrorCode import ErrorCode
|
|
from base_utils.security.security_checker import SecurityChecker
|
|
from domain_utils.cluster_file.cluster_dir import ClusterDir
|
|
from base_utils.os.file_util import FileUtil
|
|
from base_utils.os.cmd_util import CmdUtil
|
|
|
|
except ImportError as e:
|
|
sys.exit("[GAUSS-52200] : Unable to import module: %s." % str(e))
|
|
|
|
|
|
class DssConfig():
|
|
DMS_DEFAULT_RESTART_DELAY = 1
|
|
DMS_TMP_RESTART_DELAY = 300
|
|
|
|
def __init__(self, attr='', unzip_str='', offset=0):
|
|
self.ids = ''
|
|
self.ips = ''
|
|
self.ports = ''
|
|
if attr:
|
|
self.ids, self.ips, self.ports = attr
|
|
self.offset = offset
|
|
if unzip_str:
|
|
infos = list(filter(None, re.split(r':|,', unzip_str.strip(','))))
|
|
self.ids = infos[::3]
|
|
self.ips = infos[1::3]
|
|
self.ports = infos[2::3]
|
|
|
|
@staticmethod
|
|
def init_dss_config(inst):
|
|
'''
|
|
Initialize dssconfig based on the DN.
|
|
'''
|
|
if inst.enable_dss != 'on':
|
|
return
|
|
|
|
dn_inst = DssConfig.get_simple_value(inst, ['datanodes'])
|
|
dss_ips = DssConfig.get_simple_value(dn_inst, ['haIps'])
|
|
dss_ports = DssConfig.get_simple_value(dn_inst, ['port'])
|
|
dss_ids = list(range(len(dss_ips)))
|
|
inst.dss_config = str(
|
|
DssConfig((dss_ids, dss_ips, dss_ports), offset=10))
|
|
infos = list(filter(None, re.split(r':|,', inst.dss_vg_info)))
|
|
|
|
# We support two deployment method:
|
|
# 1. one dss disk for xlog of each node, and one dss disk for shared data;
|
|
# 2. one dss disk for xlogs of all nodes, and one dss disk for shared data;
|
|
if (len(infos[::2]) != len(dss_ips) + 1) and (len(infos[::2]) != 2):
|
|
raise Exception(
|
|
ErrorCode.GAUSS_500['GAUSS_50026'] % 'dss_vg_info' +
|
|
' The number of volumes is one more than the number of dns or the number of volumes is 2.' +
|
|
' The number of dns is {} and the number of dss volumes is {}'.
|
|
format(len(dss_ips), len(infos[::2])))
|
|
for dp in dss_ports:
|
|
# The dms port is db port plus 20, and the dss port is db port plus 10.
|
|
SecurityChecker.check_port_valid(
|
|
'dataPortBase',
|
|
int(dp),
|
|
max_value=65535 - 20,
|
|
des2='. In dss-mode, The DMS port number increases by 20 again')
|
|
|
|
@staticmethod
|
|
def get_simple_value(meta_object, award_key):
|
|
'''
|
|
type: award_key -> list
|
|
No nesting logic is involved. Only key values are matched.
|
|
'''
|
|
|
|
res = []
|
|
stack = [meta_object]
|
|
while stack:
|
|
sp = stack.pop(0)
|
|
if isinstance(sp, list):
|
|
for ele in sp:
|
|
stack.append(ele)
|
|
elif isinstance(sp, dict):
|
|
for key, value in sp.items():
|
|
if key in award_key:
|
|
if isinstance(value, list):
|
|
res.extend(value)
|
|
elif isinstance(value, int):
|
|
res.append(str(value))
|
|
else:
|
|
res.append(value)
|
|
else:
|
|
stack.append(value)
|
|
elif hasattr(sp, '__dict__'):
|
|
stack.append(vars(sp))
|
|
return res
|
|
|
|
@staticmethod
|
|
def get_current_dss_id_by_dn(db_info, cur_db_info):
|
|
'''
|
|
Obtains the id of the current node's dssserver.
|
|
'''
|
|
|
|
dns = DssConfig.get_simple_value(db_info, ['datanodes'])
|
|
cur_dns = DssConfig.get_simple_value(cur_db_info, ['datanodes'])
|
|
if set(dns).intersection(set(cur_dns)):
|
|
return dns.index(cur_dns[0])
|
|
return -1
|
|
|
|
@staticmethod
|
|
def get_value_b64_handler(key, value, action='encode'):
|
|
'''
|
|
Quick use of base64
|
|
'''
|
|
if action == 'encode':
|
|
b64_ans = base64.urlsafe_b64encode(
|
|
json.dumps({
|
|
key: value
|
|
}).encode()).decode()
|
|
else:
|
|
if not value.strip():
|
|
return ''
|
|
b64_ans = json.loads(
|
|
base64.urlsafe_b64decode(value.encode()).decode()).get(key, '')
|
|
return b64_ans
|
|
|
|
@staticmethod
|
|
def get_cm_inst_path(cur_db_info, inst_type='cm_agent'):
|
|
if inst_type == 'cm_agent':
|
|
return DssConfig.get_simple_value(
|
|
DssConfig.get_simple_value(cur_db_info, ['cmagents']),
|
|
['datadir'])
|
|
elif inst_type == 'cm_server':
|
|
return DssConfig.get_simple_value(
|
|
DssConfig.get_simple_value(cur_db_info, ['cmservers']),
|
|
['datadir'])
|
|
return []
|
|
|
|
@staticmethod
|
|
def check_process_exist(check_flag, user=''):
|
|
if not user:
|
|
user = getpass.getuser()
|
|
check_cmd = 'ps -u {} v'.format(user)
|
|
sts, out = CmdUtil.getstatusoutput_by_fast_popen(check_cmd)
|
|
if sts not in [0]:
|
|
raise Exception(ErrorCode.GAUSS_512["GAUSS_51252"] +
|
|
' Error: {}.'.format(str(out).strip()))
|
|
if str(out).find(check_flag) > -1:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
@staticmethod
|
|
def check_process_available(logger, process, retry=5, interval=1):
|
|
'''
|
|
The dssserver process is not available when running.
|
|
Then check if it is available.
|
|
'''
|
|
logger.debug(f'Start to check {process} available.' )
|
|
if process.find('dssserver') > -1:
|
|
cmd = 'dsscmd lsvg'
|
|
for cur in range(retry):
|
|
sts, out = CmdUtil.exec_by_popen(cmd)
|
|
if not sts:
|
|
# There is '.' in the out.
|
|
logger.debug(f'The dssserver is not available. Message: {out}')
|
|
if cur >= retry - 1:
|
|
return False
|
|
else:
|
|
time.sleep(interval)
|
|
else:
|
|
logger.debug(f'The dssserver is available. The result of the lsvg: {out}')
|
|
break
|
|
return True
|
|
|
|
@staticmethod
|
|
def set_cm_manual_flag(inst_id, flag, logger):
|
|
gauss_home = ClusterDir.get_gauss_home()
|
|
file_ = os.path.realpath(
|
|
os.path.join(gauss_home,
|
|
f'bin/instance_manual_start_{str(inst_id)}'))
|
|
logger.debug(
|
|
"Start to delete or add manual flag file: {}.".format(file_))
|
|
if flag == 'start' and os.path.isfile(file_):
|
|
os.remove(file_)
|
|
logger.debug("End to delete manual flag file: {}.".format(file_))
|
|
elif flag == 'stop' and not os.path.isfile(file_):
|
|
FileUtil.createFileInSafeMode(file_)
|
|
logger.debug("End to add manual flag file: {}.".format(file_))
|
|
|
|
@staticmethod
|
|
def get_cma_res_value(cma_path, key, res_name='dms_res'):
|
|
cma_res = os.path.join(cma_path, 'cm_resource.json')
|
|
if os.path.isfile(cma_res):
|
|
with open(cma_res, 'r') as fr:
|
|
res_dict = json.loads(fr.read())
|
|
for dict_ in res_dict.get('resources', {}):
|
|
if dict_.get('name') == res_name:
|
|
return str(dict_.get(key, ''))
|
|
return ''
|
|
|
|
@staticmethod
|
|
def wait_for_process_start(logger, flag, check_flag='', timeout=300):
|
|
if not check_flag:
|
|
check_flag = flag
|
|
logger.log(f"Start to wait for {flag} to be started.")
|
|
while timeout > 0:
|
|
if not (DssConfig.check_process_exist(check_flag=check_flag) and
|
|
DssConfig.check_process_available(logger, flag, retry=1)):
|
|
if timeout % 5 == 0:
|
|
logger.debug(
|
|
f'The process {flag} if not running or not available.')
|
|
timeout -= 1
|
|
time.sleep(1)
|
|
continue
|
|
else:
|
|
break
|
|
if timeout == 0:
|
|
raise Exception(ErrorCode.GAUSS_516['GAUSS_51657'] % flag)
|
|
logger.log(f'The process {flag} is running.')
|
|
|
|
@staticmethod
|
|
def reload_cm_resource(logger, timeout=300, wait_for_start=True):
|
|
logger.debug('Start to reload the cm resource file.')
|
|
edit_cmd = f'cm_ctl res --edit --res_name="dms_res" ' \
|
|
f'--res_attr="restart_delay={timeout}"'
|
|
logger.debug(f'The cmd of the reload: {edit_cmd}.')
|
|
sts, out = CmdUtil.getstatusoutput_by_fast_popen(edit_cmd)
|
|
if sts not in [0]:
|
|
raise Exception(ErrorCode.GAUSS_535["GAUSS_53507"] % edit_cmd +
|
|
"Error:%s." + out)
|
|
kill_cmd = "ps ux | grep 'bin/cm_agent' | grep -v grep " \
|
|
"| awk '{print $2}' | xargs -r -n 100 kill -9"
|
|
logger.debug(f'The cmd of the kill cm agent is: %s.' % kill_cmd)
|
|
status, _ = CmdUtil.retryGetstatusoutput(kill_cmd, 3, 5)
|
|
if status == 0:
|
|
logger.log("Successfully kill the cm agent.")
|
|
else:
|
|
raise Exception("Failed to kill the cm agent.")
|
|
if wait_for_start:
|
|
DssConfig.wait_for_process_start(logger, 'cm_agent', 'bin/cm_agent')
|
|
logger.debug("End to kill the cm agent.")
|
|
|
|
def __str__(self):
|
|
'''
|
|
return dss config str
|
|
'''
|
|
context = []
|
|
for id_, ip, port in zip(self.ids, self.ips, self.ports):
|
|
blocks = [str(id_), ip, str(int(port) + self.offset)]
|
|
context.append(':'.join(blocks))
|
|
return ','.join(context)
|
|
|
|
|
|
class DssSimpleChecker():
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
@staticmethod
|
|
def check_vol_disk(device_name):
|
|
"""
|
|
function: Check whether the device block exists.
|
|
:param device_name:
|
|
:return:
|
|
"""
|
|
try:
|
|
stat.S_ISBLK(os.stat(device_name).st_mode)
|
|
except FileNotFoundError:
|
|
raise Exception(ErrorCode.GAUSS_504["GAUSS_50421"] % device_name)
|
|
|
|
@staticmethod
|
|
def check_dss_vg_info(vgname, dss_vg_info):
|
|
'''
|
|
dss_vg_info checker
|
|
'''
|
|
infos = list(filter(None, re.split(r':|,', dss_vg_info)))
|
|
|
|
# The volume name must correspond to the disk.
|
|
if (dss_vg_info.count(':') != dss_vg_info.count(',') + 1) or (
|
|
not infos) or (infos and len(infos) % 2 != 0):
|
|
raise Exception(ErrorCode.GAUSS_504["GAUSS_50414"] %
|
|
"The volume name must correspond to the disk.")
|
|
|
|
# The shared volume must be in vg_config.
|
|
if vgname not in infos[::2]:
|
|
raise Exception(ErrorCode.GAUSS_504["GAUSS_50419"] %
|
|
(vgname, dss_vg_info))
|
|
|
|
for disk in infos[1::2]:
|
|
DssSimpleChecker.check_vol_disk(disk)
|
|
|
|
@staticmethod
|
|
def check_dss_some_param(inst):
|
|
'''
|
|
Check some parameters on dss mode.
|
|
'''
|
|
names = [
|
|
'dss_home', 'cm_vote_disk', 'cm_share_disk', 'dss_vgname',
|
|
'dss_vg_info'
|
|
]
|
|
for pn in names:
|
|
if not getattr(inst, pn).strip():
|
|
raise Exception(ErrorCode.GAUSS_500["GAUSS_50012"] % pn)
|
|
|
|
DssSimpleChecker.check_vol_disk(inst.cm_vote_disk)
|
|
DssSimpleChecker.check_vol_disk(inst.cm_share_disk)
|
|
inst.dss_vg_info = inst.dss_vg_info.strip(',')
|
|
|
|
# dss_vg_info checker
|
|
DssSimpleChecker.check_dss_vg_info(inst.dss_vgname, inst.dss_vg_info)
|
|
infos = list(filter(None, re.split(r':|,', inst.dss_vg_info)))
|
|
all_disk = [inst.cm_vote_disk] + [inst.cm_share_disk] + infos[1::2]
|
|
if len(all_disk) != len(set(all_disk)) or len(infos[::2]) != len(
|
|
set(infos[::2])):
|
|
raise Exception(ErrorCode.GAUSS_504["GAUSS_50417"])
|
|
|
|
inst.dss_pri_disks = {
|
|
k: v
|
|
for k, v in zip(infos[::2], infos[1::2]) if k != inst.dss_vgname
|
|
}
|
|
inst.dss_shared_disks = {
|
|
k: v
|
|
for k, v in zip(infos[::2], infos[1::2]) if k == inst.dss_vgname
|
|
}
|
|
|
|
DssSimpleChecker.check_dss_ssl_enable(inst)
|
|
DssSimpleChecker.check_rdma(inst)
|
|
|
|
@staticmethod
|
|
def check_dss_ssl_enable(inst):
|
|
if inst.dss_ssl_enable.strip() in ['on', '']:
|
|
inst.dss_ssl_enable = 'on'
|
|
elif inst.dss_ssl_enable.strip() in ['off']:
|
|
inst.dss_ssl_enable = 'off'
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_500['GAUSS_50026'] %
|
|
'dss_ssl_enable' + ' It\'s must be on or off')
|
|
|
|
@staticmethod
|
|
def check_rdma(inst):
|
|
if inst.ss_interconnect_type in ['TCP', '']:
|
|
pass
|
|
elif inst.ss_interconnect_type in ['RDMA']:
|
|
if inst.ss_rdma_work_config:
|
|
mat = re.findall(r'(\d+) (\d+)', inst.ss_rdma_work_config)
|
|
if mat:
|
|
first_cpu, lastest_cpu = mat[0]
|
|
if 0 <= int(first_cpu) <= int(lastest_cpu) < os.cpu_count():
|
|
return
|
|
raise Exception( ErrorCode.GAUSS_500["GAUSS_50027"] %
|
|
'ss_rdma_work_config' +
|
|
'The second number must be greater than the first number ' \
|
|
'and less than the number of CPU cores.'
|
|
)
|
|
else:
|
|
raise Exception(
|
|
ErrorCode.GAUSS_500["GAUSS_50027"] %
|
|
'ss_rdma_work_config' +
|
|
'The format string is "int int"', )
|
|
else:
|
|
raise Exception(ErrorCode.GAUSS_500['GAUSS_50026'] %
|
|
'ss_interconnect_type' +
|
|
' It\'s must be TCP or RDMA')
|