commit
0220fe5901
@ -1,855 +0,0 @@
|
||||
From 812edfeeb0e47f14dff2077ff5f8a69b4773eaef Mon Sep 17 00:00:00 2001
|
||||
From: twx980514 <tanjinbo3@huawei.com>
|
||||
Date: Tue, 6 Jul 2021 17:43:08 +0800
|
||||
Subject: [PATCH] del sctp check
|
||||
|
||||
---
|
||||
script/gs_check | 4 +-
|
||||
script/gspylib/common/GaussLog.py | 2 +-
|
||||
script/gspylib/etc/conf/check_list.conf | 11 ---
|
||||
script/gspylib/etc/conf/check_list_dws.conf | 11 ---
|
||||
.../inspection/config/check_list_V1R6C10.conf | 10 --
|
||||
.../inspection/config/check_list_V1R7C00.conf | 11 ---
|
||||
script/gspylib/inspection/config/items.xml | 38 +-------
|
||||
script/gspylib/inspection/config/scene_inspect.xml | 2 -
|
||||
script/gspylib/inspection/config/scene_install.xml | 2 -
|
||||
script/gspylib/inspection/config/scene_upgrade.xml | 1 -
|
||||
.../inspection/items/network/CheckNoCheckSum.py | 71 --------------
|
||||
.../inspection/items/network/CheckUsedPort.py | 17 +---
|
||||
.../inspection/items/os/CheckSctpService.py | 108 ---------------------
|
||||
.../gspylib/inspection/items/os/CheckSysParams.py | 32 ++----
|
||||
script/impl/preinstall/PreinstallImpl.py | 38 --------
|
||||
script/local/LocalCheck.py | 21 +---
|
||||
script/local/PreInstallUtility.py | 87 +----------------
|
||||
17 files changed, 16 insertions(+), 450 deletions(-)
|
||||
delete mode 100644 script/gspylib/inspection/items/network/CheckNoCheckSum.py
|
||||
delete mode 100644 script/gspylib/inspection/items/os/CheckSctpService.py
|
||||
|
||||
diff --git a/script/gs_check b/script/gs_check
|
||||
index 05d5625..0f29b31 100644
|
||||
--- a/script/gs_check
|
||||
+++ b/script/gs_check
|
||||
@@ -93,13 +93,13 @@ DEFAULT_TIMEOUT = 1500
|
||||
# because single clusters don't need to perform consistency checks and
|
||||
# internal communication class checks
|
||||
SINGLE_SKIP = ["CheckTimeZone", "CheckEncoding", "CheckKernelVer",
|
||||
- "CheckNTPD", "CheckNoCheckSum", "CheckCpuCount",
|
||||
+ "CheckNTPD", "CheckCpuCount",
|
||||
"CheckMemInfo", "CheckDiskConfig",
|
||||
"CheckUpVer", "CheckPgxcgroup", "CheckPing",
|
||||
"CheckNetWorkDrop", "CheckNetSpeed"]
|
||||
|
||||
SETITEM_SKIP = ["CheckCPU", "CheckTimeZone", "CheckOSVer", "CheckNTPD",
|
||||
- "CheckSshdService", "CheckNoCheckSum", "CheckEtcHosts",
|
||||
+ "CheckSshdService", "CheckEtcHosts",
|
||||
"CheckCpuCount", "CheckHyperThread", "CheckMemInfo",
|
||||
"CheckKernelVer", "CheckEncoding", "CheckBootItems",
|
||||
"CheckDropCache", "CheckFilehandle", "CheckKeyProAdj",
|
||||
diff --git a/script/gspylib/common/GaussLog.py b/script/gspylib/common/GaussLog.py
|
||||
index bdfecf1..31957d2 100644
|
||||
--- a/script/gspylib/common/GaussLog.py
|
||||
+++ b/script/gspylib/common/GaussLog.py
|
||||
@@ -55,7 +55,7 @@ PREINSTALL_ACTION = ["prepare_path", "check_os_Version", "create_os_user",
|
||||
"check_os_user", "create_cluster_paths",
|
||||
"set_os_parameter", "set_finish_flag", "set_warning_env",
|
||||
"prepare_user_cron_service", "prepare_user_sshd_service",
|
||||
- "set_library", "set_sctp", "set_virtualIp",
|
||||
+ "set_library", "set_virtualIp",
|
||||
"clean_virtualIp", "check_hostname_mapping",
|
||||
"init_gausslog", "check_envfile", "check_dir_owner",
|
||||
"set_user_env", "set_tool_env", "gs_preinstall"]
|
||||
diff --git a/script/gspylib/etc/conf/check_list.conf b/script/gspylib/etc/conf/check_list.conf
|
||||
index deba792..77b7c60 100644
|
||||
--- a/script/gspylib/etc/conf/check_list.conf
|
||||
+++ b/script/gspylib/etc/conf/check_list.conf
|
||||
@@ -10,14 +10,10 @@ net.ipv4.tcp_keepalive_time = 30
|
||||
net.ipv4.tcp_keepalive_intvl = 30
|
||||
net.ipv4.tcp_keepalive_probes = 9
|
||||
net.ipv4.tcp_retries2 = 12
|
||||
-net.sctp.addip_enable = 0
|
||||
net.core.wmem_max = 21299200
|
||||
net.core.rmem_max = 21299200
|
||||
net.core.wmem_default = 21299200
|
||||
net.core.rmem_default = 21299200
|
||||
-net.sctp.sctp_mem = 94500000 915000000 927000000
|
||||
-net.sctp.sctp_rmem = 8192 250000 16777216
|
||||
-net.sctp.sctp_wmem = 8192 250000 16777216
|
||||
kernel.sem = 250 6400000 1000 25600
|
||||
net.ipv4.tcp_rmem = 8192 250000 16777216
|
||||
net.ipv4.tcp_wmem = 8192 250000 16777216
|
||||
@@ -33,8 +29,6 @@ kernel.shmmax = 18446744073709551615
|
||||
|
||||
# if parameter value is not equal to ths OS's value, print the waring, and not error
|
||||
[SUGGEST:/etc/sysctl.conf]
|
||||
-net.sctp.sndbuf_policy = 0
|
||||
-net.sctp.rcvbuf_policy = 0
|
||||
net.ipv4.ip_local_port_range = 26000 65535
|
||||
net.ipv4.tcp_fin_timeout = 60
|
||||
net.ipv4.tcp_sack = 1
|
||||
@@ -42,13 +36,8 @@ net.ipv4.tcp_timestamps = 1
|
||||
net.ipv4.tcp_retries1 = 5
|
||||
net.ipv4.tcp_syn_retries = 5
|
||||
net.ipv4.tcp_synack_retries = 5
|
||||
-net.sctp.path_max_retrans = 10
|
||||
-net.sctp.max_init_retransmits = 10
|
||||
-net.sctp.association_max_retrans = 10
|
||||
-net.sctp.hb_interval = 30000
|
||||
vm.extfrag_threshold = 500
|
||||
vm.overcommit_ratio = 90
|
||||
-SctpChecksumErrors = 0
|
||||
|
||||
# open file number, please set it to set '1000000'
|
||||
[/etc/security/limits.conf]
|
||||
diff --git a/script/gspylib/etc/conf/check_list_dws.conf b/script/gspylib/etc/conf/check_list_dws.conf
|
||||
index a7f7b7c..a96f7e9 100644
|
||||
--- a/script/gspylib/etc/conf/check_list_dws.conf
|
||||
+++ b/script/gspylib/etc/conf/check_list_dws.conf
|
||||
@@ -10,14 +10,10 @@ net.ipv4.tcp_keepalive_time = 30
|
||||
net.ipv4.tcp_keepalive_intvl = 30
|
||||
net.ipv4.tcp_keepalive_probes = 9
|
||||
net.ipv4.tcp_retries2 = 12
|
||||
-net.sctp.addip_enable = 0
|
||||
net.core.wmem_max = 21299200
|
||||
net.core.rmem_max = 21299200
|
||||
net.core.wmem_default = 21299200
|
||||
net.core.rmem_default = 21299200
|
||||
-net.sctp.sctp_mem = 94500000 915000000 927000000
|
||||
-net.sctp.sctp_rmem = 8192 250000 16777216
|
||||
-net.sctp.sctp_wmem = 8192 250000 16777216
|
||||
kernel.sem = 250 6400000 1000 25600
|
||||
net.ipv4.tcp_rmem = 8192 250000 16777216
|
||||
net.ipv4.tcp_wmem = 8192 250000 16777216
|
||||
@@ -28,8 +24,6 @@ net.ipv4.tcp_max_syn_backlog = 65535
|
||||
net.core.somaxconn = 65535
|
||||
net.ipv4.tcp_syncookies = 1
|
||||
vm.overcommit_memory = 0
|
||||
-net.sctp.sndbuf_policy = 0
|
||||
-net.sctp.rcvbuf_policy = 0
|
||||
net.ipv4.tcp_fin_timeout = 60
|
||||
kernel.shmall = 1152921504606846720
|
||||
kernel.shmmax = 18446744073709551615
|
||||
@@ -38,16 +32,11 @@ net.ipv4.tcp_timestamps = 1
|
||||
net.ipv4.tcp_retries1 = 10
|
||||
net.ipv4.tcp_syn_retries = 10
|
||||
net.ipv4.tcp_synack_retries = 10
|
||||
-net.sctp.path_max_retrans = 10
|
||||
-net.sctp.max_init_retransmits = 10
|
||||
-net.sctp.association_max_retrans = 10
|
||||
-net.sctp.hb_interval = 30000
|
||||
vm.extfrag_threshold = 500
|
||||
vm.overcommit_ratio = 90
|
||||
|
||||
# if parameter value is not equal to ths OS's value, print the waring, and not error
|
||||
[SUGGEST:/etc/sysctl.conf]
|
||||
-SctpChecksumErrors = 0
|
||||
|
||||
# open file number, please set it to set '1000000'
|
||||
[/etc/security/limits.conf]
|
||||
diff --git a/script/gspylib/inspection/config/check_list_V1R6C10.conf b/script/gspylib/inspection/config/check_list_V1R6C10.conf
|
||||
index 75a2203..16c3fd2 100644
|
||||
--- a/script/gspylib/inspection/config/check_list_V1R6C10.conf
|
||||
+++ b/script/gspylib/inspection/config/check_list_V1R6C10.conf
|
||||
@@ -10,14 +10,10 @@ net.ipv4.tcp_keepalive_time = 30
|
||||
net.ipv4.tcp_keepalive_intvl = 30
|
||||
net.ipv4.tcp_keepalive_probes = 9
|
||||
net.ipv4.tcp_retries2 = 80
|
||||
-net.sctp.addip_enable = 0
|
||||
net.core.wmem_max = 21299200
|
||||
net.core.rmem_max = 21299200
|
||||
net.core.wmem_default = 21299200
|
||||
net.core.rmem_default = 21299200
|
||||
-net.sctp.sctp_mem = 94500000 915000000 927000000
|
||||
-net.sctp.sctp_rmem = 8192 250000 16777216
|
||||
-net.sctp.sctp_wmem = 8192 250000 16777216
|
||||
kernel.sem = 250 6400000 1000 25600
|
||||
net.ipv4.tcp_rmem = 8192 250000 16777216
|
||||
net.ipv4.tcp_wmem = 8192 250000 16777216
|
||||
@@ -30,8 +26,6 @@ net.ipv4.tcp_syncookies = 1
|
||||
vm.overcommit_memory = 0
|
||||
vm.panic_on_oom = 0;
|
||||
vm.oom_kill_allocating_task = 0;
|
||||
-net.sctp.sndbuf_policy = 0
|
||||
-net.sctp.rcvbuf_policy = 0
|
||||
|
||||
# if parameter value is not equal to ths OS's value, print the waring, and not error
|
||||
[SUGGEST:/etc/sysctl.conf]
|
||||
@@ -41,10 +35,6 @@ net.ipv4.tcp_timestamps = 1
|
||||
net.ipv4.tcp_retries1 = 5
|
||||
net.ipv4.tcp_syn_retries = 5
|
||||
net.ipv4.tcp_synack_retries = 5
|
||||
-net.sctp.path_max_retrans = 10
|
||||
-net.sctp.max_init_retransmits = 10
|
||||
-net.sctp.association_max_retrans = 10
|
||||
-net.sctp.hb_interval = 30000
|
||||
|
||||
# open file number, please set it to set '1000000'
|
||||
[/etc/security/limits.conf]
|
||||
diff --git a/script/gspylib/inspection/config/check_list_V1R7C00.conf b/script/gspylib/inspection/config/check_list_V1R7C00.conf
|
||||
index 41c9334..4c150b6 100644
|
||||
--- a/script/gspylib/inspection/config/check_list_V1R7C00.conf
|
||||
+++ b/script/gspylib/inspection/config/check_list_V1R7C00.conf
|
||||
@@ -10,14 +10,10 @@ net.ipv4.tcp_keepalive_time = 30
|
||||
net.ipv4.tcp_keepalive_intvl = 30
|
||||
net.ipv4.tcp_keepalive_probes = 9
|
||||
net.ipv4.tcp_retries2 = 80
|
||||
-net.sctp.addip_enable = 0
|
||||
net.core.wmem_max = 21299200
|
||||
net.core.rmem_max = 21299200
|
||||
net.core.wmem_default = 21299200
|
||||
net.core.rmem_default = 21299200
|
||||
-net.sctp.sctp_mem = 94500000 915000000 927000000
|
||||
-net.sctp.sctp_rmem = 8192 250000 16777216
|
||||
-net.sctp.sctp_wmem = 8192 250000 16777216
|
||||
kernel.sem = 250 6400000 1000 25600
|
||||
net.ipv4.tcp_rmem = 8192 250000 16777216
|
||||
net.ipv4.tcp_wmem = 8192 250000 16777216
|
||||
@@ -30,8 +26,6 @@ net.ipv4.tcp_syncookies = 1
|
||||
vm.overcommit_memory = 0
|
||||
vm.panic_on_oom = 0
|
||||
vm.oom_kill_allocating_task = 0
|
||||
-net.sctp.sndbuf_policy = 0
|
||||
-net.sctp.rcvbuf_policy = 0
|
||||
kernel.shmall = 1152921504606846720
|
||||
kernel.shmmax = 18446744073709551615
|
||||
|
||||
@@ -43,13 +37,8 @@ net.ipv4.tcp_timestamps = 1
|
||||
net.ipv4.tcp_retries1 = 5
|
||||
net.ipv4.tcp_syn_retries = 5
|
||||
net.ipv4.tcp_synack_retries = 5
|
||||
-net.sctp.path_max_retrans = 10
|
||||
-net.sctp.max_init_retransmits = 10
|
||||
-net.sctp.association_max_retrans = 10
|
||||
-net.sctp.hb_interval = 30000
|
||||
vm.extfrag_threshold = 500
|
||||
vm.overcommit_ratio = 90
|
||||
-SctpChecksumErrors = 0
|
||||
|
||||
# open file number, please set it to set '1000000'
|
||||
[/etc/security/limits.conf]
|
||||
diff --git a/script/gspylib/inspection/config/items.xml b/script/gspylib/inspection/config/items.xml
|
||||
index 1dbac79..bb4143c 100644
|
||||
--- a/script/gspylib/inspection/config/items.xml
|
||||
+++ b/script/gspylib/inspection/config/items.xml
|
||||
@@ -334,24 +334,6 @@
|
||||
<analysis>default</analysis>
|
||||
</checkitem>
|
||||
|
||||
- <checkitem id="10026" name="CheckNoCheckSum">
|
||||
- <title>
|
||||
- <zh>检查nochecksum值是否为预期值且一致(默认为N,RedHat6.4/6.5且bond是为Y)</zh>
|
||||
- <en>Check the nochecksum</en>
|
||||
- </title>
|
||||
- <threshold/>
|
||||
- <suggestion>
|
||||
- <zh>修改nochecksum值为一致的预期值</zh>
|
||||
- </suggestion>
|
||||
- <standard>
|
||||
- <zh>检查nochecksum值,若符合预期且一致则检查项通过,否则检查项不通过</zh>
|
||||
- </standard>
|
||||
- <category>network</category>
|
||||
- <permission>root</permission>
|
||||
- <scope>all</scope>
|
||||
- <analysis>consistent</analysis>
|
||||
- </checkitem>
|
||||
-
|
||||
<checkitem id="10027" name="CheckOmmUserExist">
|
||||
<title>
|
||||
<zh>检查omm用户是否已删除</zh>
|
||||
@@ -456,24 +438,6 @@
|
||||
<analysis>consistent</analysis>
|
||||
</checkitem>
|
||||
|
||||
- <checkitem id="10032" name="CheckSctpService">
|
||||
- <title>
|
||||
- <zh>检查sctp服务</zh>
|
||||
- <en>Check sctp service</en>
|
||||
- </title>
|
||||
- <threshold/>
|
||||
- <suggestion>
|
||||
- <zh>安装及加载sctp服务</zh>
|
||||
- </suggestion>
|
||||
- <standard>
|
||||
- <zh>stcp服务开启且写在开机自启动文件中则检查项通过,否则检查项不通过</zh>
|
||||
- </standard>
|
||||
- <category>os</category>
|
||||
- <permission>root</permission>
|
||||
- <scope>all</scope>
|
||||
- <analysis>default</analysis>
|
||||
- </checkitem>
|
||||
-
|
||||
<checkitem id="10033" name="CheckHyperThread">
|
||||
<title>
|
||||
<zh>检查超线程是否打开</zh>
|
||||
@@ -1841,7 +1805,7 @@
|
||||
<zh>增大net.ipv4.ip_local_port_range或降低并发</zh>
|
||||
</suggestion>
|
||||
<standard>
|
||||
- <zh>检查net.ipv4.ip_local_port_range,范围大于等于OS默认值通过;检查TCP协议随机端口数,小于总随机端口数的80%通过;检查SCTP协议随机端口数,小于总随机端口数的80%通过</zh>
|
||||
+ <zh>检查net.ipv4.ip_local_port_range,范围大于等于OS默认值通过;检查TCP协议随机端口数,小于总随机端口数的80%通过</zh>
|
||||
</standard>
|
||||
<category>network</category>
|
||||
<permission>user</permission>
|
||||
diff --git a/script/gspylib/inspection/config/scene_inspect.xml b/script/gspylib/inspection/config/scene_inspect.xml
|
||||
index 463e4b7..3ba6da3 100644
|
||||
--- a/script/gspylib/inspection/config/scene_inspect.xml
|
||||
+++ b/script/gspylib/inspection/config/scene_inspect.xml
|
||||
@@ -40,12 +40,10 @@
|
||||
<item name="CheckSshdConfig"/>
|
||||
<item name="CheckCrondService"/>
|
||||
<item name="CheckStack"/>
|
||||
- <item name="CheckNoCheckSum"/>
|
||||
<item name="CheckSysPortRange"/>
|
||||
<item name="CheckMemInfo"/>
|
||||
<item name="CheckHyperThread"/>
|
||||
<item name="CheckTableSpace"/>
|
||||
- <item name="CheckSctpService"/>
|
||||
<item name="CheckSysadminUser"/>
|
||||
<item name="CheckGUCConsistent"/>
|
||||
<item name="CheckMaxProcMemory"/>
|
||||
diff --git a/script/gspylib/inspection/config/scene_install.xml b/script/gspylib/inspection/config/scene_install.xml
|
||||
index a189193..42b9547 100644
|
||||
--- a/script/gspylib/inspection/config/scene_install.xml
|
||||
+++ b/script/gspylib/inspection/config/scene_install.xml
|
||||
@@ -12,13 +12,11 @@
|
||||
<item name="CheckStack"/>
|
||||
<item name="CheckCrondService"/>
|
||||
<item name="CheckSshdService"/>
|
||||
- <item name="CheckSctpService"/>
|
||||
<item name="CheckSysParams">
|
||||
<threshold>
|
||||
version=V1R7C00
|
||||
</threshold>
|
||||
</item>
|
||||
- <item name="CheckNoCheckSum"/>
|
||||
<item name="CheckDiskFormat"/>
|
||||
<item name="CheckEtcHosts"/>
|
||||
<item name="CheckHyperThread"/>
|
||||
diff --git a/script/gspylib/inspection/config/scene_upgrade.xml b/script/gspylib/inspection/config/scene_upgrade.xml
|
||||
index 426785a..7356a21 100644
|
||||
--- a/script/gspylib/inspection/config/scene_upgrade.xml
|
||||
+++ b/script/gspylib/inspection/config/scene_upgrade.xml
|
||||
@@ -23,7 +23,6 @@
|
||||
version=V1R7C00
|
||||
</threshold>
|
||||
</item>
|
||||
- <item name="CheckNoCheckSum"/>
|
||||
<item name="CheckGUCValue"/>
|
||||
<item name="CheckStack"/>
|
||||
<item name="CheckDiskFormat"/>
|
||||
diff --git a/script/gspylib/inspection/items/network/CheckNoCheckSum.py b/script/gspylib/inspection/items/network/CheckNoCheckSum.py
|
||||
deleted file mode 100644
|
||||
index 64d0e52..0000000
|
||||
--- a/script/gspylib/inspection/items/network/CheckNoCheckSum.py
|
||||
+++ /dev/null
|
||||
@@ -1,71 +0,0 @@
|
||||
-# -*- coding:utf-8 -*-
|
||||
-# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
-#
|
||||
-# openGauss is licensed under Mulan PSL v2.
|
||||
-# You can use this software according to the terms
|
||||
-# and conditions of the Mulan PSL v2.
|
||||
-# You may obtain a copy of Mulan PSL v2 at:
|
||||
-#
|
||||
-# http://license.coscl.org.cn/MulanPSL2
|
||||
-#
|
||||
-# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
-# WITHOUT WARRANTIES OF ANY KIND,
|
||||
-# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
-# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
-# See the Mulan PSL v2 for more details.
|
||||
-# ----------------------------------------------------------------------------
|
||||
-import os
|
||||
-import platform
|
||||
-from gspylib.inspection.common import SharedFuncs
|
||||
-from gspylib.inspection.common.CheckItem import BaseItem
|
||||
-from gspylib.inspection.common.CheckResult import ResultStatus
|
||||
-from gspylib.os.gsfile import g_file
|
||||
-from gspylib.os.gsnetwork import g_network
|
||||
-from gspylib.os.gsfile import g_Platform
|
||||
-from gspylib.common.ErrorCode import ErrorCode
|
||||
-
|
||||
-
|
||||
-class CheckNoCheckSum(BaseItem):
|
||||
- def __init__(self):
|
||||
- super(CheckNoCheckSum, self).__init__(self.__class__.__name__)
|
||||
-
|
||||
- def getOSversion(self):
|
||||
- distname, version, idnum = g_Platform.dist()
|
||||
- return distname, version
|
||||
-
|
||||
- def doCheck(self):
|
||||
- if (not os.path.isfile("/sys/module/sctp/parameters/no_checksums")):
|
||||
- self.result.rst = ResultStatus.OK
|
||||
- self.result.val = "The SCTP service is not used and the" \
|
||||
- " check item is skipped"
|
||||
- return
|
||||
- expect = "N"
|
||||
- if (self.cluster):
|
||||
- LocalNodeInfo = self.cluster.getDbNodeByName(self.host)
|
||||
- serviceIP = LocalNodeInfo.backIps[0]
|
||||
- else:
|
||||
- serviceIP = SharedFuncs.getIpByHostName(self.host)
|
||||
- for network in g_network.getAllNetworkInfo():
|
||||
- if (network.ipAddress == serviceIP):
|
||||
- networkCardNum = network.NICNum
|
||||
- networkBond = network.networkBondModeInfo
|
||||
- break
|
||||
- if (not networkCardNum or not networkBond):
|
||||
- raise Exception(ErrorCode.GAUSS_506["GAUSS_50619"])
|
||||
- (distname, version) = self.getOSversion()
|
||||
- if ((distname in ("redhat", "centos")) and
|
||||
- (version in ("6.4", "6.5")) and
|
||||
- networkBond != "BondMode Null"):
|
||||
- expect = "Y"
|
||||
-
|
||||
- output = \
|
||||
- g_file.readFile('/sys/module/sctp/parameters/no_checksums')[0]
|
||||
- if (output.strip() == expect):
|
||||
- self.result.rst = ResultStatus.OK
|
||||
- self.result.val = "Nochecksum value is %s,Check items pass." \
|
||||
- % output.strip()
|
||||
- else:
|
||||
- self.result.rst = ResultStatus.NG
|
||||
- self.result.val = "Nochecksum value(%s) is not %s," \
|
||||
- "Check items are not passed." \
|
||||
- % (output.strip(), expect)
|
||||
diff --git a/script/gspylib/inspection/items/network/CheckUsedPort.py b/script/gspylib/inspection/items/network/CheckUsedPort.py
|
||||
index 8a635ed..9718d96 100644
|
||||
--- a/script/gspylib/inspection/items/network/CheckUsedPort.py
|
||||
+++ b/script/gspylib/inspection/items/network/CheckUsedPort.py
|
||||
@@ -46,17 +46,9 @@ class CheckUsedPort(BaseItem):
|
||||
|
||||
return int(tcpUsed)
|
||||
|
||||
- def getSctpUsedPort(self):
|
||||
- cmd = "cat /proc/net/sctp/assocs|" \
|
||||
- "awk '{print $12}'|sort|uniq -c |wc -l"
|
||||
- sctpUsed = SharedFuncs.runShellCmd(cmd)
|
||||
-
|
||||
- return int(sctpUsed)
|
||||
-
|
||||
def doCheck(self):
|
||||
portRange = self.getPortRange()
|
||||
tcpUsed = self.getTcpUsedPort()
|
||||
- sctpUsed = self.getSctpUsedPort()
|
||||
defaultPortRange = 60000 - 32768
|
||||
if (portRange < defaultPortRange):
|
||||
self.result.rst = ResultStatus.WARNING
|
||||
@@ -70,14 +62,7 @@ class CheckUsedPort(BaseItem):
|
||||
" not passed." % tcpUsed
|
||||
return
|
||||
|
||||
- if (sctpUsed > portRange * 0.8):
|
||||
- self.result.rst = ResultStatus.WARNING
|
||||
- self.result.val = "sctp port used is %s," \
|
||||
- "Check items are not passed." % sctpUsed
|
||||
- return
|
||||
-
|
||||
self.result.rst = ResultStatus.OK
|
||||
self.result.val = "port range is %s,tcp port used is %s," \
|
||||
- "sctp port used is %d,Check items pass." \
|
||||
- % (portRange, tcpUsed, sctpUsed)
|
||||
+ "Check items pass." % (portRange, tcpUsed)
|
||||
return
|
||||
diff --git a/script/gspylib/inspection/items/os/CheckSctpService.py b/script/gspylib/inspection/items/os/CheckSctpService.py
|
||||
deleted file mode 100644
|
||||
index 8e00810..0000000
|
||||
--- a/script/gspylib/inspection/items/os/CheckSctpService.py
|
||||
+++ /dev/null
|
||||
@@ -1,108 +0,0 @@
|
||||
-# -*- coding:utf-8 -*-
|
||||
-# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
-#
|
||||
-# openGauss is licensed under Mulan PSL v2.
|
||||
-# You can use this software according to the terms
|
||||
-# and conditions of the Mulan PSL v2.
|
||||
-# You may obtain a copy of Mulan PSL v2 at:
|
||||
-#
|
||||
-# http://license.coscl.org.cn/MulanPSL2
|
||||
-#
|
||||
-# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
-# WITHOUT WARRANTIES OF ANY KIND,
|
||||
-# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
-# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
-# See the Mulan PSL v2 for more details.
|
||||
-# ----------------------------------------------------------------------------
|
||||
-import subprocess
|
||||
-import platform
|
||||
-from gspylib.inspection.common.CheckItem import BaseItem
|
||||
-from gspylib.inspection.common.CheckResult import ResultStatus
|
||||
-from gspylib.common.Common import DefaultValue
|
||||
-from gspylib.os.gsfile import g_Platform
|
||||
-
|
||||
-
|
||||
-class CheckSctpService(BaseItem):
|
||||
- def __init__(self):
|
||||
- super(CheckSctpService, self).__init__(self.__class__.__name__)
|
||||
-
|
||||
- def doCheck(self):
|
||||
-
|
||||
- parRes = ""
|
||||
- flag = "Normal"
|
||||
- cmd = "ls -l /lib/modules/`uname -r`/kernel/net/sctp/sctp.ko*"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0 or output == "" or output.find(
|
||||
- "No such file or directory") > 0):
|
||||
- if DefaultValue.checkDockerEnv():
|
||||
- return
|
||||
- flag = "Error"
|
||||
- parRes += "There is no sctp service."
|
||||
- else:
|
||||
- cmd = "modprobe sctp;"
|
||||
- cmd += "lsmod |grep sctp"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (output == ""):
|
||||
- flag = "Error"
|
||||
- parRes += "sctp service is not loaded."
|
||||
-
|
||||
- cmd = "cat %s | grep '^insmod.*sctp.ko'" % DefaultValue.getOSInitFile()
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0 or output == ""):
|
||||
- if (flag == "Normal"):
|
||||
- flag = "Warning"
|
||||
- parRes += "Sctp service is not set to boot from power on."
|
||||
-
|
||||
- self.result.val = parRes
|
||||
- self.result.raw = output
|
||||
- if (flag == "Error"):
|
||||
- self.result.rst = ResultStatus.NG
|
||||
- elif (flag == "Warning"):
|
||||
- self.result.rst = ResultStatus.WARNING
|
||||
- else:
|
||||
- self.result.rst = ResultStatus.OK
|
||||
- self.result.val = "Sctp service is Normal."
|
||||
-
|
||||
- def doSet(self):
|
||||
- self.result.val = ""
|
||||
- parRes = ""
|
||||
- sctpFile = ""
|
||||
- initFileSuse = "/etc/init.d/boot.local"
|
||||
- initFileRedhat = "/etc/rc.d/rc.local"
|
||||
- cmd = "ls -l /lib/modules/`uname -r`/kernel/net/sctp/sctp.ko*"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0 or output == "" or output.find(
|
||||
- "No such file or directory") > 0):
|
||||
- parRes = "There is no sctp service.\n"
|
||||
- else:
|
||||
- sctpFile = output.split()[-1]
|
||||
- cmd = "modprobe sctp;"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0):
|
||||
- cmd = "insmod %s >/dev/null 2>&1;lsmod |grep sctp" % sctpFile
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if status != 0 or output == "":
|
||||
- parRes = "Failed to load sctp service.\n"
|
||||
- distname, version, idnum = g_Platform.dist()
|
||||
- if (distname in ["redhat", "centos", "euleros", "openEuler"]):
|
||||
- cmd = "cat %s | grep sctp" % initFileRedhat
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0 or output == ""):
|
||||
- cmd = "echo 'modprobe sctp' >> /etc/rc.d/rc.local;"
|
||||
- cmd += "echo" \
|
||||
- " 'insmod %s >/dev/null 2>&1' >> /etc/rc.d/rc.local " \
|
||||
- % sctpFile
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0):
|
||||
- parRes += "Failed to add sctp service to boot.\n"
|
||||
- else:
|
||||
- cmd = "cat %s | grep stcp" % initFileSuse
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0 or output == ""):
|
||||
- cmd = "echo 'modprobe sctp' >> /etc/init.d/boot.local;"
|
||||
- cmd += "echo '%s >/dev/null 2>&1' >> /etc/init.d/boot.local " \
|
||||
- % sctpFile
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0):
|
||||
- parRes += "Failed to add sctp service to boot."
|
||||
- self.result.val = parRes
|
||||
diff --git a/script/gspylib/inspection/items/os/CheckSysParams.py b/script/gspylib/inspection/items/os/CheckSysParams.py
|
||||
index 947ecc6..c15a627 100644
|
||||
--- a/script/gspylib/inspection/items/os/CheckSysParams.py
|
||||
+++ b/script/gspylib/inspection/items/os/CheckSysParams.py
|
||||
@@ -122,21 +122,13 @@ class CheckSysParams(BaseItem):
|
||||
for key in kernelParameter:
|
||||
if (patchlevel == "1" and key == "vm.extfrag_threshold"):
|
||||
continue
|
||||
- if (key == "sctpchecksumerrors"):
|
||||
- snmpFile = "/proc/net/sctp/snmp"
|
||||
- if (os.path.isfile(snmpFile)):
|
||||
- output = \
|
||||
- g_file.readFile(snmpFile, 'SctpChecksumErrors')[
|
||||
- 0].split()[1].strip()
|
||||
- else:
|
||||
- continue
|
||||
- else:
|
||||
- sysFile = "/proc/sys/%s" % key.replace('.', '/')
|
||||
- # High version of linux no longer supports tcp_tw_recycle
|
||||
- if (not os.path.exists(
|
||||
- sysFile) and key == "net.ipv4.tcp_tw_recycle"):
|
||||
- continue
|
||||
- output = g_file.readFile(sysFile)[0].strip()
|
||||
+
|
||||
+ sysFile = "/proc/sys/%s" % key.replace('.', '/')
|
||||
+ # High version of linux no longer supports tcp_tw_recycle
|
||||
+ if (not os.path.exists(
|
||||
+ sysFile) and key == "net.ipv4.tcp_tw_recycle"):
|
||||
+ continue
|
||||
+ output = g_file.readFile(sysFile)[0].strip()
|
||||
if (len(output.split()) > 1):
|
||||
output = ' '.join(output.split())
|
||||
|
||||
@@ -184,16 +176,6 @@ class CheckSysParams(BaseItem):
|
||||
checkResultList = checkResult.split('\'')
|
||||
setParameterList[checkResultList[1]] = checkResultList[5]
|
||||
self.result.val = ""
|
||||
- # The parameter sctpchecksumerrors set method is independent
|
||||
- if ("sctpchecksumerrors" in setParameterList):
|
||||
- cmd = "echo 1 > /sys/module/sctp/parameters/no_checksums"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0):
|
||||
- self.result.val += " " \
|
||||
- " Failed to enforce sysctl kernel " \
|
||||
- "variable 'sctpchecksumerrors'. " \
|
||||
- "Error: %s" % output
|
||||
- setParameterList.pop("sctpchecksumerrors")
|
||||
|
||||
if (len(setParameterList) != 0):
|
||||
for key in setParameterList:
|
||||
diff --git a/script/impl/preinstall/PreinstallImpl.py b/script/impl/preinstall/PreinstallImpl.py
|
||||
index a35e87a..908423f 100644
|
||||
--- a/script/impl/preinstall/PreinstallImpl.py
|
||||
+++ b/script/impl/preinstall/PreinstallImpl.py
|
||||
@@ -54,8 +54,6 @@ ACTION_PREPARE_USER_CRON_SERVICE = "prepare_user_cron_service"
|
||||
ACTION_PREPARE_USER_SSHD_SERVICE = "prepare_user_sshd_service"
|
||||
# set the dynamic link library
|
||||
ACTION_SET_LIBRARY = "set_library"
|
||||
-# set sctp service
|
||||
-ACTION_SET_SCTP = "set_sctp"
|
||||
# set virtual Ip
|
||||
ACTION_SET_VIRTUALIP = "set_virtualIp"
|
||||
# clean virtual Ip
|
||||
@@ -1485,38 +1483,6 @@ class PreinstallImpl:
|
||||
"""
|
||||
pass
|
||||
|
||||
- def setSctp(self):
|
||||
- """
|
||||
- function: setting SCTP service
|
||||
- input: NA
|
||||
- output: NA
|
||||
- """
|
||||
- self.context.logger.log("Setting SCTP service.", "addStep")
|
||||
- try:
|
||||
- # set SCTP service
|
||||
- cmd = "%s -t %s -u %s -l %s" % (
|
||||
- OMCommand.getLocalScript("Local_PreInstall"),
|
||||
- ACTION_SET_SCTP,
|
||||
- self.context.user,
|
||||
- self.context.localLog)
|
||||
- # check the mpprcFile
|
||||
- if self.context.mpprcFile != "":
|
||||
- cmd += " -s '%s'" % self.context.mpprcFile
|
||||
- self.context.logger.debug("Command for setting SCTP: %s" % cmd)
|
||||
-
|
||||
- # exec cmd for set SCTP
|
||||
- DefaultValue.execCommandWithMode(
|
||||
- cmd,
|
||||
- "set SCTP",
|
||||
- self.context.sshTool,
|
||||
- self.context.localMode or self.context.isSingle,
|
||||
- self.context.mpprcFile)
|
||||
- except Exception as e:
|
||||
- # failed set SCTP service
|
||||
- raise Exception(str(e))
|
||||
- # Successfully set SCTP service
|
||||
- self.context.logger.log("Successfully set SCTP service.", "constant")
|
||||
-
|
||||
def setVirtualIp(self):
|
||||
"""
|
||||
function: set the virtual IPs
|
||||
@@ -1893,10 +1859,6 @@ class PreinstallImpl:
|
||||
self.checkOSVersion()
|
||||
# create path and set mode
|
||||
self.createDirs()
|
||||
-
|
||||
- # set Sctp
|
||||
- if not DefaultValue.checkDockerEnv():
|
||||
- self.setSctp()
|
||||
# set os parameters
|
||||
self.setAndCheckOSParameter()
|
||||
# prepare cron service for user
|
||||
diff --git a/script/local/LocalCheck.py b/script/local/LocalCheck.py
|
||||
index 82a9efb..6e5cb6e 100644
|
||||
--- a/script/local/LocalCheck.py
|
||||
+++ b/script/local/LocalCheck.py
|
||||
@@ -47,8 +47,7 @@ actioItemMap = {
|
||||
|
||||
docker_no_need_check = ["net.core.wmem_max", "net.core.rmem_max",
|
||||
"net.core.wmem_default", "net.core.rmem_default",
|
||||
- "net.sctp.sctp_mem", "net.sctp.sctp_rmem",
|
||||
- "net.sctp.sctp_wmem", "net.core.netdev_max_backlog",
|
||||
+ "net.core.netdev_max_backlog",
|
||||
"net.ipv4.tcp_max_tw_buckets", "net.ipv4.tcp_tw_reuse",
|
||||
"net.ipv4.tcp_tw_recycle", "net.ipv4.tcp_retries2",
|
||||
"net.ipv4.ip_local_reserved_ports", "net.ipv4.tcp_rmem",
|
||||
@@ -239,12 +238,7 @@ def checkSysctlParameter(kernelParameter, isSet):
|
||||
continue
|
||||
if (DefaultValue.checkDockerEnv() and key in docker_no_need_check):
|
||||
continue
|
||||
- # The parameter sctpchecksumerrors check method is independent
|
||||
- if (key == "sctpchecksumerrors"):
|
||||
- cmd = "cat /proc/net/sctp/snmp | grep SctpChecksumErrors" \
|
||||
- " | awk '{print $2}'"
|
||||
- else:
|
||||
- cmd = "cat %s" % ("/proc/sys/%s" % key.replace('.', '/'))
|
||||
+ cmd = "cat %s" % ("/proc/sys/%s" % key.replace('.', '/'))
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if (status == 0):
|
||||
if (key == "vm.min_free_kbytes"
|
||||
@@ -315,15 +309,6 @@ def setOSParameter(setParameterList, patchlevel):
|
||||
# vm.extfrag_threshold parameter, skip set
|
||||
if ("vm.extfrag_threshold" in setParameterList and patchlevel == "1"):
|
||||
setParameterList.pop("vm.extfrag_threshold")
|
||||
- # The parameter sctpchecksumerrors set method is independent
|
||||
- if ("sctpchecksumerrors" in setParameterList):
|
||||
- cmd = "echo 1 > /sys/module/sctp/parameters/no_checksums"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if (status != 0):
|
||||
- g_logger.debug("The cmd is %s " % cmd)
|
||||
- g_logger.log(" Failed to enforce sysctl kernel variable"
|
||||
- " 'sctpchecksumerrors'. Error: %s" % output)
|
||||
- setParameterList.pop("sctpchecksumerrors")
|
||||
|
||||
if (len(setParameterList) != 0):
|
||||
g_logger.debug("Setting sysctl parameter.")
|
||||
@@ -332,7 +317,7 @@ def setOSParameter(setParameterList, patchlevel):
|
||||
g_logger.log(" Set variable '%s' to '%s'"
|
||||
% (key, setParameterList[key]))
|
||||
cmd = "sysctl -p"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
+ (status, _) = subprocess.getstatusoutput(cmd)
|
||||
if (status != 0):
|
||||
cmderrorinfo = "sysctl -p | grep 'No such file or directory'"
|
||||
(status, outputresult) = subprocess.getstatusoutput(cmderrorinfo)
|
||||
diff --git a/script/local/PreInstallUtility.py b/script/local/PreInstallUtility.py
|
||||
index cbe2a59..b4071f3 100644
|
||||
--- a/script/local/PreInstallUtility.py
|
||||
+++ b/script/local/PreInstallUtility.py
|
||||
@@ -55,7 +55,6 @@ ACTION_SET_TOOL_ENV = "set_tool_env"
|
||||
ACTION_PREPARE_USER_CRON_SERVICE = "prepare_user_cron_service"
|
||||
ACTION_PREPARE_USER_SSHD_SERVICE = "prepare_user_sshd_service"
|
||||
ACTION_SET_LIBRARY = "set_library"
|
||||
-ACTION_SET_SCTP = "set_sctp"
|
||||
ACTION_SET_VIRTUALIP = "set_virtualIp"
|
||||
ACTION_CHECK_HOSTNAME_MAPPING = "check_hostname_mapping"
|
||||
ACTION_INIT_GAUSSLOG = "init_gausslog"
|
||||
@@ -256,7 +255,7 @@ Common options:
|
||||
GaussLog.exitWithError(str(e))
|
||||
parameter_list = [ACTION_CHECK_OS_VERSION, ACTION_SET_FINISH_FLAG,
|
||||
ACTION_SET_USER_ENV, ACTION_SET_LIBRARY, \
|
||||
- ACTION_SET_SCTP, ACTION_PREPARE_USER_CRON_SERVICE,
|
||||
+ ACTION_PREPARE_USER_CRON_SERVICE,
|
||||
ACTION_PREPARE_USER_SSHD_SERVICE, \
|
||||
ACTION_SET_VIRTUALIP, ACTION_INIT_GAUSSLOG,
|
||||
ACTION_CHECK_ENVFILE, ACTION_CHECK_OS_SOFTWARE, \
|
||||
@@ -1981,88 +1980,6 @@ Common options:
|
||||
self.logger.logExit(str(e))
|
||||
self.logger.debug("Successfully set ARM Optimization.")
|
||||
|
||||
- def setSctp(self):
|
||||
- """
|
||||
- function: Setting SCTP
|
||||
- input : NA
|
||||
- output: NA
|
||||
- """
|
||||
- self.logger.debug("Setting SCTP.")
|
||||
- try:
|
||||
-
|
||||
- key = "install ipv6 \/bin\/true"
|
||||
- confFile = "/etc/modprobe.d/*ipv6.conf"
|
||||
-
|
||||
- initFile = DefaultValue.getOSInitFile()
|
||||
- cmd = "ls %s" % confFile
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if status == 0:
|
||||
- cmd = "sed -i 's/^.*\(%s.*\)/#\\1/g' %s" % (key, confFile)
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if status != 0:
|
||||
- self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50223"]
|
||||
- % confFile + " Error: \n%s" % output)
|
||||
- cmd = "modprobe ipv6"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if status != 0:
|
||||
- self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd
|
||||
- + " Error: \n%s" % output)
|
||||
- cmd = "modprobe sctp"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if status != 0:
|
||||
- self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd
|
||||
- + " Error: \n%s" % output)
|
||||
-
|
||||
- cmd = "uname -r"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if status != 0:
|
||||
- self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd
|
||||
- + " Error: \n%s" % output)
|
||||
-
|
||||
- # Since redhat7.4 kernel module files ending in .xz
|
||||
- stcpFile = "/lib/modules/%s/kernel/net/sctp/sctp.ko" \
|
||||
- % output.strip()
|
||||
- stcpFileXz = "/lib/modules/%s/kernel/net/sctp/sctp.ko.xz" \
|
||||
- % output.strip()
|
||||
- if (not os.path.exists(stcpFile)) and \
|
||||
- (not os.path.exists(stcpFileXz)):
|
||||
- output = stcpFile + " and " + stcpFileXz
|
||||
- self.logger.logExit(ErrorCode.GAUSS_502["GAUSS_50201"]
|
||||
- % output)
|
||||
-
|
||||
- cmd_insmod = "insmod %s >/dev/null 2>&1" % stcpFileXz
|
||||
- (status, output) = subprocess.getstatusoutput(cmd_insmod)
|
||||
-
|
||||
- cmd_insmod = "insmod %s >/dev/null 2>&1" % stcpFile
|
||||
- (status, output) = subprocess.getstatusoutput(cmd_insmod)
|
||||
-
|
||||
- cmd = "lsmod | grep 'sctp ' | wc -l"
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if not str(output.strip()).isdigit() or int(output.strip()) == 0:
|
||||
- self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd
|
||||
- + " Error: \n%s" % output)
|
||||
-
|
||||
- init_cmd = "sed -i '/^modprobe sctp$/d' %s &&" % initFile
|
||||
- init_cmd += "echo \"modprobe sctp\" >> %s &&" % initFile
|
||||
- init_cmd += "sed -i '/^insmod.*sctp.ko/d' %s &&" % initFile
|
||||
- init_cmd += "echo \"%s\" >> %s" % (cmd_insmod, initFile)
|
||||
- (status, output) = subprocess.getstatusoutput(init_cmd)
|
||||
- if status != 0:
|
||||
- self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"]
|
||||
- % init_cmd + " Error: \n%s" % output)
|
||||
-
|
||||
- cmd = "sed -i \"/^sysctl -p/d\" %s &&" % initFile
|
||||
- cmd += "echo \"sysctl -p\" >> %s" % initFile
|
||||
- (status, output) = subprocess.getstatusoutput(cmd)
|
||||
- if status != 0:
|
||||
- self.logger.logExit(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd
|
||||
- + " Error: \n%s" % output)
|
||||
-
|
||||
- except Exception as e:
|
||||
- self.logger.logExit(str(e))
|
||||
-
|
||||
- self.logger.debug("Successfully set Sctp.")
|
||||
-
|
||||
def checkVirtualIp(self):
|
||||
"""
|
||||
function: Checking virtual IP
|
||||
@@ -2915,8 +2832,6 @@ Common options:
|
||||
self.prepareUserSshdService()
|
||||
elif self.action == ACTION_SET_LIBRARY:
|
||||
self.setLibrary()
|
||||
- elif self.action == ACTION_SET_SCTP:
|
||||
- self.setSctp()
|
||||
elif self.action == ACTION_SET_VIRTUALIP:
|
||||
DefaultValue.modifyFileOwnerFromGPHOME(self.logger.logFile)
|
||||
self.setVirtualIp()
|
||||
--
|
||||
2.6.4.windows.1
|
||||
|
@ -16,12 +16,16 @@ class SqlCommands:
|
||||
|
||||
@staticmethod
|
||||
def getSQLCommand(port, database=ConstantsBase.DEFAULT_DB_NAME,
|
||||
gsqlBin="gsql"):
|
||||
gsqlBin="gsql", user_name="", user_pwd=""):
|
||||
"""
|
||||
function : get SQL command
|
||||
input : port, database
|
||||
output : cmd
|
||||
"""
|
||||
if user_name and user_pwd:
|
||||
cmd = ConstantsBase.SQL_EXEC_COMMAND_WITHOUT_HOST_WITH_USER % (
|
||||
gsqlBin, str(port), database, user_name, user_pwd)
|
||||
return cmd
|
||||
cmd = ConstantsBase.SQL_EXEC_COMMAND_WITHOUT_HOST_WITHOUT_USER % (
|
||||
gsqlBin, str(int(port) + 1), database)
|
||||
return cmd
|
||||
|
@ -59,4 +59,4 @@ class ConstantsBase:
|
||||
|
||||
#SQL_EXEC_COMMAND
|
||||
SQL_EXEC_COMMAND_WITHOUT_HOST_WITHOUT_USER = "%s -p %s -d %s "
|
||||
SQL_EXEC_COMMAND_WITHOUT_HOST_WITH_USER = "%s -p %s -d %s -U %s -W %s "
|
||||
SQL_EXEC_COMMAND_WITHOUT_HOST_WITH_USER = "%s -p %s -d %s -U %s -W '%s' "
|
||||
|
@ -24,6 +24,8 @@ import subprocess
|
||||
import threading
|
||||
import time
|
||||
from subprocess import PIPE, Popen
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
import pwd
|
||||
from gspylib.common.ErrorCode import ErrorCode
|
||||
from base_utils.common.exceptions import CommandNotFoundException
|
||||
@ -575,6 +577,21 @@ class CmdUtil(object):
|
||||
break
|
||||
return status, output
|
||||
|
||||
@staticmethod
|
||||
def retry_util_timeout(cmd, timeout, sleep_time=1):
|
||||
"""
|
||||
retry execute cmd with giving timeout.
|
||||
"""
|
||||
end_time = datetime.now() + timedelta(seconds=int(timeout))
|
||||
status, output = 1, 1
|
||||
while datetime.now() < end_time:
|
||||
status, output = CmdUtil.getstatusoutput_by_fast_popen(cmd)
|
||||
if status == 0:
|
||||
break
|
||||
else:
|
||||
time.sleep(sleep_time)
|
||||
return status, output
|
||||
|
||||
@staticmethod
|
||||
def getstatusoutput_by_fast_popen(cmd):
|
||||
"""
|
||||
|
@ -24,6 +24,7 @@ import stat
|
||||
import subprocess
|
||||
import pwd
|
||||
import grp
|
||||
import json
|
||||
from subprocess import PIPE
|
||||
|
||||
from base_utils.common.constantsbase import ConstantsBase
|
||||
@ -299,6 +300,27 @@ class FileUtil(object):
|
||||
lock.release()
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def write_update_file(file_path, content, authority, is_json=True):
|
||||
"""
|
||||
Write or update file, create if not exist.
|
||||
"""
|
||||
with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
||||
authority), "w") as fp_write:
|
||||
if is_json:
|
||||
json.dump(content, fp_write)
|
||||
else:
|
||||
fp_write.write(content)
|
||||
|
||||
@staticmethod
|
||||
def write_add_file(file_path, content, authority):
|
||||
"""
|
||||
Write or add content in file, create if not exist.
|
||||
"""
|
||||
if not os.path.isfile(file_path):
|
||||
FileUtil.createFileInSafeMode(file_path, mode=authority)
|
||||
FileUtil.writeFile(file_path, [content])
|
||||
|
||||
@staticmethod
|
||||
def withAsteriskPath(path):
|
||||
"""
|
||||
|
@ -6,14 +6,28 @@
|
||||
# Date : 2021-06-30
|
||||
# Description : security_checker.py check security conditions
|
||||
#############################################################################
|
||||
|
||||
import re
|
||||
from gspylib.common.ErrorCode import ErrorCode
|
||||
|
||||
|
||||
class ValidationError(Exception):
|
||||
"""
|
||||
validation base error
|
||||
"""
|
||||
def __init__(self, error_info):
|
||||
super().__init__(self)
|
||||
self.error_info = error_info
|
||||
|
||||
def __str__(self):
|
||||
return self.error_info
|
||||
|
||||
|
||||
class SecurityChecker(object):
|
||||
"""check security conditions"""
|
||||
INJECTION_CHAR_LIST = ["|", ";", "&", "$", "<", ">", "`", "\\", "'", "\"", "{", "}", "(", ")",
|
||||
"[", "]", "~", "*", "?", " ", "!", "\n"]
|
||||
PWD_VALIDATION_PATTERN = r'^[A-Za-z0-9~!@#%^*\-_=+?,]+$'
|
||||
IP_PATTERN = r'^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$'
|
||||
|
||||
@staticmethod
|
||||
def check_injection_char(check_value):
|
||||
@ -27,3 +41,83 @@ class SecurityChecker(object):
|
||||
if any(rac in check_value for rac in SecurityChecker.INJECTION_CHAR_LIST):
|
||||
raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % check_value +
|
||||
" There are illegal characters.")
|
||||
|
||||
@staticmethod
|
||||
def check_is_string(description, value):
|
||||
"""
|
||||
Check is string
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50022'] % (description, 'string'))
|
||||
|
||||
@staticmethod
|
||||
def check_max_length(description, value, max_length):
|
||||
"""
|
||||
Check max length
|
||||
"""
|
||||
if len(value) > max_length:
|
||||
raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50023"] % (description, max_length))
|
||||
|
||||
@staticmethod
|
||||
def check_db_injection(description, value):
|
||||
"""
|
||||
Check db injection
|
||||
"""
|
||||
for rac in SecurityChecker.INJECTION_CHAR_LIST:
|
||||
if value.find(rac) > 0:
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50025'] % (rac, description))
|
||||
|
||||
@staticmethod
|
||||
def check_password(description, value):
|
||||
if not re.match(SecurityChecker.PWD_VALIDATION_PATTERN, value):
|
||||
raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50024"] % description)
|
||||
|
||||
@staticmethod
|
||||
def check_db_user(description, value):
|
||||
SecurityChecker.check_is_string(description, value)
|
||||
SecurityChecker.check_max_length(description, value, 256)
|
||||
SecurityChecker.check_db_injection(description, value)
|
||||
|
||||
@staticmethod
|
||||
def check_db_password(description, value):
|
||||
SecurityChecker.check_is_string(description, value)
|
||||
SecurityChecker.check_max_length(description, value, 256)
|
||||
SecurityChecker.check_password(description, value)
|
||||
|
||||
@staticmethod
|
||||
def check_is_digit(description, value):
|
||||
if isinstance(value, int):
|
||||
return
|
||||
elif isinstance(value, str):
|
||||
if not value.isdigit():
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50022']
|
||||
% (description, 'integer'))
|
||||
else:
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50022']
|
||||
% (description, 'int or string'))
|
||||
|
||||
@staticmethod
|
||||
def check_is_list(description, value):
|
||||
if not isinstance(value, list):
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50022'] % (description, 'list'))
|
||||
|
||||
@staticmethod
|
||||
def check_is_dict(description, value):
|
||||
if not isinstance(value, dict):
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50022'] % (description, 'dict'))
|
||||
|
||||
@staticmethod
|
||||
def check_ip_valid(description, value):
|
||||
SecurityChecker.check_is_string(description, value)
|
||||
if not re.match(SecurityChecker.IP_PATTERN, value):
|
||||
raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50024"] % description)
|
||||
|
||||
@staticmethod
|
||||
def check_port_valid(description, value):
|
||||
SecurityChecker.check_is_digit(description, value)
|
||||
value = int(value) if not isinstance(value, int) else value
|
||||
if value > 65535 or value < 0:
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50022']
|
||||
% (description, 'between 0 and 65535'))
|
||||
|
||||
|
||||
|
@ -87,7 +87,7 @@ class Checkperf():
|
||||
|
||||
def usage(self):
|
||||
"""
|
||||
gs_checkperf is a utility to check the cluster performance and SSD performance.
|
||||
gs_checkperf is a utility to check the cluster performance and SSD performance, streaming disaster cluster does not yet support.
|
||||
|
||||
Usage:
|
||||
gs_checkperf -? | --help
|
||||
@ -253,6 +253,11 @@ General options:
|
||||
binPath = os.path.join(self.clusterInfo.appPath, "bin")
|
||||
g_opts.databaseSizeFile = os.path.join(binPath,
|
||||
DefaultValue.DB_SIZE_FILE)
|
||||
is_disaster_cluster = \
|
||||
DefaultValue.cm_exist_and_is_disaster_cluster(self.clusterInfo, g_logger)
|
||||
if is_disaster_cluster:
|
||||
GaussLog.exitWithError(
|
||||
ErrorCode.GAUSS_512["GAUSS_51244"] % " Disaster cluster")
|
||||
except Exception as e:
|
||||
g_logger.logExit(str(e))
|
||||
|
||||
|
@ -86,7 +86,7 @@ class Dropnode(ParallelBaseOM):
|
||||
|
||||
def usage(self):
|
||||
"""
|
||||
gs_dropnode is a utility to delete the standby node from a cluster.
|
||||
gs_dropnode is a utility to delete the standby node from a cluster, streaming cluster does not yet support.
|
||||
|
||||
Usage:
|
||||
gs_dropnode -? | --help
|
||||
@ -335,6 +335,7 @@ if __name__ == "__main__":
|
||||
dropNode = Dropnode()
|
||||
dropNode.parseCommandLine()
|
||||
dropNode.initLogs()
|
||||
DefaultValue.check_is_streaming_dr_cluster()
|
||||
dropNode.check_repeat_process()
|
||||
dropNode.checkParameters()
|
||||
dropNode.checkConnection(list(dropNode.backIpNameMap.keys()),
|
||||
|
@ -87,7 +87,7 @@ class Expansion(ParallelBaseOM):
|
||||
|
||||
def usage(self):
|
||||
"""
|
||||
gs_expansion is a utility to expansion standby node for a cluster.
|
||||
gs_expansion is a utility to expansion standby node for a cluster, streaming cluster does not yet support.
|
||||
|
||||
Usage:
|
||||
gs_expansion -? | --help
|
||||
|
95
script/gs_sdr
Normal file
95
script/gs_sdr
Normal file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
#############################################################################
|
||||
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
#
|
||||
# openGauss is licensed under Mulan PSL v2.
|
||||
# You can use this software according to the terms
|
||||
# and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
#
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OF ANY KIND,
|
||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
# ----------------------------------------------------------------------------
|
||||
# Description : gs_sdr is a utility for streaming
|
||||
# disaster recovery fully options.
|
||||
#############################################################################
|
||||
|
||||
import os
|
||||
import uuid
|
||||
|
||||
from gspylib.common.Common import DefaultValue
|
||||
from gspylib.common.ErrorCode import ErrorCode
|
||||
from gspylib.common.GaussLog import GaussLog
|
||||
from impl.streaming_disaster_recovery.streaming_constants import StreamingConstants
|
||||
from base_utils.os.user_util import UserUtil
|
||||
from domain_utils.cluster_file.cluster_log import ClusterLog
|
||||
from impl.streaming_disaster_recovery.params_handler import ParamsHandler
|
||||
from impl.streaming_disaster_recovery.streaming_modules.\
|
||||
streaming_diaster_recovery_start import StreamingStartHandler
|
||||
from impl.streaming_disaster_recovery.streaming_modules.\
|
||||
streaming_disaster_recovery_stop import StreamingStopHandler
|
||||
from impl.streaming_disaster_recovery.streaming_modules.\
|
||||
streaming_disaster_recovery_failover import StreamingFailoverHandler
|
||||
from impl.streaming_disaster_recovery.streaming_modules.\
|
||||
streaming_disaster_recovery_switchover import StreamingSwitchoverHandler
|
||||
from impl.streaming_disaster_recovery.streaming_modules.\
|
||||
streaming_disaster_recovery_query import StreamingQueryHandler
|
||||
|
||||
HANDLER_MAPPING = {
|
||||
"start": StreamingStartHandler,
|
||||
"stop": StreamingStopHandler,
|
||||
"switchover": StreamingSwitchoverHandler,
|
||||
"failover": StreamingFailoverHandler,
|
||||
"query": StreamingQueryHandler
|
||||
}
|
||||
|
||||
|
||||
class StreamingDisasterRecoveryBase(object):
|
||||
def __init__(self):
|
||||
self.params = None
|
||||
self.user = None
|
||||
self.log_file = None
|
||||
self.logger = None
|
||||
self.trace_id = uuid.uuid1().hex
|
||||
StreamingDisasterRecoveryBase.mock_process_user_sensitive_info()
|
||||
self.__init_globals()
|
||||
|
||||
@staticmethod
|
||||
def mock_process_user_sensitive_info():
|
||||
"""mock_process_user_sensitive_info"""
|
||||
cmdline = DefaultValue.get_proc_title("-W")
|
||||
DefaultValue.set_proc_title(cmdline)
|
||||
|
||||
def __init_globals(self):
|
||||
self.user = UserUtil.getUserInfo()['name']
|
||||
tmp_logger_file = ClusterLog.getOMLogPath(StreamingConstants.STREAMING_LOG_FILE, self.user)
|
||||
tmp_logger = GaussLog(tmp_logger_file, 'parse_and_validate_params', trace_id=self.trace_id)
|
||||
self.params = ParamsHandler(tmp_logger, self.trace_id).get_valid_params()
|
||||
self.log_file = self.params.logFile if self.params.logFile else \
|
||||
ClusterLog.getOMLogPath(StreamingConstants.STREAMING_LOG_FILE, self.user)
|
||||
self.logger = GaussLog(self.log_file, self.params.task, trace_id=self.trace_id)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if os.getuid() == 0:
|
||||
GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"])
|
||||
|
||||
base = StreamingDisasterRecoveryBase()
|
||||
handler = HANDLER_MAPPING[base.params.task](base.params, base.user,
|
||||
base.logger, base.trace_id, base.log_file)
|
||||
handler.handle_lock_file(handler.trace_id, 'create')
|
||||
try:
|
||||
if base.params.task in StreamingConstants.TASK_EXIST_CHECK:
|
||||
handler.check_streaming_process_is_running()
|
||||
handler.run()
|
||||
except Exception as error:
|
||||
handler.logger.error(error)
|
||||
raise Exception(str(error))
|
||||
finally:
|
||||
handler.handle_lock_file(handler.trace_id, 'remove')
|
@ -45,6 +45,7 @@ import pwd
|
||||
import grp
|
||||
import copy
|
||||
import re
|
||||
import json
|
||||
|
||||
from gspylib.common.Common import DefaultValue
|
||||
from gspylib.common.GaussLog import GaussLog
|
||||
@ -60,6 +61,23 @@ from base_utils.os.net_util import NetUtil
|
||||
from domain_utils.domain_common.cluster_constants import ClusterConstants
|
||||
|
||||
|
||||
class DualUpgradeShareInfo:
|
||||
"""
|
||||
Used to record the upgrade status information of the primary and standby clusters
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, jsonInfo=None):
|
||||
# If the Json string is passed in, the Json information is used to initialize the class
|
||||
if jsonInfo:
|
||||
self.__dict__ = jsonInfo
|
||||
else:
|
||||
self.masterVersion = ""
|
||||
self.masterUpgradeStatus = 0
|
||||
self.standbyVersion = ""
|
||||
self.standbyUpgradeStatus = 0
|
||||
|
||||
|
||||
class Upgrade(ParallelBaseOM):
|
||||
"""
|
||||
The class about upgrade
|
||||
@ -90,6 +108,14 @@ class Upgrade(ParallelBaseOM):
|
||||
self.oldClusterNumber = None
|
||||
self.forceRollback = False
|
||||
self.upgrade_remain = False
|
||||
# Record the upgrade status information under dual clusters
|
||||
self.dualUpgradeShareInfo = None
|
||||
# Record the primary cluster or the standby cluster, dual-primary or dual-standby
|
||||
self.clusterType = ""
|
||||
# Whether it is a standby cluster in a dual cluster. Convenient to judge
|
||||
self.standbyCluster = False
|
||||
# The path to record the information of each cluster upgrade stage in the dual cluster
|
||||
self.upgradePhaseInfoPath = ""
|
||||
|
||||
def usage(self):
|
||||
"""
|
||||
@ -153,6 +179,10 @@ Option for grey upgrade
|
||||
self.upgrade_remain = True
|
||||
if "force" in ParaDict.keys():
|
||||
self.forceRollback = True
|
||||
self.tmpDir = EnvUtil.getTmpDirFromEnv()
|
||||
if self.tmpDir == "":
|
||||
raise Exception(ErrorCode.GAUSS_518["GAUSS_51800"] % "$PGHOST")
|
||||
self.upgradePhaseInfoPath = os.path.join(self.tmpDir, Const.UPGRADE_PHASE_INFO)
|
||||
|
||||
def checkUser(self):
|
||||
"""
|
||||
@ -299,6 +329,69 @@ Option for grey upgrade
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % nodeName)
|
||||
self.logger.debug("Successfully init global infos")
|
||||
|
||||
# If it is a dual-cluster, initialize the related information of the dual-cluster
|
||||
self.initDualUpgradeInfo()
|
||||
|
||||
def initDualUpgradeInfo(self):
|
||||
"""
|
||||
initialize dual cluster upgrade status information
|
||||
If it is not a dual cluster, do not initialize
|
||||
:return:
|
||||
"""
|
||||
if os.path.exists(self.upgradePhaseInfoPath):
|
||||
if self.is_inplace_upgrade and self.action not in \
|
||||
["commit-upgrade", "auto-rollback", "chose-strategy"]:
|
||||
raise Exception("Dual cluster does not support in-place upgrade")
|
||||
self.dualUpgradeShareInfo = self.getDualUpgradeInfo(self.upgradePhaseInfoPath,
|
||||
startPost=0)
|
||||
if not self.dualUpgradeShareInfo:
|
||||
self.dualUpgradeShareInfo = DualUpgradeShareInfo()
|
||||
|
||||
@staticmethod
|
||||
def getDualUpgradeInfo(filePath, startPost):
|
||||
"""
|
||||
Obtain the dual-cluster upgrade status information from the file,
|
||||
and return None if there is no record
|
||||
:return:
|
||||
"""
|
||||
if os.path.exists(filePath):
|
||||
lenInfo = 0
|
||||
with open(filePath, 'r') as shareInfo:
|
||||
shareInfo.seek(startPost)
|
||||
length = shareInfo.read(4)
|
||||
if length > '':
|
||||
try:
|
||||
lenInfo = int(length)
|
||||
except Exception as _:
|
||||
lenInfo = 0
|
||||
if lenInfo > 0:
|
||||
shareInfo.seek(startPost + 4)
|
||||
return json.loads(shareInfo.read(lenInfo), object_hook=DualUpgradeShareInfo)
|
||||
return None
|
||||
|
||||
def updateDualUpgradeInfo(self, dualUpgradeShareInfo, filePath, startPost):
|
||||
"""
|
||||
Update the upgrade information of the cluster to the dual-cluster
|
||||
shared file /dev/my_disk_sync_disk file
|
||||
:return:
|
||||
"""
|
||||
if os.path.exists(filePath):
|
||||
with os.fdopen(os.open(filePath, os.O_WRONLY, 0o600), "w") as shareInfo:
|
||||
shareInfo.seek(startPost + Const.LENGTH_STORAGE_INFO_LEN)
|
||||
shareInfo.write(json.dumps(dualUpgradeShareInfo, default=lambda obj: obj.__dict__))
|
||||
length = shareInfo.tell() - (startPost + Const.LENGTH_STORAGE_INFO_LEN)
|
||||
shareInfo.seek(startPost, 0)
|
||||
shareInfo.write("{0:04d}".format(length))
|
||||
|
||||
# After the status file is updated, the standby cluster
|
||||
# distributes the updated status file to the data directory of the DN.
|
||||
for dbNode in self.clusterInfo.dbNodes:
|
||||
for dnInst in dbNode.datanodes:
|
||||
self.sshTool.scpFiles(filePath, dnInst.datadir,
|
||||
hostList=[dnInst.hostname])
|
||||
else:
|
||||
raise Exception("{0} file does not exist and cannot be updated".format(filePath))
|
||||
|
||||
def distributeFileToSpecialNode(self, file, destDir, hostList):
|
||||
"""
|
||||
distribute file to special node
|
||||
|
@ -17,6 +17,7 @@
|
||||
# ----------------------------------------------------------------------------
|
||||
# Description : Common is a utility with a lot of common functions
|
||||
#############################################################################
|
||||
import ctypes
|
||||
import sys
|
||||
import subprocess
|
||||
import os
|
||||
@ -28,6 +29,7 @@ import time
|
||||
import multiprocessing
|
||||
import _thread as thread
|
||||
import pwd
|
||||
import json
|
||||
import base64
|
||||
import secrets
|
||||
import string
|
||||
@ -35,6 +37,7 @@ import stat
|
||||
import csv
|
||||
import copy
|
||||
from subprocess import PIPE
|
||||
from subprocess import Popen
|
||||
|
||||
# The installation starts, but the package is not decompressed completely.
|
||||
# The lib64/libz.so.1 file is incomplete, and the hashlib depends on the
|
||||
@ -106,6 +109,7 @@ from base_utils.os.cmd_util import CmdUtil
|
||||
from base_utils.os.env_util import EnvUtil
|
||||
from base_utils.os.file_util import FileUtil
|
||||
from domain_utils.cluster_file.version_info import VersionInfo
|
||||
from domain_utils.cluster_file.cluster_dir import ClusterDir
|
||||
from domain_utils.security.random_value import RandomValue
|
||||
from base_utils.os.process_util import ProcessUtil
|
||||
from domain_utils.sql_handler.sql_executor import SqlExecutor
|
||||
@ -199,6 +203,7 @@ class DefaultValue():
|
||||
FILE_MODE = 640
|
||||
FILE_MODE_PERMISSION = 0o640
|
||||
KEY_FILE_MODE = 600
|
||||
KEY_FILE_MODE_IN_OS = 0o600
|
||||
MIN_FILE_MODE = 400
|
||||
SPE_FILE_MODE = 500
|
||||
KEY_DIRECTORY_MODE = 700
|
||||
@ -318,6 +323,9 @@ class DefaultValue():
|
||||
# FI_ELK_KRB_XML is used in elk
|
||||
FI_ELK_KRB_XML = "auth_config/elk-krb-site.xml"
|
||||
FI_KRB_CONF = "krb5.conf"
|
||||
# cluster status
|
||||
CLUSTER_STATUS_NORMAL = "Normal"
|
||||
CLUSTER_STATUS_DEGRADED = "Degraded"
|
||||
###########################
|
||||
# instance role
|
||||
###########################
|
||||
@ -615,6 +623,60 @@ class DefaultValue():
|
||||
|
||||
return NetWorkConfFile
|
||||
|
||||
@staticmethod
|
||||
def get_remote_ips(host, mpp_file):
|
||||
"""
|
||||
Get ips from remote host
|
||||
"""
|
||||
cmd = "source %s && pssh -s -t 30 -H %s \"hostname -I\"" % (mpp_file, host)
|
||||
status, output = subprocess.getstatusoutput(cmd)
|
||||
if status == 0 and output != "":
|
||||
ips = output.strip().split()
|
||||
return ips
|
||||
else:
|
||||
raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
|
||||
% "check remote ips for node:%s, Error:%s." % (host, output))
|
||||
|
||||
@staticmethod
|
||||
def obtain_file_content(dest_file, deduplicate=True, is_list=True):
|
||||
"""
|
||||
function:obtains the content of each line in the file.
|
||||
input: file dir
|
||||
:return: file context lines list
|
||||
"""
|
||||
result = [] if is_list else None
|
||||
if not os.path.isfile(dest_file):
|
||||
return result
|
||||
with open(dest_file, "r") as fp_read:
|
||||
if is_list:
|
||||
for line in fp_read:
|
||||
result.append(line.strip('\n'))
|
||||
else:
|
||||
result = fp_read.read().strip()
|
||||
if deduplicate and is_list:
|
||||
result = list(set(result))
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def get_all_dn_num_for_dr(file_path, dn_inst, cluster_info, logger):
|
||||
"""get_all_dn_num_for_dr_cluster"""
|
||||
# DN inst supports a maximum of replicaNum=8 in postgresql.conf.
|
||||
default_num = 8
|
||||
content = DefaultValue.obtain_file_content(file_path, is_list=False)
|
||||
if content:
|
||||
default_num = 0
|
||||
shards = json.loads(content)['remoteClusterConf']["shards"]
|
||||
logger.debug("Stream cluster json shards:%s" % shards)
|
||||
if cluster_info.isSingleInstCluster():
|
||||
for shard in shards:
|
||||
default_num += len(shard)
|
||||
else:
|
||||
default_num += len(shards[0])
|
||||
peer_insts = cluster_info.getPeerInstance(dn_inst)
|
||||
default_num += len(peer_insts)
|
||||
logger.debug("Get config replconninfo dn num:%s" % default_num)
|
||||
return default_num
|
||||
|
||||
@staticmethod
|
||||
def getIpByHostName():
|
||||
'''
|
||||
@ -1616,6 +1678,45 @@ class DefaultValue():
|
||||
noPassIPs.append(ip)
|
||||
g_lock.release()
|
||||
|
||||
@staticmethod
|
||||
def fast_ping(node_ip):
|
||||
"""
|
||||
ping node with short timeout
|
||||
"""
|
||||
cmd = "ping %s -c 1 -w 4" % node_ip
|
||||
proc = FastPopen(cmd, stdout=PIPE, stderr=PIPE, preexec_fn=os.setsid, close_fds=True)
|
||||
proc.communicate()
|
||||
status = proc.returncode
|
||||
result = (node_ip, True) if status == 0 else (node_ip, False)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def fast_ping_on_node(on_node, from_ip, to_ip, logger):
|
||||
"""
|
||||
Ping on remote node with -I
|
||||
"""
|
||||
cmd = "ping %s -c 1 -w 4" % on_node
|
||||
proc = FastPopen(cmd, stdout=PIPE, stderr=PIPE,
|
||||
preexec_fn=os.setsid, close_fds=True)
|
||||
proc.communicate()
|
||||
status = proc.returncode
|
||||
if status != 0:
|
||||
logger.debug("Node:%s ping failed, can not execute remote check." % on_node)
|
||||
return on_node, False
|
||||
if on_node == NetUtil.GetHostIpOrName():
|
||||
cmd_remote = "ping %s -I %s -c 1 -w 4" % (to_ip, from_ip)
|
||||
else:
|
||||
cmd_remote = "source %s && pssh -s -H %s 'ping %s -I %s -c 1 -w 4'" \
|
||||
% (EnvUtil.getMpprcFile(), on_node, to_ip, from_ip)
|
||||
proc = FastPopen(cmd_remote, stdout=PIPE, stderr=PIPE,
|
||||
preexec_fn=os.setsid, close_fds=True)
|
||||
proc.communicate()
|
||||
status = proc.returncode
|
||||
result = (to_ip, True) if status == 0 else (to_ip, False)
|
||||
logger.debug("Remote ping result on node:%s, from ip:%s, to ip:%s, result:%s."
|
||||
% (on_node, from_ip, to_ip, result))
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def checkIsPing(ips):
|
||||
"""
|
||||
@ -2259,7 +2360,7 @@ class DefaultValue():
|
||||
"Command:%s. Error:\n%s" % (cmd, output))
|
||||
targetString = output.split("Datanode")[1]
|
||||
dnPrimary = [x for x in re.split(r"[|\n]", targetString)
|
||||
if flagStr in x]
|
||||
if flagStr in x or "Main" in x]
|
||||
primaryList = []
|
||||
for dn in dnPrimary:
|
||||
primaryList.append(list(filter(None, dn.split(" ")))[1])
|
||||
@ -2866,6 +2967,283 @@ class DefaultValue():
|
||||
"on node [{0}] successfully.".format(node.name))
|
||||
logger.log("Remove dynamic_config_file and CM metadata directory on all nodes.")
|
||||
|
||||
@staticmethod
|
||||
def distribute_file_to_node(params):
|
||||
"""
|
||||
Distribute file to dest node with path
|
||||
"""
|
||||
dest_ip, from_path, to_path, timeout = params
|
||||
pscp_cmd = "source %s ; pscp -t %s -H %s %s %s" % (
|
||||
EnvUtil.getMpprcFile(), timeout, dest_ip, from_path, to_path)
|
||||
status, output = CmdUtil.getstatusoutput_by_fast_popen(pscp_cmd)
|
||||
return status, output, dest_ip
|
||||
|
||||
@staticmethod
|
||||
def check_is_cm_cluster(logger):
|
||||
"""
|
||||
Check cm_ctl is exist.
|
||||
"""
|
||||
cmd = "source %s; cm_ctl view | grep cmDataPath" % EnvUtil.getMpprcFile()
|
||||
status, output = CmdUtil.retryGetstatusoutput(cmd)
|
||||
if status != 0:
|
||||
logger.debug("Check cm_ctl is failed msg: %s." % output)
|
||||
return False
|
||||
logger.debug("Successfully check cm_ctl is available.")
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def is_disaster_cluster(clusterinfo):
|
||||
"""
|
||||
function: determine cluster status normal or disaster
|
||||
input: NA
|
||||
output: NA
|
||||
"""
|
||||
cmd = "source %s; cm_ctl view | grep cmDataPath | awk -F [:] '{print $2}' | head -n 1" % \
|
||||
EnvUtil.getMpprcFile()
|
||||
proc = FastPopen(cmd, stdout=PIPE, stderr=PIPE)
|
||||
stdout, stderr = proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
raise Exception(ErrorCode.GAUSS_514['GAUSS_51400'] % cmd + "Error:\n%s" % stderr)
|
||||
cm_agent_conf_file = stdout.strip() + "/cm_agent/cm_agent.conf"
|
||||
if not os.path.isfile(cm_agent_conf_file):
|
||||
host_list = clusterinfo.getClusterNodeNames()
|
||||
cm_agent_conf_temp_file = os.path.join(EnvUtil.getTmpDirFromEnv(), "cm_agent_tmp.conf")
|
||||
for host_ip in host_list:
|
||||
get_file_cmd = g_file.SHELL_CMD_DICT["scpFileFromRemote"] % \
|
||||
(host_ip, NetUtil.GetHostIpOrName(), cm_agent_conf_file, cm_agent_conf_temp_file)
|
||||
proc = FastPopen(get_file_cmd, stdout=PIPE, stderr=PIPE)
|
||||
stdout, stderr = proc.communicate()
|
||||
if not os.path.isfile(cm_agent_conf_temp_file):
|
||||
continue
|
||||
else:
|
||||
break
|
||||
if os.path.isfile(cm_agent_conf_temp_file):
|
||||
with open(cm_agent_conf_temp_file, "r") as cma_conf_file:
|
||||
content = cma_conf_file.read()
|
||||
ret = re.findall(r'agent_backup_open *= *1|agent_backup_open *= *2', content)
|
||||
g_file.removeFile(cm_agent_conf_temp_file)
|
||||
if ret:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
raise Exception(ErrorCode.GAUSS_502['GAUSS_50201'] % cm_agent_conf_file)
|
||||
with open(cm_agent_conf_file, "r") as cma_conf_file:
|
||||
content = cma_conf_file.read()
|
||||
ret = re.findall(r'agent_backup_open *= *1|agent_backup_open *= *2', content)
|
||||
if ret:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def cm_exist_and_is_disaster_cluster(clusterinfo, logger):
|
||||
"""
|
||||
check current cluster cm exist and is disaster cluster.
|
||||
"""
|
||||
cm_exist = DefaultValue.check_is_cm_cluster(logger)
|
||||
if not cm_exist:
|
||||
return False
|
||||
is_disaster = DefaultValue.is_disaster_cluster(clusterinfo)
|
||||
if not is_disaster:
|
||||
return False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def write_content_on_file(dest_file, content, authority=None):
|
||||
"""
|
||||
Write content on file
|
||||
"""
|
||||
authority = authority if authority else DefaultValue.KEY_FILE_MODE_IN_OS
|
||||
with os.fdopen(os.open(dest_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
||||
authority), "w") as fp_write:
|
||||
fp_write.write(str(content))
|
||||
|
||||
@staticmethod
|
||||
def get_data_ip_info(instance, logger):
|
||||
"""
|
||||
Obtain data ip from file or cluster instance.
|
||||
"""
|
||||
cluster_conf_record = os.path.join(EnvUtil.getEnv("PGHOST"),
|
||||
"streaming_cabin/cluster_conf_record")
|
||||
if not os.path.isfile(cluster_conf_record):
|
||||
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % cluster_conf_record)
|
||||
with open(cluster_conf_record, 'r') as read_fp:
|
||||
conf_dict = json.load(read_fp)
|
||||
if not conf_dict or len(conf_dict) != 2:
|
||||
logger.debug("Failed obtain data ip list.")
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "check data ip file")
|
||||
inst_data_ip = ""
|
||||
local_shards_list = conf_dict["localClusterConf"]["shards"]
|
||||
for shard_list in local_shards_list:
|
||||
for shard in shard_list:
|
||||
if shard["ip"] not in instance.listenIps:
|
||||
continue
|
||||
inst_data_ip = shard["dataIp"]
|
||||
logger.debug("File record:%s, \nGot data ip:%s for instanceId:%s." %
|
||||
(conf_dict, inst_data_ip, instance.instanceId))
|
||||
if not inst_data_ip:
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "obtain local data ip")
|
||||
return inst_data_ip
|
||||
|
||||
@staticmethod
|
||||
def obtain_hadr_user_encrypt_str(cluster_info, db_user, logger, mode, ignore_res=False):
|
||||
"""
|
||||
Obtain hadr user encrypted string
|
||||
"""
|
||||
sql = "select value from gs_global_config where name='hadr_user_info';"
|
||||
instances = []
|
||||
for node in cluster_info.dbNodes:
|
||||
if cluster_info.isSingleInstCluster():
|
||||
for inst in node.datanodes:
|
||||
instances.append(inst)
|
||||
for inst in instances:
|
||||
logger.debug("Obtain hadr user info string on node:%s with port:%s."
|
||||
% (inst.hostname, inst.port))
|
||||
status, output = ClusterCommand.remoteSQLCommand(sql, db_user, inst.hostname,
|
||||
inst.port, maintenance_mode=mode)
|
||||
if status == 0 and output:
|
||||
logger.debug("Successfully obtain hadr user info string.")
|
||||
return output
|
||||
if ignore_res:
|
||||
return
|
||||
logger.debug("Failed obtain hadr user info string.")
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "obtain hadr user info")
|
||||
|
||||
@staticmethod
|
||||
def getstatusoutput_hide_pass(joint_cmd):
|
||||
"""
|
||||
Hide password of process
|
||||
"""
|
||||
proc = Popen(["sh", "-"], stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
|
||||
stdout, stderr = proc.communicate(joint_cmd)
|
||||
text = stderr or stdout
|
||||
sts = proc.returncode
|
||||
if sts is None:
|
||||
sts = 0
|
||||
if text and text[-1:] == '\n':
|
||||
text = text[:-1]
|
||||
return sts, text
|
||||
|
||||
@staticmethod
|
||||
def decrypt_hadr_user_info(params):
|
||||
"""
|
||||
Decrypt hadr user info
|
||||
"""
|
||||
if len(params) != 6:
|
||||
raise Exception(ErrorCode.GAUSS_500["GAUSS_50000"] % "decrypt hadr user info")
|
||||
rand_pwd, hadr_str, cluster_info, db_user, logger, mode = params
|
||||
sql = "select pg_catalog.gs_decrypt_aes128('%s', '%s');" % (hadr_str, rand_pwd)
|
||||
instances = []
|
||||
for node in cluster_info.dbNodes:
|
||||
if cluster_info.isSingleInstCluster():
|
||||
for inst in node.datanodes:
|
||||
instances.append(inst)
|
||||
else:
|
||||
for inst in node.coordinators:
|
||||
instances.append(inst)
|
||||
for inst in instances:
|
||||
logger.debug("Decrypt hadr user info on node:%s with port:%s."
|
||||
% (inst.hostname, inst.port))
|
||||
status, output = ClusterCommand.remoteSQLCommand(sql, db_user, inst.hostname,
|
||||
inst.port, maintenance_mode=mode)
|
||||
if status == 0 and output and "|" in output and len(output.split("|")) == 2:
|
||||
logger.debug("Successfully decrypt hadr user info string.")
|
||||
hadr_user, hadr_pwd = output.strip().split("|")[0], output.strip().split("|")[1]
|
||||
return hadr_user, hadr_pwd
|
||||
logger.debug("Failed decrypt hadr user info string.")
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "decrypt hadr user info")
|
||||
|
||||
@staticmethod
|
||||
def decrypt_hadr_rand_pwd(logger):
|
||||
"""
|
||||
Decrypt hadr rand pwd
|
||||
"""
|
||||
db_user = pwd.getpwuid(os.getuid()).pw_name
|
||||
gauss_home = ClusterDir.getInstallDir(db_user)
|
||||
bin_path = os.path.join(os.path.realpath(gauss_home), "bin")
|
||||
if not bin_path:
|
||||
logger.debug("Failed obtain bin path.")
|
||||
raise Exception(ErrorCode.GAUSS_518["GAUSS_51802"] % "bin path")
|
||||
cipher_file = os.path.join(EnvUtil.getTmpDirFromEnv(), "binary_upgrade/hadr.key.cipher")
|
||||
rand_file = os.path.join(EnvUtil.getTmpDirFromEnv(), "binary_upgrade/hadr.key.rand")
|
||||
if os.path.isfile(cipher_file) and os.path.isfile(rand_file):
|
||||
bin_path = os.path.join(EnvUtil.getTmpDirFromEnv(), "binary_upgrade")
|
||||
rand_pwd = AesCbcUtil.aes_cbc_decrypt_with_path(bin_path, bin_path, key_name="hadr")
|
||||
if rand_pwd:
|
||||
logger.debug("Successfully decrypt rand pwd.")
|
||||
return rand_pwd
|
||||
|
||||
@staticmethod
|
||||
def get_proc_title(pwd_para_name):
|
||||
"""
|
||||
Obtain the process name after sensitive information is hidden.
|
||||
"""
|
||||
cmd = "cat /proc/%s/cmdline" % os.getpid()
|
||||
status, output = CmdUtil.retryGetstatusoutput(cmd)
|
||||
if status != 0 or not output:
|
||||
raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "proc title" + " Cmd is:%s." % cmd)
|
||||
title_str_list = []
|
||||
for title_str in output.split("\0"):
|
||||
if "=" in title_str:
|
||||
title_str_list.extend(title_str.split("="))
|
||||
else:
|
||||
title_str_list.extend(title_str.split(" "))
|
||||
if pwd_para_name in title_str_list:
|
||||
w_index = title_str_list.index(pwd_para_name)
|
||||
title_str_list[w_index], title_str_list[w_index + 1] = "", ""
|
||||
title_name = " ".join(title_str_list).strip()
|
||||
return title_name
|
||||
|
||||
@staticmethod
|
||||
def set_proc_title(name):
|
||||
"""
|
||||
set proc title to new name
|
||||
"""
|
||||
new_name = name.encode('ascii', 'replace')
|
||||
try:
|
||||
libc = ctypes.CDLL('libc.so.6')
|
||||
proc_name = ctypes.c_char_p.in_dll(libc, '__progname_full')
|
||||
with open('/proc/self/cmdline') as fp:
|
||||
old_progname_len = len(fp.readline())
|
||||
if old_progname_len > len(new_name):
|
||||
# padding blank chars
|
||||
new_name += b' ' * (old_progname_len - len(new_name))
|
||||
# Environment variables are already copied to Python app zone.
|
||||
# We can get environment variables by `os.environ` module,
|
||||
# so we can ignore the destroying from the following action.
|
||||
libc.strcpy(proc_name, ctypes.c_char_p(new_name))
|
||||
buff = ctypes.create_string_buffer(len(new_name) + 1)
|
||||
buff.value = new_name
|
||||
libc.prctl(15, ctypes.byref(buff), 0, 0, 0)
|
||||
except Exception as err_msg:
|
||||
raise Exception(ErrorCode.GAUSS_505["GAUSS_50503"] + str(err_msg))
|
||||
|
||||
@staticmethod
|
||||
def check_is_streaming_dr_cluster():
|
||||
"""check_is_steaming_cluster_cluster"""
|
||||
stream_file = os.path.realpath(os.path.join(EnvUtil.getEnv("PGHOST"), "streaming_cabin"))
|
||||
if os.path.exists(stream_file):
|
||||
sys.exit(ErrorCode.GAUSS_512["GAUSS_51244"] % "current operate on dr cluster")
|
||||
|
||||
@staticmethod
|
||||
def get_primary_dn_instance_id(inst_status="Primary", ignore=False):
|
||||
"""
|
||||
function: get Primary/Standby dn instance id for centralized/distribute cluster
|
||||
:param: inst_status Primary/Standby
|
||||
return; instance id
|
||||
"""
|
||||
cmd = r"source %s; cm_ctl query -v | grep -E 'instance_state\ *:\ %s' " \
|
||||
r"-B 4 | grep -E 'type\ *:\ Datanode' -B 5 | grep instance_id | awk " \
|
||||
r"'{print $NF}'" % (EnvUtil.getMpprcFile(), inst_status)
|
||||
(status, output) = CmdUtil.retryGetstatusoutput(cmd)
|
||||
if status != 0 or not output:
|
||||
if ignore is True:
|
||||
return []
|
||||
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
|
||||
cmd + " Error: \n%s" % output)
|
||||
return output.strip().split('\n')
|
||||
|
||||
@staticmethod
|
||||
def isgreyUpgradeNodeSpecify(user, step=-1, nodes=None, logger=None):
|
||||
"""
|
||||
@ -2988,6 +3366,29 @@ class ClusterCommand():
|
||||
# rollback to flag of start cluster
|
||||
INSTALL_STEP_START = "Start cluster"
|
||||
|
||||
@staticmethod
|
||||
def getStartCmd(nodeId=0, timeout=DefaultValue.TIMEOUT_CLUSTER_START, datadir="", azName = ""):
|
||||
"""
|
||||
function : Start all cluster or a node
|
||||
input : String,int,String,String
|
||||
output : String
|
||||
"""
|
||||
user_profile = EnvUtil.getMpprcFile()
|
||||
cmd = "%s %s ; cm_ctl start" % (CmdUtil.SOURCE_CMD, user_profile)
|
||||
# check node id
|
||||
if nodeId > 0:
|
||||
cmd += " -n %d" % nodeId
|
||||
# check data directory
|
||||
if datadir != "":
|
||||
cmd += " -D %s" % datadir
|
||||
# check timeout
|
||||
if timeout > 0:
|
||||
cmd += " -t %d" % timeout
|
||||
# azName
|
||||
if azName != "":
|
||||
cmd += " -z%s" % azName
|
||||
|
||||
return cmd
|
||||
|
||||
@staticmethod
|
||||
def getStopCmd(nodeId=0, stopMode="", timeout=0, datadir="", azName = ""):
|
||||
@ -3152,7 +3553,8 @@ class ClusterCommand():
|
||||
@staticmethod
|
||||
def remoteSQLCommand(sql, user, host, port, ignoreError=True,
|
||||
database="postgres", useTid=False,
|
||||
IsInplaceUpgrade=False):
|
||||
IsInplaceUpgrade=False, maintenance_mode=False,
|
||||
user_name="", user_pwd=""):
|
||||
"""
|
||||
function : Execute sql command on remote host
|
||||
input : String,String,String,int
|
||||
@ -3220,7 +3622,10 @@ class ClusterCommand():
|
||||
gsql_cmd = SqlCommands.getSQLCommandForInplaceUpgradeBackup(
|
||||
port, database)
|
||||
else:
|
||||
gsql_cmd = SqlCommands.getSQLCommand(port, database)
|
||||
gsql_cmd = SqlCommands.getSQLCommand(port, database, user_name=user_name,
|
||||
user_pwd=user_pwd)
|
||||
if maintenance_mode:
|
||||
gsql_cmd += " -m "
|
||||
if str(localHost) != str(host):
|
||||
sshCmd = CmdUtil.getSshCmd(host)
|
||||
if os.getuid() == 0 and user != "":
|
||||
@ -3233,16 +3638,24 @@ class ClusterCommand():
|
||||
if ignoreError:
|
||||
cmd += " 2>/dev/null"
|
||||
else:
|
||||
cmd = "%s '" % sshCmd
|
||||
cmd = ""
|
||||
if mpprcFile != "" and mpprcFile is not None:
|
||||
cmd += "source %s;" % mpprcFile
|
||||
cmd += "%s -f %s --output %s -t -A -X '" % (gsql_cmd,
|
||||
cmd += "%s -f %s --output %s -t -A -X " % (gsql_cmd,
|
||||
sqlFile,
|
||||
queryResultFile)
|
||||
if user_pwd:
|
||||
cmd = "echo \"%s\" | %s" % (cmd, sshCmd)
|
||||
else:
|
||||
cmd = "%s '%s'" % (sshCmd, cmd)
|
||||
if ignoreError:
|
||||
cmd += " 2>/dev/null"
|
||||
for i in range(RE_TIMES):
|
||||
(status1, output1) = subprocess.getstatusoutput(cmd)
|
||||
proc = FastPopen(cmd, stdout=PIPE, stderr=PIPE,
|
||||
preexec_fn=os.setsid, close_fds=True)
|
||||
stdout, stderr = proc.communicate()
|
||||
output1 = stdout + stderr
|
||||
status1 = proc.returncode
|
||||
if SqlFile.findErrorInSqlFile(sqlFile, output1):
|
||||
if SqlFile.findTupleErrorInSqlFile(output1):
|
||||
time.sleep(1) # find tuple error --> retry
|
||||
@ -3278,7 +3691,11 @@ class ClusterCommand():
|
||||
if (ignoreError):
|
||||
cmd += " 2>/dev/null"
|
||||
for i in range(RE_TIMES):
|
||||
(status1, output1) = subprocess.getstatusoutput(cmd)
|
||||
proc = FastPopen(cmd, stdout=PIPE, stderr=PIPE,
|
||||
preexec_fn=os.setsid, close_fds=True)
|
||||
stdout, stderr = proc.communicate()
|
||||
output1 = stdout + stderr
|
||||
status1 = proc.returncode
|
||||
if SqlFile.findErrorInSqlFile(sqlFile, output1):
|
||||
if SqlFile.findTupleErrorInSqlFile(output1):
|
||||
time.sleep(1) # find tuple error --> retry
|
||||
@ -3778,6 +4195,83 @@ class ClusterInstanceConfig():
|
||||
|
||||
return connInfo1, nodename
|
||||
|
||||
@staticmethod
|
||||
def get_data_from_dcc(cluster_info, logger, user, paralist):
|
||||
"""
|
||||
function: get value from dcc
|
||||
:param cluster_info: cluster info
|
||||
:param logger: logger obj
|
||||
:param user: cluster user
|
||||
:param paralist: paralist
|
||||
:return: key-value map dict
|
||||
"""
|
||||
gausshome = ClusterDir.getInstallDir(user)
|
||||
cm_ctl = os.path.realpath(os.path.join(gausshome, "bin/cm_ctl"))
|
||||
if not os.path.isfile(cm_ctl):
|
||||
raise Exception(ErrorCode.GAUSS_502["GAUSS-50201"] % "file cm_ctl")
|
||||
cms_count = 0
|
||||
etcd_count = 0
|
||||
for dbnode in cluster_info.dbNodes:
|
||||
for _ in dbnode.cmservers:
|
||||
cms_count += 1
|
||||
for _ in dbnode.etcds:
|
||||
etcd_count += 1
|
||||
if cms_count == 0 or etcd_count > 1:
|
||||
raise Exception(ErrorCode.GAUSS_500["GAUSS-50011"] % paralist)
|
||||
para_value_map = {}
|
||||
for para_key in paralist:
|
||||
cmd = "source %s; %s ddb --get '%s'" % (EnvUtil.getMpprcFile(), cm_ctl, para_key)
|
||||
logger.debug("Get dcc value cmd:%s." % cmd)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if status != 0:
|
||||
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd, "Error:%s" % output)
|
||||
logger.debug("Get dcc value:%s." % output)
|
||||
res = output.strip("\n").split("\n")
|
||||
if len(res) != 2:
|
||||
raise Exception(ErrorCode.GAUSS_500["GAUSS-50019"] % res)
|
||||
if res[-1].find("Key not found") > -1:
|
||||
para_value_map[para_key] = ""
|
||||
continue
|
||||
para_value_map[para_key] = res[-1].split(":")[-1].strip()
|
||||
logger.debug("Get all values from dcc component res:%s." % para_value_map)
|
||||
return para_value_map
|
||||
|
||||
@staticmethod
|
||||
def set_data_on_dcc(cluster_info, logger, user, paradict):
|
||||
"""
|
||||
function: set data on dcc
|
||||
:param cluster_info: cluster info
|
||||
:param logger: logger obj
|
||||
:param user: cluster user
|
||||
:param paradict: paradict
|
||||
:return: NA
|
||||
"""
|
||||
gausshome = ClusterDir.getInstallDir(user)
|
||||
cm_ctl = os.path.realpath(os.path.join(gausshome, "bin/cm_ctl"))
|
||||
if not os.path.isfile(cm_ctl):
|
||||
raise Exception(ErrorCode.GAUSS_502["GAUSS-50201"] % "file cm_ctl")
|
||||
cms_count = 0
|
||||
etcd_count = 0
|
||||
for dbnode in cluster_info.dbNodes:
|
||||
for _ in dbnode.cmservers:
|
||||
cms_count += 1
|
||||
for _ in dbnode.etcds:
|
||||
etcd_count += 1
|
||||
if cms_count == 0 or etcd_count > 1:
|
||||
raise Exception(ErrorCode.GAUSS_500["GAUSS-50011"] % paradict)
|
||||
for para_key in list(paradict.keys()):
|
||||
cmd = "source %s; %s ddb --put '%s' '%s'" % \
|
||||
(EnvUtil.getMpprcFile(), cm_ctl, para_key, paradict[para_key])
|
||||
logger.debug("Set dcc value cmd:%s." % cmd)
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if status != 0:
|
||||
raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd, "Error:%s" % output)
|
||||
logger.debug("Set dcc data:%s." % output)
|
||||
res = output.strip("\n").split("\n")
|
||||
if len(res) != 2:
|
||||
raise Exception(ErrorCode.GAUSS_500["GAUSS-50019"] % res)
|
||||
logger.debug("Successfully set the dcc data information.")
|
||||
|
||||
|
||||
class TempfileManagement():
|
||||
"""
|
||||
|
@ -37,6 +37,7 @@ from domain_utils.cluster_file.version_info import VersionInfo
|
||||
from domain_utils.domain_common.cluster_constants import ClusterConstants
|
||||
from base_utils.common.constantsbase import ConstantsBase
|
||||
from base_utils.os.env_util import EnvUtil
|
||||
from base_utils.security.security_checker import SecurityChecker
|
||||
|
||||
###########################
|
||||
# instance role
|
||||
@ -958,6 +959,10 @@ class dbClusterInfo():
|
||||
# add for dcf
|
||||
self.enable_dcf = ""
|
||||
self.dcf_config = ""
|
||||
self.local_stream_ip_map = []
|
||||
self.remote_stream_ip_map = []
|
||||
self.remote_dn_base_port = 0
|
||||
self.local_dn_base_port = 0
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
@ -1314,7 +1319,7 @@ class dbClusterInfo():
|
||||
maxAzNameLen = maxAzNameLen if maxAzNameLen > azNameLen \
|
||||
else azNameLen
|
||||
dnNodeCount += 1
|
||||
if roleStatus == "Primary":
|
||||
if roleStatus in ["Primary", "Main"]:
|
||||
primaryDbNum += 1
|
||||
primaryDbState = dbState
|
||||
else:
|
||||
@ -3395,6 +3400,7 @@ class dbClusterInfo():
|
||||
if self.enable_dcf == "":
|
||||
i = 0
|
||||
ssdInfoList[i].extend(ssddirList)
|
||||
self.parse_stream_cluster_info(masterNode, i)
|
||||
|
||||
# dataNode syncNum
|
||||
key = "dataNode%d_syncNum" % (i + 1)
|
||||
@ -3620,6 +3626,48 @@ class dbClusterInfo():
|
||||
for inst in masterNode.datanodes:
|
||||
inst.azName = masterNode.azName
|
||||
|
||||
def parse_stream_cluster_info(self, masternode, i):
|
||||
"""parse_stream_cluster_info"""
|
||||
i = i + 1
|
||||
local_ip_map = self.__readNodeStrValue(masternode.name,
|
||||
"localStreamIpmap%s" % i, True)
|
||||
if not local_ip_map:
|
||||
return
|
||||
remote_ip_map = self.__readNodeStrValue(masternode.name,
|
||||
"remoteStreamIpmap%s" % i, True)
|
||||
remote_dn_port = self.__readNodeStrValue(masternode.name,
|
||||
"remotedataPortBase", True)
|
||||
local_dn_port = self.__readNodeStrValue(masternode.name,
|
||||
"dataPortBase", True, MASTER_BASEPORT_DATA)
|
||||
if not all([local_ip_map, remote_ip_map, remote_dn_port]):
|
||||
raise Exception(
|
||||
ErrorCode.GAUSS_512["GAUSS_51236"] + " check streamInfo config is correct")
|
||||
self.local_stream_ip_map.append(dbClusterInfo.append_map_ip_into_global(local_ip_map))
|
||||
self.remote_stream_ip_map.append(dbClusterInfo.append_map_ip_into_global(remote_ip_map))
|
||||
if not remote_dn_port.isdigit() or not local_dn_port.isdigit():
|
||||
raise Exception(
|
||||
ErrorCode.GAUSS_512["GAUSS_51236"] + " check streamInfo config is correct")
|
||||
self.remote_dn_base_port = int(remote_dn_port)
|
||||
self.local_dn_base_port = int(local_dn_port)
|
||||
|
||||
@staticmethod
|
||||
def append_map_ip_into_global(strem_ip_map):
|
||||
"""append_map_ip_into_global"""
|
||||
shard_map = []
|
||||
ip_map_list = [i.strip().strip("),").strip(",(") for i in strem_ip_map.split("(") if i]
|
||||
for ip_map in ip_map_list:
|
||||
peer_ip_map = ip_map.split(",")
|
||||
temp_dict = dict()
|
||||
if len(peer_ip_map) != 2:
|
||||
raise Exception(ErrorCode.GAUSS_512["GAUSS_51236"] +
|
||||
" check localStreamIpmap is correct")
|
||||
temp_dict["ip"] = peer_ip_map[0].strip()
|
||||
SecurityChecker.check_ip_valid(temp_dict["ip"], temp_dict["ip"])
|
||||
temp_dict["dataIp"] = peer_ip_map[1].strip()
|
||||
SecurityChecker.check_ip_valid(temp_dict["dataIp"], temp_dict["dataIp"])
|
||||
shard_map.append(temp_dict)
|
||||
return shard_map
|
||||
|
||||
def __readCmaConfig(self, dbNode):
|
||||
"""
|
||||
function : Read cm agent config on node.
|
||||
@ -4689,3 +4737,14 @@ class dbClusterInfo():
|
||||
:return:True or False
|
||||
"""
|
||||
return self.cmscount < 1
|
||||
|
||||
def getDbNodeByID(self, inputid):
|
||||
"""
|
||||
function : Get node by id.
|
||||
input : nodename
|
||||
output : []
|
||||
"""
|
||||
for dbNode in self.dbNodes:
|
||||
if dbNode.id == inputid:
|
||||
return dbNode
|
||||
return None
|
||||
|
@ -21,11 +21,13 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(sys.path[0] + "/../../")
|
||||
from gspylib.common.Common import DefaultValue, ClusterInstanceConfig
|
||||
from gspylib.common.DbClusterInfo import dbClusterInfo
|
||||
from gspylib.common.ErrorCode import ErrorCode
|
||||
from domain_utils.cluster_os.cluster_user import ClusterUser
|
||||
from impl.streaming_disaster_recovery.streaming_constants import StreamingConstants
|
||||
|
||||
###########################
|
||||
# instance type. only for CN/DN
|
||||
@ -64,6 +66,7 @@ class StatusReport():
|
||||
self.dnPrimary = 0
|
||||
self.dnStandby = 0
|
||||
self.dn_cascade_standby = 0
|
||||
self.dn_main_standby = 0
|
||||
self.dnDummy = 0
|
||||
self.dnBuild = 0
|
||||
self.dnAbnormal = 0
|
||||
@ -124,6 +127,8 @@ class DbInstanceStatus():
|
||||
elif self.status == DbClusterStatus.INSTANCE_STATUS_CASCADE_STANDBY:
|
||||
if self.haStatus != DbClusterStatus.HA_STATUS_NORMAL:
|
||||
return False
|
||||
elif self.status == DbClusterStatus.INSTANCE_STATUS_MAIN_STANDBY:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@ -231,6 +236,8 @@ class DbNodeStatus():
|
||||
report.dnDummy += 1
|
||||
elif inst.status == DbClusterStatus.INSTANCE_STATUS_CASCADE_STANDBY:
|
||||
report.dn_cascade_standby += 1
|
||||
elif inst.status == DbClusterStatus.INSTANCE_STATUS_MAIN_STANDBY:
|
||||
report.dn_main_standby += 1
|
||||
else:
|
||||
report.dnAbnormal += 1
|
||||
|
||||
@ -400,7 +407,23 @@ class DbClusterStatus():
|
||||
"Degraded": "Degraded",
|
||||
"Unknown": "Abnormal"
|
||||
}
|
||||
|
||||
INSTANCE_STATUS_MAP_CHECK_STATUS = {
|
||||
"Normal": "Primary",
|
||||
"Unnormal": "Abnormal",
|
||||
"Primary": "Primary",
|
||||
"Standby": "Standby",
|
||||
"Secondary": "Secondary",
|
||||
"Pending": "Abnormal",
|
||||
"Down": "Down",
|
||||
"Unknown": "Abnormal",
|
||||
"Offline": "Offline",
|
||||
"Main Standby": "Standby",
|
||||
"Cascade Standby": "Standby"
|
||||
}
|
||||
INSTANCE_STATUS_MAP_CHECK_FAILOVER = {
|
||||
"Need repair(Disconnected)": "Normal",
|
||||
"Need repair": "Normal"
|
||||
}
|
||||
###################################################################
|
||||
# instance role
|
||||
###################################################################
|
||||
@ -418,6 +441,7 @@ class DbClusterStatus():
|
||||
INSTANCE_STATUS_PRIMARY = "Primary"
|
||||
INSTANCE_STATUS_STANDBY = "Standby"
|
||||
INSTANCE_STATUS_CASCADE_STANDBY = "Cascade Standby"
|
||||
INSTANCE_STATUS_MAIN_STANDBY = "Main Standby"
|
||||
INSTANCE_STATUS_ABNORMAL = "Abnormal"
|
||||
INSTANCE_STATUS_DOWN = "Down"
|
||||
INSTANCE_STATUS_DUMMY = "Secondary"
|
||||
@ -432,6 +456,7 @@ class DbClusterStatus():
|
||||
"Standby": "Standby",
|
||||
"Secondary": "Secondary",
|
||||
"Cascade Standby": "Cascade Standby",
|
||||
"Main Standby": "Main Standby",
|
||||
"Pending": "Abnormal",
|
||||
"Down": "Down",
|
||||
"Unknown": "Abnormal"
|
||||
@ -611,7 +636,29 @@ class DbClusterStatus():
|
||||
DbClusterStatus.OM_NODE_STATUS_ABNORMAL)
|
||||
return statusInfo
|
||||
|
||||
def initFromFile(self, filePath, isExpandScene=False):
|
||||
def init_from_content(self, content, is_expand_scene=False, check_action=None, logger=None):
|
||||
"""
|
||||
Init from content
|
||||
"""
|
||||
content_list = content.split('\n')
|
||||
try:
|
||||
for line in content_list:
|
||||
line = line.strip()
|
||||
if line == "":
|
||||
continue
|
||||
str_list = line.split(":")
|
||||
if len(str_list) != 2:
|
||||
continue
|
||||
self.__fillField(str_list[0].strip(), str_list[1].strip(),
|
||||
is_expand_scene, check_action=check_action)
|
||||
except Exception as error:
|
||||
if logger:
|
||||
logger.debug("Failed parse cluster status with error:%s, "
|
||||
"status content:%s" % (error, content))
|
||||
raise Exception(
|
||||
ErrorCode.GAUSS_502["GAUSS_50204"] % "status content" + " Error: \n%s" % str(error))
|
||||
|
||||
def initFromFile(self, filePath, isExpandScene=False, check_action=None):
|
||||
"""
|
||||
function : Init from status file
|
||||
input : filePath
|
||||
@ -637,12 +684,12 @@ class DbClusterStatus():
|
||||
continue
|
||||
|
||||
self.__fillField(strList[0].strip(), strList[1].strip(),
|
||||
isExpandScene)
|
||||
isExpandScene, check_action=check_action)
|
||||
except Exception as e:
|
||||
raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] %
|
||||
"status file" + " Error: \n%s" % str(e))
|
||||
|
||||
def __fillField(self, field, value, isExpandScene):
|
||||
def __fillField(self, field, value, isExpandScene, check_action=None):
|
||||
"""
|
||||
function : Fill field
|
||||
input : field, value
|
||||
@ -690,7 +737,10 @@ class DbClusterStatus():
|
||||
elif value == DbClusterStatus.INSTANCE_TYPE_ETCD:
|
||||
self.__curNode.etcds.append(self.__curInstance)
|
||||
elif field == "instance_state":
|
||||
status = DbClusterStatus.INSTANCE_STATUS_MAP.get(value)
|
||||
if check_action == DefaultValue.TASK_QUERY_STATUS:
|
||||
status = DbClusterStatus.INSTANCE_STATUS_MAP_CHECK_STATUS.get(value)
|
||||
else:
|
||||
status = DbClusterStatus.INSTANCE_STATUS_MAP.get(value)
|
||||
self.__curInstance.status = \
|
||||
DbClusterStatus.INSTANCE_STATUS_ABNORMAL \
|
||||
if status is None else status
|
||||
@ -715,6 +765,11 @@ class DbClusterStatus():
|
||||
self.__curInstance.status = \
|
||||
DbClusterStatus.INSTANCE_STATUS_ABNORMAL
|
||||
self.__curInstance.detail_status = value
|
||||
if check_action == StreamingConstants.STREAM_DISTRIBUTE_ACTION:
|
||||
self.__curInstance.status = \
|
||||
DbClusterStatus.INSTANCE_STATUS_MAP_CHECK_FAILOVER.get(value, value)
|
||||
self.__curInstance.detail_status = \
|
||||
DbClusterStatus.INSTANCE_STATUS_MAP_CHECK_FAILOVER.get(value, value)
|
||||
elif field == "HA_state":
|
||||
haStatus = DbClusterStatus.HA_STATUS_MAP.get(value)
|
||||
detail_ha = value
|
||||
@ -742,5 +797,9 @@ class DbClusterStatus():
|
||||
if dataStatus is None else dataStatus
|
||||
elif field == "reason":
|
||||
self.__curInstance.reason = value
|
||||
if check_action == StreamingConstants.STREAM_DISTRIBUTE_ACTION and \
|
||||
hasattr(self.__curInstance, "detail_ha") and value == "Disconnected":
|
||||
self.__curInstance.detail_ha = \
|
||||
DbClusterStatus.INSTANCE_STATUS_MAP_CHECK_FAILOVER.get("Need repair", value)
|
||||
|
||||
|
||||
|
@ -101,7 +101,12 @@ class ErrorCode():
|
||||
'GAUSS_50018': "[GAUSS-50018] : The parameter value of %s is Null.",
|
||||
'GAUSS_50019': "[GAUSS-50019] : The value of %s is error.",
|
||||
'GAUSS_50020': "[GAUSS-50020] : The value of %s must be a digit.",
|
||||
'GAUSS_50021': "[GAUSS-50021] : Failed to query %s parameter."
|
||||
'GAUSS_50021': "[GAUSS-50021] : Failed to query %s parameter.",
|
||||
'GAUSS_50022': "[GAUSS-50022] : The parameter '%s' should be %s.",
|
||||
'GAUSS_50023': "[GAUSS-50023] : The parameter '%s' over max length %s.",
|
||||
'GAUSS_50024': "[GAUSS-50024] : The parameter '%s' is invalid.",
|
||||
'GAUSS_50025': "[GAUSS-50025] : There is illegal character '%s' in parameter %s.",
|
||||
'GAUSS_50026': "[GAUSS-50026] : Failed to check %s parameters in the XML file."
|
||||
|
||||
}
|
||||
|
||||
|
@ -85,7 +85,7 @@ class GaussLog:
|
||||
Class to handle log file
|
||||
"""
|
||||
|
||||
def __init__(self, logFile, module="", expectLevel=LOG_DEBUG):
|
||||
def __init__(self, logFile, module="", expectLevel=LOG_DEBUG, trace_id=None):
|
||||
"""
|
||||
function: Constructor
|
||||
input : NA
|
||||
@ -104,6 +104,7 @@ class GaussLog:
|
||||
self.lock = thread.allocate_lock()
|
||||
self.tmpFile = None
|
||||
self.ignoreErr = False
|
||||
self.trace_id = trace_id
|
||||
|
||||
logFileList = ""
|
||||
try:
|
||||
@ -419,9 +420,14 @@ class GaussLog:
|
||||
strTime = datetime.datetime.now()
|
||||
file_line = self.get_log_file_line()
|
||||
if (stepFlag == ""):
|
||||
print("[%s][%d][%s][%s][%s]:%s" % (
|
||||
strTime, self.pid, file_line, self.moduleName, level, msg),
|
||||
file=self.fp)
|
||||
if self.trace_id:
|
||||
print("[%s][%s][%d][%s][%s]:%s"
|
||||
% (self.trace_id, strTime, self.pid, self.moduleName,
|
||||
level, msg), file=self.fp)
|
||||
else:
|
||||
print("[%s][%d][%s][%s]:%s" % (
|
||||
strTime, self.pid, self.moduleName, level, msg),
|
||||
file=self.fp)
|
||||
else:
|
||||
stepnum = self.Step(stepFlag)
|
||||
print("[%s][%d][%s][%s][%s][Step%d]:%s" % (
|
||||
|
@ -407,7 +407,7 @@ class DN_OLAP(Kernel):
|
||||
|
||||
self.modifyDummpyStandbyConfigItem()
|
||||
|
||||
def setPghbaConfig(self, clusterAllIpList):
|
||||
def setPghbaConfig(self, clusterAllIpList, try_reload=False):
|
||||
"""
|
||||
"""
|
||||
principal = None
|
||||
@ -446,12 +446,22 @@ class DN_OLAP(Kernel):
|
||||
GUCParasStrList.append(GUCParasStr)
|
||||
i = 0
|
||||
GUCParasStr = ""
|
||||
# Used only streaming disaster cluster
|
||||
streaming_dn_ips = self.get_streaming_relate_dn_ips(self.instInfo)
|
||||
if streaming_dn_ips:
|
||||
for dn_ip in streaming_dn_ips:
|
||||
GUCParasStr += "-h \"host all %s %s/32 %s\" " \
|
||||
% (pg_user, dn_ip, METHOD_TRUST)
|
||||
GUCParasStr += "-h \"host all all %s/32 %s\" " \
|
||||
% (dn_ip, METHOD_SHA)
|
||||
ip_segment = '.'.join(dn_ip.split('.')[:2]) + ".0.0/16"
|
||||
GUCParasStr += "-h \"host replication all %s sha256\" " % ip_segment
|
||||
|
||||
if (GUCParasStr != ""):
|
||||
GUCParasStrList.append(GUCParasStr)
|
||||
|
||||
for parasStr in GUCParasStrList:
|
||||
self.doGUCConfig("set", parasStr, True)
|
||||
self.doGUCConfig("set", parasStr, True, try_reload=try_reload)
|
||||
|
||||
"""
|
||||
Desc:
|
||||
|
@ -19,6 +19,8 @@ import sys
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import pwd
|
||||
import json
|
||||
|
||||
sys.path.append(sys.path[0] + "/../../../")
|
||||
from gspylib.common.ErrorCode import ErrorCode
|
||||
@ -28,6 +30,7 @@ from gspylib.common.Common import DefaultValue
|
||||
from base_utils.os.cmd_util import CmdUtil
|
||||
from base_utils.os.env_util import EnvUtil
|
||||
from base_utils.os.file_util import FileUtil
|
||||
from base_utils.security.security_checker import SecurityChecker
|
||||
from domain_utils.cluster_os.cluster_user import ClusterUser
|
||||
|
||||
MAX_PARA_NUMBER = 1000
|
||||
@ -403,7 +406,7 @@ class Kernel(BaseComponent):
|
||||
|
||||
return tempCommonDict
|
||||
|
||||
def doGUCConfig(self, action, GUCParasStr, isHab=False):
|
||||
def doGUCConfig(self, action, GUCParasStr, isHab=False, try_reload=False):
|
||||
"""
|
||||
"""
|
||||
# check instance data directory
|
||||
@ -424,6 +427,16 @@ class Kernel(BaseComponent):
|
||||
if (not os.path.exists(configFile)):
|
||||
raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % configFile)
|
||||
|
||||
if try_reload:
|
||||
cmd_reload = "%s/gs_guc %s -D %s %s " % (self.binPath, 'reload',
|
||||
self.instInfo.datadir, GUCParasStr)
|
||||
status, output = CmdUtil.retryGetstatusoutput(cmd_reload, 3, 3)
|
||||
if status != 0:
|
||||
self.logger.log("Failed to reload guc params with commander:[%s]" % cmd_reload)
|
||||
else:
|
||||
self.logger.log("Successfully to reload guc params with commander:[%s]"
|
||||
% cmd_reload)
|
||||
return
|
||||
cmd = "%s/gs_guc %s -D %s %s " % (self.binPath, action,
|
||||
self.instInfo.datadir, GUCParasStr)
|
||||
self.logger.debug("gs_guc command is: {0}".format(cmd))
|
||||
@ -456,6 +469,39 @@ class Kernel(BaseComponent):
|
||||
for parasStr in guc_paras_str_list:
|
||||
self.doGUCConfig(setMode, parasStr, False)
|
||||
|
||||
def get_streaming_relate_dn_ips(self, instance):
|
||||
"""
|
||||
function: Streaming disaster cluster, obtain the IP address of the DN
|
||||
with the same shards.
|
||||
input: NA
|
||||
:return: Cn ip
|
||||
"""
|
||||
self.logger.debug("Start parse cluster_conf_record.")
|
||||
pg_host = EnvUtil.getEnv("PGHOST")
|
||||
config_param_file = os.path.realpath(
|
||||
os.path.join(pg_host, "streaming_cabin", "cluster_conf_record"))
|
||||
if not os.path.isfile(config_param_file):
|
||||
self.logger.debug("Not found streaming cluster config file.")
|
||||
return []
|
||||
|
||||
with open(config_param_file, "r") as fp_read:
|
||||
param_dict = json.load(fp_read)
|
||||
dn_ip_list = []
|
||||
remote_cluster_conf = param_dict.get("remoteClusterConf")
|
||||
shards = remote_cluster_conf.get('shards')
|
||||
for shard in shards:
|
||||
for node_info in shard:
|
||||
shard_num = node_info.get("shardNum", '1')
|
||||
node_ip = node_info.get("dataIp")
|
||||
SecurityChecker.check_ip_valid("check ip from cluster_conf_record", node_ip)
|
||||
if not all([shard_num, node_ip]):
|
||||
raise Exception(ErrorCode.GAUSS_516['GAUSS_51632']
|
||||
% "obtain remote conf from cluster_conf_record")
|
||||
if str(shard_num) == str(instance.mirrorId):
|
||||
dn_ip_list.append(node_ip)
|
||||
self.logger.debug("Got streaming cluster pg_hba ips %s." % dn_ip_list)
|
||||
return dn_ip_list
|
||||
|
||||
def removeIpInfoOnPghbaConfig(self, ipAddressList):
|
||||
"""
|
||||
"""
|
||||
|
@ -76,7 +76,7 @@ class CheckperfImplOLAP(CheckperfImpl):
|
||||
dnInst.instanceId)
|
||||
if (instStatus is not None and
|
||||
instStatus.isInstanceHealthy() and
|
||||
instStatus.status == "Primary"):
|
||||
instStatus.status in ["Primary"]):
|
||||
normalDNList.append(dnInst)
|
||||
|
||||
if (len(normalDNList) == 0):
|
||||
@ -1791,7 +1791,6 @@ class CheckperfImplOLAP(CheckperfImpl):
|
||||
pmk_last_collect_start_time, last_snapshot_id) = \
|
||||
self.getMetaData(hostname, port)
|
||||
self.deleteExpiredSnapShots(hostname, port)
|
||||
|
||||
# collect pmk stat
|
||||
self.collectPMKData(pmk_curr_collect_start_time,
|
||||
pmk_last_collect_start_time,
|
||||
@ -1825,8 +1824,8 @@ class CheckperfImplOLAP(CheckperfImpl):
|
||||
self.handleNodeStat()
|
||||
# insert the node stat of all hosts into the cluster
|
||||
self.insertNodeStat(hostname, port,
|
||||
pmk_curr_collect_start_time,
|
||||
pmk_last_collect_start_time, last_snapshot_id)
|
||||
pmk_curr_collect_start_time,
|
||||
pmk_last_collect_start_time, last_snapshot_id)
|
||||
|
||||
# display pmk stat
|
||||
showDetail = ""
|
||||
|
@ -370,6 +370,10 @@ class OmImplOLAP(OmImpl):
|
||||
self.logger.log(
|
||||
"No need to generate dynamic configuration file for one node.")
|
||||
return
|
||||
if DefaultValue.cm_exist_and_is_disaster_cluster(self.context.clusterInfo, self.logger):
|
||||
self.logger.log(
|
||||
"Streaming disaster cluster do not need to generate dynamic configuration.")
|
||||
return
|
||||
self.logger.log("Generating dynamic configuration file for all nodes.")
|
||||
hostname = NetUtil.GetHostIpOrName()
|
||||
sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames())
|
||||
|
0
script/impl/streaming_disaster_recovery/__init__.py
Normal file
0
script/impl/streaming_disaster_recovery/__init__.py
Normal file
344
script/impl/streaming_disaster_recovery/params_handler.py
Normal file
344
script/impl/streaming_disaster_recovery/params_handler.py
Normal file
@ -0,0 +1,344 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
#############################################################################
|
||||
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
#
|
||||
# openGauss is licensed under Mulan PSL v2.
|
||||
# You can use this software according to the terms
|
||||
# and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
#
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OF ANY KIND,
|
||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
# ----------------------------------------------------------------------------
|
||||
# Description : params_handler.py is a utility for parsing and verifying streaming
|
||||
# disaster recovery params.
|
||||
#############################################################################
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import optparse
|
||||
import getpass
|
||||
|
||||
from impl.streaming_disaster_recovery.streaming_constants import StreamingConstants
|
||||
from gspylib.common.DbClusterInfo import dbClusterInfo
|
||||
from gspylib.common.ErrorCode import ErrorCode
|
||||
from base_utils.security.security_checker import SecurityChecker, ValidationError
|
||||
from domain_utils.cluster_file.version_info import VersionInfo
|
||||
|
||||
|
||||
def check_streaming_start_mode(mode):
|
||||
"""
|
||||
Check start mode
|
||||
"""
|
||||
if mode not in ["primary", "disaster_standby"]:
|
||||
raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50011"] % ('-m', mode))
|
||||
|
||||
|
||||
def check_xml_file(file):
|
||||
"""
|
||||
Check xml file param
|
||||
"""
|
||||
if not file:
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50001'] % 'X')
|
||||
SecurityChecker.check_is_string('xml file path', file)
|
||||
if not os.path.isfile(file):
|
||||
raise ValidationError(ErrorCode.GAUSS_502["GAUSS_50201"] % file)
|
||||
|
||||
|
||||
def check_hadr_user(value):
|
||||
"""
|
||||
Check disaster user
|
||||
"""
|
||||
description = "disaster username"
|
||||
SecurityChecker.check_db_user(description, value)
|
||||
|
||||
|
||||
def check_hadr_pwd(value):
|
||||
"""
|
||||
Check disaster user password
|
||||
"""
|
||||
description = "disaster user password"
|
||||
# check_db_password will be used in cloud scene
|
||||
SecurityChecker.check_db_user(description, value)
|
||||
|
||||
|
||||
def check_wait_timeout(value):
|
||||
"""
|
||||
Check wait timeout
|
||||
"""
|
||||
description = "wait timeout"
|
||||
SecurityChecker.check_is_digit(description, value)
|
||||
|
||||
|
||||
def check_local_cluster_conf(value):
|
||||
"""
|
||||
Check local cluster conf
|
||||
"""
|
||||
SecurityChecker.check_is_dict("localClusterConf", value)
|
||||
port = value.get('port')
|
||||
SecurityChecker.check_port_valid('port of localClusterConf', port)
|
||||
shards = value.get('shards')
|
||||
SecurityChecker.check_is_list('shards of localClusterConf', shards)
|
||||
for shard in shards:
|
||||
for node in shard:
|
||||
ip = node.get('ip')
|
||||
data_ip = node.get('dataIp')
|
||||
SecurityChecker.check_ip_valid('ip of localClusterConf', ip)
|
||||
SecurityChecker.check_ip_valid('dataIp of localClusterConf', data_ip)
|
||||
|
||||
|
||||
def check_remote_cluster_conf(value):
|
||||
"""
|
||||
Check local cluster conf
|
||||
"""
|
||||
SecurityChecker.check_is_dict("remoteClusterConf", value)
|
||||
port = value.get('port')
|
||||
SecurityChecker.check_port_valid('port of remoteClusterConf', port)
|
||||
shards = value.get('shards')
|
||||
SecurityChecker.check_is_list('shards of remoteClusterConf', shards)
|
||||
for shard in shards:
|
||||
for node in shard:
|
||||
ip = node.get('ip')
|
||||
data_ip = node.get('dataIp')
|
||||
SecurityChecker.check_ip_valid('ip of remoteClusterConf', ip)
|
||||
SecurityChecker.check_ip_valid('dataIp of remoteClusterConf', data_ip)
|
||||
|
||||
|
||||
STREAMING_PARAMS_FOR_MODULE = {
|
||||
"start": {
|
||||
"mode": check_streaming_start_mode,
|
||||
"xml_path": check_xml_file,
|
||||
"hadrUserName": check_hadr_user,
|
||||
"hadrUserPassword": check_hadr_pwd,
|
||||
"waitingTimeout": check_wait_timeout,
|
||||
"localClusterConf": check_local_cluster_conf,
|
||||
"remoteClusterConf": check_remote_cluster_conf
|
||||
},
|
||||
"stop": {
|
||||
"xml_path": check_xml_file,
|
||||
"waitingTimeout": check_wait_timeout,
|
||||
"localClusterConf": check_local_cluster_conf,
|
||||
"remoteClusterConf": check_remote_cluster_conf
|
||||
},
|
||||
"switchover": {
|
||||
"mode": check_streaming_start_mode,
|
||||
"waitingTimeout": check_wait_timeout
|
||||
},
|
||||
"failover": {
|
||||
"waitingTimeout": check_wait_timeout,
|
||||
},
|
||||
"query": {}
|
||||
}
|
||||
|
||||
HELP_MSG = """
|
||||
gs_sdr is a utility for streaming disaster recovery fully options.
|
||||
|
||||
Usage:
|
||||
gs_sdr -? | --help
|
||||
gs_sdr -V | --version
|
||||
gs_sdr -t start -m [primary|disaster_standby] -X XMLFILE [-U DR_USERNAME] [-W DR_PASSWORD] [--json JSONFILE] [--time-out=SECS] [-l LOGFILE]
|
||||
gs_sdr -t stop -X XMLFILE|--json JSONFILE [-l LOGFILE]
|
||||
gs_sdr -t switchover -m [primary|disaster_standby] [--time-out=SECS] [-l LOGFILE]
|
||||
gs_sdr -t failover [-l LOGFILE]
|
||||
gs_sdr -t query [-l LOGFILE]
|
||||
General options:
|
||||
-?, --help Show help information for this utility,
|
||||
and exit the command line mode.
|
||||
-V, --version Show version information.
|
||||
-t Task name, it could be:
|
||||
"start", "stop", "switchover", "failover", "query".
|
||||
-m Option mode, it could be:
|
||||
"primary", "disaster_standby".
|
||||
-U Disaster recovery user name.
|
||||
-W Disaster recovery user password.
|
||||
-X Path of the XML configuration file.
|
||||
-l Path of log file.
|
||||
--json Path of params file for streaming options.
|
||||
--time-out=SECS Maximum waiting time when Main standby connect to the primary dn,
|
||||
default value is 1200s.
|
||||
"""
|
||||
|
||||
|
||||
class ParamsHandler(object):
|
||||
"""
|
||||
Parse and check params.
|
||||
"""
|
||||
def __init__(self, logger, trace_id):
|
||||
self.params = None
|
||||
self.logger = logger
|
||||
self.trace_id = trace_id
|
||||
|
||||
@staticmethod
|
||||
def option_parser():
|
||||
"""
|
||||
parsing parameters
|
||||
:return: param obj
|
||||
"""
|
||||
parser = optparse.OptionParser(conflict_handler='resolve')
|
||||
parser.disable_interspersed_args()
|
||||
parser.epilog = "Example: gs_sdr -t " \
|
||||
"start -m primary -X clusterConfig.xml " \
|
||||
"--time-out=1200."
|
||||
parser.add_option('-V', "--version", dest='version_info', action='store_true',
|
||||
help='-V|--version show version info.')
|
||||
parser.add_option('-?', "--help", dest='help_info', action='store_true',
|
||||
help='-?|--help show help message and exist.')
|
||||
parser.add_option('-t', dest='task', type='string',
|
||||
help='Task name. It could be "start", "stop", '
|
||||
'"switchover", "failover", "query"')
|
||||
parser.add_option('-m', dest='mode', type='string',
|
||||
help='Cluster run mode. It could be ["primary", "disaster_standby"].')
|
||||
parser.add_option('-U', dest='hadrusername', type='string',
|
||||
help='hadr user name.')
|
||||
parser.add_option('-W', dest='hadruserpasswd', type='string',
|
||||
help='hadr user password.')
|
||||
parser.add_option('-X', dest='xml_path', type='string',
|
||||
help='Cluster config xml path.')
|
||||
parser.add_option('--json', dest='json_path', type='string',
|
||||
help='Config json file of streaming options')
|
||||
parser.add_option('--time-out=', dest='timeout', default="1200", type='string',
|
||||
help='time out.')
|
||||
parser.add_option("-l", dest='logFile', type='string',
|
||||
help='Path of log file.')
|
||||
return parser
|
||||
|
||||
def __print_usage(self):
|
||||
"""
|
||||
Print help message
|
||||
"""
|
||||
if self.params.help_info:
|
||||
print(HELP_MSG)
|
||||
sys.exit(0)
|
||||
|
||||
def __print_version_info(self):
|
||||
"""
|
||||
Print version info
|
||||
"""
|
||||
if self.params.version_info:
|
||||
print("%s %s" % (sys.argv[0].split("/")[-1],
|
||||
VersionInfo.COMMON_VERSION))
|
||||
sys.exit(0)
|
||||
|
||||
def __cluster_conf_parser(self, file_path):
|
||||
"""
|
||||
Parse params in json file
|
||||
"""
|
||||
if self.params.json_path:
|
||||
if not os.path.isfile(file_path):
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50010']
|
||||
% '--json' + " Json file is not exist.")
|
||||
with open(file_path, 'r') as read_fp:
|
||||
param_dict = json.load(read_fp)
|
||||
for key, value in param_dict.items():
|
||||
if key not in StreamingConstants.STREAMING_JSON_PARAMS[self.params.task]:
|
||||
continue
|
||||
setattr(self.params, key, value)
|
||||
return
|
||||
cluster_info = dbClusterInfo()
|
||||
if not self.params.xml_path or not os.path.isfile(self.params.xml_path):
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50010']
|
||||
% '-X' + " XML file and json file are all not exist.")
|
||||
cluster_info.initFromXml(self.params.xml_path)
|
||||
remote_cluster_conf = dict()
|
||||
remote_cluster_conf.setdefault("port", cluster_info.remote_dn_base_port)
|
||||
remote_cluster_conf.setdefault("shards", cluster_info.remote_stream_ip_map)
|
||||
setattr(self.params, "remoteClusterConf", remote_cluster_conf)
|
||||
self.logger.debug("Remote stream cluster conf: %s." % str(remote_cluster_conf))
|
||||
|
||||
local_cluster_conf = dict()
|
||||
local_cluster_conf.setdefault("port", cluster_info.local_dn_base_port)
|
||||
local_cluster_conf.setdefault("shards", cluster_info.local_stream_ip_map)
|
||||
setattr(self.params, "localClusterConf", local_cluster_conf)
|
||||
self.logger.debug("Local stream cluster conf: %s." % str(local_cluster_conf))
|
||||
if not remote_cluster_conf["shards"] or len(remote_cluster_conf["shards"])\
|
||||
!= len(local_cluster_conf["shards"]):
|
||||
raise ValidationError(ErrorCode.GAUSS_500['GAUSS_50026'] % "streaming DR")
|
||||
|
||||
def __init_default_params(self):
|
||||
"""
|
||||
Init params if need default value
|
||||
"""
|
||||
if not self.params.timeout.isdigit():
|
||||
raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50004"] % "--time-out")
|
||||
self.params.waitingTimeout = int(self.params.timeout)
|
||||
|
||||
def __parse_args(self):
|
||||
"""
|
||||
Parse arguments
|
||||
"""
|
||||
parser = ParamsHandler.option_parser()
|
||||
self.params, _ = parser.parse_args()
|
||||
self.__print_usage()
|
||||
self.__print_version_info()
|
||||
if not hasattr(self.params, 'task') or not self.params.task:
|
||||
raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50001"] % 't' + ".")
|
||||
if self.params.task not in StreamingConstants.STREAMING_JSON_PARAMS.keys():
|
||||
raise ValidationError(ErrorCode.GAUSS_500["GAUSS_50004"] % 't')
|
||||
# parse arguments in json/xml file
|
||||
if StreamingConstants.STREAMING_JSON_PARAMS[self.params.task]:
|
||||
self.__cluster_conf_parser(self.params.json_path)
|
||||
|
||||
def __reload_hadr_user_info(self):
|
||||
"""
|
||||
Input hadr user info
|
||||
"""
|
||||
if self.params.task not in ["start"]:
|
||||
return
|
||||
if self.params.hadrusername and self.params.hadruserpasswd:
|
||||
self.params.hadrUserName = self.params.hadrusername
|
||||
self.params.hadrUserPassword = self.params.hadruserpasswd
|
||||
del self.params.hadruserpasswd
|
||||
return
|
||||
user_name = ""
|
||||
if not self.params.hadrusername:
|
||||
user_name = input("Please enter disaster user name:")
|
||||
self.params.hadrUserName = user_name if user_name else self.params.hadrusername
|
||||
if self.params.hadruserpasswd:
|
||||
self.params.hadrUserPassword = self.params.hadruserpasswd
|
||||
del self.params.hadruserpasswd
|
||||
return
|
||||
for i in range(3):
|
||||
user_passwd = getpass.getpass("Please enter password for [%s]:" %
|
||||
self.params.hadrUserName)
|
||||
user_passwd_check = getpass.getpass("Please repeat enter for password for [%s]:"
|
||||
% self.params.hadrUserName)
|
||||
if user_passwd == user_passwd_check:
|
||||
break
|
||||
if i == 2:
|
||||
self.logger.logExit("The two passwords entered for too many "
|
||||
"times are inconsistent. Authentication failed.")
|
||||
self.logger.error(
|
||||
ErrorCode.GAUSS_503["GAUSS_50306"] % user_name
|
||||
+ "The two passwords are different, please enter password again.")
|
||||
self.params.hadrUserPassword = user_passwd
|
||||
del user_passwd
|
||||
del user_passwd_check
|
||||
self.logger.debug("The hadr user information is successfully loaded.")
|
||||
|
||||
def get_valid_params(self):
|
||||
"""
|
||||
Check params
|
||||
"""
|
||||
try:
|
||||
self.__parse_args()
|
||||
self.logger.log(StreamingConstants.LOG_REMARK)
|
||||
self.logger.log('Streaming disaster recovery ' + self.params.task + ' ' + self.trace_id)
|
||||
self.logger.log(StreamingConstants.LOG_REMARK)
|
||||
self.__init_default_params()
|
||||
self.__reload_hadr_user_info()
|
||||
for param_name, validate in STREAMING_PARAMS_FOR_MODULE[self.params.task].items():
|
||||
check_value = getattr(self.params, param_name)
|
||||
if self.params.task == "stop":
|
||||
if param_name == "xml_path" and not check_value:
|
||||
check_value = getattr(self.params, 'json_path')
|
||||
validate(check_value)
|
||||
except ValidationError as error:
|
||||
self.logger.logExit(str(error))
|
||||
return self.params
|
2484
script/impl/streaming_disaster_recovery/streaming_base.py
Normal file
2484
script/impl/streaming_disaster_recovery/streaming_base.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
#############################################################################
|
||||
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
#
|
||||
# openGauss is licensed under Mulan PSL v2.
|
||||
# You can use this software according to the terms
|
||||
# and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
#
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OF ANY KIND,
|
||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
# ----------------------------------------------------------------------------
|
||||
# Description : streaming_constants.py is utility for defining constants
|
||||
# of streaming disaster recovery.
|
||||
#############################################################################
|
||||
|
||||
|
||||
class StreamingConstants:
|
||||
|
||||
# streaming files
|
||||
STREAMING_LOG_FILE = "gs_sdr.log"
|
||||
STREAMING_FILES_DIR = 'streaming_cabin'
|
||||
STREAMING_CLUSTER_STATUS_TMP_FILE = "cluster_state_tmp"
|
||||
WAL_KEEP_SEGMENTS = ".wal_keep_segments_record"
|
||||
STREAMING_CLUSTER_CONF_RECORD = "cluster_conf_record"
|
||||
GS_SECURE_FILES = "gs_secure_files"
|
||||
HADR_KEY_CIPHER = "hadr.key.cipher"
|
||||
HADR_KEY_RAND = "hadr.key.rand"
|
||||
STREAM_SWITCHOVER_STATE = ".switchover_cluster_state"
|
||||
MAX_TERM_RECORD = ".max_term_record"
|
||||
PROCESS_LOCK_FILE = 'streaming_lock_'
|
||||
STREAMING_CONFIG_XML = "streaming_config.xml"
|
||||
GUC_BACKUP_FILE = ".streaming_guc_backup"
|
||||
CLUSTER_USER_RECORD = ".cluster_user_record"
|
||||
|
||||
ACTION_START = "start"
|
||||
ACTION_SWITCHOVER = "switchover"
|
||||
ACTION_FAILOVER = "failover"
|
||||
|
||||
ACTION_ESTABLISH = "establish"
|
||||
|
||||
# streaming query temp file
|
||||
HADR_CLUSTER_STAT = ".hadr_cluster_stat"
|
||||
HADR_FAILOVER_STAT = ".hadr_failover_stat"
|
||||
HADR_SWICHOVER_STAT = ".hadr_switchover_stat"
|
||||
HADR_ESTABLISH_STAT = ".hadr_establish_stat"
|
||||
|
||||
STREAM_DISTRIBUTE_ACTION = "distribute_stream_failover"
|
||||
|
||||
# GUC CHANGE MAP
|
||||
GUC_CHANGE_MAP = {"most_available_sync": "on", "synchronous_commit": "on"}
|
||||
|
||||
# params in json file for each module
|
||||
STREAMING_JSON_PARAMS = {
|
||||
"start": ["localClusterConf", "remoteClusterConf"],
|
||||
"stop": ["localClusterConf", "remoteClusterConf"],
|
||||
"switchover": [],
|
||||
"failover": [],
|
||||
"query": []
|
||||
}
|
||||
|
||||
# step file of each module
|
||||
STREAMING_STEP_FILES = {
|
||||
"start_primary": ".streaming_start_primary.step",
|
||||
"start_standby": ".streaming_start_standby.step",
|
||||
"stop": ".streaming_stop.step",
|
||||
"switchover_primary": ".streaming_switchover_primary.step",
|
||||
"switchover_standby": ".streaming_switchover_standby.step",
|
||||
"failover": ".streaming_failover.step",
|
||||
"query": ".streaming_query.step",
|
||||
}
|
||||
# task need check process is exist
|
||||
TASK_EXIST_CHECK = ["start", "stop", "switchover", "failover"]
|
||||
|
||||
# default values
|
||||
MAX_WAL_KEEP_SEGMENTS = 16384
|
||||
MAX_REPLICATION_NUMS = 8
|
||||
MAX_BUILD_TIMEOUT = 1209600
|
||||
STANDBY_START_TIMEOUT = 3600 * 24 * 7
|
||||
CHECK_PROCESS_WAIT_TIME = 3
|
||||
|
||||
# backup open key
|
||||
BACKUP_OPEN = "/%s/CMServer/backup_open"
|
||||
|
||||
# log remark
|
||||
LOG_REMARK = "-" * 80
|
@ -0,0 +1,234 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
#############################################################################
|
||||
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
#
|
||||
# openGauss is licensed under Mulan PSL v2.
|
||||
# You can use this software according to the terms
|
||||
# and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
#
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OF ANY KIND,
|
||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
# ----------------------------------------------------------------------------
|
||||
# Description : streaming_disaster_recovery_start.py is utility for creating
|
||||
# relationship between primary cluster and standby cluster.
|
||||
|
||||
import os
|
||||
|
||||
from base_utils.security.sensitive_mask import SensitiveMask
|
||||
from gspylib.common.ErrorCode import ErrorCode
|
||||
from gspylib.common.Common import DefaultValue, ClusterCommand
|
||||
from impl.streaming_disaster_recovery.streaming_base import StreamingBase
|
||||
from impl.streaming_disaster_recovery.streaming_constants import StreamingConstants
|
||||
|
||||
|
||||
class StreamingStartHandler(StreamingBase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def _first_step_for_streaming_start(self, step):
|
||||
"""
|
||||
First step for streaming start
|
||||
"""
|
||||
if step >= 2:
|
||||
return
|
||||
self.logger.debug("Start first step of streaming start.")
|
||||
self.create_streaming_dir(self.streaming_file_dir)
|
||||
self.check_action_and_mode()
|
||||
self.init_cluster_status()
|
||||
|
||||
def _second_step_for_streaming_start(self, step):
|
||||
"""
|
||||
Second step for streaming start
|
||||
"""
|
||||
if step >= 2:
|
||||
return
|
||||
self.logger.debug("Start second step of streaming start.")
|
||||
self.check_cluster_status(status_allowed=['Normal'])
|
||||
self.check_cluster_is_common()
|
||||
cm_exist = DefaultValue.check_is_cm_cluster(self.logger)
|
||||
if not cm_exist:
|
||||
self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51632"] %
|
||||
"check cm_ctl is available for current cluster")
|
||||
self.check_is_under_upgrade()
|
||||
self.check_dn_instance_params()
|
||||
self.write_streaming_step("2_check_cluster_step")
|
||||
|
||||
def _third_step_for_streaming_start(self, step):
|
||||
"""
|
||||
Third step for streaming start
|
||||
"""
|
||||
if step >= 3:
|
||||
return
|
||||
self.logger.debug("Start third step of streaming start.")
|
||||
self.drop_replication_slot_on_dr_cluster(only_mode="disaster_standby")
|
||||
self.prepare_gs_secure_files(only_mode='primary')
|
||||
self.build_and_distribute_key_files(only_mode='disaster_standby')
|
||||
self.get_default_wal_keep_segments(only_mode='primary')
|
||||
self.write_streaming_step("3_set_wal_segments_step")
|
||||
|
||||
def drop_replication_slot_on_dr_cluster(self, only_mode=None):
|
||||
"""
|
||||
Drop replication slot on dr cluster
|
||||
"""
|
||||
if only_mode and self.params.mode != only_mode:
|
||||
self.logger.debug("Drop replication slot opts not for mode:%s." % self.params.mode)
|
||||
return
|
||||
sql_check = "select slot_name from pg_get_replication_slots() where slot_type='logical'"
|
||||
primary_dns = DefaultValue.get_primary_dn_instance_id("Primary", ignore=True)
|
||||
if not primary_dns:
|
||||
return
|
||||
primary_insts = [inst for node in self.cluster_info.dbNodes
|
||||
for inst in node.datanodes if str(inst.instanceId) in primary_dns]
|
||||
dn_inst = primary_insts[0]
|
||||
self.logger.debug("Start drop node %s [%s] slots" % (dn_inst.hostname, dn_inst.instanceId))
|
||||
status, output = ClusterCommand.remoteSQLCommand(
|
||||
sql_check, self.user, dn_inst.hostname, dn_inst.port)
|
||||
self.logger.debug("Get %s all replication slots, status=%d, output: %s." %
|
||||
(dn_inst.instanceId, status, SensitiveMask.mask_pwd(output)))
|
||||
if status == 0 and output.strip():
|
||||
drop_slots = output.strip().split('\n')
|
||||
for slot in drop_slots:
|
||||
self.logger.debug("Starting drop node %s %s" % (dn_inst.instanceId, slot.strip()))
|
||||
sql = "select * from pg_drop_replication_slot('%s');" % slot.strip()
|
||||
status_dr, output_dr = ClusterCommand.remoteSQLCommand(
|
||||
sql, self.user, dn_inst.hostname, dn_inst.port)
|
||||
if status_dr != 0:
|
||||
self.logger.debug("Failed to remove node %s %s with error: %s" % (
|
||||
dn_inst.hostname, slot.strip(), SensitiveMask.mask_pwd(output_dr)))
|
||||
self.logger.debug(
|
||||
"Successfully drop node %s %s" % (dn_inst.instanceId, slot.strip()))
|
||||
|
||||
def _fourth_step_for_streaming_start(self, step):
|
||||
"""
|
||||
Fourth step for streaming start
|
||||
"""
|
||||
if step >= 4:
|
||||
return
|
||||
self.logger.debug("Start fourth step of streaming start.")
|
||||
self.set_wal_keep_segments(
|
||||
"reload", StreamingConstants.MAX_WAL_KEEP_SEGMENTS, only_mode='primary')
|
||||
self.write_streaming_step("4_set_wal_segments_step")
|
||||
|
||||
def _fifth_step_for_streaming_start(self, step):
|
||||
"""
|
||||
Fifth step for streaming start
|
||||
"""
|
||||
if step >= 5:
|
||||
return
|
||||
self.logger.debug("Start fifth step of streaming start.")
|
||||
self.set_data_in_dcc(self.backup_open_key, "0", only_mode='primary')
|
||||
self.set_data_in_dcc(self.backup_open_key, "2", only_mode='disaster_standby')
|
||||
self.stop_cluster_by_node(only_mode='disaster_standby')
|
||||
self.write_streaming_step("5_set_wal_segments_step")
|
||||
|
||||
def common_step_for_streaming_start(self):
|
||||
"""
|
||||
Common step for streaming start between step 1 and 2
|
||||
"""
|
||||
self.logger.debug("Start common config step of streaming start.")
|
||||
self.distribute_cluster_conf()
|
||||
self.update_streaming_pg_hba()
|
||||
self.config_streaming_repl_info()
|
||||
|
||||
def _sixth_step_for_streaming_start(self, step):
|
||||
"""
|
||||
Sixth step for streaming start
|
||||
"""
|
||||
if step >= 6:
|
||||
return
|
||||
self.logger.debug("Start sixth step of streaming start.")
|
||||
self.set_cmserver_guc("backup_open", "2", "set", only_mode='disaster_standby')
|
||||
self.set_cmagent_guc("agent_backup_open", "2", "set", only_mode='disaster_standby')
|
||||
self.write_streaming_step("6_set_guc_step")
|
||||
|
||||
def _seventh_step_for_streaming_start(self, step):
|
||||
"""
|
||||
Seventh step for streaming start
|
||||
"""
|
||||
if step >= 7:
|
||||
return
|
||||
self.logger.debug("Start seventh step of streaming start.")
|
||||
self.update_streaming_info("cluster", "restore", only_mode='disaster_standby')
|
||||
try:
|
||||
self.build_dn_instance(only_mode='disaster_standby')
|
||||
except Exception as error:
|
||||
self.update_streaming_info("cluster", "restore_fail", only_mode='disaster_standby')
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] % "build dns" + "Error:%s" % error)
|
||||
self.write_streaming_step("7_build_dn_instance_step")
|
||||
|
||||
def _eighth_step_for_streaming_start(self, step):
|
||||
"""
|
||||
Eighth step for streaming start
|
||||
"""
|
||||
if step >= 8:
|
||||
return
|
||||
self.logger.debug("Start eighth step of streaming start.")
|
||||
self.start_cluster(cm_timeout=StreamingConstants.STANDBY_START_TIMEOUT,
|
||||
only_mode='disaster_standby')
|
||||
self.update_streaming_info("cluster", "full_backup", only_mode='primary')
|
||||
try:
|
||||
self.wait_main_standby_connection(only_mode='primary')
|
||||
except Exception as error:
|
||||
self.update_streaming_info("cluster", "backup_fail", only_mode='primary')
|
||||
raise Exception(str(error))
|
||||
ret = self.check_cluster_status(status_allowed=['Normal'],
|
||||
only_check=True, check_current=True)
|
||||
query_status = "recovery" if ret else "recovery_fail"
|
||||
self.update_streaming_info("cluster", query_status, only_mode='disaster_standby')
|
||||
self.update_streaming_info("cluster", "archive", only_mode='primary')
|
||||
self.write_streaming_step("8_start_cluster_step")
|
||||
|
||||
def _ninth_step_for_streaming_start(self, step):
|
||||
"""
|
||||
ninth step for streaming start
|
||||
"""
|
||||
if step >= 9:
|
||||
return
|
||||
self.logger.debug("Start ninth step of streaming start.")
|
||||
self.restore_wal_keep_segments(only_mode='primary')
|
||||
self.clean_gs_secure_dir()
|
||||
self.clean_step_file()
|
||||
|
||||
def _check_and_refresh_disaster_user_permission(self):
|
||||
"""check and refresh disaster user permission"""
|
||||
if self.params.mode != "primary":
|
||||
return
|
||||
self.check_hadr_user(only_mode='primary')
|
||||
self.check_hadr_pwd(only_mode='primary')
|
||||
self.logger.debug("Encrypt hadr user info to database not "
|
||||
"for mode:%s." % self.params.mode)
|
||||
hadr_cipher_path = os.path.join(self.bin_path, "hadr.key.cipher")
|
||||
hadr_rand_path = os.path.join(self.bin_path, "hadr.key.rand")
|
||||
if not os.path.isfile(hadr_cipher_path) or not os.path.isfile(hadr_rand_path):
|
||||
self.hadr_key_generator('hadr')
|
||||
user_info = DefaultValue.obtain_hadr_user_encrypt_str(self.cluster_info, self.user,
|
||||
self.logger, False, True)
|
||||
if user_info:
|
||||
self.clean_global_config()
|
||||
pass_str = self.encrypt_hadr_user_info(
|
||||
'hadr', self.params.hadrUserName, self.params.hadrUserPassword)
|
||||
self.keep_hadr_user_info(pass_str)
|
||||
|
||||
def run(self):
|
||||
self.logger.log("Start create streaming disaster relationship.")
|
||||
step = self.query_streaming_step()
|
||||
self._first_step_for_streaming_start(step)
|
||||
self.parse_cluster_status()
|
||||
self._check_and_refresh_disaster_user_permission()
|
||||
self._second_step_for_streaming_start(step)
|
||||
self.common_step_for_streaming_start()
|
||||
self._third_step_for_streaming_start(step)
|
||||
self._fourth_step_for_streaming_start(step)
|
||||
self._fifth_step_for_streaming_start(step)
|
||||
self._sixth_step_for_streaming_start(step)
|
||||
self._seventh_step_for_streaming_start(step)
|
||||
self._eighth_step_for_streaming_start(step)
|
||||
self._ninth_step_for_streaming_start(step)
|
||||
self.logger.log("Successfully do streaming disaster recovery start.")
|
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
#############################################################################
|
||||
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
#
|
||||
# openGauss is licensed under Mulan PSL v2.
|
||||
# You can use this software according to the terms
|
||||
# and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
#
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OF ANY KIND,
|
||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
# ----------------------------------------------------------------------------
|
||||
# Description : streaming_disaster_recovery_failover.py is utility for
|
||||
# standby cluster failover to primary cluster.
|
||||
|
||||
|
||||
from gspylib.common.Common import DefaultValue
|
||||
from gspylib.common.ErrorCode import ErrorCode
|
||||
from impl.streaming_disaster_recovery.streaming_base import StreamingBase
|
||||
|
||||
|
||||
class StreamingFailoverHandler(StreamingBase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def run(self):
|
||||
self.logger.log("Start streaming disaster failover.")
|
||||
self.check_action_and_mode()
|
||||
step = self.check_streaming_failover_workable(check_type_step=3, check_status_step=0)
|
||||
self.check_is_under_upgrade()
|
||||
self.init_cluster_conf()
|
||||
try:
|
||||
self.streaming_failover_single_inst(step)
|
||||
self.update_streaming_info("cluster", "normal")
|
||||
self.clean_step_file()
|
||||
except Exception as error:
|
||||
self.update_streaming_info("cluster", "promote_fail")
|
||||
raise Exception(
|
||||
ErrorCode.GAUSS_516["GAUSS_51632"] % "centralize failover" + "Error:%s" % error)
|
||||
finally:
|
||||
self.remove_cluster_maintance_file()
|
||||
self.clean_streaming_dir()
|
||||
self.logger.log("Successfully do streaming disaster recovery failover.")
|
||||
|
||||
def check_streaming_failover_workable(self, check_type_step=0, check_status_step=0):
|
||||
"""
|
||||
Check streaming failover is workable.
|
||||
"""
|
||||
self.logger.debug("Streaming disaster distribute cluster failover...")
|
||||
stream_disaster_step = self.query_streaming_step()
|
||||
if not DefaultValue.is_disaster_cluster(self.cluster_info) \
|
||||
and stream_disaster_step < check_type_step:
|
||||
self.logger.debug("The primary dn exist, do nothing except record the result file.")
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
|
||||
"streaming disaster cluster failover, Because the primary cluster "
|
||||
"does not support failover")
|
||||
cluster_normal_status = [DefaultValue.CLUSTER_STATUS_NORMAL,
|
||||
DefaultValue.CLUSTER_STATUS_DEGRADED]
|
||||
if stream_disaster_step < check_status_step:
|
||||
self.init_cluster_status()
|
||||
self.parse_cluster_status()
|
||||
if stream_disaster_step < check_status_step:
|
||||
self.check_cluster_status(cluster_normal_status)
|
||||
return stream_disaster_step
|
@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
#############################################################################
|
||||
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
#
|
||||
# openGauss is licensed under Mulan PSL v2.
|
||||
# You can use this software according to the terms
|
||||
# and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
#
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OF ANY KIND,
|
||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
# ----------------------------------------------------------------------------
|
||||
# Description : streaming_disaster_recovery_query.py is utility for
|
||||
# query streaming disaster recovery condition.
|
||||
|
||||
import os
|
||||
|
||||
from base_utils.security.sensitive_mask import SensitiveMask
|
||||
from impl.streaming_disaster_recovery.streaming_constants import StreamingConstants
|
||||
from gspylib.common.Common import ClusterCommand
|
||||
from impl.streaming_disaster_recovery.streaming_base import StreamingBase
|
||||
|
||||
|
||||
class StreamingQueryHandler(StreamingBase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def get_streaming_cluster_query_value(self, file_name):
|
||||
"""
|
||||
Query infos from files.
|
||||
"""
|
||||
file_path = os.path.realpath(os.path.join(self.streaming_file_dir, file_name))
|
||||
if not os.path.isfile(file_path) and file_name in [StreamingConstants.HADR_CLUSTER_STAT]:
|
||||
return "normal"
|
||||
if not os.path.isfile(file_path):
|
||||
return "0%"
|
||||
with open(file_path, 'r') as read_file:
|
||||
value = read_file.read().strip()
|
||||
return value
|
||||
|
||||
def check_archive(self, former_status, cluster_status):
|
||||
"""
|
||||
Check for archive.
|
||||
"""
|
||||
self.logger.log("Start check archive.")
|
||||
if former_status.strip() not in ["archive", "archive_fail"]:
|
||||
self.logger.debug("Ignore for status:%s" % former_status)
|
||||
return
|
||||
archive_status = "archive_fail"
|
||||
if cluster_status.lower() not in ["normal", "degraded"]:
|
||||
self.logger.debug("Cluster status:%s,archive fail." % cluster_status)
|
||||
return archive_status
|
||||
if self.main_standby_ids or (not self.primary_dn_ids):
|
||||
self.logger.debug("Ignore update archive for disaster_standby cluster.")
|
||||
return archive_status
|
||||
sql_check = "select 1 from pg_catalog.pg_stat_get_wal_senders() where sync_state" \
|
||||
"='Async' and peer_role='Standby' and peer_state='Normal';"
|
||||
dn_instances = [inst for node in self.cluster_info.dbNodes for inst in node.datanodes
|
||||
if inst.instanceId in self.primary_dn_ids]
|
||||
self.logger.debug("Check archive with cmd:%s." % sql_check)
|
||||
if dn_instances:
|
||||
status, output = ClusterCommand.remoteSQLCommand(
|
||||
sql_check, self.user, dn_instances[0].hostname,
|
||||
dn_instances[0].port)
|
||||
if status == 0 and output and output.strip():
|
||||
archive_status = "archive"
|
||||
self.logger.debug("Successfully check archive, results:%s." %
|
||||
SensitiveMask.mask_pwd(output))
|
||||
return archive_status
|
||||
elif status == 0 and not output.strip():
|
||||
self.logger.debug("Check archive fail.")
|
||||
return archive_status
|
||||
else:
|
||||
self.logger.debug("Check archive status:%s, output:%s."
|
||||
% (status, output))
|
||||
self.logger.debug("Check archive result:%s." % archive_status)
|
||||
return archive_status
|
||||
|
||||
def check_recovery(self, former_status, cluster_status="normal"):
|
||||
"""
|
||||
Check for recovery.
|
||||
"""
|
||||
self.logger.log("Start check recovery.")
|
||||
if former_status.strip() not in ["recovery", "recovery_fail"]:
|
||||
self.logger.debug("Ignore for check recovery status:%s" % former_status)
|
||||
return
|
||||
recovery_status = "recovery_fail"
|
||||
if cluster_status.lower() not in ["normal", "degraded"]:
|
||||
self.logger.debug("Cluster status:%s,recovery fail." % cluster_status)
|
||||
return recovery_status
|
||||
if self.primary_dn_ids or (not self.main_standby_ids):
|
||||
self.logger.debug("Ignore update recovery for primary cluster.")
|
||||
return recovery_status
|
||||
return "recovery"
|
||||
|
||||
def get_max_rpo_rto(self):
|
||||
"""
|
||||
Get max rpo and rto.
|
||||
"""
|
||||
self.logger.log("Start check RPO & RTO.")
|
||||
rpo_sql = "SELECT current_rpo FROM dbe_perf.global_streaming_hadr_rto_and_rpo_stat;"
|
||||
rto_sql = "SELECT current_rto FROM dbe_perf.global_streaming_hadr_rto_and_rpo_stat;"
|
||||
rto_rpo_sql = rpo_sql + rto_sql
|
||||
if not self.primary_dn_ids:
|
||||
self.logger.debug("Not found primary dn in cluster, cluster status:%s, "
|
||||
"main standby:%s." % (self.cluster_status, self.main_standby_ids))
|
||||
return "", ""
|
||||
log_info = "Execute sql [%s] on node [%s: %s] with result:%s"
|
||||
dn_instances = [inst for node in self.cluster_info.dbNodes for inst in node.datanodes
|
||||
if inst.instanceId in self.primary_dn_ids]
|
||||
if dn_instances:
|
||||
status, output = ClusterCommand.remoteSQLCommand(
|
||||
rto_rpo_sql, self.user, dn_instances[0].hostname, dn_instances[0].port)
|
||||
if status == 0 and output:
|
||||
try:
|
||||
rets = output.strip().split('\n')
|
||||
length = len(rets) // 2
|
||||
rpo_list = [int(i) for i in rets[:length]]
|
||||
rto_list = [int(j) for j in rets[length:]]
|
||||
max_rpo, max_rto = str(max(rpo_list)), str(max(rto_list))
|
||||
except ValueError:
|
||||
return "", ""
|
||||
self.logger.debug("Successfully get max rpo:%s, rto:%s, output:%s"
|
||||
% (max_rpo, max_rto, ','.join(output.split('\n'))))
|
||||
return max_rpo, max_rto
|
||||
else:
|
||||
self.logger.debug(log_info % (rto_rpo_sql, dn_instances[0].hostname,
|
||||
dn_instances[0].port, ','.join(output.split('\n'))))
|
||||
return "", ""
|
||||
|
||||
def run(self):
|
||||
self.logger.log("Start streaming disaster query.")
|
||||
cluster_info = self.query_cluster_info()
|
||||
if cluster_info:
|
||||
self.parse_cluster_status(current_status=cluster_info)
|
||||
self.check_is_under_upgrade()
|
||||
check_cluster_stat = self.get_streaming_cluster_query_value(
|
||||
StreamingConstants.HADR_CLUSTER_STAT)
|
||||
archive_status = self.check_archive(check_cluster_stat, self.cluster_status)
|
||||
recovery_status = self.check_recovery(check_cluster_stat, self.cluster_status)
|
||||
hadr_cluster_stat = archive_status or recovery_status or check_cluster_stat
|
||||
|
||||
hadr_failover_stat = self.get_streaming_cluster_query_value(
|
||||
StreamingConstants.HADR_FAILOVER_STAT)
|
||||
hadr_switchover_stat = self.get_streaming_cluster_query_value(
|
||||
StreamingConstants.HADR_SWICHOVER_STAT)
|
||||
if hadr_cluster_stat != "promote":
|
||||
hadr_failover_stat = ""
|
||||
if hadr_cluster_stat != "switchover":
|
||||
hadr_switchover_stat = ""
|
||||
|
||||
self.logger.debug("Start check max rpo and rto.")
|
||||
max_rpo, max_rto = self.get_max_rpo_rto()
|
||||
self.logger.debug("Finished check max rpo and rto.")
|
||||
values = dict()
|
||||
values["hadr_cluster_stat"] = hadr_cluster_stat
|
||||
values["hadr_failover_stat"] = hadr_failover_stat
|
||||
values["hadr_switchover_stat"] = hadr_switchover_stat
|
||||
values["RPO"] = max_rpo
|
||||
values["RTO"] = max_rto
|
||||
self.logger.log("Successfully executed streaming disaster "
|
||||
"recovery query, result:\n%s" % values)
|
@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
#############################################################################
|
||||
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
#
|
||||
# openGauss is licensed under Mulan PSL v2.
|
||||
# You can use this software according to the terms
|
||||
# and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
#
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OF ANY KIND,
|
||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
# ----------------------------------------------------------------------------
|
||||
# Description : streaming_disaster_recovery_stop.py is a utility for stopping
|
||||
# streaming disaster recovery on primary cluster.
|
||||
|
||||
from impl.streaming_disaster_recovery.streaming_base import StreamingBase
|
||||
|
||||
|
||||
class StreamingStopHandler(StreamingBase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def _first_step_for_streaming_stop(self, step):
|
||||
"""
|
||||
First step for streaming stop
|
||||
"""
|
||||
if step >= 2:
|
||||
return
|
||||
self.logger.debug("Start first step of streaming stop.")
|
||||
self.init_cluster_status()
|
||||
self.check_action_and_mode()
|
||||
|
||||
def _second_step_for_streaming_stop(self, step):
|
||||
"""
|
||||
Second step for streaming stop
|
||||
"""
|
||||
if step >= 2:
|
||||
return
|
||||
self.logger.debug("Start second step of streaming start.")
|
||||
self.check_cluster_status(status_allowed=['Normal'])
|
||||
self.check_cluster_type(allowed_type='primary')
|
||||
self.check_is_under_upgrade()
|
||||
self.write_streaming_step("2_check_cluster_step")
|
||||
|
||||
def _third_step_for_streaming_stop(self, step):
|
||||
"""
|
||||
Third step for streaming stop
|
||||
"""
|
||||
if step >= 3:
|
||||
return
|
||||
self.logger.debug("Start third step of streaming stop.")
|
||||
self.remove_all_stream_repl_infos(guc_mode="reload")
|
||||
self.remove_streaming_cluster_file()
|
||||
self.write_streaming_step("3_remove_config_step")
|
||||
|
||||
def _fourth_step_for_streaming_stop(self, step):
|
||||
"""
|
||||
Fourth step for streaming stop
|
||||
"""
|
||||
if step >= 4:
|
||||
return
|
||||
self.logger.debug("Start fourth step of streaming stop.")
|
||||
self.remove_streaming_pg_hba()
|
||||
self.restore_guc_params()
|
||||
self.write_streaming_step("4_remove_pg_hba_step")
|
||||
|
||||
def _fifth_step_for_streaming_stop(self, step):
|
||||
"""
|
||||
Fifth step for streaming stop
|
||||
"""
|
||||
if step >= 5:
|
||||
return
|
||||
self.logger.debug("Start fifth step of streaming start.")
|
||||
self.streaming_clean_replication_slot()
|
||||
self.write_streaming_step("5_update_config_step")
|
||||
|
||||
def _sixth_step_for_streaming_stop(self, step):
|
||||
"""
|
||||
Sixth step for streaming stop
|
||||
"""
|
||||
if step >= 6:
|
||||
return
|
||||
self.logger.debug("Start sixth step of streaming stop.")
|
||||
self.check_cluster_status(['Normal'])
|
||||
self.clean_global_config()
|
||||
self.update_streaming_info("cluster", "normal")
|
||||
self.clean_streaming_dir()
|
||||
|
||||
def run(self):
|
||||
self.logger.log("Start remove streaming disaster relationship.")
|
||||
step = self.query_streaming_step()
|
||||
self._first_step_for_streaming_stop(step)
|
||||
self.parse_cluster_status()
|
||||
self._second_step_for_streaming_stop(step)
|
||||
self._third_step_for_streaming_stop(step)
|
||||
self._fourth_step_for_streaming_stop(step)
|
||||
self._fifth_step_for_streaming_stop(step)
|
||||
self._sixth_step_for_streaming_stop(step)
|
||||
self.logger.log("Successfully do streaming disaster recovery stop.")
|
@ -0,0 +1,476 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
#############################################################################
|
||||
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
||||
#
|
||||
# openGauss is licensed under Mulan PSL v2.
|
||||
# You can use this software according to the terms
|
||||
# and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
#
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OF ANY KIND,
|
||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
# ----------------------------------------------------------------------------
|
||||
# Description : streaming_disaster_recovery_switchover.py is a utility for
|
||||
# changing role between primary cluster and standby cluster.
|
||||
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from base_utils.os.cmd_util import CmdUtil
|
||||
from base_utils.os.env_util import EnvUtil
|
||||
from gspylib.common.Common import DefaultValue, ClusterCommand, ClusterInstanceConfig
|
||||
from gspylib.common.DbClusterStatus import DbClusterStatus
|
||||
from gspylib.common.ErrorCode import ErrorCode
|
||||
from gspylib.threads.parallelTool import parallelTool
|
||||
from impl.streaming_disaster_recovery.streaming_base import StreamingBase
|
||||
from impl.streaming_disaster_recovery.streaming_constants import StreamingConstants
|
||||
|
||||
|
||||
class StreamingSwitchoverHandler(StreamingBase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
streaming disaster recovery switchover
|
||||
"""
|
||||
self.logger.log("Start streaming disaster switchover.")
|
||||
self.check_action_and_mode()
|
||||
self.check_switchover_workable()
|
||||
self.init_cluster_conf()
|
||||
self.check_dn_instance_params()
|
||||
self.check_is_under_upgrade()
|
||||
try:
|
||||
self.streaming_switchover_single_inst()
|
||||
self.clean_step_file()
|
||||
except Exception as error:
|
||||
if self.params.mode == "primary":
|
||||
self.update_streaming_info("cluster", "promote_fail")
|
||||
raise Exception(
|
||||
ErrorCode.GAUSS_516["GAUSS_51632"] % "switchover" + "Error:%s" % str(error))
|
||||
finally:
|
||||
self.remove_cluster_maintance_file_for_switchover()
|
||||
self.remove_cluster_maintance_file()
|
||||
self.logger.log("Successfully do streaming disaster recovery switchover.")
|
||||
|
||||
def streaming_switchover_single_inst(self):
|
||||
"""
|
||||
streaming disaster recovery switchover for single_inst cluster
|
||||
disaster_standby: expect primary cluster becomes standby
|
||||
primary: expect standby cluster becomes primary
|
||||
"""
|
||||
self.create_cluster_maintance_file("streaming switchover")
|
||||
self.update_streaming_info("cluster", StreamingConstants.ACTION_SWITCHOVER)
|
||||
stream_disaster_step = self.query_streaming_step()
|
||||
if self.params.mode == "primary":
|
||||
end_time = datetime.now() + timedelta(seconds=self.params.waitingTimeout)
|
||||
self.logger.log("Waiting for switchover barrier.")
|
||||
while True:
|
||||
switchover_barrier_list = self.check_streaming_disaster_switchover_barrier()
|
||||
if len(switchover_barrier_list) == len(self.normal_dn_ids):
|
||||
break
|
||||
if datetime.now() >= end_time:
|
||||
self.restart_cluster()
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
|
||||
"check switchover_barrier on all main standby dn" +
|
||||
" Because check timeout: %ss" %
|
||||
str(self.params.waitingTimeout))
|
||||
time.sleep(5)
|
||||
self.streaming_failover_single_inst(stream_disaster_step,
|
||||
StreamingConstants.ACTION_SWITCHOVER)
|
||||
else:
|
||||
self.add_cluster_maintance_file_for_switchover()
|
||||
try:
|
||||
if stream_disaster_step < 1:
|
||||
self.update_streaming_info(StreamingConstants.ACTION_SWITCHOVER, "10%")
|
||||
self.stop_cluster()
|
||||
self.start_cluster()
|
||||
self.streaming_disaster_set_master_cluster_in_switchover()
|
||||
self.write_streaming_step("1_streaming_disaster_set_master_in_switchover")
|
||||
if stream_disaster_step < 2:
|
||||
self.update_streaming_info(StreamingConstants.ACTION_SWITCHOVER, "30%")
|
||||
ClusterInstanceConfig.set_data_on_dcc(self.cluster_info,
|
||||
self.logger, self.user,
|
||||
{self.backup_open_key: "2"})
|
||||
self.stop_cluster()
|
||||
self.write_streaming_step("2_stop_cluster_for_switchover")
|
||||
if stream_disaster_step < 3:
|
||||
self.set_cmserver_guc("backup_open", "2", "set")
|
||||
self.set_cmagent_guc("agent_backup_open", "2", "set")
|
||||
self.write_streaming_step("3_set_backup_open_2_done")
|
||||
if stream_disaster_step < 4:
|
||||
self.update_streaming_info(StreamingConstants.ACTION_SWITCHOVER, "50%")
|
||||
self.remove_cluster_maintance_file_for_switchover()
|
||||
self.remove_cluster_maintance_file()
|
||||
self.start_cluster()
|
||||
self.write_streaming_step("4_start_cluster_done")
|
||||
if stream_disaster_step < 5:
|
||||
self.wait_for_normal(timeout=self.params.waitingTimeout,
|
||||
streaming_switchover="streaming_switchover")
|
||||
self.streaming_clean_replication_slot()
|
||||
self.update_streaming_info("cluster", "recovery")
|
||||
except Exception as error:
|
||||
self.logger.error("Failed to do streaming disaster cluster switchover, Error:"
|
||||
" \n%s" % str(error))
|
||||
rollback_step = self.query_streaming_step()
|
||||
self.logger.debug("Roll back switchover step:%s" % rollback_step)
|
||||
self.remove_cluster_maintance_file_for_switchover()
|
||||
self.remove_cluster_maintance_file()
|
||||
if rollback_step < 4 or (rollback_step >= 4 and
|
||||
self.streaming_switchover_roll_back_condition()):
|
||||
self.streaming_switchover_roll_back(update_query=True)
|
||||
self.clean_step_file()
|
||||
raise Exception(error)
|
||||
self.remove_hadr_switchover_process_file()
|
||||
|
||||
def remove_hadr_switchover_process_file(self):
|
||||
self.logger.debug("Remove hadr switchover process file for switchover.")
|
||||
process_file = os.path.realpath(os.path.join(self.streaming_file_dir,
|
||||
".hadr_switchover_stat"))
|
||||
cmd = "if [ -f {0} ]; then rm -rf {0}; fi".format(process_file)
|
||||
self.ssh_tool.executeCommand(cmd, hostList=self.connected_nodes)
|
||||
self.logger.debug("Successfully remove switchover process on all connected nodes.")
|
||||
|
||||
@staticmethod
|
||||
def clean_file_on_node(params):
|
||||
"""
|
||||
clean file on dest node with path
|
||||
"""
|
||||
dest_ip, dest_path, timeout = params
|
||||
cmd = "source %s && pssh -s -t %s -H %s 'if [ -f %s ]; then rm -f %s; fi'" % (
|
||||
EnvUtil.getMpprcFile(), timeout, dest_ip, dest_path, dest_path)
|
||||
status, output = CmdUtil.getstatusoutput_by_fast_popen(cmd)
|
||||
return status, output, dest_ip
|
||||
|
||||
def restart_cluster(self, restart_timeout=DefaultValue.TIMEOUT_CLUSTER_START):
|
||||
"""
|
||||
Restart cluster
|
||||
"""
|
||||
self.logger.log("Restart cluster.")
|
||||
static_config = "%s/bin/cluster_static_config" % self.bin_path
|
||||
cm_ctl_file = "%s/bin/cm_ctl" % self.bin_path
|
||||
if not os.path.isfile(static_config):
|
||||
self.logger.debug("Checked file %s lost." % static_config)
|
||||
if not os.path.isfile(cm_ctl_file):
|
||||
self.logger.debug("Checked file %s lost." % cm_ctl_file)
|
||||
stop_cmd = ClusterCommand.getStopCmd(0, timeout=restart_timeout)
|
||||
status, output = CmdUtil.retryGetstatusoutput(stop_cmd, retry_time=0)
|
||||
self.logger.debug("Stop cluster result:[%s][%s]." % (status, output))
|
||||
start_cmd = ClusterCommand.getStartCmd(0, timeout=restart_timeout)
|
||||
status, output = CmdUtil.retryGetstatusoutput(start_cmd, retry_time=0)
|
||||
self.logger.debug("Start cluster result:[%s][%s]." % (status, output))
|
||||
|
||||
def remove_cluster_maintance_file_for_switchover(self):
|
||||
"""
|
||||
function: remove the cluster_maintance file
|
||||
:return: NA
|
||||
"""
|
||||
self.logger.debug("Remove cluster_maintance file for switchover.")
|
||||
cluster_maintance_file = os.path.realpath(os.path.join(self.gauss_home,
|
||||
"bin/cluster_maintance"))
|
||||
host_names = \
|
||||
self.get_all_connection_node_name("remove_cluster_maintance_file_for_switchover")
|
||||
try:
|
||||
pscp_params = []
|
||||
all_instances = [dn_inst for db_node in self.cluster_info.dbNodes
|
||||
for dn_inst in db_node.datanodes]
|
||||
if not self.cluster_info.isSingleInstCluster():
|
||||
all_instances.extend([dn_inst for db_node in self.cluster_info.dbNodes
|
||||
for dn_inst in db_node.coordinators])
|
||||
for dn_inst in all_instances:
|
||||
if dn_inst.hostname in host_names:
|
||||
pscp_params.append([dn_inst.hostname, os.path.join(
|
||||
dn_inst.datadir, os.path.basename(cluster_maintance_file)), 10])
|
||||
if len(pscp_params) > 0:
|
||||
results = parallelTool.parallelExecute(self.clean_file_on_node, pscp_params)
|
||||
for ret in results:
|
||||
if ret[0] != 0:
|
||||
self.logger.debug("clean maintance file to node[%s] with status[%s], "
|
||||
"output[%s]" % (ret[-1], ret[0], ret[1]))
|
||||
except Exception as error:
|
||||
self.logger.debug(
|
||||
"Failed to remove cluster_maintance file for switchover with error: %s"
|
||||
% str(error))
|
||||
self.logger.debug("Successfully remove %s cluster_maintance file for switchover."
|
||||
% host_names)
|
||||
|
||||
def add_cluster_maintance_file_for_switchover(self):
|
||||
"""
|
||||
add cluster_maintance file for streaming disaster switchover to disaster_standby
|
||||
"""
|
||||
self.logger.debug("Start add cluster_maintance file for switchover.")
|
||||
try:
|
||||
cluster_maintance_file = os.path.realpath(os.path.join(self.gauss_home,
|
||||
"bin/cluster_maintance"))
|
||||
host_names = \
|
||||
self.get_all_connection_node_name("add_cluster_maintance_file_for_switchover", True)
|
||||
pscp_params = []
|
||||
all_instances = [dn_inst for db_node in self.cluster_info.dbNodes
|
||||
for dn_inst in db_node.datanodes]
|
||||
for dn_inst in all_instances:
|
||||
if dn_inst.hostname in host_names:
|
||||
pscp_params.append([dn_inst.hostname, cluster_maintance_file,
|
||||
os.path.join(dn_inst.datadir, "cluster_maintance"), 10])
|
||||
if len(pscp_params) > 0:
|
||||
results = parallelTool.parallelExecute(
|
||||
DefaultValue.distribute_file_to_node, pscp_params)
|
||||
for ret in results:
|
||||
if ret[0] != 0:
|
||||
self.logger.debug("Distribute maintance file for switchover to node[%s] "
|
||||
"with status[%s], output[%s]" % (ret[-1], ret[0], ret[1]))
|
||||
except Exception as error:
|
||||
self.logger.debug("WARNING: Failed add cluster_maintance file for switchover, "
|
||||
"error:%s." % (str(error)))
|
||||
self.logger.debug("Successfully add cluster_maintance file for switchover.")
|
||||
|
||||
def streaming_disaster_set_master_cluster_in_switchover(self):
|
||||
"""
|
||||
streaming disaster set master cluster in switchover
|
||||
"""
|
||||
self.logger.debug("Starting set streaming master cluster in switchover.")
|
||||
primary_dns = [dn_inst for db_node in self.cluster_info.dbNodes
|
||||
for dn_inst in db_node.datanodes if
|
||||
dn_inst.instanceId in self.primary_dn_ids]
|
||||
if not primary_dns:
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
|
||||
% "obtain primary dns for switchover")
|
||||
if self.streaming_dr_in_switchover(primary_dns):
|
||||
if self.streaming_dr_service_truncation_check(primary_dns):
|
||||
self.logger.debug("Successfully set streaming master cluster in switchover.")
|
||||
|
||||
def streaming_dr_service_truncation_check(self, primary_dns_list):
|
||||
"""
|
||||
streaming dr service truncation check
|
||||
"""
|
||||
self.logger.log("Waiting for truncation.")
|
||||
results = parallelTool.parallelExecute(self.concurrent_check_dr_service_truncation,
|
||||
primary_dns_list)
|
||||
return all(results)
|
||||
|
||||
def concurrent_check_dr_service_truncation(self, dn_inst):
|
||||
"""
|
||||
Wait for the log playback to complete.
|
||||
"""
|
||||
self.logger.debug("Starting check node %s shardNum %s instance %s streaming service "
|
||||
"truncation." % (dn_inst.hostname, dn_inst.mirrorId, dn_inst.instanceId))
|
||||
sql_check = "select * from gs_streaming_dr_service_truncation_check();"
|
||||
end_time = datetime.now() + timedelta(seconds=1200)
|
||||
succeed = False
|
||||
while datetime.now() < end_time:
|
||||
status, output = ClusterCommand.remoteSQLCommand(sql_check, self.user, dn_inst.hostname,
|
||||
dn_inst.port)
|
||||
if status == 0 and output and output.strip() == "t":
|
||||
succeed = True
|
||||
break
|
||||
time.sleep(5)
|
||||
self.logger.debug("Retry truncation check shardNum %s in node %s instance %s." %
|
||||
(dn_inst.mirrorId, dn_inst.hostname, dn_inst.instanceId))
|
||||
if not succeed:
|
||||
self.logger.error("Failed to execute the command: %s, Error:\n%s" % (sql_check, output))
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
|
||||
"check truncate service before switchover")
|
||||
self.logger.debug("Successfully check node %s shardNum %s instance %s streaming service "
|
||||
"truncation." % (dn_inst.hostname, dn_inst.mirrorId, dn_inst.instanceId))
|
||||
return True
|
||||
|
||||
def streaming_dr_in_switchover(self, primary_dns_list):
|
||||
"""
|
||||
set steaming dr in switchover
|
||||
"""
|
||||
results = parallelTool.parallelExecute(self.concurrent_set_dr_in_switchover,
|
||||
primary_dns_list)
|
||||
return all(results)
|
||||
|
||||
def concurrent_set_dr_in_switchover(self, dn_inst):
|
||||
"""
|
||||
Switchover requires log truncation first
|
||||
"""
|
||||
self.logger.debug("Starting set shardNum %s node %s streaming dr in switchover." %
|
||||
(dn_inst.mirrorId, dn_inst.hostname))
|
||||
sql_cmd = "select * from gs_streaming_dr_in_switchover();"
|
||||
# We need to use the normal port to transmit service truncation,
|
||||
# not the OM port.
|
||||
port = int(dn_inst.port) - 1
|
||||
(status, output) = ClusterCommand.remoteSQLCommand(sql_cmd,
|
||||
self.user, dn_inst.hostname, str(port))
|
||||
self.logger.debug("check streaming in switchover, status=%d, output: %s."
|
||||
% (status, output))
|
||||
if status != 0 or self.find_error(output) or output.strip() != "t":
|
||||
self.logger.error("Failed to execute the command: %s, Error:\n%s" % (sql_cmd, output))
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
|
||||
"generate switchover barrier before switchover")
|
||||
self.logger.debug("Successfully set shardNum %s node %s streaming dr in switchover." %
|
||||
(dn_inst.mirrorId, dn_inst.hostname))
|
||||
return True
|
||||
|
||||
def wait_for_normal(self, timeout=DefaultValue.TIMEOUT_CLUSTER_START,
|
||||
streaming_switchover=None):
|
||||
"""
|
||||
function:Wait the cluster become Normal or Degraded
|
||||
input:NA
|
||||
output:NA
|
||||
"""
|
||||
self.logger.debug("Waiting for cluster status being satisfied.")
|
||||
end_time = None if timeout <= 0 else datetime.now() + timedelta(seconds=timeout)
|
||||
|
||||
check_status = 0
|
||||
while True:
|
||||
time.sleep(10)
|
||||
if end_time is not None and datetime.now() >= end_time:
|
||||
check_status = 1
|
||||
self.logger.debug("Timeout. The cluster is not available.")
|
||||
break
|
||||
# View the cluster status
|
||||
status_file = "/home/%s/gauss_check_status_%d.dat" % (self.user, os.getpid())
|
||||
cmd = ClusterCommand.getQueryStatusCmd(outFile=status_file)
|
||||
(status, output) = CmdUtil.retryGetstatusoutput(cmd, retry_time=0)
|
||||
if status != 0:
|
||||
if os.path.exists(status_file):
|
||||
os.remove(status_file)
|
||||
self.logger.debug("Failed to obtain the cluster status. Error: \n%s" % output)
|
||||
continue
|
||||
# Determine whether the cluster status is normal or degraded
|
||||
cluster_status = DbClusterStatus()
|
||||
cluster_status.initFromFile(status_file)
|
||||
if os.path.exists(status_file):
|
||||
os.remove(status_file)
|
||||
if cluster_status.clusterStatus == "Normal":
|
||||
self.logger.log("The cluster status is Normal.")
|
||||
break
|
||||
else:
|
||||
self.logger.debug("Cluster status is %s(%s)." % (
|
||||
cluster_status.clusterStatus, cluster_status.clusterStatusDetail))
|
||||
|
||||
if check_status != 0:
|
||||
if streaming_switchover == "streaming_switchover":
|
||||
raise Exception(
|
||||
ErrorCode.GAUSS_528["GAUSS_52800"] % (cluster_status.clusterStatus,
|
||||
cluster_status.clusterStatusDetail))
|
||||
self.logger.logExit(ErrorCode.GAUSS_528["GAUSS_52800"] % (
|
||||
cluster_status.clusterStatus, cluster_status.clusterStatusDetail))
|
||||
self.logger.debug("Successfully wait for cluster status become Normal.", "constant")
|
||||
|
||||
def set_auto_csn_barrier_guc(self, guc_mode, action_flag=False, roll_back=False):
|
||||
"""
|
||||
auto_csn_barrier : 0 / 1
|
||||
"""
|
||||
guc_value = 1 if self.params.mode == "primary" else 0
|
||||
if action_flag:
|
||||
guc_value = 0
|
||||
if roll_back:
|
||||
guc_value = 1
|
||||
self.logger.debug("Starting %s auto_csn_barrier is %s." % (guc_mode, guc_value))
|
||||
cmd = 'source %s && gs_guc %s -Z coordinator -N all -I all ' \
|
||||
'-c "auto_csn_barrier=%s"' % (self.mpp_file, guc_mode, guc_value)
|
||||
host_names = self.cluster_info.getClusterNodeNames()
|
||||
ignore_node = [node for node in host_names if node not in self.normal_node_list]
|
||||
if ignore_node:
|
||||
self.logger.debug(
|
||||
"WARNING: auto_csn_barrier need ignore host name is %s" % ignore_node)
|
||||
nodes = ",".join(ignore_node)
|
||||
cmd = cmd + " --ignore-node %s" % nodes
|
||||
self.logger.debug("Set auto_csn_barrier with cmd:%s" % cmd)
|
||||
status, output = CmdUtil.retryGetstatusoutput(cmd)
|
||||
if status != 0:
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
|
||||
% "set auto_csn_barrier" + "Error:%s" % output)
|
||||
self.logger.debug("Successfully %s auto_csn_barrier is %s." % (guc_mode, guc_value))
|
||||
|
||||
def streaming_switchover_roll_back(self, update_query=False):
|
||||
"""
|
||||
streaming disaster cluster roll back in switchover
|
||||
"""
|
||||
self.logger.log("Roll back streaming disaster cluster switchover...")
|
||||
ClusterInstanceConfig.set_data_on_dcc(self.cluster_info,
|
||||
self.logger, self.user,
|
||||
{self.backup_open_key: "0"})
|
||||
self.stop_cluster()
|
||||
self.set_cmserver_guc("backup_open", "0", "set")
|
||||
self.set_cmagent_guc("agent_backup_open", "0", "set")
|
||||
self.logger.log("Successfully modify cma and cms parameters to start according to primary "
|
||||
"cluster mode")
|
||||
if update_query:
|
||||
self.update_streaming_info("cluster", "archive")
|
||||
self.start_cluster()
|
||||
self.logger.log("Successfully Roll back streaming disaster cluster switchover.")
|
||||
|
||||
def check_streaming_disaster_switchover_barrier(self):
|
||||
"""
|
||||
check whether get switchover_barrier on all dn
|
||||
"""
|
||||
self.logger.debug("check streaming disaster switchover barrier...")
|
||||
sql_cmd = "select * from gs_streaming_dr_get_switchover_barrier();"
|
||||
switchover_barrier_list = []
|
||||
for db_node in self.cluster_info.dbNodes:
|
||||
for dn_inst in db_node.datanodes:
|
||||
if dn_inst.instanceId not in self.normal_dn_ids:
|
||||
self.logger.debug("Warning: Not check for abnormal instance %s %s" % (
|
||||
dn_inst.instanceType, dn_inst.instanceId))
|
||||
continue
|
||||
(status, output) = ClusterCommand.remoteSQLCommand(
|
||||
sql_cmd, self.user, dn_inst.hostname, dn_inst.port, maintenance_mode=True)
|
||||
self.logger.debug("Check inst has switchover barrier, status=%d, "
|
||||
"output: %s." % (status, output))
|
||||
if status == 0 and output.strip() == "t":
|
||||
self.logger.debug("Successfully check instance %s %s has switchover "
|
||||
"barrier." % (dn_inst.instanceType, dn_inst.instanceId))
|
||||
switchover_barrier_list.append(dn_inst.instanceId)
|
||||
return switchover_barrier_list
|
||||
|
||||
def check_switchover_workable(self):
|
||||
"""
|
||||
Check switchover is workable
|
||||
"""
|
||||
if not DefaultValue.is_disaster_cluster(self.cluster_info) \
|
||||
and self.params.mode == "primary":
|
||||
self.logger.debug("The primary dn exist, do nothing except record the result file.")
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
|
||||
"streaming disaster cluster switchover, Because the primary cluster "
|
||||
"[drClusterMode] parameter must be disaster_standby")
|
||||
if DefaultValue.is_disaster_cluster(self.cluster_info) and \
|
||||
self.params.mode == "disaster_standby":
|
||||
self.logger.debug("The primary dn not exist, do nothing except record the result file.")
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"] %
|
||||
"streaming disaster cluster switchover, Because the disaster_standby "
|
||||
"cluster [drClusterMode] parameter must be primary")
|
||||
self.logger.log("Waiting for cluster and all instances normal.")
|
||||
if self.params.mode == "primary":
|
||||
end_time = datetime.now() + timedelta(seconds=600)
|
||||
while True:
|
||||
self.init_cluster_status()
|
||||
self.parse_cluster_status()
|
||||
if self.check_cluster_status(status_allowed=['Normal'], only_check=True,
|
||||
is_log=False) and self.check_instances_ready_for_switchover():
|
||||
break
|
||||
if datetime.now() >= end_time:
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51632"]
|
||||
% "check cluster and instances status"
|
||||
" with timeout: %ss" % str(600))
|
||||
time.sleep(5)
|
||||
self.logger.debug("Retry check stream disaster standby cluster status...")
|
||||
else:
|
||||
self.init_cluster_status()
|
||||
self.parse_cluster_status()
|
||||
if (not self.check_cluster_status(status_allowed=['Normal'], only_check=True,
|
||||
is_log=False)) \
|
||||
or (not self.check_instances_ready_for_switchover()):
|
||||
raise Exception(ErrorCode.GAUSS_516['GAUSS_51632'] % "check cluster status")
|
||||
|
||||
def check_instances_ready_for_switchover(self):
|
||||
"""
|
||||
Check cns and dns is ready for switchover
|
||||
"""
|
||||
dn_instances = [dn_inst.instanceId for db_node in self.cluster_info.dbNodes
|
||||
for dn_inst in db_node.datanodes]
|
||||
if len(dn_instances) != len(self.normal_dn_ids):
|
||||
self.logger.debug("Not all dn instances is normal.")
|
||||
return False
|
||||
self.logger.debug("Successfully check cn and dn instances are normal.")
|
||||
return True
|
@ -107,6 +107,24 @@ BINARY_UPGRADE_STEP_START_NODE = 5
|
||||
BINARY_UPGRADE_STEP_PRE_COMMIT = 6
|
||||
|
||||
|
||||
# dual cluster stage
|
||||
class DualClusterStage:
|
||||
"""
|
||||
Dual cluster stage upgrade marking
|
||||
"""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
(STEP_UPGRADE_END,
|
||||
STEP_UPGRADE_UNFINISHED,
|
||||
STEP_UPGRADE_FINISH,
|
||||
STEP_UPGRADE_COMMIT,
|
||||
) = list(range(0, 4))
|
||||
|
||||
def __str__(self):
|
||||
pass
|
||||
|
||||
|
||||
# grey upgrade
|
||||
class GreyUpgradeStep:
|
||||
def __init__(self):
|
||||
@ -174,3 +192,17 @@ UPGRADE_VERSION_64bit_xid = 91.208
|
||||
ENABLE_STREAM_REPLICATION_VERSION = "92.149"
|
||||
ENABLE_STREAM_REPLICATION_NAME = "enable_stream_replication"
|
||||
RELMAP_4K_VERSION = "92.420"
|
||||
|
||||
# streaming cluster
|
||||
GS_SECURE_FILES = "gs_secure_files"
|
||||
UPGRADE_PHASE_INFO = "upgrade_phase_info"
|
||||
HARD_KEY_CIPHER = "hadr.key.cipher"
|
||||
HARD_KEY_RAND = "hadr.key.rand"
|
||||
DISASTER_RECOVERY_GUC = "backup_open"
|
||||
INSTALL_TYPE_GUC = "install_type"
|
||||
REMOTE_INFO_GUC = {
|
||||
"dual-standby-streamDR": "replconninfo",
|
||||
"dual-primary-streamDR": "replconninfo"
|
||||
}
|
||||
LENGTH_STORAGE_INFO_LEN = 4
|
||||
ACTION_CLEAN_GS_SECURE_FILES = "clean_gs_secure_files"
|
||||
|
@ -23,6 +23,7 @@ import json
|
||||
import csv
|
||||
import traceback
|
||||
import copy
|
||||
import re
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
@ -38,6 +39,7 @@ from gspylib.os.gsfile import g_file
|
||||
from gspylib.inspection.common import SharedFuncs
|
||||
from gspylib.component.CM.CM_OLAP.CM_OLAP import CM_OLAP
|
||||
from impl.upgrade.UpgradeConst import GreyUpgradeStep
|
||||
from impl.upgrade.UpgradeConst import DualClusterStage
|
||||
import impl.upgrade.UpgradeConst as const
|
||||
from base_utils.executor.cmd_executor import CmdExecutor
|
||||
from base_utils.executor.local_remote_cmd import LocalRemoteCmd
|
||||
@ -82,6 +84,7 @@ class UpgradeImpl:
|
||||
self.isLargeInplaceUpgrade = False
|
||||
self.__upgrade_across_64bit_xid = False
|
||||
self.action = upgrade.action
|
||||
self.primaryDn = None
|
||||
|
||||
def exitWithRetCode(self, action, succeed=True, msg=""):
|
||||
"""
|
||||
@ -145,9 +148,56 @@ class UpgradeImpl:
|
||||
self.context.sshTool = SshTool(
|
||||
self.context.clusterNodes, self.context.localLog,
|
||||
DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE)
|
||||
self.initVersionInfo()
|
||||
self.initClusterConfig()
|
||||
self.initClusterType()
|
||||
self.context.logger.debug("Successfully init global infos", "constant")
|
||||
|
||||
def initVersionInfo(self):
|
||||
"""
|
||||
Initialize the old and new version information
|
||||
|
||||
:return:
|
||||
"""
|
||||
newVersionFile = VersionInfo.get_version_file()
|
||||
newClusterVersion, newClusterNumber, newCommitId = VersionInfo.get_version_info(
|
||||
newVersionFile)
|
||||
gaussHome = ClusterDir.getInstallDir(self.context.user)
|
||||
|
||||
newPath = gaussHome + "_%s" % newCommitId
|
||||
oldPath = self.getClusterAppPath(const.OLD)
|
||||
|
||||
if oldPath == "":
|
||||
oldPath = os.path.realpath(gaussHome)
|
||||
oldVersionFile = "%s/bin/upgrade_version" % oldPath
|
||||
try:
|
||||
|
||||
(oldClusterVersion, oldClusterNumber, oldCommitId) = VersionInfo.get_version_info(
|
||||
oldVersionFile)
|
||||
self.context.logger.debug("Successfully obtained version information of "
|
||||
"old clusters by %s." % oldVersionFile)
|
||||
except Exception as er:
|
||||
if os.path.exists(self.context.upgradeBackupPath):
|
||||
# if upgradeBackupPath exist, it means that we do rollback first.
|
||||
# and we get cluster version from the backup file
|
||||
possibOldVersionFile = "%s/old_upgrade_version" % self.context.upgradeBackupPath
|
||||
self.context.logger.debug(str(er))
|
||||
self.context.logger.debug("Try to get the version information "
|
||||
"from %s." % possibOldVersionFile)
|
||||
(oldClusterVersion, oldClusterNumber, oldCommitId) = VersionInfo.get_version_info(
|
||||
possibOldVersionFile)
|
||||
else:
|
||||
raise Exception(str(er))
|
||||
|
||||
self.context.newClusterVersion = newClusterVersion
|
||||
self.context.newClusterNumber = newClusterNumber
|
||||
self.context.oldClusterVersion = oldClusterVersion
|
||||
self.context.oldClusterNumber = oldClusterNumber
|
||||
self.context.newClusterAppPath = newPath
|
||||
self.context.oldClusterAppPath = oldPath
|
||||
self.newCommitId = newCommitId
|
||||
self.oldCommitId = oldCommitId
|
||||
|
||||
def setClusterDetailInfo(self):
|
||||
"""
|
||||
function: set cluster detail info
|
||||
@ -268,6 +318,8 @@ class UpgradeImpl:
|
||||
DefaultValue.TIMEOUT_PSSH_BINARY_UPGRADE)
|
||||
if action == const.ACTION_AUTO_ROLLBACK and \
|
||||
self.checkBakPathNotExists():
|
||||
if os.path.isfile(self.context.upgradePhaseInfoPath):
|
||||
self.recordDualClusterStage(self.oldCommitId, DualClusterStage.STEP_UPGRADE_END)
|
||||
self.context.logger.log("No need to rollback.")
|
||||
self.exitWithRetCode(action, True)
|
||||
else:
|
||||
@ -290,6 +342,11 @@ class UpgradeImpl:
|
||||
grey upgrade rollback if not in read only, then record the value of
|
||||
enable_transaction_read_only and set it to off
|
||||
"""
|
||||
# no need to check read only mode and close enable_transaction_read_only
|
||||
if self.context.standbyCluster:
|
||||
self.context.logger.debug("no need to check read only in force or"
|
||||
" standby cluster mode upgrade")
|
||||
return
|
||||
try:
|
||||
self.context.logger.debug("Check if in read only mode.")
|
||||
greyUpgradeFlagFile = os.path.join(self.context.upgradeBackupPath,
|
||||
@ -481,16 +538,9 @@ class UpgradeImpl:
|
||||
% newClusterNumber)
|
||||
self.context.logger.debug("The matched upgrade strategy is: %s."
|
||||
% upgradeAction)
|
||||
self.context.newClusterVersion = newClusterVersion
|
||||
self.context.newClusterNumber = newClusterNumber
|
||||
self.context.oldClusterVersion = oldClusterVersion
|
||||
self.context.oldClusterNumber = oldClusterNumber
|
||||
self.context.newClusterAppPath = newPath
|
||||
self.context.oldClusterAppPath = oldPath
|
||||
self.newCommitId = newCommitId
|
||||
self.oldCommitId = oldCommitId
|
||||
return upgradeAction
|
||||
except Exception as e:
|
||||
self.clean_gs_secure_files()
|
||||
raise Exception(ErrorCode.GAUSS_529["GAUSS_52900"] % str(e)
|
||||
+ " Do nothing this time.")
|
||||
|
||||
@ -665,6 +715,10 @@ class UpgradeImpl:
|
||||
"""
|
||||
try:
|
||||
self.context.logger.debug("Setting up the cluster read-only mode.")
|
||||
if self.context.standbyCluster:
|
||||
self.context.logger.debug("no need to set cluster "
|
||||
"read only mode under force or standby cluster upgrade")
|
||||
return 0
|
||||
self.setGUCValue("default_transaction_read_only", "true")
|
||||
self.context.logger.debug("successfully set the cluster read-only mode.")
|
||||
return 0
|
||||
@ -682,6 +736,10 @@ class UpgradeImpl:
|
||||
"""
|
||||
try:
|
||||
self.context.logger.debug("Canceling the cluster read-only mode.")
|
||||
if self.context.standbyCluster:
|
||||
self.context.logger.debug("no need to unset cluster "
|
||||
"read only mode under force or standby cluster upgrade")
|
||||
return 0
|
||||
self.setGUCValue("default_transaction_read_only", "false")
|
||||
self.context.logger.debug("Successfully cancelled the cluster read-only mode.")
|
||||
return 0
|
||||
@ -887,6 +945,8 @@ class UpgradeImpl:
|
||||
Input : gucStr the guc key:value string
|
||||
output : NA
|
||||
"""
|
||||
if "dual-standby" in self.context.clusterType:
|
||||
return
|
||||
self.context.logger.debug("Start to check GUC value %s." % gucStr)
|
||||
try:
|
||||
# send cmd to that node and exec
|
||||
@ -910,6 +970,28 @@ class UpgradeImpl:
|
||||
except Exception as e:
|
||||
raise Exception(str(e))
|
||||
|
||||
def backup_disaster_user_file(self):
|
||||
"""backup_disaster_user_file"""
|
||||
bin_path = os.path.join(EnvUtil.getEnv("GAUSSHOME"), "bin")
|
||||
cipher_file = os.path.join(bin_path, "hadr.key.cipher")
|
||||
if os.path.isfile(cipher_file):
|
||||
FileUtil.cpFile(cipher_file, "%s/" % self.context.tmpDir)
|
||||
rand_file = os.path.join(bin_path, "hadr.key.rand")
|
||||
if os.path.isfile(rand_file):
|
||||
FileUtil.cpFile(rand_file, "%s/" % self.context.tmpDir)
|
||||
self.context.logger.debug("Back up rand and cipher file to temp dir.")
|
||||
|
||||
def restore_origin_disaster_user_file(self):
|
||||
"""restore_origin_disaster_user_file"""
|
||||
bin_path = os.path.join(self.context.newClusterAppPath, "bin")
|
||||
cipher_file = os.path.join(self.context.tmpDir, "hadr.key.cipher")
|
||||
if os.path.isfile(cipher_file):
|
||||
self.context.sshTool.scpFiles(cipher_file, bin_path)
|
||||
rand_file = os.path.join(self.context.tmpDir, "hadr.key.rand")
|
||||
if os.path.isfile(rand_file):
|
||||
self.context.sshTool.scpFiles(rand_file, bin_path)
|
||||
self.context.logger.debug("Restore rand and cipher file to gausshome.")
|
||||
|
||||
def floatMoreThan(self, numOne, numTwo):
|
||||
"""
|
||||
function: float more than
|
||||
@ -968,8 +1050,10 @@ class UpgradeImpl:
|
||||
self.distributeXml()
|
||||
# 2. check if the app path is ready and sha256 is right and others
|
||||
self.checkUpgrade()
|
||||
# 4. check the cluster pressure
|
||||
self.HASyncReplayCheck()
|
||||
if self.context.action == const.ACTION_LARGE_UPGRADE and \
|
||||
"dual-standby" not in self.context.clusterType:
|
||||
# 4. check the cluster pressure
|
||||
self.HASyncReplayCheck()
|
||||
# 5. before do grey binary upgrade, we must make sure the
|
||||
# cluster is Normal and the database could be
|
||||
# connected, if not, exit.
|
||||
@ -983,6 +1067,12 @@ class UpgradeImpl:
|
||||
# check if it satisfy upgrade again, if it is the second loop to
|
||||
# upgrade, it can go go upgrade again branch
|
||||
upgradeAgain = self.canUpgradeAgain()
|
||||
if not upgradeAgain:
|
||||
self.recordDualClusterStage(self.oldCommitId,
|
||||
DualClusterStage.STEP_UPGRADE_UNFINISHED)
|
||||
self.context.logger.log("NOTICE: The directory %s will be deleted after "
|
||||
"commit-upgrade, please make sure there is no personal "
|
||||
"data." % self.context.oldClusterAppPath)
|
||||
except Exception as e:
|
||||
# before this step, the upgrade process do nothing to the cluster,
|
||||
# this time has no remaining
|
||||
@ -998,6 +1088,8 @@ class UpgradeImpl:
|
||||
if not self.doGreyBinaryRollback():
|
||||
self.exitWithRetCode(const.ACTION_AUTO_ROLLBACK, False)
|
||||
self.removeOmRollbackProgressFile()
|
||||
self.recordDualClusterStage(self.oldCommitId,
|
||||
DualClusterStage.STEP_UPGRADE_UNFINISHED)
|
||||
self.context.logger.log(
|
||||
"The directory %s will be deleted after commit-upgrade, "
|
||||
"please make sure there is no personal data." %
|
||||
@ -1021,8 +1113,14 @@ class UpgradeImpl:
|
||||
# we can not recognize if it really cannot
|
||||
# find the column, or just because the old version. So we
|
||||
# will update the catalog in the old version
|
||||
if self.context.action == const.ACTION_LARGE_UPGRADE:
|
||||
if self.context.action == const.ACTION_LARGE_UPGRADE and \
|
||||
"dual-standby" not in self.context.clusterType:
|
||||
self.updateCatalog()
|
||||
elif self.context.action == const.ACTION_LARGE_UPGRADE and \
|
||||
"dual-standby" in self.context.clusterType:
|
||||
self.setUpgradeFromParam(self.context.oldClusterNumber)
|
||||
self.reloadCmAgent()
|
||||
self.reload_cmserver()
|
||||
self.recordNodeStep(GreyUpgradeStep.STEP_SWITCH_NEW_BIN)
|
||||
self.CopyCerts()
|
||||
self.upgradeAgain()
|
||||
@ -1061,6 +1159,7 @@ class UpgradeImpl:
|
||||
# 11. switch the cluster version to new version
|
||||
self.getOneDNInst(checkNormal=True)
|
||||
self.switchBin(const.NEW)
|
||||
self.restore_origin_disaster_user_file()
|
||||
# create CA for CM
|
||||
self.create_ca_for_cm()
|
||||
self.setNewVersionGuc()
|
||||
@ -1093,14 +1192,16 @@ class UpgradeImpl:
|
||||
self.waitClusterForNormal()
|
||||
# backup global relmap file before doing upgrade-post
|
||||
self.backupGlobalRelmapFile()
|
||||
self.prepareSql("rollback-post")
|
||||
self.execRollbackUpgradedCatalog(scriptType="rollback-post")
|
||||
self.prepareSql("upgrade-post")
|
||||
self.execRollbackUpgradedCatalog(scriptType="upgrade-post")
|
||||
self.getLsnInfo()
|
||||
if "dual-standby" not in self.context.clusterType:
|
||||
self.prepareSql("rollback-post")
|
||||
self.execRollbackUpgradedCatalog(scriptType="rollback-post")
|
||||
self.prepareSql("upgrade-post")
|
||||
self.execRollbackUpgradedCatalog(scriptType="upgrade-post")
|
||||
self.getLsnInfo()
|
||||
hosts = copy.deepcopy(self.context.clusterNodes)
|
||||
self.recordNodeStep(
|
||||
GreyUpgradeStep.STEP_PRE_COMMIT, nodes=hosts)
|
||||
self.recordDualClusterStage(self.newCommitId, DualClusterStage.STEP_UPGRADE_FINISH)
|
||||
self.printPrecommitBanner()
|
||||
except Exception as e:
|
||||
hintInfo = "Nodes are new version. " \
|
||||
@ -1250,6 +1351,9 @@ class UpgradeImpl:
|
||||
try:
|
||||
self.context.logger.log("Create checkpoint before switching.")
|
||||
start_time = timeit.default_timer()
|
||||
if self.context.forceRollback or self.context.standbyCluster:
|
||||
self.context.logger.debug("No need to do checkpoint.")
|
||||
return
|
||||
# create checkpoint
|
||||
sql = "CHECKPOINT;"
|
||||
for i in range(10):
|
||||
@ -1703,6 +1807,10 @@ class UpgradeImpl:
|
||||
if not self.doInplaceBinaryRollback():
|
||||
self.exitWithRetCode(const.ACTION_AUTO_ROLLBACK, False)
|
||||
try:
|
||||
if self.context.action == const.ACTION_LARGE_UPGRADE and \
|
||||
"dual-standby" not in self.context.clusterType:
|
||||
# check the cluster pressure
|
||||
self.HASyncReplayCheck()
|
||||
self.checkUpgrade()
|
||||
|
||||
# 3. before do binary upgrade, we must make sure the cluster is
|
||||
@ -2278,6 +2386,9 @@ class UpgradeImpl:
|
||||
output: NA
|
||||
"""
|
||||
self.context.logger.debug("Preparing upgrade sql folder.")
|
||||
if self.context.standbyCluster:
|
||||
self.context.logger.debug("no need prepare upgrade sql folder under force upgrade")
|
||||
return
|
||||
hosts = self.context.clusterNodes
|
||||
cmd = "%s -t %s -U %s --upgrade_bak_path=%s -X %s -l %s" % \
|
||||
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
||||
@ -2309,6 +2420,10 @@ class UpgradeImpl:
|
||||
self.context.logger.debug("Start to wait and check if all the standby"
|
||||
" instances have replayed all xlogs, host: %s" % \
|
||||
host.hostname)
|
||||
if self.context.standbyCluster or self.context.forceRollback:
|
||||
self.context.logger.debug("no need to do HA sync replay check "
|
||||
"under force upgrade/rollback and standby cluster mode")
|
||||
return
|
||||
self.doReplay(catchupFailedOk, host)
|
||||
self.context.logger.debug("Successfully performed the replay check "
|
||||
"of the standby instance.")
|
||||
@ -2754,10 +2869,11 @@ class UpgradeImpl:
|
||||
"""
|
||||
self.context.logger.debug("Get database list in cluster.")
|
||||
sql = "select datname from pg_database;"
|
||||
mode = True if "dual-standby" in self.context.clusterType else False
|
||||
(status, output) = ClusterCommand.remoteSQLCommand(
|
||||
sql, self.context.user,
|
||||
self.dnInst.hostname, self.dnInst.port, False,
|
||||
DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True)
|
||||
DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True, maintenance_mode=mode)
|
||||
if status != 0:
|
||||
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
||||
" Error: \n%s" % str(output))
|
||||
@ -2777,12 +2893,13 @@ class UpgradeImpl:
|
||||
make checkpoint
|
||||
:return:
|
||||
"""
|
||||
mode = True if "dual-standby" in self.context.clusterType else False
|
||||
sql = 'CHECKPOINT;'
|
||||
for eachdb in database_list:
|
||||
(status, output) = ClusterCommand.remoteSQLCommand(
|
||||
sql, self.context.user,
|
||||
self.dnInst.hostname, self.dnInst.port, False,
|
||||
eachdb, IsInplaceUpgrade=True)
|
||||
eachdb, IsInplaceUpgrade=True, maintenance_mode=mode)
|
||||
if status != 0:
|
||||
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
||||
" Error: \n%s" % str(output))
|
||||
@ -3335,6 +3452,7 @@ class UpgradeImpl:
|
||||
input : NA
|
||||
output: NA
|
||||
"""
|
||||
self.checkDualClusterCommit()
|
||||
try:
|
||||
(status, output) = self.doHealthCheck(const.OPTION_POSTCHECK)
|
||||
if status != 0:
|
||||
@ -3351,22 +3469,27 @@ class UpgradeImpl:
|
||||
# for the reenter commit, the schema may have been deleted
|
||||
if self.existTable(const.RECORD_NODE_STEP):
|
||||
self.recordNodeStep(GreyUpgradeStep.STEP_BEGIN_COMMIT)
|
||||
self.recordDualClusterStage(self.newCommitId, DualClusterStage.STEP_UPGRADE_COMMIT)
|
||||
|
||||
self.setActionFile()
|
||||
if self.context.action == const.ACTION_LARGE_UPGRADE:
|
||||
if DefaultValue.get_cm_server_num_from_static(self.context.clusterInfo) > 0:
|
||||
self.setUpgradeFromParam(const.UPGRADE_UNSET_NUM)
|
||||
self.reloadCmAgent()
|
||||
self.reload_cmserver(is_final=True)
|
||||
self.setUpgradeMode(0)
|
||||
if "dual-standby" not in self.context.clusterType:
|
||||
self.setUpgradeMode(0)
|
||||
time.sleep(10)
|
||||
if self.dropPMKSchema() != 0:
|
||||
raise Exception(ErrorCode.GAUSS_529["GAUSS_52917"])
|
||||
|
||||
self.clearOtherToolPackage()
|
||||
self.cleanInstallPath(const.OLD)
|
||||
self.dropSupportSchema()
|
||||
self.cleanBinaryUpgradeBakFiles()
|
||||
if "dual-standby" not in self.context.clusterType:
|
||||
self.dropSupportSchema()
|
||||
self.cleanConfBakOld()
|
||||
self.recordDualClusterStage(self.newCommitId, DualClusterStage.STEP_UPGRADE_END)
|
||||
self.cleanBinaryUpgradeBakFiles()
|
||||
# remove tmp global relmap file
|
||||
self.cleanTmpGlobalRelampFile()
|
||||
self.context.logger.log("Commit upgrade succeeded.")
|
||||
@ -3383,6 +3506,9 @@ class UpgradeImpl:
|
||||
"""
|
||||
try:
|
||||
self.context.logger.debug("Start to drop schema PMK.")
|
||||
if self.context.standbyCluster:
|
||||
self.context.logger.debug("no need to delete schema PMK in standby cluster mode.")
|
||||
return 0
|
||||
# execute drop commands by the CN instance
|
||||
sql = "DROP SCHEMA IF EXISTS pmk CASCADE; "
|
||||
retry_times = 0
|
||||
@ -3448,7 +3574,10 @@ class UpgradeImpl:
|
||||
try:
|
||||
self.distributeXml()
|
||||
if action == const.ACTION_AUTO_ROLLBACK:
|
||||
self.checkDualClusterRollback()
|
||||
self.clearOtherToolPackage(action)
|
||||
self.recordDualClusterStage(self.oldCommitId,
|
||||
DualClusterStage.STEP_UPGRADE_UNFINISHED)
|
||||
try:
|
||||
self.getOneDNInst(True)
|
||||
except Exception as e:
|
||||
@ -3475,12 +3604,14 @@ class UpgradeImpl:
|
||||
# consider if need to sync them, not important
|
||||
# under force upgrade, only read step from file
|
||||
maxStep = self.getNodeStep()
|
||||
self.checkDualClusterRollback()
|
||||
# if -2, it means there is no need to exec rollback
|
||||
# if under upgrade continue mode, it will do upgrade not rollback,
|
||||
# it can enter the upgrade process
|
||||
# when the binary_upgrade bak dir has some files
|
||||
if maxStep == const.BINARY_UPGRADE_NO_NEED_ROLLBACK:
|
||||
self.cleanBinaryUpgradeBakFiles(True)
|
||||
self.recordDualClusterStage(self.oldCommitId, DualClusterStage.STEP_UPGRADE_END)
|
||||
self.context.logger.log("No need to rollback.")
|
||||
return True
|
||||
|
||||
@ -3498,6 +3629,7 @@ class UpgradeImpl:
|
||||
self.recordNodeStep(
|
||||
GreyUpgradeStep.STEP_UPDATE_POST_CATALOG, nodes)
|
||||
maxStep = self.getNodeStep()
|
||||
self.checkDualClusterRollback()
|
||||
if maxStep == GreyUpgradeStep.STEP_UPDATE_POST_CATALOG:
|
||||
self.context.logger.debug(
|
||||
"Record the step %d to mark it has leaved pre-commit"
|
||||
@ -3506,7 +3638,8 @@ class UpgradeImpl:
|
||||
if self.context.action == const.ACTION_LARGE_UPGRADE\
|
||||
and \
|
||||
self.isNodeSpecifyStep(
|
||||
GreyUpgradeStep.STEP_UPDATE_POST_CATALOG):
|
||||
GreyUpgradeStep.STEP_UPDATE_POST_CATALOG)\
|
||||
and "dual-standby" not in self.context.clusterType:
|
||||
self.prepareUpgradeSqlFolder()
|
||||
self.prepareSql("rollback-post")
|
||||
self.setUpgradeMode(2)
|
||||
@ -3538,7 +3671,8 @@ class UpgradeImpl:
|
||||
self.recordNodeStep(GreyUpgradeStep.STEP_UPDATE_CATALOG)
|
||||
if maxStep >= GreyUpgradeStep.STEP_UPDATE_CATALOG and\
|
||||
self.context.action == const.ACTION_LARGE_UPGRADE:
|
||||
self.rollbackCatalog()
|
||||
if "dual-standby" not in self.context.clusterType:
|
||||
self.rollbackCatalog()
|
||||
self.recordNodeStep(GreyUpgradeStep.STEP_INIT_STATUS)
|
||||
|
||||
if maxStep >= GreyUpgradeStep.STEP_INIT_STATUS:
|
||||
@ -3546,8 +3680,10 @@ class UpgradeImpl:
|
||||
# dir will create in every node
|
||||
self.cleanInstallPath(const.NEW)
|
||||
self.getOneDNInst()
|
||||
self.dropSupportSchema()
|
||||
if "dual-standby" not in self.context.clusterType:
|
||||
self.dropSupportSchema()
|
||||
self.initOmRollbackProgressFile()
|
||||
self.recordDualClusterStage(self.oldCommitId, DualClusterStage.STEP_UPGRADE_END)
|
||||
self.cleanBinaryUpgradeBakFiles(True)
|
||||
self.cleanTmpGlobalRelampFile()
|
||||
except Exception as e:
|
||||
@ -3621,28 +3757,6 @@ class UpgradeImpl:
|
||||
"""
|
||||
self.checkActionInFile()
|
||||
|
||||
def execSqlCommandInPrimaryDN(self, sql, retryTime=3):
|
||||
self.context.logger.debug("Start to exec sql {0}.".format(sql))
|
||||
count = 0
|
||||
status, output = 1, ""
|
||||
while count < retryTime:
|
||||
|
||||
self.context.logger.debug(
|
||||
"Exec sql in dn node {0}".format(self.dnInst.hostname))
|
||||
(status, output) = ClusterCommand.remoteSQLCommand(
|
||||
sql, self.context.user,
|
||||
self.dnInst.hostname, self.dnInst.port, False,
|
||||
DefaultValue.DEFAULT_DB_NAME, IsInplaceUpgrade=True)
|
||||
self.context.logger.debug(
|
||||
"Exec sql result is, status:{0}, output is {1}".format(
|
||||
status, output))
|
||||
if status != 0 or SqlResult.findErrorInSql(output):
|
||||
count += 1
|
||||
continue
|
||||
else:
|
||||
break
|
||||
return status, output
|
||||
|
||||
def checkActionInFile(self):
|
||||
"""
|
||||
function: check whether current action is same
|
||||
@ -3884,11 +3998,12 @@ class UpgradeImpl:
|
||||
check a table exist
|
||||
:return:
|
||||
"""
|
||||
mode = True if "dual-standby" in self.context.clusterType else False
|
||||
sql = "select count(*) from pg_class where relname = '%s';" % name
|
||||
(status, output) = ClusterCommand.remoteSQLCommand(
|
||||
sql, self.context.user,
|
||||
self.dnInst.hostname, self.dnInst.port, False,
|
||||
eachdb, IsInplaceUpgrade=True)
|
||||
eachdb, IsInplaceUpgrade=True, maintenance_mode=mode)
|
||||
if status != 0 or SqlResult.findErrorInSql(output):
|
||||
raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql +
|
||||
" Error: \n%s" % str(output))
|
||||
@ -4832,7 +4947,7 @@ class UpgradeImpl:
|
||||
self.context.logger.log(
|
||||
"Failed to check upgrade environment.", "constant")
|
||||
raise Exception(str(e))
|
||||
|
||||
self.checkDualClusterUpgrade()
|
||||
self.context.logger.log(
|
||||
"Successfully checked upgrade environment.", "constant")
|
||||
|
||||
@ -5618,6 +5733,9 @@ class UpgradeImpl:
|
||||
try:
|
||||
# clean backup files
|
||||
self.cleanBackupFiles()
|
||||
# clean gs_secure_files folder
|
||||
if self.context.rollback or self.action == "commit-upgrade":
|
||||
self.clean_gs_secure_files()
|
||||
except Exception as e:
|
||||
raise Exception(str(e))
|
||||
if (isRollBack):
|
||||
@ -5838,6 +5956,7 @@ class UpgradeImpl:
|
||||
1 failed
|
||||
"""
|
||||
self.context.logger.debug("Start to check database connection.")
|
||||
mode = True if "dual-standby" in self.context.clusterType else False
|
||||
for dbNode in self.context.clusterInfo.dbNodes:
|
||||
if len(dbNode.datanodes) == 0 or dbNode.name:
|
||||
continue
|
||||
@ -5848,7 +5967,7 @@ class UpgradeImpl:
|
||||
ClusterCommand.remoteSQLCommand(
|
||||
sql, self.context.user, dnInst.hostname, dnInst.port,
|
||||
False, DefaultValue.DEFAULT_DB_NAME,
|
||||
IsInplaceUpgrade=True)
|
||||
IsInplaceUpgrade=True, maintenance_mode=mode)
|
||||
if status != 0 or not output.isdigit():
|
||||
self.context.logger.debug(
|
||||
"Failed to execute SQL on [%s]: %s. Error: \n%s" %
|
||||
@ -6272,3 +6391,501 @@ class UpgradeImpl:
|
||||
packFilePath)
|
||||
except Exception as e:
|
||||
raise Exception(str(e))
|
||||
|
||||
def getPrimaryDN(self, checkNormal):
|
||||
"""
|
||||
find primary dn in centralized cluster, which we can execute SQL commands
|
||||
"""
|
||||
try:
|
||||
self.context.logger.debug("start to get primary dn. \n"
|
||||
"checkNormal is {0}.".format(checkNormal))
|
||||
if self.context.standbyCluster or self.context.forceRollback:
|
||||
checkNormal = False
|
||||
primaryDn = None
|
||||
if not checkNormal:
|
||||
clusterNodes = self.context.oldClusterInfo.dbNodes
|
||||
for dbNode in clusterNodes:
|
||||
if len(dbNode.datanodes) == 0:
|
||||
continue
|
||||
primaryDn = dbNode.datanodes[0]
|
||||
break
|
||||
self.primaryDn = primaryDn
|
||||
else:
|
||||
primaryList, _ = DefaultValue.getPrimaryNode(self.context.userProfile, self.context.logger)
|
||||
if primaryList:
|
||||
primaryDn = primaryList[0]
|
||||
if not primaryDn:
|
||||
raise Exception(ErrorCode.GAUSS_526["GAUSS_52635"])
|
||||
for dbNode in self.context.clusterInfo.dbNodes:
|
||||
for dn in dbNode.datanodes:
|
||||
if dn.hostname == primaryDn:
|
||||
self.primaryDn = dn
|
||||
self.context.logger.debug("Successfully get primary DN from "
|
||||
"{0}.".format(self.primaryDn.hostname))
|
||||
except Exception as er:
|
||||
self.context.logger.debug("Failed to get Primary dn. Error: %s" % str(er))
|
||||
raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] % "primary dn")
|
||||
|
||||
def getPrimaryNode(self, instanceType):
|
||||
"""
|
||||
|
||||
:param instanceType:
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
self.waitClusterNormalDegrade(waitTimeOut=120)
|
||||
self.context.logger.debug("Start to get primary node.")
|
||||
postSplit = ""
|
||||
primaryFlag = "Primary"
|
||||
count = 0
|
||||
cmd, status, output = "", 0, ""
|
||||
while count < 60:
|
||||
cmd = "source {0} && cm_ctl query -Cv".format(self.context.userProfile)
|
||||
(status, output) = CmdUtil.retryGetstatusoutput(cmd, 3, 5)
|
||||
# no need to retry under force upgrade
|
||||
if status == 0:
|
||||
break
|
||||
time.sleep(2)
|
||||
count += 1
|
||||
if status != 0:
|
||||
raise Exception(
|
||||
ErrorCode.GAUSS_514["GAUSS_51400"] % "%s. Error:\n%s" % (cmd, output))
|
||||
self.context.logger.debug("the result of query is {0}, "
|
||||
"instanceType is {1}.".format(output, instanceType))
|
||||
targetString = output.split(instanceType)[1]
|
||||
if instanceType == "Datanode":
|
||||
dnPrimary = [x for x in re.split(r"[|\n]", targetString) if primaryFlag in x
|
||||
or "Main" in x]
|
||||
primaryList = []
|
||||
for dn in dnPrimary:
|
||||
primaryList.append(list(filter(None, dn.split(" ")))[1])
|
||||
return primaryList
|
||||
if instanceType == "ETCD":
|
||||
postSplit = "Cluster"
|
||||
primaryFlag = "StateLeader"
|
||||
elif instanceType == "CMServer":
|
||||
postSplit = "ETCD"
|
||||
elif instanceType == "GTM":
|
||||
postSplit = "Datanode"
|
||||
elif instanceType == "Coordinator":
|
||||
return ""
|
||||
if postSplit not in targetString:
|
||||
return ""
|
||||
primaryInfo = [x for x in re.split(r"[|\n]", targetString.split(postSplit)[0]) if
|
||||
primaryFlag in x]
|
||||
if primaryInfo == "" or primaryInfo == []:
|
||||
return ""
|
||||
primary = list(filter(None, primaryInfo[0].split(" ")))[1]
|
||||
self.context.logger.debug("get node {0}".format(primary))
|
||||
return primary
|
||||
except Exception as er:
|
||||
self.context.logger.debug("Failed to get primary node." + str(er))
|
||||
raise Exception(str(er))
|
||||
|
||||
def isGucContainDesignatedVal(self, gucName, result):
|
||||
"""
|
||||
The guc value contains the designated string.
|
||||
:return:
|
||||
"""
|
||||
sql = "show {0};".format(gucName)
|
||||
self.getPrimaryDN(True)
|
||||
mode = "primary"
|
||||
is_disaster = DefaultValue.cm_exist_and_is_disaster_cluster(self.context.clusterInfo,
|
||||
self.context.logger)
|
||||
if is_disaster:
|
||||
mode = "standby"
|
||||
(_, output) = self.execSqlCommandInPrimaryDN(sql, mode=mode)
|
||||
if result in output:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def execSqlCommandInPrimaryDN(self, sql, retryTime=3, execHost=None, mode="primary"):
|
||||
"""
|
||||
execute sql on primary dn
|
||||
:return:
|
||||
"""
|
||||
self.context.logger.debug("Start to exec sql {0}.".format(sql))
|
||||
count = 0
|
||||
status, output = 1, ""
|
||||
mode = True if "dual-standby" in self.context.clusterType or mode == "standby" else False
|
||||
while count < retryTime:
|
||||
if not execHost:
|
||||
self.getPrimaryDN(checkNormal=True)
|
||||
execHost = self.primaryDn
|
||||
self.context.logger.debug("Exec sql in dn node {0}".format(execHost.hostname))
|
||||
(status, output) = ClusterCommand.remoteSQLCommand(sql, self.context.user,
|
||||
execHost.hostname, execHost.port,
|
||||
False,
|
||||
DefaultValue.DEFAULT_DB_NAME,
|
||||
IsInplaceUpgrade=True,
|
||||
maintenance_mode=mode)
|
||||
self.context.logger.debug("Exec sql result "
|
||||
"is, status:{0}, output is {1}"
|
||||
"".format(status, output).replace("ERROR", "Log"))
|
||||
if status != 0 or SqlResult.findErrorInSql(output):
|
||||
count += 1
|
||||
continue
|
||||
else:
|
||||
break
|
||||
return status, output
|
||||
|
||||
def initClusterType(self):
|
||||
"""
|
||||
If it is a dual cluster, initialize whether the current cluster
|
||||
is the primary cluster or the standby cluster
|
||||
|
||||
:return:
|
||||
"""
|
||||
# The value of replconninfo1 must contain 'iscascade' in the DR cluster.
|
||||
isStrDRCluster = self.isGucContainDesignatedVal("replconninfo1", "iscascade")
|
||||
if isStrDRCluster:
|
||||
suffix = "-streamDR"
|
||||
else:
|
||||
self.context.logger.debug("Current cluster is not dual cluster.")
|
||||
return
|
||||
|
||||
if self.context.is_inplace_upgrade and self.context.action \
|
||||
not in ["commit-upgrade", "auto-rollback", "chose-strategy"]:
|
||||
raise Exception("Dual cluster does not support in-place upgrade")
|
||||
|
||||
if self.checkGucValIsInValGiven(const.DISASTER_RECOVERY_GUC, ["2"], fromFile=True):
|
||||
self.context.standbyCluster = True
|
||||
self.context.clusterType = "dual-standby" + suffix
|
||||
|
||||
elif self.checkGucValIsInValGiven(const.DISASTER_RECOVERY_GUC, ["0"], fromFile=True):
|
||||
self.context.clusterType = "dual-primary" + suffix
|
||||
|
||||
self.context.logger.log("NOTICE: the clusterType is {0}".format(self.context.clusterType))
|
||||
|
||||
if not self.context.is_inplace_upgrade:
|
||||
self.backup_disaster_user_file()
|
||||
|
||||
if self.context.forceRollback:
|
||||
return
|
||||
self.copyStandbyClusterUpgradeFile()
|
||||
|
||||
upgradeInfoTmp = self.context.getDualUpgradeInfo(self.context.upgradePhaseInfoPath, 0)
|
||||
if upgradeInfoTmp is not None:
|
||||
if "dual-standby" in self.context.clusterType:
|
||||
self.context.dualUpgradeShareInfo.masterVersion = upgradeInfoTmp.masterVersion
|
||||
self.context.dualUpgradeShareInfo.masterUpgradeStatus = \
|
||||
upgradeInfoTmp.masterUpgradeStatus
|
||||
else:
|
||||
self.context.dualUpgradeShareInfo.standbyVersion = upgradeInfoTmp.standbyVersion
|
||||
self.context.dualUpgradeShareInfo.standbyUpgradeStatus = \
|
||||
upgradeInfoTmp.standbyUpgradeStatus
|
||||
|
||||
self.context.updateDualUpgradeInfo(self.context.dualUpgradeShareInfo,
|
||||
filePath=self.context.upgradePhaseInfoPath,
|
||||
startPost=0)
|
||||
|
||||
def checkGucValIsInValGiven(self, gucName, valList, fromFile=False):
|
||||
"""
|
||||
Checks whether a given parameter is a given value list in a given instance list.
|
||||
"""
|
||||
self.context.logger.debug("checks whether the parameter:{0} is "
|
||||
"the value:{1}.".format(gucName, valList))
|
||||
gucStr = "{0}:{1}".format(gucName, ",".join(valList))
|
||||
try:
|
||||
self.checkParam(gucStr, fromFile)
|
||||
self.context.logger.debug("Success to check the parameter:{0} value is "
|
||||
"in the value:{1}.".format(gucName, valList))
|
||||
return True
|
||||
except Exception as _:
|
||||
return False
|
||||
|
||||
def copyStandbyClusterUpgradeFile(self):
|
||||
"""
|
||||
From the data directory of the standby cluster, copy the upgrade_phase_info file
|
||||
to the designated instance directory of the primary cluster, and distribute it
|
||||
to the upgrade backup directory of all nodes
|
||||
"""
|
||||
hardUser, hardUserPwd = self.getDisasterRecoveryUser()
|
||||
if hardUser is None or hardUser == "" or hardUserPwd is None or hardUserPwd == "":
|
||||
raise Exception("Failed to obtain the streaming disaster build user")
|
||||
dnInstance = None
|
||||
for x in range(1, 9):
|
||||
localRemoteInfo = self.getLocalRemoteHostIpAndPort("{0}{1}".format(
|
||||
const.REMOTE_INFO_GUC[self.context.clusterType], x))
|
||||
for dbNode in self.context.clusterInfo.dbNodes:
|
||||
for dnInst in dbNode.datanodes:
|
||||
self.context.logger.debug("The instance is {0}".format(dnInst.__dict__))
|
||||
if "-streamDR" in self.context.clusterType:
|
||||
dataIp = DefaultValue.get_data_ip_info(dnInst, self.context.logger)
|
||||
if localRemoteInfo.get("localhost") in dataIp and \
|
||||
localRemoteInfo.get("localport") == str(dnInst.haPort).strip():
|
||||
dnInstance = copy.deepcopy(dnInst)
|
||||
break
|
||||
if dnInstance is not None:
|
||||
try:
|
||||
self.copyAndDistributeUpgradeFile(dnInstance, localRemoteInfo)
|
||||
except Exception as err:
|
||||
self.context.logger.error("Cope file failed msg:%s." % err)
|
||||
dnInstance = None
|
||||
continue
|
||||
break
|
||||
if dnInstance is None:
|
||||
raise Exception("Unable to find a DN to connect to the standby cluster node")
|
||||
|
||||
def checkDualClusterUpgrade(self):
|
||||
"""
|
||||
Double cluster check whether it can be upgrade
|
||||
|
||||
:return:
|
||||
"""
|
||||
if "dual-standby-streamDR" not in self.context.clusterType or \
|
||||
self.context.action == const.ACTION_SMALL_UPGRADE:
|
||||
return
|
||||
self.context.logger.debug("The status of the dual-cluster standby status is {0}, version "
|
||||
"is {1}. The status of the dual-cluster master status is {2}, "
|
||||
"version is {3}".format(
|
||||
self.context.dualUpgradeShareInfo.standbyUpgradeStatus,
|
||||
self.context.dualUpgradeShareInfo.standbyVersion,
|
||||
self.context.dualUpgradeShareInfo.masterUpgradeStatus,
|
||||
self.context.dualUpgradeShareInfo.masterVersion))
|
||||
|
||||
if self.context.dualUpgradeShareInfo.masterUpgradeStatus < 2 or \
|
||||
self.context.dualUpgradeShareInfo.masterVersion != self.newCommitId:
|
||||
raise Exception("The status of the dual-cluster master is {0}. "
|
||||
"the standby cluster cannot be upgrade."
|
||||
.format(self.context.dualUpgradeShareInfo.masterUpgradeStatus))
|
||||
|
||||
def recordDualClusterStage(self, commitVersion, upgradeStage):
|
||||
"""
|
||||
Record the upgrade information of the dual cluster
|
||||
|
||||
:param commitVersion:
|
||||
:param upgradeStage:
|
||||
:return:
|
||||
"""
|
||||
if "dual-primary" in self.context.clusterType:
|
||||
self.context.dualUpgradeShareInfo.masterVersion = commitVersion
|
||||
self.context.dualUpgradeShareInfo.masterUpgradeStatus = upgradeStage
|
||||
elif "dual-standby" in self.context.clusterType:
|
||||
self.context.dualUpgradeShareInfo.standbyVersion = commitVersion
|
||||
self.context.dualUpgradeShareInfo.standbyUpgradeStatus = upgradeStage
|
||||
else:
|
||||
return
|
||||
self.context.updateDualUpgradeInfo(self.context.dualUpgradeShareInfo,
|
||||
filePath=self.context.upgradePhaseInfoPath, startPost=0)
|
||||
|
||||
def checkDualClusterRollback(self):
|
||||
"""
|
||||
Double cluster check whether it can be rollback
|
||||
|
||||
:return:
|
||||
"""
|
||||
if "dual-standby" in self.context.clusterType or \
|
||||
"dual-" not in self.context.clusterType:
|
||||
return
|
||||
self.context.logger.debug("The status of the dual-cluster standby status is {0}, version "
|
||||
"is {1}. The status of the dual-cluster master status is {2}, "
|
||||
"version is {3}".format(
|
||||
self.context.dualUpgradeShareInfo.standbyUpgradeStatus,
|
||||
self.context.dualUpgradeShareInfo.standbyVersion,
|
||||
self.context.dualUpgradeShareInfo.masterUpgradeStatus,
|
||||
self.context.dualUpgradeShareInfo.masterVersion))
|
||||
if not self.context.rollback or \
|
||||
"dual-primary" in self.context.clusterType or \
|
||||
self.context.action == const.ACTION_SMALL_UPGRADE or self.context.forceRollback:
|
||||
return
|
||||
# master cluster
|
||||
if "dual-primary" in self.context.clusterType:
|
||||
if (self.context.dualUpgradeShareInfo.standbyUpgradeStatus > 2 or
|
||||
self.context.dualUpgradeShareInfo.standbyUpgradeStatus == 0) and \
|
||||
self.context.dualUpgradeShareInfo.standbyVersion == self.newCommitId:
|
||||
raise Exception("The status of the dual-cluster standby is {0}. "
|
||||
"the master cluster cannot be rolled back."
|
||||
.format(self.context.dualUpgradeShareInfo.standbyUpgradeStatus))
|
||||
|
||||
def checkDualClusterCommit(self):
|
||||
"""
|
||||
Double cluster check whether it can be submitted
|
||||
|
||||
:return:
|
||||
"""
|
||||
if "dual-" not in self.context.clusterType:
|
||||
return
|
||||
if self.context.action == const.ACTION_SMALL_UPGRADE:
|
||||
return
|
||||
self.context.logger.debug("The status of the dual-cluster standby status is {0}, version "
|
||||
"is {1}. The status of the dual-cluster master status is {2}, "
|
||||
"version is {3}".format(
|
||||
self.context.dualUpgradeShareInfo.standbyUpgradeStatus,
|
||||
self.context.dualUpgradeShareInfo.standbyVersion,
|
||||
self.context.dualUpgradeShareInfo.masterUpgradeStatus,
|
||||
self.context.dualUpgradeShareInfo.masterVersion))
|
||||
# master cluster
|
||||
if "dual-primary" in self.context.clusterType:
|
||||
if self.context.dualUpgradeShareInfo.standbyUpgradeStatus != 0 or \
|
||||
self.context.dualUpgradeShareInfo.standbyVersion != self.newCommitId:
|
||||
raise Exception("The status of the dual-cluster standby status is {0}, "
|
||||
"version is {1}. the master cluster cannot be commit."
|
||||
.format(self.context.dualUpgradeShareInfo.standbyUpgradeStatus,
|
||||
self.context.dualUpgradeShareInfo.standbyVersion))
|
||||
if "dual-standby" in self.context.clusterType:
|
||||
if self.context.dualUpgradeShareInfo.masterUpgradeStatus != 2 or \
|
||||
self.context.dualUpgradeShareInfo.masterVersion != self.newCommitId:
|
||||
raise Exception("The status of the dual-cluster master status is {0}, "
|
||||
"version is {1}. The standby cluster cannot be commit."
|
||||
.format(self.context.dualUpgradeShareInfo.masterUpgradeStatus,
|
||||
self.context.dualUpgradeShareInfo.masterVersion))
|
||||
|
||||
def copyDirFromRemoteNode(self, remoteHost, remoteDir, targetHost, targetDir):
|
||||
"""
|
||||
SSH to the remote node, copy dir from the remote node to the specified node
|
||||
|
||||
:param remoteHost:
|
||||
:param remoteDir:
|
||||
:param targetHost:
|
||||
:param targetDir:
|
||||
:return:
|
||||
"""
|
||||
scpcmd = "pssh -s -H {0} 'source {5}; if [ -d '{1}' ];" \
|
||||
"then pscp -r -H {2} {3} {4}; fi' ".format(remoteHost, remoteDir, targetHost,
|
||||
remoteDir, targetDir,
|
||||
self.context.userProfile)
|
||||
(status, output) = CmdUtil.retryGetstatusoutput(scpcmd, 2, 5)
|
||||
if status != 0:
|
||||
raise Exception("File copy failed. Output: {0}".format(output))
|
||||
|
||||
def getLocalRemoteHostIpAndPort(self, gucName):
|
||||
"""
|
||||
Get the DN instance and the corresponding standby cluster host and port through the
|
||||
cross_cluster_replconninfo parameter
|
||||
:param gucName: cross_cluster_replconninfo parameter name
|
||||
:return: {"localhost":"", "localport":"", "remotehost":"", "remoteport":""}
|
||||
"""
|
||||
isLocal = False
|
||||
localRemoteInfo = dict()
|
||||
sql = "show {0};".format(gucName)
|
||||
self.getPrimaryDN(False)
|
||||
(status, output) = self.execSqlCommandInPrimaryDN(sql)
|
||||
if status != 0 or output == "":
|
||||
raise Exception("Failed to get GUC parameter: {0} value. Output: {1}".format(gucName,
|
||||
output))
|
||||
localIp = output.split("localhost=")[1].split("localport=")[0].strip()
|
||||
remoteIp = output.split("remotehost=")[1].split("remoteport=")[0].strip()
|
||||
|
||||
self.context.logger.debug("Success get the output {0}".format(output))
|
||||
|
||||
if "-streamDR" in self.context.clusterType:
|
||||
localPort = output.split("localport=")[1].split("localheartbeatport=")[0].strip()
|
||||
remotePort = output.split("remoteport=")[1].split("remoteheartbeatport=")[0].strip()
|
||||
|
||||
for dbNode in self.context.clusterInfo.dbNodes:
|
||||
if isLocal:
|
||||
break
|
||||
for dnInst in dbNode.datanodes:
|
||||
if remoteIp in dnInst.listenIps or remoteIp in dnInst.hostname:
|
||||
isLocal = True
|
||||
break
|
||||
self.context.logger.debug("The local flag is {0}".format(isLocal))
|
||||
|
||||
if isLocal:
|
||||
localRemoteInfo.setdefault("localhost", "no find remote host")
|
||||
else:
|
||||
localRemoteInfo.setdefault("localhost", localIp)
|
||||
|
||||
localRemoteInfo.setdefault("localport", localPort)
|
||||
localRemoteInfo.setdefault("remotehost", remoteIp)
|
||||
localRemoteInfo.setdefault("remoteport", remotePort)
|
||||
return localRemoteInfo
|
||||
|
||||
def copyAndDistributeUpgradeFile(self, dnInstance, localRemoteInfo):
|
||||
"""
|
||||
copy upgrade file
|
||||
:return:
|
||||
"""
|
||||
hardUser, hardUserPwd = self.getDisasterRecoveryUser()
|
||||
cmd_remote = 'pssh -s -H {0} \'source {8}; gs_ctl build -D {1} -b copy_upgrade_file ' \
|
||||
'-Z datanode -U {2} -P "{3}" -C "localhost={4} localport={5} remotehost={6} ' \
|
||||
'remoteport={7}"\''.format(dnInstance.hostname,
|
||||
dnInstance.datadir,
|
||||
hardUser,
|
||||
hardUserPwd,
|
||||
localRemoteInfo.get("localhost"),
|
||||
localRemoteInfo.get("localport"),
|
||||
localRemoteInfo.get("remotehost"),
|
||||
localRemoteInfo.get("remoteport"),
|
||||
self.context.userProfile)
|
||||
|
||||
cmd_remote = cmd_remote.replace(" -Z datanode", "")
|
||||
|
||||
self.context.logger.debug("Copy upgrade file with cmd: {0}.".
|
||||
format(cmd_remote.replace(hardUserPwd, "***")))
|
||||
status, output = DefaultValue.getstatusoutput_hide_pass(cmd_remote)
|
||||
if status == 0:
|
||||
self.context.logger.debug("Successfully copy upgrade file")
|
||||
else:
|
||||
raise Exception("Failed to copy files from the standby cluster. "
|
||||
"Ensure that the standby cluster version supports this function. "
|
||||
"Output: {0}".format(output))
|
||||
|
||||
remoteUpgradeInfoPath = os.path.join(dnInstance.datadir, const.UPGRADE_PHASE_INFO)
|
||||
self.copyFileFromRemoteNode(dnInstance.hostname, remoteUpgradeInfoPath,
|
||||
NetUtil.GetHostIpOrName(),
|
||||
self.context.upgradePhaseInfoPath)
|
||||
if not os.path.exists(self.context.upgradePhaseInfoPath):
|
||||
FileUtil.createFile(self.context.upgradePhaseInfoPath,
|
||||
mode=DefaultValue.KEY_FILE_MODE)
|
||||
self.context.updateDualUpgradeInfo(self.context.dualUpgradeShareInfo,
|
||||
filePath=self.context.upgradePhaseInfoPath,
|
||||
startPost=0)
|
||||
|
||||
self.context.sshTool.scpFiles(self.context.upgradePhaseInfoPath,
|
||||
self.context.tmpDir,
|
||||
hostList=self.context.clusterNodes)
|
||||
|
||||
def getDisasterRecoveryUser(self):
|
||||
"""
|
||||
Obtain special users of the streaming disaster recovery cluster for building
|
||||
:return: user name
|
||||
"""
|
||||
mode = True if "dual-standby" in self.context.clusterType else False
|
||||
user_str = DefaultValue.obtain_hadr_user_encrypt_str(
|
||||
self.context.clusterInfo, self.context.user, self.context.logger, mode)
|
||||
rand_pwd = DefaultValue.decrypt_hadr_rand_pwd(self.context.logger)
|
||||
params = rand_pwd, user_str, self.context.clusterInfo, self.context.user, \
|
||||
self.context.logger, mode
|
||||
hardUser, hardUserPwd = DefaultValue.decrypt_hadr_user_info(params)
|
||||
return hardUser, hardUserPwd
|
||||
|
||||
def copyFileFromRemoteNode(self, remoteHost, remoteFile, targetHost, targetFile):
|
||||
"""
|
||||
SSH to the remote node, copy files from the remote node to the specified node
|
||||
|
||||
:param remoteHost:
|
||||
:param remoteFile:
|
||||
:param targetHost:
|
||||
:param targetFile:
|
||||
:return:
|
||||
"""
|
||||
scpcmd = "pssh -s -H {0} 'source {5}; if [ -f '{1}' ];" \
|
||||
"then pscp -H {2} {3} {4}; fi' ".format(remoteHost, remoteFile, targetHost,
|
||||
remoteFile, targetFile,
|
||||
self.context.userProfile)
|
||||
(status, output) = CmdUtil.retryGetstatusoutput(scpcmd, 2, 5)
|
||||
if status != 0:
|
||||
raise Exception("File copy failed. Output: {0}".format(output))
|
||||
|
||||
def clean_gs_secure_files(self):
|
||||
"""
|
||||
delete gs_secure_files during rollback or commit
|
||||
"""
|
||||
try:
|
||||
self.context.logger.debug(
|
||||
"Starting to clean gs_secure_files folder in the dn data catalog.")
|
||||
cmd = "%s -t %s -U %s -l %s" % \
|
||||
(OMCommand.getLocalScript("Local_Upgrade_Utility"),
|
||||
const.ACTION_CLEAN_GS_SECURE_FILES,
|
||||
self.context.user,
|
||||
self.context.localLog)
|
||||
self.context.logger.debug("clean gs_secure_files folder:{0}".format(cmd))
|
||||
host_list = copy.deepcopy(self.context.clusterNodes)
|
||||
self.context.execCommandInSpecialNode(cmd, host_list)
|
||||
except Exception as er:
|
||||
raise Exception(str(er))
|
||||
self.context.logger.debug(
|
||||
"Successfully to clean gs_secure_files folder in the dn data catalog.")
|
||||
|
@ -56,6 +56,7 @@ class CmdOptions():
|
||||
self.removeIps = []
|
||||
self.addIps = []
|
||||
self.dws_mode = False
|
||||
self.try_reload = False
|
||||
|
||||
|
||||
def usage():
|
||||
@ -75,6 +76,7 @@ General options:
|
||||
-r the signal about ignorepgHbaMiss
|
||||
--remove-ip Remove ip address from pg_hba.conf
|
||||
--add-ip Add ip address to pg_hba.conf
|
||||
--try-reload Try reload guc params if can not set
|
||||
--help Show help information for this utility,
|
||||
and exit the command line mode.
|
||||
"""
|
||||
@ -88,7 +90,7 @@ def parseCommandLine():
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "U:X:l:r",
|
||||
["remove-ip=", "help", "dws-mode",
|
||||
"add-ip="])
|
||||
"add-ip=", "try-reload"])
|
||||
except Exception as e:
|
||||
usage()
|
||||
GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50000"] % str(e))
|
||||
@ -116,6 +118,8 @@ def parseCommandLine():
|
||||
g_opts.removeIps.append(value)
|
||||
elif (key == "--dws-mode"):
|
||||
g_opts.dws_mode = True
|
||||
elif key == "--try-reload":
|
||||
g_opts.try_reload = True
|
||||
elif (key == "--add-ip"):
|
||||
g_opts.addIps = value.split(',')
|
||||
Parameter.checkParaVaild(key, value)
|
||||
@ -154,7 +158,7 @@ class ConfigHba(LocalBaseOM):
|
||||
"""
|
||||
|
||||
def __init__(self, logFile, user, clusterConf, dwsMode=False,
|
||||
ignorepgHbaMiss=False, removeIps=None):
|
||||
ignorepgHbaMiss=False, removeIps=None, try_reload=False):
|
||||
"""
|
||||
function: configure all instance on local node
|
||||
"""
|
||||
@ -178,6 +182,7 @@ class ConfigHba(LocalBaseOM):
|
||||
if removeIps is None:
|
||||
removeIps = []
|
||||
self.removeIps = removeIps
|
||||
self.try_reload = try_reload
|
||||
|
||||
def getAllIps(self):
|
||||
"""
|
||||
@ -220,6 +225,16 @@ class ConfigHba(LocalBaseOM):
|
||||
except Exception as e:
|
||||
raise Exception(str(e))
|
||||
|
||||
def remove_streaming_config(self, component):
|
||||
"""
|
||||
remove dn & cn pg_hba for streaming stop
|
||||
"""
|
||||
ip_segment_list = list(set(['.'.join(
|
||||
remove_ip.split('.')[:2]) + ".0.0/16" for remove_ip in self.removeIps]))
|
||||
for ip_segment in ip_segment_list:
|
||||
ip_remove_str = "-h \"host replication all %s\" " % ip_segment
|
||||
component.doGUCConfig("set", ip_remove_str, True)
|
||||
|
||||
def __configAnInstance(self, component):
|
||||
"""
|
||||
function: set hba config for single component
|
||||
@ -245,9 +260,10 @@ class ConfigHba(LocalBaseOM):
|
||||
self.logger.debug("The %s does not exist." % hbaFile)
|
||||
return
|
||||
|
||||
component.setPghbaConfig(self.allIps)
|
||||
component.setPghbaConfig(self.allIps, try_reload=self.try_reload)
|
||||
if len(self.removeIps) != 0:
|
||||
component.removeIpInfoOnPghbaConfig(self.removeIps)
|
||||
self.remove_streaming_config(component)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@ -266,7 +282,7 @@ if __name__ == '__main__':
|
||||
# modify Instance
|
||||
configer = ConfigHba(g_opts.logFile, g_opts.clusterUser,
|
||||
g_opts.clusterConf, g_opts.dws_mode,
|
||||
g_opts.ignorepgHbaMiss, g_opts.removeIps)
|
||||
g_opts.ignorepgHbaMiss, g_opts.removeIps, g_opts.try_reload)
|
||||
configer.configHba()
|
||||
|
||||
except Exception as e:
|
||||
|
@ -2152,6 +2152,35 @@ def backupHotpatch():
|
||||
for dbInstance in g_dbNode.gtms:
|
||||
backupInstanceHotpatchConfig(dbInstance.datadir)
|
||||
|
||||
def clean_gs_secure_files():
|
||||
"""
|
||||
clean gs_secure_files folder
|
||||
"""
|
||||
pool = ThreadPool(DefaultValue.getCpuSet())
|
||||
pool.map(clean_stream_gs_secure, g_dbNode.datanodes)
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
|
||||
def clean_stream_gs_secure(dn_inst):
|
||||
"""
|
||||
clean gs secure dir
|
||||
"""
|
||||
temp_dir = EnvUtil.getTmpDirFromEnv()
|
||||
file_path = os.path.join(dn_inst.datadir, "gs_secure_files")
|
||||
cmd = "(if [ -d '%s' ]; then rm -rf '%s'; fi) && " % (file_path, file_path)
|
||||
cmd += "(if [ -f '%s/upgrade_phase_info' ]; then rm -f '%s/upgrade_phase_info'; " \
|
||||
"fi) &&" % (temp_dir, temp_dir)
|
||||
cmd += "(if [ -f '%s/hadr.key.cipher' ]; then rm -f '%s/hadr.key.cipher'; " \
|
||||
"fi) &&" % (temp_dir, temp_dir)
|
||||
cmd += "(if [ -f '%s/hadr.key.rand' ]; then rm -f '%s/hadr.key.rand'; " \
|
||||
"fi) &&" % (temp_dir, temp_dir)
|
||||
cmd += "(if [ -d '%s/gs_secure_files' ]; then rm -f '%s/gs_secure_files'; " \
|
||||
"fi)" % (temp_dir, temp_dir)
|
||||
g_logger.debug("Starting clean instance %s gs secure dir, cmd:%s." % (dn_inst.instanceId, cmd))
|
||||
CmdExecutor.execCommandLocally(cmd)
|
||||
g_logger.debug("Successfully clean instance %s gs secure dir." % dn_inst.instanceId)
|
||||
|
||||
|
||||
def rollbackInstanceHotpatchConfig(instanceDataDir):
|
||||
"""
|
||||
@ -4720,6 +4749,7 @@ def main():
|
||||
const.ACTION_GREY_SYNC_GUC: greySyncGuc,
|
||||
const.ACTION_GREY_UPGRADE_CONFIG_SYNC: greyUpgradeSyncConfig,
|
||||
const.ACTION_SWITCH_DN: switchDnNodeProcess,
|
||||
const.ACTION_CLEAN_GS_SECURE_FILES: clean_gs_secure_files,
|
||||
const.ACTION_GET_LSN_INFO: getLsnInfo,
|
||||
const.ACTION_GREY_RESTORE_CONFIG: greyRestoreConfig,
|
||||
const.ACTION_GREY_RESTORE_GUC: greyRestoreGuc,
|
||||
|
Loading…
x
Reference in New Issue
Block a user