public upgrade py
This commit is contained in:
committed by
LINxiansheng
parent
056c5a28b4
commit
fe44cd94cf
733
tools/upgrade/upgrade_post_checker.py
Executable file
733
tools/upgrade/upgrade_post_checker.py
Executable file
@ -0,0 +1,733 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import mysql.connector
|
||||
from mysql.connector import errorcode
|
||||
import logging
|
||||
import getopt
|
||||
import time
|
||||
|
||||
class UpgradeParams:
|
||||
log_filename = 'upgrade_post_checker.log'
|
||||
new_version = '3.1.2'
|
||||
#### --------------start : my_error.py --------------
|
||||
class MyError(Exception):
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
def __str__(self):
|
||||
return repr(self.value)
|
||||
#### --------------start : actions.py------------
|
||||
class Cursor:
|
||||
__cursor = None
|
||||
def __init__(self, cursor):
|
||||
self.__cursor = cursor
|
||||
def exec_sql(self, sql, print_when_succ = True):
|
||||
try:
|
||||
self.__cursor.execute(sql)
|
||||
rowcount = self.__cursor.rowcount
|
||||
if True == print_when_succ:
|
||||
logging.info('succeed to execute sql: %s, rowcount = %d', sql, rowcount)
|
||||
return rowcount
|
||||
except mysql.connector.Error, e:
|
||||
logging.exception('mysql connector error, fail to execute sql: %s', sql)
|
||||
raise e
|
||||
except Exception, e:
|
||||
logging.exception('normal error, fail to execute sql: %s', sql)
|
||||
raise e
|
||||
def exec_query(self, sql, print_when_succ = True):
|
||||
try:
|
||||
self.__cursor.execute(sql)
|
||||
results = self.__cursor.fetchall()
|
||||
rowcount = self.__cursor.rowcount
|
||||
if True == print_when_succ:
|
||||
logging.info('succeed to execute query: %s, rowcount = %d', sql, rowcount)
|
||||
return (self.__cursor.description, results)
|
||||
except mysql.connector.Error, e:
|
||||
logging.exception('mysql connector error, fail to execute sql: %s', sql)
|
||||
raise e
|
||||
except Exception, e:
|
||||
logging.exception('normal error, fail to execute sql: %s', sql)
|
||||
raise e
|
||||
|
||||
def set_parameter(cur, parameter, value):
|
||||
sql = """alter system set {0} = '{1}'""".format(parameter, value)
|
||||
logging.info(sql)
|
||||
cur.execute(sql)
|
||||
wait_parameter_sync(cur, parameter, value)
|
||||
|
||||
def wait_parameter_sync(cur, key, value):
|
||||
sql = """select count(*) as cnt from oceanbase.__all_virtual_sys_parameter_stat
|
||||
where name = '{0}' and value != '{1}'""".format(key, value)
|
||||
times = 10
|
||||
while times > 0:
|
||||
logging.info(sql)
|
||||
cur.execute(sql)
|
||||
result = cur.fetchall()
|
||||
if len(result) != 1 or len(result[0]) != 1:
|
||||
logging.exception('result cnt not match')
|
||||
raise e
|
||||
elif result[0][0] == 0:
|
||||
logging.info("""{0} is sync, value is {1}""".format(key, value))
|
||||
break
|
||||
else:
|
||||
logging.info("""{0} is not sync, value should be {1}""".format(key, value))
|
||||
|
||||
times -= 1
|
||||
if times == 0:
|
||||
logging.exception("""check {0}:{1} sync timeout""".format(key, value))
|
||||
raise e
|
||||
time.sleep(5)
|
||||
|
||||
#### --------------start : opt.py --------------
|
||||
help_str = \
|
||||
"""
|
||||
Help:
|
||||
""" +\
|
||||
sys.argv[0] + """ [OPTIONS]""" +\
|
||||
'\n\n' +\
|
||||
'-I, --help Display this help and exit.\n' +\
|
||||
'-V, --version Output version information and exit.\n' +\
|
||||
'-h, --host=name Connect to host.\n' +\
|
||||
'-P, --port=name Port number to use for connection.\n' +\
|
||||
'-u, --user=name User for login.\n' +\
|
||||
'-p, --password=name Password to use when connecting to server. If password is\n' +\
|
||||
' not given it\'s empty string "".\n' +\
|
||||
'-m, --module=name Modules to run. Modules should be a string combined by some of\n' +\
|
||||
' the following strings: ddl, normal_dml, each_tenant_dml,\n' +\
|
||||
' system_variable_dml, special_action, all. "all" represents\n' +\
|
||||
' that all modules should be run. They are splitted by ",".\n' +\
|
||||
' For example: -m all, or --module=ddl,normal_dml,special_action\n' +\
|
||||
'-l, --log-file=name Log file path. If log file path is not given it\'s ' + os.path.splitext(sys.argv[0])[0] + '.log\n' +\
|
||||
'\n\n' +\
|
||||
'Maybe you want to run cmd like that:\n' +\
|
||||
sys.argv[0] + ' -h 127.0.0.1 -P 3306 -u xxx -p xxx\n'
|
||||
|
||||
version_str = """version 1.0.0"""
|
||||
|
||||
class Option:
|
||||
__g_short_name_set = set([])
|
||||
__g_long_name_set = set([])
|
||||
__short_name = None
|
||||
__long_name = None
|
||||
__is_with_param = None
|
||||
__is_local_opt = None
|
||||
__has_value = None
|
||||
__value = None
|
||||
def __init__(self, short_name, long_name, is_with_param, is_local_opt, default_value = None):
|
||||
if short_name in Option.__g_short_name_set:
|
||||
raise MyError('duplicate option short name: {0}'.format(short_name))
|
||||
elif long_name in Option.__g_long_name_set:
|
||||
raise MyError('duplicate option long name: {0}'.format(long_name))
|
||||
Option.__g_short_name_set.add(short_name)
|
||||
Option.__g_long_name_set.add(long_name)
|
||||
self.__short_name = short_name
|
||||
self.__long_name = long_name
|
||||
self.__is_with_param = is_with_param
|
||||
self.__is_local_opt = is_local_opt
|
||||
self.__has_value = False
|
||||
if None != default_value:
|
||||
self.set_value(default_value)
|
||||
def is_with_param(self):
|
||||
return self.__is_with_param
|
||||
def get_short_name(self):
|
||||
return self.__short_name
|
||||
def get_long_name(self):
|
||||
return self.__long_name
|
||||
def has_value(self):
|
||||
return self.__has_value
|
||||
def get_value(self):
|
||||
return self.__value
|
||||
def set_value(self, value):
|
||||
self.__value = value
|
||||
self.__has_value = True
|
||||
def is_local_opt(self):
|
||||
return self.__is_local_opt
|
||||
def is_valid(self):
|
||||
return None != self.__short_name and None != self.__long_name and True == self.__has_value and None != self.__value
|
||||
|
||||
g_opts =\
|
||||
[\
|
||||
Option('I', 'help', False, True),\
|
||||
Option('V', 'version', False, True),\
|
||||
Option('h', 'host', True, False),\
|
||||
Option('P', 'port', True, False),\
|
||||
Option('u', 'user', True, False),\
|
||||
Option('p', 'password', True, False, ''),\
|
||||
# 要跑哪个模块,默认全跑
|
||||
Option('m', 'module', True, False, 'all'),\
|
||||
# 日志文件路径,不同脚本的main函数中中会改成不同的默认值
|
||||
Option('l', 'log-file', True, False)
|
||||
]\
|
||||
|
||||
def change_opt_defult_value(opt_long_name, opt_default_val):
|
||||
global g_opts
|
||||
for opt in g_opts:
|
||||
if opt.get_long_name() == opt_long_name:
|
||||
opt.set_value(opt_default_val)
|
||||
return
|
||||
|
||||
def has_no_local_opts():
|
||||
global g_opts
|
||||
no_local_opts = True
|
||||
for opt in g_opts:
|
||||
if opt.is_local_opt() and opt.has_value():
|
||||
no_local_opts = False
|
||||
return no_local_opts
|
||||
|
||||
def check_db_client_opts():
|
||||
global g_opts
|
||||
for opt in g_opts:
|
||||
if not opt.is_local_opt() and not opt.has_value():
|
||||
raise MyError('option "-{0}" has not been specified, maybe you should run "{1} --help" for help'\
|
||||
.format(opt.get_short_name(), sys.argv[0]))
|
||||
|
||||
def parse_option(opt_name, opt_val):
|
||||
global g_opts
|
||||
for opt in g_opts:
|
||||
if opt_name in (('-' + opt.get_short_name()), ('--' + opt.get_long_name())):
|
||||
opt.set_value(opt_val)
|
||||
|
||||
def parse_options(argv):
|
||||
global g_opts
|
||||
short_opt_str = ''
|
||||
long_opt_list = []
|
||||
for opt in g_opts:
|
||||
if opt.is_with_param():
|
||||
short_opt_str += opt.get_short_name() + ':'
|
||||
else:
|
||||
short_opt_str += opt.get_short_name()
|
||||
for opt in g_opts:
|
||||
if opt.is_with_param():
|
||||
long_opt_list.append(opt.get_long_name() + '=')
|
||||
else:
|
||||
long_opt_list.append(opt.get_long_name())
|
||||
(opts, args) = getopt.getopt(argv, short_opt_str, long_opt_list)
|
||||
for (opt_name, opt_val) in opts:
|
||||
parse_option(opt_name, opt_val)
|
||||
if has_no_local_opts():
|
||||
check_db_client_opts()
|
||||
|
||||
def deal_with_local_opt(opt):
|
||||
if 'help' == opt.get_long_name():
|
||||
global help_str
|
||||
print help_str
|
||||
elif 'version' == opt.get_long_name():
|
||||
global version_str
|
||||
print version_str
|
||||
|
||||
def deal_with_local_opts():
|
||||
global g_opts
|
||||
if has_no_local_opts():
|
||||
raise MyError('no local options, can not deal with local options')
|
||||
else:
|
||||
for opt in g_opts:
|
||||
if opt.is_local_opt() and opt.has_value():
|
||||
deal_with_local_opt(opt)
|
||||
# 只处理一个
|
||||
return
|
||||
|
||||
def get_opt_host():
|
||||
global g_opts
|
||||
for opt in g_opts:
|
||||
if 'host' == opt.get_long_name():
|
||||
return opt.get_value()
|
||||
|
||||
def get_opt_port():
|
||||
global g_opts
|
||||
for opt in g_opts:
|
||||
if 'port' == opt.get_long_name():
|
||||
return opt.get_value()
|
||||
|
||||
def get_opt_user():
|
||||
global g_opts
|
||||
for opt in g_opts:
|
||||
if 'user' == opt.get_long_name():
|
||||
return opt.get_value()
|
||||
|
||||
def get_opt_password():
|
||||
global g_opts
|
||||
for opt in g_opts:
|
||||
if 'password' == opt.get_long_name():
|
||||
return opt.get_value()
|
||||
|
||||
def get_opt_module():
|
||||
global g_opts
|
||||
for opt in g_opts:
|
||||
if 'module' == opt.get_long_name():
|
||||
return opt.get_value()
|
||||
|
||||
def get_opt_log_file():
|
||||
global g_opts
|
||||
for opt in g_opts:
|
||||
if 'log-file' == opt.get_long_name():
|
||||
return opt.get_value()
|
||||
#### ---------------end----------------------
|
||||
|
||||
def config_logging_module(log_filenamme):
|
||||
logging.basicConfig(level=logging.INFO,\
|
||||
format='[%(asctime)s] %(levelname)s %(filename)s:%(lineno)d %(message)s',\
|
||||
datefmt='%Y-%m-%d %H:%M:%S',\
|
||||
filename=log_filenamme,\
|
||||
filemode='w')
|
||||
# 定义日志打印格式
|
||||
formatter = logging.Formatter('[%(asctime)s] %(levelname)s %(filename)s:%(lineno)d %(message)s', '%Y-%m-%d %H:%M:%S')
|
||||
#######################################
|
||||
# 定义一个Handler打印INFO及以上级别的日志到sys.stdout
|
||||
stdout_handler = logging.StreamHandler(sys.stdout)
|
||||
stdout_handler.setLevel(logging.INFO)
|
||||
# 设置日志打印格式
|
||||
stdout_handler.setFormatter(formatter)
|
||||
# 将定义好的stdout_handler日志handler添加到root logger
|
||||
logging.getLogger('').addHandler(stdout_handler)
|
||||
|
||||
#### STAR
|
||||
# 1 检查版本号
|
||||
def check_cluster_version(query_cur):
|
||||
# 一方面配置项生效是个异步生效任务,另一方面是2.2.0之后新增租户级配置项刷新,和系统级配置项刷新复用同一个timer,这里暂且等一下。
|
||||
times = 30
|
||||
sql="select distinct value = '{0}' from oceanbase.__all_virtual_sys_parameter_stat where name='min_observer_version'".format(upgrade_params.new_version)
|
||||
while times > 0 :
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if len(results) == 1 and results[0][0] == 1:
|
||||
break;
|
||||
time.sleep(1)
|
||||
times -=1
|
||||
if times == 0:
|
||||
logging.warn("check cluster version timeout!")
|
||||
raise e
|
||||
else:
|
||||
logging.info("check_cluster_version success")
|
||||
|
||||
def check_storage_format_version(query_cur):
|
||||
# Specified expected version each time want to upgrade (see OB_STORAGE_FORMAT_VERSION_MAX)
|
||||
expect_version = 4;
|
||||
sql = "select value from oceanbase.__all_zone where zone = '' and name = 'storage_format_version'"
|
||||
times = 180
|
||||
while times > 0 :
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if len(results) == 1 and results[0][0] == expect_version:
|
||||
break
|
||||
time.sleep(10)
|
||||
times -= 1
|
||||
if times == 0:
|
||||
logging.warn("check storage format version timeout! Expected version {0}".format(expect_version))
|
||||
raise e
|
||||
else:
|
||||
logging.info("check expected storage format version '{0}' success".format(expect_version))
|
||||
|
||||
def upgrade_table_schema_version(conn, cur):
|
||||
try:
|
||||
sql = """SELECT * FROM v$ob_cluster
|
||||
WHERE cluster_role = "PRIMARY"
|
||||
AND cluster_status = "VALID"
|
||||
AND (switchover_status = "NOT ALLOWED" OR switchover_status = "TO STANDBY") """
|
||||
(desc, results) = cur.exec_query(sql)
|
||||
is_primary = len(results) > 0
|
||||
if is_primary:
|
||||
sql = "set @@session.ob_query_timeout = 60000000;"
|
||||
logging.info(sql)
|
||||
cur.exec_sql(sql)
|
||||
|
||||
sql = "alter system run job 'UPDATE_TABLE_SCHEMA_VERSION';"
|
||||
logging.info(sql)
|
||||
cur.exec_sql(sql)
|
||||
else:
|
||||
logging.info("standby cluster no need to run job update_table_schema_ersion")
|
||||
except Exception, e:
|
||||
logging.warn("update table schema failed")
|
||||
raise MyError("update table schema failed")
|
||||
logging.info("update table schema finish")
|
||||
|
||||
def upgrade_storage_format_version(conn, cur):
|
||||
try:
|
||||
# enable_ddl
|
||||
set_parameter(cur, 'enable_ddl', 'True')
|
||||
|
||||
# run job
|
||||
sql = "alter system run job 'UPGRADE_STORAGE_FORMAT_VERSION';"
|
||||
logging.info(sql)
|
||||
cur.execute(sql)
|
||||
|
||||
except Exception, e:
|
||||
logging.warn("upgrade storage format version failed")
|
||||
raise e
|
||||
logging.info("upgrade storage format version finish")
|
||||
|
||||
# 2 检查内部表自检是否成功
|
||||
def check_root_inspection(query_cur):
|
||||
sql = "select count(*) from oceanbase.__all_virtual_upgrade_inspection where info != 'succeed'"
|
||||
times = 180
|
||||
while times > 0 :
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if results[0][0] == 0:
|
||||
break
|
||||
time.sleep(10)
|
||||
times -= 1
|
||||
if times == 0:
|
||||
logging.warn('check root inspection failed!')
|
||||
raise e
|
||||
logging.info('check root inspection success')
|
||||
|
||||
# 4 开ddl
|
||||
def enable_ddl(cur):
|
||||
set_parameter(cur, 'enable_ddl', 'True')
|
||||
|
||||
# 5 打开rebalance
|
||||
def enable_rebalance(cur):
|
||||
set_parameter(cur, 'enable_rebalance', 'True')
|
||||
|
||||
# 6 打开rereplication
|
||||
def enable_rereplication(cur):
|
||||
set_parameter(cur, 'enable_rereplication', 'True')
|
||||
|
||||
# 7 打开major freeze
|
||||
def enable_major_freeze(cur):
|
||||
set_parameter(cur, 'enable_major_freeze', 'True')
|
||||
|
||||
# 8 打开sql走px
|
||||
def enable_px_inner_sql(query_cur, cur):
|
||||
cur.execute("alter system set _ob_enable_px_for_inner_sql = True")
|
||||
(desc, results) = query_cur.exec_query("""select cluster_role from oceanbase.v$ob_cluster""")
|
||||
if (len(results) != 1) :
|
||||
raise MyError('cluster role results is not valid')
|
||||
elif (cmp(results[0][0], "PRIMARY") == 0):
|
||||
tenant_id_list = fetch_tenant_ids(query_cur)
|
||||
for tenant_id in tenant_id_list:
|
||||
cur.execute("alter system change tenant tenant_id = {0}".format(tenant_id))
|
||||
sql = """set global _ob_use_parallel_execution = true"""
|
||||
logging.info("tenant_id : %d , %s", tenant_id, sql)
|
||||
cur.execute(sql)
|
||||
elif (cmp(results[0][0], "PHYSICAL STANDBY") == 0):
|
||||
sql = """set global _ob_use_parallel_execution = true"""
|
||||
cur.execute(sql)
|
||||
logging.info("execute sql in standby: %s", sql)
|
||||
|
||||
def execute_schema_split_v2(conn, cur, query_cur, user, pwd):
|
||||
try:
|
||||
# check local cluster finish schema split
|
||||
done = check_schema_split_v2_finish(query_cur)
|
||||
if not done:
|
||||
###### disable ddl
|
||||
set_parameter(cur, 'enable_ddl', 'False')
|
||||
|
||||
# run job
|
||||
sql = """alter system run job 'SCHEMA_SPLIT_V2'"""
|
||||
logging.info("""run job 'SCHEMA_SPLIT_V2'""")
|
||||
query_cur.exec_sql(sql)
|
||||
# check schema split v2 finish
|
||||
check_schema_split_v2_finish_until_timeout(query_cur, 360)
|
||||
|
||||
# primary cluster should wait standby clusters' schema split results
|
||||
is_primary = check_current_cluster_is_primary(query_cur)
|
||||
if is_primary:
|
||||
standby_cluster_list = fetch_standby_cluster_infos(conn, query_cur, user, pwd)
|
||||
for standby_cluster in standby_cluster_list:
|
||||
# connect
|
||||
logging.info("start to check schema split result by cluster: cluster_id = {0}"
|
||||
.format(standby_cluster['cluster_id']))
|
||||
logging.info("create connection : cluster_id = {0}, ip = {1}, port = {2}"
|
||||
.format(standby_cluster['cluster_id'],
|
||||
standby_cluster['ip'],
|
||||
standby_cluster['port']))
|
||||
tmp_conn = mysql.connector.connect(user = standby_cluster['user'],
|
||||
password = standby_cluster['pwd'],
|
||||
host = standby_cluster['ip'],
|
||||
port = standby_cluster['port'],
|
||||
database = 'oceanbase')
|
||||
tmp_cur = tmp_conn.cursor(buffered=True)
|
||||
tmp_conn.autocommit = True
|
||||
tmp_query_cur = Cursor(tmp_cur)
|
||||
# check if stanby cluster
|
||||
is_primary = check_current_cluster_is_primary(tmp_query_cur)
|
||||
if is_primary:
|
||||
logging.exception("""primary cluster changed : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster['cluster_id'],
|
||||
standby_cluster['ip'],
|
||||
standby_cluster['port']))
|
||||
raise e
|
||||
# check schema split finish
|
||||
check_schema_split_v2_finish_until_timeout(tmp_query_cur, 180)
|
||||
# close
|
||||
tmp_cur.close()
|
||||
tmp_conn.close()
|
||||
logging.info("""check schema split result success : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster['cluster_id'],
|
||||
standby_cluster['ip'],
|
||||
standby_cluster['port']))
|
||||
except Exception, e:
|
||||
logging.warn("execute schema_split_v2 failed")
|
||||
raise e
|
||||
logging.info("execute schema_split_v2 success")
|
||||
|
||||
def check_schema_split_v2_finish(query_cur):
|
||||
done = False;
|
||||
sql = "select count(*) from oceanbase.__all_virtual_upgrade_inspection where name = 'SCHEMA_SPLIT_V2' and info = 'succeed'"
|
||||
logging.info(sql)
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if 1 != len(results) or 1 != len(results[0]):
|
||||
logging.warn("should has one record")
|
||||
raise e
|
||||
elif 1 == results[0][0]:
|
||||
done = True
|
||||
else:
|
||||
done = False
|
||||
return done
|
||||
|
||||
def check_schema_split_v2_finish_until_timeout(query_cur, times):
|
||||
while times > 0:
|
||||
done = check_schema_split_v2_finish(query_cur)
|
||||
if done:
|
||||
break;
|
||||
else:
|
||||
times -= 1
|
||||
time.sleep(10)
|
||||
if 0 == times:
|
||||
logging.warn('check schema split v2 timeout!')
|
||||
raise e
|
||||
logging.info("check schema split v2 success")
|
||||
|
||||
def fetch_tenant_ids(query_cur):
|
||||
try:
|
||||
tenant_id_list = []
|
||||
(desc, results) = query_cur.exec_query("""select distinct tenant_id from oceanbase.__all_tenant order by tenant_id desc""")
|
||||
for r in results:
|
||||
tenant_id_list.append(r[0])
|
||||
return tenant_id_list
|
||||
except Exception, e:
|
||||
logging.exception('fail to fetch distinct tenant ids')
|
||||
raise e
|
||||
|
||||
def check_current_cluster_is_primary(query_cur):
|
||||
try:
|
||||
sql = """SELECT * FROM v$ob_cluster
|
||||
WHERE cluster_role = "PRIMARY"
|
||||
AND cluster_status = "VALID"
|
||||
AND (switchover_status = "NOT ALLOWED" OR switchover_status = "TO STANDBY") """
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
is_primary = len(results) > 0
|
||||
return is_primary
|
||||
except Exception, e:
|
||||
logging.exception("""fail to check current is primary""")
|
||||
raise e
|
||||
|
||||
def fetch_standby_cluster_infos(conn, query_cur, user, pwd):
|
||||
try:
|
||||
is_primary = check_current_cluster_is_primary(query_cur)
|
||||
if not is_primary:
|
||||
logging.exception("""should be primary cluster""")
|
||||
raise e
|
||||
|
||||
standby_cluster_infos = []
|
||||
sql = """SELECT cluster_id, rootservice_list from v$ob_standby_status"""
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
|
||||
for r in results:
|
||||
standby_cluster_info = {}
|
||||
if 2 != len(r):
|
||||
logging.exception("length not match")
|
||||
raise e
|
||||
standby_cluster_info['cluster_id'] = r[0]
|
||||
standby_cluster_info['user'] = user
|
||||
standby_cluster_info['pwd'] = pwd
|
||||
# construct ip/port
|
||||
address = r[1].split(";")[0] # choose first address in rs_list
|
||||
standby_cluster_info['ip'] = str(address.split(":")[0])
|
||||
standby_cluster_info['port'] = address.split(":")[2]
|
||||
# append
|
||||
standby_cluster_infos.append(standby_cluster_info)
|
||||
logging.info("""cluster_info : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
conn.commit()
|
||||
# check standby cluster
|
||||
for standby_cluster_info in standby_cluster_infos:
|
||||
# connect
|
||||
logging.info("""create connection : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
|
||||
tmp_conn = mysql.connector.connect(user = standby_cluster_info['user'],
|
||||
password = standby_cluster_info['pwd'],
|
||||
host = standby_cluster_info['ip'],
|
||||
port = standby_cluster_info['port'],
|
||||
database = 'oceanbase')
|
||||
|
||||
tmp_cur = tmp_conn.cursor(buffered=True)
|
||||
tmp_conn.autocommit = True
|
||||
tmp_query_cur = Cursor(tmp_cur)
|
||||
is_primary = check_current_cluster_is_primary(tmp_query_cur)
|
||||
if is_primary:
|
||||
logging.exception("""primary cluster changed : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
raise e
|
||||
# close
|
||||
tmp_cur.close()
|
||||
tmp_conn.close()
|
||||
|
||||
return standby_cluster_infos
|
||||
except Exception, e:
|
||||
logging.exception('fail to fetch standby cluster info')
|
||||
raise e
|
||||
|
||||
def check_ddl_and_dml_sync(conn, query_cur, standby_cluster_infos, tenant_ids):
|
||||
try:
|
||||
conn.commit()
|
||||
# check if need check ddl and dml sync
|
||||
is_primary = check_current_cluster_is_primary(query_cur)
|
||||
if not is_primary:
|
||||
logging.exception("""should be primary cluster""")
|
||||
raise e
|
||||
|
||||
# fetch sys stats
|
||||
sys_infos = []
|
||||
sql = """SELECT tenant_id,
|
||||
refreshed_schema_version,
|
||||
min_sys_table_scn,
|
||||
min_user_table_scn
|
||||
FROM oceanbase.v$ob_cluster_stats
|
||||
ORDER BY tenant_id desc"""
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if len(tenant_ids) != len(results):
|
||||
logging.exception("result not match")
|
||||
raise e
|
||||
else:
|
||||
for i in range(len(results)):
|
||||
if len(results[i]) != 4:
|
||||
logging.exception("length not match")
|
||||
raise e
|
||||
elif results[i][0] != tenant_ids[i]:
|
||||
logging.exception("tenant_id not match")
|
||||
raise e
|
||||
else:
|
||||
sys_info = {}
|
||||
sys_info['tenant_id'] = results[i][0]
|
||||
sys_info['refreshed_schema_version'] = results[i][1]
|
||||
sys_info['min_sys_table_scn'] = results[i][2]
|
||||
sys_info['min_user_table_scn'] = results[i][3]
|
||||
logging.info("sys info : {0}".format(sys_info))
|
||||
sys_infos.append(sys_info)
|
||||
conn.commit()
|
||||
|
||||
# check ddl and dml by cluster
|
||||
for standby_cluster_info in standby_cluster_infos:
|
||||
check_ddl_and_dml_sync_by_cluster(standby_cluster_info, sys_infos)
|
||||
|
||||
except Exception, e:
|
||||
logging.exception("fail to check ddl and dml sync")
|
||||
raise e
|
||||
|
||||
def check_ddl_and_dml_sync_by_tenant(query_cur, sys_info):
|
||||
try:
|
||||
times = 1800 # 30min
|
||||
logging.info("start to check ddl and dml sync by tenant : {0}".format(sys_info))
|
||||
start_time = time.time()
|
||||
sql = ""
|
||||
if 1 == sys_info['tenant_id'] :
|
||||
# 备库系统租户DML不走物理同步,需要升级脚本负责写入,系统租户仅校验DDL同步
|
||||
sql = """SELECT count(*)
|
||||
FROM oceanbase.v$ob_cluster_stats
|
||||
WHERE tenant_id = {0}
|
||||
AND refreshed_schema_version >= {1}
|
||||
""".format(sys_info['tenant_id'],
|
||||
sys_info['refreshed_schema_version'])
|
||||
else:
|
||||
sql = """SELECT count(*)
|
||||
FROM oceanbase.v$ob_cluster_stats
|
||||
WHERE tenant_id = {0}
|
||||
AND refreshed_schema_version >= {1}
|
||||
AND min_sys_table_scn >= {2}
|
||||
AND min_user_table_scn >= {3}
|
||||
""".format(sys_info['tenant_id'],
|
||||
sys_info['refreshed_schema_version'],
|
||||
sys_info['min_sys_table_scn'],
|
||||
sys_info['min_user_table_scn'])
|
||||
while times > 0 :
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if len(results) == 1 and results[0][0] == 1:
|
||||
break;
|
||||
time.sleep(1)
|
||||
times -= 1
|
||||
if times == 0:
|
||||
logging.exception("check ddl and dml sync timeout! : {0}, cost = {1}"
|
||||
.format(sys_info, time.time() - start_time))
|
||||
raise e
|
||||
else:
|
||||
logging.info("check ddl and dml sync success! : {0}, cost = {1}"
|
||||
.format(sys_info, time.time() - start_time))
|
||||
|
||||
except Exception, e:
|
||||
logging.exception("fail to check ddl and dml sync : {0}".format(sys_info))
|
||||
raise e
|
||||
|
||||
# 开始升级后的检查
|
||||
def do_check(my_host, my_port, my_user, my_passwd, upgrade_params):
|
||||
try:
|
||||
conn = mysql.connector.connect(user = my_user,
|
||||
password = my_passwd,
|
||||
host = my_host,
|
||||
port = my_port,
|
||||
database = 'oceanbase',
|
||||
raise_on_warnings = True)
|
||||
conn.autocommit = True
|
||||
cur = conn.cursor(buffered=True)
|
||||
try:
|
||||
query_cur = Cursor(cur)
|
||||
try:
|
||||
check_cluster_version(query_cur)
|
||||
upgrade_table_schema_version(conn, query_cur)
|
||||
check_root_inspection(query_cur)
|
||||
enable_ddl(cur)
|
||||
enable_rebalance(cur)
|
||||
enable_rereplication(cur)
|
||||
enable_major_freeze(cur)
|
||||
enable_px_inner_sql(query_cur, cur)
|
||||
except Exception, e:
|
||||
logging.exception('run error')
|
||||
raise e
|
||||
except Exception, e:
|
||||
logging.exception('run error')
|
||||
raise e
|
||||
finally:
|
||||
cur.close()
|
||||
conn.close()
|
||||
except mysql.connector.Error, e:
|
||||
logging.exception('connection error')
|
||||
raise e
|
||||
except Exception, e:
|
||||
logging.exception('normal error')
|
||||
raise e
|
||||
|
||||
if __name__ == '__main__':
|
||||
upgrade_params = UpgradeParams()
|
||||
change_opt_defult_value('log-file', upgrade_params.log_filename)
|
||||
parse_options(sys.argv[1:])
|
||||
if not has_no_local_opts():
|
||||
deal_with_local_opts()
|
||||
else:
|
||||
check_db_client_opts()
|
||||
log_filename = get_opt_log_file()
|
||||
upgrade_params.log_filename = log_filename
|
||||
# 日志配置放在这里是为了前面的操作不要覆盖掉日志文件
|
||||
config_logging_module(upgrade_params.log_filename)
|
||||
try:
|
||||
host = get_opt_host()
|
||||
port = int(get_opt_port())
|
||||
user = get_opt_user()
|
||||
password = get_opt_password()
|
||||
logging.info('parameters from cmd: host=\"%s\", port=%s, user=\"%s\", password=\"%s\", log-file=\"%s\"',\
|
||||
host, port, user, password, log_filename)
|
||||
do_check(host, port, user, password, upgrade_params)
|
||||
except mysql.connector.Error, e:
|
||||
logging.exception('mysql connctor error')
|
||||
raise e
|
||||
except Exception, e:
|
||||
logging.exception('normal error')
|
||||
raise e
|
||||
|
||||
Reference in New Issue
Block a user