[scn] fix failure of mittest after refresh feature scn
This commit is contained in:
@ -12,7 +12,7 @@ import time
|
||||
|
||||
class UpgradeParams:
|
||||
log_filename = 'upgrade_post_checker.log'
|
||||
new_version = '4.1.0.0'
|
||||
new_version = '4.0.0.0'
|
||||
#### --------------start : my_error.py --------------
|
||||
class MyError(Exception):
|
||||
def __init__(self, value):
|
||||
@ -288,7 +288,7 @@ def config_logging_module(log_filenamme):
|
||||
def check_cluster_version(query_cur):
|
||||
# 一方面配置项生效是个异步生效任务,另一方面是2.2.0之后新增租户级配置项刷新,和系统级配置项刷新复用同一个timer,这里暂且等一下。
|
||||
times = 30
|
||||
sql="select distinct value = '{0}' from oceanbase.GV$OB_PARAMETERS where name='min_observer_version'".format(upgrade_params.new_version)
|
||||
sql="select distinct value = '{0}' from oceanbase.__all_virtual_sys_parameter_stat where name='min_observer_version'".format(upgrade_params.new_version)
|
||||
while times > 0 :
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if len(results) == 1 and results[0][0] == 1:
|
||||
@ -301,6 +301,38 @@ def check_cluster_version(query_cur):
|
||||
else:
|
||||
logging.info("check_cluster_version success")
|
||||
|
||||
def check_storage_format_version(query_cur):
|
||||
# Specified expected version each time want to upgrade (see OB_STORAGE_FORMAT_VERSION_MAX)
|
||||
expect_version = 4;
|
||||
sql = "select value from oceanbase.__all_zone where zone = '' and name = 'storage_format_version'"
|
||||
times = 180
|
||||
while times > 0 :
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if len(results) == 1 and results[0][0] == expect_version:
|
||||
break
|
||||
time.sleep(10)
|
||||
times -= 1
|
||||
if times == 0:
|
||||
logging.warn("check storage format version timeout! Expected version {0}".format(expect_version))
|
||||
raise e
|
||||
else:
|
||||
logging.info("check expected storage format version '{0}' success".format(expect_version))
|
||||
|
||||
def upgrade_storage_format_version(conn, cur):
|
||||
try:
|
||||
# enable_ddl
|
||||
set_parameter(cur, 'enable_ddl', 'True')
|
||||
|
||||
# run job
|
||||
sql = "alter system run job 'UPGRADE_STORAGE_FORMAT_VERSION';"
|
||||
logging.info(sql)
|
||||
cur.execute(sql)
|
||||
|
||||
except Exception, e:
|
||||
logging.warn("upgrade storage format version failed")
|
||||
raise e
|
||||
logging.info("upgrade storage format version finish")
|
||||
|
||||
# 2 检查内部表自检是否成功
|
||||
def check_root_inspection(query_cur):
|
||||
sql = "select count(*) from oceanbase.__all_virtual_upgrade_inspection where info != 'succeed'"
|
||||
@ -316,6 +348,20 @@ def check_root_inspection(query_cur):
|
||||
raise e
|
||||
logging.info('check root inspection success')
|
||||
|
||||
# 3 check standby cluster
|
||||
def check_standby_cluster(conn, query_cur, my_user, my_passwd):
|
||||
try:
|
||||
is_primary = check_current_cluster_is_primary(query_cur)
|
||||
if not is_primary:
|
||||
logging.info("""current cluster is standby cluster, just skip""")
|
||||
else:
|
||||
tenant_id_list = fetch_tenant_ids(query_cur)
|
||||
standby_cluster_infos = fetch_standby_cluster_infos(conn, query_cur, my_user, my_passwd)
|
||||
check_ddl_and_dml_sync(conn, query_cur, standby_cluster_infos, tenant_id_list)
|
||||
except Exception, e:
|
||||
logging.exception('fail to fetch standby cluster info')
|
||||
raise e
|
||||
|
||||
# 4 开ddl
|
||||
def enable_ddl(cur):
|
||||
set_parameter(cur, 'enable_ddl', 'True')
|
||||
@ -343,6 +389,216 @@ def fetch_tenant_ids(query_cur):
|
||||
logging.exception('fail to fetch distinct tenant ids')
|
||||
raise e
|
||||
|
||||
def check_current_cluster_is_primary(query_cur):
|
||||
try:
|
||||
sql = """SELECT * FROM v$ob_cluster
|
||||
WHERE cluster_role = "PRIMARY"
|
||||
AND cluster_status = "VALID"
|
||||
AND (switchover_status = "NOT ALLOWED" OR switchover_status = "TO STANDBY") """
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
is_primary = len(results) > 0
|
||||
return is_primary
|
||||
except Exception, e:
|
||||
logging.exception("""fail to check current is primary""")
|
||||
raise e
|
||||
|
||||
def fetch_standby_cluster_infos(conn, query_cur, user, pwd):
|
||||
try:
|
||||
is_primary = check_current_cluster_is_primary(query_cur)
|
||||
if not is_primary:
|
||||
logging.exception("""should be primary cluster""")
|
||||
raise e
|
||||
|
||||
standby_cluster_infos = []
|
||||
sql = """SELECT cluster_id, rootservice_list from v$ob_standby_status"""
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
|
||||
for r in results:
|
||||
standby_cluster_info = {}
|
||||
if 2 != len(r):
|
||||
logging.exception("length not match")
|
||||
raise e
|
||||
standby_cluster_info['cluster_id'] = r[0]
|
||||
standby_cluster_info['user'] = user
|
||||
standby_cluster_info['pwd'] = pwd
|
||||
# construct ip/port
|
||||
address = r[1].split(";")[0] # choose first address in rs_list
|
||||
standby_cluster_info['ip'] = str(address.split(":")[0])
|
||||
standby_cluster_info['port'] = address.split(":")[2]
|
||||
# append
|
||||
standby_cluster_infos.append(standby_cluster_info)
|
||||
logging.info("""cluster_info : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
conn.commit()
|
||||
# check standby cluster
|
||||
for standby_cluster_info in standby_cluster_infos:
|
||||
# connect
|
||||
logging.info("""create connection : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
|
||||
tmp_conn = mysql.connector.connect(user = standby_cluster_info['user'],
|
||||
password = standby_cluster_info['pwd'],
|
||||
host = standby_cluster_info['ip'],
|
||||
port = standby_cluster_info['port'],
|
||||
database = 'oceanbase')
|
||||
|
||||
tmp_cur = tmp_conn.cursor(buffered=True)
|
||||
tmp_conn.autocommit = True
|
||||
tmp_query_cur = Cursor(tmp_cur)
|
||||
is_primary = check_current_cluster_is_primary(tmp_query_cur)
|
||||
if is_primary:
|
||||
logging.exception("""primary cluster changed : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
raise e
|
||||
# close
|
||||
tmp_cur.close()
|
||||
tmp_conn.close()
|
||||
|
||||
return standby_cluster_infos
|
||||
except Exception, e:
|
||||
logging.exception('fail to fetch standby cluster info')
|
||||
raise e
|
||||
|
||||
def check_ddl_and_dml_sync(conn, query_cur, standby_cluster_infos, tenant_ids):
|
||||
try:
|
||||
conn.commit()
|
||||
# check if need check ddl and dml sync
|
||||
is_primary = check_current_cluster_is_primary(query_cur)
|
||||
if not is_primary:
|
||||
logging.exception("""should be primary cluster""")
|
||||
raise e
|
||||
|
||||
# fetch sys stats
|
||||
sys_infos = []
|
||||
sql = """SELECT tenant_id,
|
||||
refreshed_schema_version,
|
||||
min_sys_table_scn,
|
||||
min_user_table_scn
|
||||
FROM oceanbase.v$ob_cluster_stats
|
||||
ORDER BY tenant_id desc"""
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if len(tenant_ids) != len(results):
|
||||
logging.exception("result not match")
|
||||
raise e
|
||||
else:
|
||||
for i in range(len(results)):
|
||||
if len(results[i]) != 4:
|
||||
logging.exception("length not match")
|
||||
raise e
|
||||
elif results[i][0] != tenant_ids[i]:
|
||||
logging.exception("tenant_id not match")
|
||||
raise e
|
||||
else:
|
||||
sys_info = {}
|
||||
sys_info['tenant_id'] = results[i][0]
|
||||
sys_info['refreshed_schema_version'] = results[i][1]
|
||||
sys_info['min_sys_table_scn'] = results[i][2]
|
||||
sys_info['min_user_table_scn'] = results[i][3]
|
||||
logging.info("sys info : {0}".format(sys_info))
|
||||
sys_infos.append(sys_info)
|
||||
conn.commit()
|
||||
|
||||
# check ddl and dml by cluster
|
||||
for standby_cluster_info in standby_cluster_infos:
|
||||
check_ddl_and_dml_sync_by_cluster(standby_cluster_info, sys_infos)
|
||||
|
||||
except Exception, e:
|
||||
logging.exception("fail to check ddl and dml sync")
|
||||
raise e
|
||||
|
||||
def check_ddl_and_dml_sync_by_cluster(standby_cluster_info, sys_infos):
|
||||
try:
|
||||
# connect
|
||||
logging.info("start to check ddl and dml sync by cluster: cluster_id = {0}"
|
||||
.format(standby_cluster_info['cluster_id']))
|
||||
logging.info("create connection : cluster_id = {0}, ip = {1}, port = {2}"
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
tmp_conn = mysql.connector.connect(user = standby_cluster_info['user'],
|
||||
password = standby_cluster_info['pwd'],
|
||||
host = standby_cluster_info['ip'],
|
||||
port = standby_cluster_info['port'],
|
||||
database = 'oceanbase',
|
||||
raise_on_warnings = True)
|
||||
tmp_cur = tmp_conn.cursor(buffered=True)
|
||||
tmp_conn.autocommit = True
|
||||
tmp_query_cur = Cursor(tmp_cur)
|
||||
is_primary = check_current_cluster_is_primary(tmp_query_cur)
|
||||
if is_primary:
|
||||
logging.exception("""primary cluster changed : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
raise e
|
||||
|
||||
for sys_info in sys_infos:
|
||||
check_ddl_and_dml_sync_by_tenant(tmp_query_cur, sys_info)
|
||||
|
||||
# close
|
||||
tmp_cur.close()
|
||||
tmp_conn.close()
|
||||
logging.info("""check_ddl_and_dml_sync_by_cluster success : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
|
||||
except Exception, e:
|
||||
logging.exception("""fail to check ddl and dml sync : cluster_id = {0}, ip = {1}, port = {2}"""
|
||||
.format(standby_cluster_info['cluster_id'],
|
||||
standby_cluster_info['ip'],
|
||||
standby_cluster_info['port']))
|
||||
raise e
|
||||
|
||||
def check_ddl_and_dml_sync_by_tenant(query_cur, sys_info):
|
||||
try:
|
||||
times = 1800 # 30min
|
||||
logging.info("start to check ddl and dml sync by tenant : {0}".format(sys_info))
|
||||
start_time = time.time()
|
||||
sql = ""
|
||||
if 1 == sys_info['tenant_id'] :
|
||||
# 备库系统租户DML不走物理同步,需要升级脚本负责写入,系统租户仅校验DDL同步
|
||||
sql = """SELECT count(*)
|
||||
FROM oceanbase.v$ob_cluster_stats
|
||||
WHERE tenant_id = {0}
|
||||
AND refreshed_schema_version >= {1}
|
||||
""".format(sys_info['tenant_id'],
|
||||
sys_info['refreshed_schema_version'])
|
||||
else:
|
||||
sql = """SELECT count(*)
|
||||
FROM oceanbase.v$ob_cluster_stats
|
||||
WHERE tenant_id = {0}
|
||||
AND refreshed_schema_version >= {1}
|
||||
AND min_sys_table_scn >= {2}
|
||||
AND min_user_table_scn >= {3}
|
||||
""".format(sys_info['tenant_id'],
|
||||
sys_info['refreshed_schema_version'],
|
||||
sys_info['min_sys_table_scn'],
|
||||
sys_info['min_user_table_scn'])
|
||||
while times > 0 :
|
||||
(desc, results) = query_cur.exec_query(sql)
|
||||
if len(results) == 1 and results[0][0] == 1:
|
||||
break;
|
||||
time.sleep(1)
|
||||
times -= 1
|
||||
if times == 0:
|
||||
logging.exception("check ddl and dml sync timeout! : {0}, cost = {1}"
|
||||
.format(sys_info, time.time() - start_time))
|
||||
raise e
|
||||
else:
|
||||
logging.info("check ddl and dml sync success! : {0}, cost = {1}"
|
||||
.format(sys_info, time.time() - start_time))
|
||||
|
||||
except Exception, e:
|
||||
logging.exception("fail to check ddl and dml sync : {0}".format(sys_info))
|
||||
raise e
|
||||
|
||||
# 开始升级后的检查
|
||||
def do_check(my_host, my_port, my_user, my_passwd, upgrade_params):
|
||||
try:
|
||||
@ -358,7 +614,10 @@ def do_check(my_host, my_port, my_user, my_passwd, upgrade_params):
|
||||
query_cur = Cursor(cur)
|
||||
try:
|
||||
check_cluster_version(query_cur)
|
||||
#upgrade_storage_format_version(conn, cur)
|
||||
#check_storage_format_version(query_cur)
|
||||
check_root_inspection(query_cur)
|
||||
check_standby_cluster(conn, query_cur, my_user, my_passwd)
|
||||
enable_ddl(cur)
|
||||
enable_rebalance(cur)
|
||||
enable_rereplication(cur)
|
||||
|
||||
Reference in New Issue
Block a user