[CP] [UPGRADE] Calculate default timeout according to tenant count
This commit is contained in:
parent
901bf26411
commit
9093d820a0
@ -127,16 +127,45 @@ def set_parameter(cur, parameter, value, timeout = 0):
|
|||||||
cur.execute(sql)
|
cur.execute(sql)
|
||||||
wait_parameter_sync(cur, False, parameter, value, timeout)
|
wait_parameter_sync(cur, False, parameter, value, timeout)
|
||||||
|
|
||||||
|
def set_session_timeout(cur, seconds):
|
||||||
|
sql = "set @@session.ob_query_timeout = {0}".format(seconds * 1000 * 1000)
|
||||||
|
logging.info(sql)
|
||||||
|
cur.execute(sql)
|
||||||
|
|
||||||
|
def set_default_timeout_by_tenant(cur, timeout, timeout_per_tenant, min_timeout):
|
||||||
|
if timeout > 0:
|
||||||
|
logging.info("use timeout from opt, timeout(s):{0}".format(timeout))
|
||||||
|
else:
|
||||||
|
query_cur = QueryCursor(cur)
|
||||||
|
tenant_id_list = fetch_tenant_ids(query_cur)
|
||||||
|
cal_timeout = len(tenant_id_list) * timeout_per_tenant
|
||||||
|
timeout = (cal_timeout if cal_timeout > min_timeout else min_timeout)
|
||||||
|
logging.info("use default timeout caculated by tenants, "
|
||||||
|
"timeout(s):{0}, tenant_count:{1}, "
|
||||||
|
"timeout_per_tenant(s):{2}, min_timeout(s):{3}"
|
||||||
|
.format(timeout, len(tenant_id_list), timeout_per_tenant, min_timeout))
|
||||||
|
|
||||||
|
return timeout
|
||||||
|
|
||||||
def set_tenant_parameter(cur, parameter, value, timeout = 0):
|
def set_tenant_parameter(cur, parameter, value, timeout = 0):
|
||||||
|
|
||||||
tenants_list = []
|
tenants_list = []
|
||||||
if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
||||||
tenants_list = ['all']
|
tenants_list = ['all']
|
||||||
else:
|
else:
|
||||||
tenants_list = ['sys', 'all_user', 'all_meta']
|
tenants_list = ['sys', 'all_user', 'all_meta']
|
||||||
|
|
||||||
|
query_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
|
||||||
|
set_session_timeout(cur, query_timeout)
|
||||||
|
|
||||||
for tenants in tenants_list:
|
for tenants in tenants_list:
|
||||||
sql = """alter system set {0} = '{1}' tenant = '{2}'""".format(parameter, value, tenants)
|
sql = """alter system set {0} = '{1}' tenant = '{2}'""".format(parameter, value, tenants)
|
||||||
logging.info(sql)
|
logging.info(sql)
|
||||||
cur.execute(sql)
|
cur.execute(sql)
|
||||||
|
|
||||||
|
set_session_timeout(cur, 10)
|
||||||
|
|
||||||
wait_parameter_sync(cur, True, parameter, value, timeout)
|
wait_parameter_sync(cur, True, parameter, value, timeout)
|
||||||
|
|
||||||
def get_ori_enable_ddl(cur, timeout):
|
def get_ori_enable_ddl(cur, timeout):
|
||||||
@ -225,7 +254,20 @@ def wait_parameter_sync(cur, is_tenant_config, key, value, timeout):
|
|||||||
table_name = "GV$OB_PARAMETERS" if not is_tenant_config else "__all_virtual_tenant_parameter_info"
|
table_name = "GV$OB_PARAMETERS" if not is_tenant_config else "__all_virtual_tenant_parameter_info"
|
||||||
sql = """select count(*) as cnt from oceanbase.{0}
|
sql = """select count(*) as cnt from oceanbase.{0}
|
||||||
where name = '{1}' and value != '{2}'""".format(table_name, key, value)
|
where name = '{1}' and value != '{2}'""".format(table_name, key, value)
|
||||||
times = (timeout if timeout > 0 else 60) / 5
|
|
||||||
|
wait_timeout = 0
|
||||||
|
query_timeout = 0
|
||||||
|
if not is_tenant_config or timeout > 0:
|
||||||
|
wait_timeout = (timeout if timeout > 0 else 60)
|
||||||
|
query_timeout = wait_timeout
|
||||||
|
else:
|
||||||
|
# is_tenant_config & timeout not set
|
||||||
|
wait_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
query_timeout = set_default_timeout_by_tenant(cur, timeout, 2, 60)
|
||||||
|
|
||||||
|
set_session_timeout(cur, query_timeout)
|
||||||
|
|
||||||
|
times = wait_timeout / 5
|
||||||
while times >= 0:
|
while times >= 0:
|
||||||
logging.info(sql)
|
logging.info(sql)
|
||||||
cur.execute(sql)
|
cur.execute(sql)
|
||||||
@ -245,6 +287,8 @@ def wait_parameter_sync(cur, is_tenant_config, key, value, timeout):
|
|||||||
raise e
|
raise e
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
|
set_session_timeout(cur, 10)
|
||||||
|
|
||||||
def do_begin_upgrade(cur, timeout):
|
def do_begin_upgrade(cur, timeout):
|
||||||
|
|
||||||
if not check_parameter(cur, False, "enable_upgrade_mode", "True"):
|
if not check_parameter(cur, False, "enable_upgrade_mode", "True"):
|
||||||
@ -316,24 +360,38 @@ def do_suspend_merge(cur, timeout):
|
|||||||
tenants_list = ['all']
|
tenants_list = ['all']
|
||||||
else:
|
else:
|
||||||
tenants_list = ['sys', 'all_user', 'all_meta']
|
tenants_list = ['sys', 'all_user', 'all_meta']
|
||||||
|
|
||||||
|
query_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
|
||||||
|
set_session_timeout(cur, query_timeout)
|
||||||
|
|
||||||
for tenants in tenants_list:
|
for tenants in tenants_list:
|
||||||
action_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
action_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
||||||
rollback_sql = "alter system resume merge tenant = {0}".format(tenants)
|
rollback_sql = "alter system resume merge tenant = {0}".format(tenants)
|
||||||
logging.info(action_sql)
|
logging.info(action_sql)
|
||||||
cur.execute(action_sql)
|
cur.execute(action_sql)
|
||||||
|
|
||||||
|
set_session_timeout(cur, 10)
|
||||||
|
|
||||||
def do_resume_merge(cur, timeout):
|
def do_resume_merge(cur, timeout):
|
||||||
tenants_list = []
|
tenants_list = []
|
||||||
if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
||||||
tenants_list = ['all']
|
tenants_list = ['all']
|
||||||
else:
|
else:
|
||||||
tenants_list = ['sys', 'all_user', 'all_meta']
|
tenants_list = ['sys', 'all_user', 'all_meta']
|
||||||
|
|
||||||
|
query_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
|
||||||
|
set_session_timeout(cur, query_timeout)
|
||||||
|
|
||||||
for tenants in tenants_list:
|
for tenants in tenants_list:
|
||||||
action_sql = "alter system resume merge tenant = {0}".format(tenants)
|
action_sql = "alter system resume merge tenant = {0}".format(tenants)
|
||||||
rollback_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
rollback_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
||||||
logging.info(action_sql)
|
logging.info(action_sql)
|
||||||
cur.execute(action_sql)
|
cur.execute(action_sql)
|
||||||
|
|
||||||
|
set_session_timeout(cur, 10)
|
||||||
|
|
||||||
class Cursor:
|
class Cursor:
|
||||||
__cursor = None
|
__cursor = None
|
||||||
def __init__(self, cursor):
|
def __init__(self, cursor):
|
||||||
|
@ -21,17 +21,7 @@ def do_upgrade(conn, cur, timeout, user, pwd):
|
|||||||
else:
|
else:
|
||||||
run_upgrade_job(conn, cur, "UPGRADE_VIRTUAL_SCHEMA", timeout)
|
run_upgrade_job(conn, cur, "UPGRADE_VIRTUAL_SCHEMA", timeout)
|
||||||
|
|
||||||
# just to make __all_virtual_upgrade_inspection avaliable
|
run_root_inspection(cur, timeout)
|
||||||
timeout_ts = (timeout if timeout > 0 else 600) * 1000 * 1000
|
|
||||||
sql = "set @@session.ob_query_timeout = {0}".format(timeout_ts)
|
|
||||||
logging.info(sql)
|
|
||||||
cur.execute(sql)
|
|
||||||
sql = "alter system run job 'root_inspection'"
|
|
||||||
logging.info(sql)
|
|
||||||
cur.execute(sql)
|
|
||||||
sql = "set @@session.ob_query_timeout = 10000000"
|
|
||||||
logging.info(sql)
|
|
||||||
cur.execute(sql)
|
|
||||||
####========******####======== actions begin ========####******========####
|
####========******####======== actions begin ========####******========####
|
||||||
upgrade_syslog_level(conn, cur)
|
upgrade_syslog_level(conn, cur)
|
||||||
return
|
return
|
||||||
@ -47,7 +37,6 @@ def upgrade_syslog_level(conn, cur):
|
|||||||
info_cnt = result[0][0]
|
info_cnt = result[0][0]
|
||||||
if info_cnt > 0:
|
if info_cnt > 0:
|
||||||
actions.set_parameter(cur, "syslog_level", "WDIAG")
|
actions.set_parameter(cur, "syslog_level", "WDIAG")
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logging.warn("upgrade syslog level failed!")
|
logging.warn("upgrade syslog level failed!")
|
||||||
raise e
|
raise e
|
||||||
@ -62,6 +51,18 @@ def query(cur, sql):
|
|||||||
def get_tenant_ids(cur):
|
def get_tenant_ids(cur):
|
||||||
return [_[0] for _ in query(cur, 'select tenant_id from oceanbase.__all_tenant')]
|
return [_[0] for _ in query(cur, 'select tenant_id from oceanbase.__all_tenant')]
|
||||||
|
|
||||||
|
def run_root_inspection(cur, timeout):
|
||||||
|
|
||||||
|
query_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 10, 600)
|
||||||
|
|
||||||
|
actions.set_session_timeout(cur, query_timeout)
|
||||||
|
|
||||||
|
sql = "alter system run job 'root_inspection'"
|
||||||
|
logging.info(sql)
|
||||||
|
cur.execute(sql)
|
||||||
|
|
||||||
|
actions.set_session_timeout(cur, 10)
|
||||||
|
|
||||||
def upgrade_across_version(cur):
|
def upgrade_across_version(cur):
|
||||||
current_data_version = actions.get_current_data_version()
|
current_data_version = actions.get_current_data_version()
|
||||||
int_current_data_version = actions.get_version(current_data_version)
|
int_current_data_version = actions.get_version(current_data_version)
|
||||||
@ -167,7 +168,9 @@ def check_can_run_upgrade_job(cur, job_name):
|
|||||||
|
|
||||||
def check_upgrade_job_result(cur, job_name, timeout, max_used_job_id):
|
def check_upgrade_job_result(cur, job_name, timeout, max_used_job_id):
|
||||||
try:
|
try:
|
||||||
times = (timeout if timeout > 0 else 3600) / 10
|
wait_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 100, 3600)
|
||||||
|
|
||||||
|
times = wait_timeout / 10
|
||||||
while (times >= 0):
|
while (times >= 0):
|
||||||
sql = """select job_status, rs_svr_ip, rs_svr_port, gmt_create from oceanbase.__all_rootservice_job
|
sql = """select job_status, rs_svr_ip, rs_svr_port, gmt_create from oceanbase.__all_rootservice_job
|
||||||
where job_type = '{0}' and job_id > {1} order by job_id desc limit 1
|
where job_type = '{0}' and job_id > {1} order by job_id desc limit 1
|
||||||
|
@ -73,7 +73,7 @@ sys.argv[0] + """ [OPTIONS]""" +\
|
|||||||
' that all modules should be run. They are splitted by ",".\n' +\
|
' that all modules should be run. They are splitted by ",".\n' +\
|
||||||
' For example: -m all, or --module=ddl,normal_dml,special_action\n' +\
|
' For example: -m all, or --module=ddl,normal_dml,special_action\n' +\
|
||||||
'-l, --log-file=name Log file path. If log file path is not given it\'s ' + os.path.splitext(sys.argv[0])[0] + '.log\n' +\
|
'-l, --log-file=name Log file path. If log file path is not given it\'s ' + os.path.splitext(sys.argv[0])[0] + '.log\n' +\
|
||||||
'-t, --timeout=name check timeout, default: 600(s).\n' + \
|
'-t, --timeout=name check timeout.\n' + \
|
||||||
'-z, --zone=name If zone is not specified, check all servers status in cluster. \n' +\
|
'-z, --zone=name If zone is not specified, check all servers status in cluster. \n' +\
|
||||||
' Otherwise, only check servers status in specified zone. \n' + \
|
' Otherwise, only check servers status in specified zone. \n' + \
|
||||||
'\n\n' +\
|
'\n\n' +\
|
||||||
@ -135,8 +135,7 @@ Option('p', 'password', True, False, ''),\
|
|||||||
Option('m', 'module', True, False, 'all'),\
|
Option('m', 'module', True, False, 'all'),\
|
||||||
# 日志文件路径,不同脚本的main函数中中会改成不同的默认值
|
# 日志文件路径,不同脚本的main函数中中会改成不同的默认值
|
||||||
Option('l', 'log-file', True, False),\
|
Option('l', 'log-file', True, False),\
|
||||||
# 一些检查的超时时间,默认是600s
|
Option('t', 'timeout', True, False, 0),\
|
||||||
Option('t', 'timeout', True, False, '600'),\
|
|
||||||
Option('z', 'zone', True, False, ''),\
|
Option('z', 'zone', True, False, ''),\
|
||||||
]\
|
]\
|
||||||
|
|
||||||
@ -288,13 +287,38 @@ def check_zone_valid(query_cur, zone):
|
|||||||
else:
|
else:
|
||||||
logging.info("zone is empty, check all servers in cluster")
|
logging.info("zone is empty, check all servers in cluster")
|
||||||
|
|
||||||
|
def fetch_tenant_ids(query_cur):
|
||||||
|
try:
|
||||||
|
tenant_id_list = []
|
||||||
|
(desc, results) = query_cur.exec_query("""select distinct tenant_id from oceanbase.__all_tenant order by tenant_id desc""")
|
||||||
|
for r in results:
|
||||||
|
tenant_id_list.append(r[0])
|
||||||
|
return tenant_id_list
|
||||||
|
except Exception, e:
|
||||||
|
logging.exception('fail to fetch distinct tenant ids')
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def set_default_timeout_by_tenant(query_cur, timeout, timeout_per_tenant, min_timeout):
|
||||||
|
if timeout > 0:
|
||||||
|
logging.info("use timeout from opt, timeout(s):{0}".format(timeout))
|
||||||
|
else:
|
||||||
|
tenant_id_list = fetch_tenant_ids(query_cur)
|
||||||
|
cal_timeout = len(tenant_id_list) * timeout_per_tenant
|
||||||
|
timeout = (cal_timeout if cal_timeout > min_timeout else min_timeout)
|
||||||
|
logging.info("use default timeout caculated by tenants, "
|
||||||
|
"timeout(s):{0}, tenant_count:{1}, "
|
||||||
|
"timeout_per_tenant(s):{2}, min_timeout(s):{3}"
|
||||||
|
.format(timeout, len(tenant_id_list), timeout_per_tenant, min_timeout))
|
||||||
|
|
||||||
|
return timeout
|
||||||
|
|
||||||
#### START ####
|
#### START ####
|
||||||
# 0. 检查server版本是否严格一致
|
# 0. 检查server版本是否严格一致
|
||||||
def check_server_version_by_zone(query_cur, zone):
|
def check_server_version_by_zone(query_cur, zone):
|
||||||
if zone == '':
|
if zone == '':
|
||||||
logging.info("skip check server version by cluster")
|
logging.info("skip check server version by cluster")
|
||||||
else:
|
else:
|
||||||
sql = """select distinct(substring_index(build_version, '_', 1)) from __all_server where zone = '{0}'""".format(zone);
|
sql = """select distinct(substring_index(build_version, '_', 1)) from oceanbase.__all_server where zone = '{0}'""".format(zone);
|
||||||
(desc, results) = query_cur.exec_query(sql);
|
(desc, results) = query_cur.exec_query(sql);
|
||||||
if len(results) != 1:
|
if len(results) != 1:
|
||||||
raise MyError("servers build_version not match")
|
raise MyError("servers build_version not match")
|
||||||
@ -304,8 +328,9 @@ def check_server_version_by_zone(query_cur, zone):
|
|||||||
# 1. 检查paxos副本是否同步, paxos副本是否缺失
|
# 1. 检查paxos副本是否同步, paxos副本是否缺失
|
||||||
def check_paxos_replica(query_cur, timeout):
|
def check_paxos_replica(query_cur, timeout):
|
||||||
# 1.1 检查paxos副本是否同步
|
# 1.1 检查paxos副本是否同步
|
||||||
sql = """select count(*) from GV$OB_LOG_STAT where in_sync = 'NO'"""
|
sql = """select count(*) from oceanbase.GV$OB_LOG_STAT where in_sync = 'NO'"""
|
||||||
check_until_timeout(query_cur, sql, 0, timeout)
|
wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 10, 600)
|
||||||
|
check_until_timeout(query_cur, sql, 0, wait_timeout)
|
||||||
|
|
||||||
# 1.2 检查paxos副本是否有缺失 TODO
|
# 1.2 检查paxos副本是否有缺失 TODO
|
||||||
logging.info('check paxos replica success')
|
logging.info('check paxos replica success')
|
||||||
@ -315,26 +340,29 @@ def check_observer_status(query_cur, zone, timeout):
|
|||||||
sql = """select count(*) from oceanbase.__all_server where (start_service_time <= 0 or status='inactive')"""
|
sql = """select count(*) from oceanbase.__all_server where (start_service_time <= 0 or status='inactive')"""
|
||||||
if zone != '':
|
if zone != '':
|
||||||
sql += """ and zone = '{0}'""".format(zone)
|
sql += """ and zone = '{0}'""".format(zone)
|
||||||
check_until_timeout(query_cur, sql, 0, timeout)
|
wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 10, 600)
|
||||||
|
check_until_timeout(query_cur, sql, 0, wait_timeout)
|
||||||
|
|
||||||
# 3. 检查schema是否刷新成功
|
# 3. 检查schema是否刷新成功
|
||||||
def check_schema_status(query_cur, timeout):
|
def check_schema_status(query_cur, timeout):
|
||||||
sql = """select if (a.cnt = b.cnt, 1, 0) as passed from (select count(*) as cnt from oceanbase.__all_virtual_server_schema_info where refreshed_schema_version > 1 and refreshed_schema_version % 8 = 0) as a join (select count(*) as cnt from oceanbase.__all_server join oceanbase.__all_tenant) as b"""
|
sql = """select if (a.cnt = b.cnt, 1, 0) as passed from (select count(*) as cnt from oceanbase.__all_virtual_server_schema_info where refreshed_schema_version > 1 and refreshed_schema_version % 8 = 0) as a join (select count(*) as cnt from oceanbase.__all_server join oceanbase.__all_tenant) as b"""
|
||||||
check_until_timeout(query_cur, sql, 1, timeout)
|
wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 30, 600)
|
||||||
|
check_until_timeout(query_cur, sql, 1, wait_timeout)
|
||||||
|
|
||||||
# 4. check major finish
|
# 4. check major finish
|
||||||
def check_major_merge(query_cur, timeout):
|
def check_major_merge(query_cur, timeout):
|
||||||
need_check = 0
|
need_check = 0
|
||||||
(desc, results) = query_cur.exec_query("""select distinct value from GV$OB_PARAMETERs where name = 'enable_major_freeze';""")
|
(desc, results) = query_cur.exec_query("""select distinct value from oceanbase.GV$OB_PARAMETERS where name = 'enable_major_freeze';""")
|
||||||
if len(results) != 1:
|
if len(results) != 1:
|
||||||
need_check = 1
|
need_check = 1
|
||||||
elif results[0][0] != 'True':
|
elif results[0][0] != 'True':
|
||||||
need_check = 1
|
need_check = 1
|
||||||
if need_check == 1:
|
if need_check == 1:
|
||||||
sql = """select count(1) from CDB_OB_MAJOR_COMPACTION where (GLOBAL_BROADCAST_SCN > LAST_SCN or STATUS != 'IDLE')"""
|
wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 30, 600)
|
||||||
check_until_timeout(query_cur, sql, 0, timeout)
|
sql = """select count(1) from oceanbase.CDB_OB_MAJOR_COMPACTION where (GLOBAL_BROADCAST_SCN > LAST_SCN or STATUS != 'IDLE')"""
|
||||||
sql2 = """select /*+ query_timeout(1000000000) */ count(1) from __all_virtual_tablet_compaction_info where max_received_scn > finished_scn and max_received_scn > 0"""
|
check_until_timeout(query_cur, sql, 0, wait_timeout)
|
||||||
check_until_timeout(query_cur, sql2, 0, timeout)
|
sql2 = """select /*+ query_timeout(1000000000) */ count(1) from oceanbase.__all_virtual_tablet_compaction_info where max_received_scn > finished_scn and max_received_scn > 0"""
|
||||||
|
check_until_timeout(query_cur, sql2, 0, wait_timeout)
|
||||||
|
|
||||||
def check_until_timeout(query_cur, sql, value, timeout):
|
def check_until_timeout(query_cur, sql, value, timeout):
|
||||||
times = timeout / 10
|
times = timeout / 10
|
||||||
@ -366,7 +394,6 @@ def do_check(my_host, my_port, my_user, my_passwd, upgrade_params, timeout, need
|
|||||||
raise_on_warnings = True)
|
raise_on_warnings = True)
|
||||||
conn.autocommit = True
|
conn.autocommit = True
|
||||||
cur = conn.cursor(buffered=True)
|
cur = conn.cursor(buffered=True)
|
||||||
timeout = timeout if timeout > 0 else 600
|
|
||||||
try:
|
try:
|
||||||
query_cur = QueryCursor(cur)
|
query_cur = QueryCursor(cur)
|
||||||
check_zone_valid(query_cur, zone)
|
check_zone_valid(query_cur, zone)
|
||||||
|
@ -135,16 +135,45 @@
|
|||||||
# cur.execute(sql)
|
# cur.execute(sql)
|
||||||
# wait_parameter_sync(cur, False, parameter, value, timeout)
|
# wait_parameter_sync(cur, False, parameter, value, timeout)
|
||||||
#
|
#
|
||||||
|
#def set_session_timeout(cur, seconds):
|
||||||
|
# sql = "set @@session.ob_query_timeout = {0}".format(seconds * 1000 * 1000)
|
||||||
|
# logging.info(sql)
|
||||||
|
# cur.execute(sql)
|
||||||
|
#
|
||||||
|
#def set_default_timeout_by_tenant(cur, timeout, timeout_per_tenant, min_timeout):
|
||||||
|
# if timeout > 0:
|
||||||
|
# logging.info("use timeout from opt, timeout(s):{0}".format(timeout))
|
||||||
|
# else:
|
||||||
|
# query_cur = QueryCursor(cur)
|
||||||
|
# tenant_id_list = fetch_tenant_ids(query_cur)
|
||||||
|
# cal_timeout = len(tenant_id_list) * timeout_per_tenant
|
||||||
|
# timeout = (cal_timeout if cal_timeout > min_timeout else min_timeout)
|
||||||
|
# logging.info("use default timeout caculated by tenants, "
|
||||||
|
# "timeout(s):{0}, tenant_count:{1}, "
|
||||||
|
# "timeout_per_tenant(s):{2}, min_timeout(s):{3}"
|
||||||
|
# .format(timeout, len(tenant_id_list), timeout_per_tenant, min_timeout))
|
||||||
|
#
|
||||||
|
# return timeout
|
||||||
|
#
|
||||||
#def set_tenant_parameter(cur, parameter, value, timeout = 0):
|
#def set_tenant_parameter(cur, parameter, value, timeout = 0):
|
||||||
|
#
|
||||||
# tenants_list = []
|
# tenants_list = []
|
||||||
# if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
# if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
||||||
# tenants_list = ['all']
|
# tenants_list = ['all']
|
||||||
# else:
|
# else:
|
||||||
# tenants_list = ['sys', 'all_user', 'all_meta']
|
# tenants_list = ['sys', 'all_user', 'all_meta']
|
||||||
|
#
|
||||||
|
# query_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
# for tenants in tenants_list:
|
# for tenants in tenants_list:
|
||||||
# sql = """alter system set {0} = '{1}' tenant = '{2}'""".format(parameter, value, tenants)
|
# sql = """alter system set {0} = '{1}' tenant = '{2}'""".format(parameter, value, tenants)
|
||||||
# logging.info(sql)
|
# logging.info(sql)
|
||||||
# cur.execute(sql)
|
# cur.execute(sql)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
# wait_parameter_sync(cur, True, parameter, value, timeout)
|
# wait_parameter_sync(cur, True, parameter, value, timeout)
|
||||||
#
|
#
|
||||||
#def get_ori_enable_ddl(cur, timeout):
|
#def get_ori_enable_ddl(cur, timeout):
|
||||||
@ -233,7 +262,20 @@
|
|||||||
# table_name = "GV$OB_PARAMETERS" if not is_tenant_config else "__all_virtual_tenant_parameter_info"
|
# table_name = "GV$OB_PARAMETERS" if not is_tenant_config else "__all_virtual_tenant_parameter_info"
|
||||||
# sql = """select count(*) as cnt from oceanbase.{0}
|
# sql = """select count(*) as cnt from oceanbase.{0}
|
||||||
# where name = '{1}' and value != '{2}'""".format(table_name, key, value)
|
# where name = '{1}' and value != '{2}'""".format(table_name, key, value)
|
||||||
# times = (timeout if timeout > 0 else 60) / 5
|
#
|
||||||
|
# wait_timeout = 0
|
||||||
|
# query_timeout = 0
|
||||||
|
# if not is_tenant_config or timeout > 0:
|
||||||
|
# wait_timeout = (timeout if timeout > 0 else 60)
|
||||||
|
# query_timeout = wait_timeout
|
||||||
|
# else:
|
||||||
|
# # is_tenant_config & timeout not set
|
||||||
|
# wait_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
# query_timeout = set_default_timeout_by_tenant(cur, timeout, 2, 60)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
|
# times = wait_timeout / 5
|
||||||
# while times >= 0:
|
# while times >= 0:
|
||||||
# logging.info(sql)
|
# logging.info(sql)
|
||||||
# cur.execute(sql)
|
# cur.execute(sql)
|
||||||
@ -253,6 +295,8 @@
|
|||||||
# raise e
|
# raise e
|
||||||
# time.sleep(5)
|
# time.sleep(5)
|
||||||
#
|
#
|
||||||
|
# set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
#def do_begin_upgrade(cur, timeout):
|
#def do_begin_upgrade(cur, timeout):
|
||||||
#
|
#
|
||||||
# if not check_parameter(cur, False, "enable_upgrade_mode", "True"):
|
# if not check_parameter(cur, False, "enable_upgrade_mode", "True"):
|
||||||
@ -324,24 +368,38 @@
|
|||||||
# tenants_list = ['all']
|
# tenants_list = ['all']
|
||||||
# else:
|
# else:
|
||||||
# tenants_list = ['sys', 'all_user', 'all_meta']
|
# tenants_list = ['sys', 'all_user', 'all_meta']
|
||||||
|
#
|
||||||
|
# query_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
# for tenants in tenants_list:
|
# for tenants in tenants_list:
|
||||||
# action_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
# action_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
||||||
# rollback_sql = "alter system resume merge tenant = {0}".format(tenants)
|
# rollback_sql = "alter system resume merge tenant = {0}".format(tenants)
|
||||||
# logging.info(action_sql)
|
# logging.info(action_sql)
|
||||||
# cur.execute(action_sql)
|
# cur.execute(action_sql)
|
||||||
#
|
#
|
||||||
|
# set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
#def do_resume_merge(cur, timeout):
|
#def do_resume_merge(cur, timeout):
|
||||||
# tenants_list = []
|
# tenants_list = []
|
||||||
# if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
# if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
||||||
# tenants_list = ['all']
|
# tenants_list = ['all']
|
||||||
# else:
|
# else:
|
||||||
# tenants_list = ['sys', 'all_user', 'all_meta']
|
# tenants_list = ['sys', 'all_user', 'all_meta']
|
||||||
|
#
|
||||||
|
# query_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
# for tenants in tenants_list:
|
# for tenants in tenants_list:
|
||||||
# action_sql = "alter system resume merge tenant = {0}".format(tenants)
|
# action_sql = "alter system resume merge tenant = {0}".format(tenants)
|
||||||
# rollback_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
# rollback_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
||||||
# logging.info(action_sql)
|
# logging.info(action_sql)
|
||||||
# cur.execute(action_sql)
|
# cur.execute(action_sql)
|
||||||
#
|
#
|
||||||
|
# set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
#class Cursor:
|
#class Cursor:
|
||||||
# __cursor = None
|
# __cursor = None
|
||||||
# def __init__(self, cursor):
|
# def __init__(self, cursor):
|
||||||
@ -1334,17 +1392,7 @@
|
|||||||
# else:
|
# else:
|
||||||
# run_upgrade_job(conn, cur, "UPGRADE_VIRTUAL_SCHEMA", timeout)
|
# run_upgrade_job(conn, cur, "UPGRADE_VIRTUAL_SCHEMA", timeout)
|
||||||
#
|
#
|
||||||
# # just to make __all_virtual_upgrade_inspection avaliable
|
# run_root_inspection(cur, timeout)
|
||||||
# timeout_ts = (timeout if timeout > 0 else 600) * 1000 * 1000
|
|
||||||
# sql = "set @@session.ob_query_timeout = {0}".format(timeout_ts)
|
|
||||||
# logging.info(sql)
|
|
||||||
# cur.execute(sql)
|
|
||||||
# sql = "alter system run job 'root_inspection'"
|
|
||||||
# logging.info(sql)
|
|
||||||
# cur.execute(sql)
|
|
||||||
# sql = "set @@session.ob_query_timeout = 10000000"
|
|
||||||
# logging.info(sql)
|
|
||||||
# cur.execute(sql)
|
|
||||||
#####========******####======== actions begin ========####******========####
|
#####========******####======== actions begin ========####******========####
|
||||||
# upgrade_syslog_level(conn, cur)
|
# upgrade_syslog_level(conn, cur)
|
||||||
# return
|
# return
|
||||||
@ -1360,7 +1408,6 @@
|
|||||||
# info_cnt = result[0][0]
|
# info_cnt = result[0][0]
|
||||||
# if info_cnt > 0:
|
# if info_cnt > 0:
|
||||||
# actions.set_parameter(cur, "syslog_level", "WDIAG")
|
# actions.set_parameter(cur, "syslog_level", "WDIAG")
|
||||||
#
|
|
||||||
# except Exception, e:
|
# except Exception, e:
|
||||||
# logging.warn("upgrade syslog level failed!")
|
# logging.warn("upgrade syslog level failed!")
|
||||||
# raise e
|
# raise e
|
||||||
@ -1375,6 +1422,18 @@
|
|||||||
#def get_tenant_ids(cur):
|
#def get_tenant_ids(cur):
|
||||||
# return [_[0] for _ in query(cur, 'select tenant_id from oceanbase.__all_tenant')]
|
# return [_[0] for _ in query(cur, 'select tenant_id from oceanbase.__all_tenant')]
|
||||||
#
|
#
|
||||||
|
#def run_root_inspection(cur, timeout):
|
||||||
|
#
|
||||||
|
# query_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 10, 600)
|
||||||
|
#
|
||||||
|
# actions.set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
|
# sql = "alter system run job 'root_inspection'"
|
||||||
|
# logging.info(sql)
|
||||||
|
# cur.execute(sql)
|
||||||
|
#
|
||||||
|
# actions.set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
#def upgrade_across_version(cur):
|
#def upgrade_across_version(cur):
|
||||||
# current_data_version = actions.get_current_data_version()
|
# current_data_version = actions.get_current_data_version()
|
||||||
# int_current_data_version = actions.get_version(current_data_version)
|
# int_current_data_version = actions.get_version(current_data_version)
|
||||||
@ -1480,7 +1539,9 @@
|
|||||||
#
|
#
|
||||||
#def check_upgrade_job_result(cur, job_name, timeout, max_used_job_id):
|
#def check_upgrade_job_result(cur, job_name, timeout, max_used_job_id):
|
||||||
# try:
|
# try:
|
||||||
# times = (timeout if timeout > 0 else 3600) / 10
|
# wait_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 100, 3600)
|
||||||
|
#
|
||||||
|
# times = wait_timeout / 10
|
||||||
# while (times >= 0):
|
# while (times >= 0):
|
||||||
# sql = """select job_status, rs_svr_ip, rs_svr_port, gmt_create from oceanbase.__all_rootservice_job
|
# sql = """select job_status, rs_svr_ip, rs_svr_port, gmt_create from oceanbase.__all_rootservice_job
|
||||||
# where job_type = '{0}' and job_id > {1} order by job_id desc limit 1
|
# where job_type = '{0}' and job_id > {1} order by job_id desc limit 1
|
||||||
@ -2217,7 +2278,8 @@
|
|||||||
# fail_list.append("""still has tenant snapshot task, upgrade is not allowed temporarily""")
|
# fail_list.append("""still has tenant snapshot task, upgrade is not allowed temporarily""")
|
||||||
# else:
|
# else:
|
||||||
# logging.info('check tenant snapshot task success')
|
# logging.info('check tenant snapshot task success')
|
||||||
## 19. 检查是否有租户在升到4.3.0版本之前已将binlog_row_image设为MINIMAL
|
#
|
||||||
|
## 17. 检查是否有租户在升到4.3.0版本之前已将binlog_row_image设为MINIMAL
|
||||||
#def check_variable_binlog_row_image(query_cur):
|
#def check_variable_binlog_row_image(query_cur):
|
||||||
## 4.3.0.0之前的版本,MINIMAL模式生成的日志CDC无法正常消费(DELETE日志).
|
## 4.3.0.0之前的版本,MINIMAL模式生成的日志CDC无法正常消费(DELETE日志).
|
||||||
## 4.3.0版本开始,MINIMAL模式做了改进,支持CDC消费,需要在升级到4.3.0.0之后再打开.
|
## 4.3.0版本开始,MINIMAL模式做了改进,支持CDC消费,需要在升级到4.3.0.0之后再打开.
|
||||||
@ -2405,7 +2467,7 @@
|
|||||||
#' that all modules should be run. They are splitted by ",".\n' +\
|
#' that all modules should be run. They are splitted by ",".\n' +\
|
||||||
#' For example: -m all, or --module=ddl,normal_dml,special_action\n' +\
|
#' For example: -m all, or --module=ddl,normal_dml,special_action\n' +\
|
||||||
#'-l, --log-file=name Log file path. If log file path is not given it\'s ' + os.path.splitext(sys.argv[0])[0] + '.log\n' +\
|
#'-l, --log-file=name Log file path. If log file path is not given it\'s ' + os.path.splitext(sys.argv[0])[0] + '.log\n' +\
|
||||||
#'-t, --timeout=name check timeout, default: 600(s).\n' + \
|
#'-t, --timeout=name check timeout.\n' + \
|
||||||
#'-z, --zone=name If zone is not specified, check all servers status in cluster. \n' +\
|
#'-z, --zone=name If zone is not specified, check all servers status in cluster. \n' +\
|
||||||
#' Otherwise, only check servers status in specified zone. \n' + \
|
#' Otherwise, only check servers status in specified zone. \n' + \
|
||||||
#'\n\n' +\
|
#'\n\n' +\
|
||||||
@ -2467,8 +2529,7 @@
|
|||||||
#Option('m', 'module', True, False, 'all'),\
|
#Option('m', 'module', True, False, 'all'),\
|
||||||
## 日志文件路径,不同脚本的main函数中中会改成不同的默认值
|
## 日志文件路径,不同脚本的main函数中中会改成不同的默认值
|
||||||
#Option('l', 'log-file', True, False),\
|
#Option('l', 'log-file', True, False),\
|
||||||
## 一些检查的超时时间,默认是600s
|
#Option('t', 'timeout', True, False, 0),\
|
||||||
#Option('t', 'timeout', True, False, '600'),\
|
|
||||||
#Option('z', 'zone', True, False, ''),\
|
#Option('z', 'zone', True, False, ''),\
|
||||||
#]\
|
#]\
|
||||||
#
|
#
|
||||||
@ -2620,13 +2681,38 @@
|
|||||||
# else:
|
# else:
|
||||||
# logging.info("zone is empty, check all servers in cluster")
|
# logging.info("zone is empty, check all servers in cluster")
|
||||||
#
|
#
|
||||||
|
#def fetch_tenant_ids(query_cur):
|
||||||
|
# try:
|
||||||
|
# tenant_id_list = []
|
||||||
|
# (desc, results) = query_cur.exec_query("""select distinct tenant_id from oceanbase.__all_tenant order by tenant_id desc""")
|
||||||
|
# for r in results:
|
||||||
|
# tenant_id_list.append(r[0])
|
||||||
|
# return tenant_id_list
|
||||||
|
# except Exception, e:
|
||||||
|
# logging.exception('fail to fetch distinct tenant ids')
|
||||||
|
# raise e
|
||||||
|
#
|
||||||
|
#def set_default_timeout_by_tenant(query_cur, timeout, timeout_per_tenant, min_timeout):
|
||||||
|
# if timeout > 0:
|
||||||
|
# logging.info("use timeout from opt, timeout(s):{0}".format(timeout))
|
||||||
|
# else:
|
||||||
|
# tenant_id_list = fetch_tenant_ids(query_cur)
|
||||||
|
# cal_timeout = len(tenant_id_list) * timeout_per_tenant
|
||||||
|
# timeout = (cal_timeout if cal_timeout > min_timeout else min_timeout)
|
||||||
|
# logging.info("use default timeout caculated by tenants, "
|
||||||
|
# "timeout(s):{0}, tenant_count:{1}, "
|
||||||
|
# "timeout_per_tenant(s):{2}, min_timeout(s):{3}"
|
||||||
|
# .format(timeout, len(tenant_id_list), timeout_per_tenant, min_timeout))
|
||||||
|
#
|
||||||
|
# return timeout
|
||||||
|
#
|
||||||
##### START ####
|
##### START ####
|
||||||
## 0. 检查server版本是否严格一致
|
## 0. 检查server版本是否严格一致
|
||||||
#def check_server_version_by_zone(query_cur, zone):
|
#def check_server_version_by_zone(query_cur, zone):
|
||||||
# if zone == '':
|
# if zone == '':
|
||||||
# logging.info("skip check server version by cluster")
|
# logging.info("skip check server version by cluster")
|
||||||
# else:
|
# else:
|
||||||
# sql = """select distinct(substring_index(build_version, '_', 1)) from __all_server where zone = '{0}'""".format(zone);
|
# sql = """select distinct(substring_index(build_version, '_', 1)) from oceanbase.__all_server where zone = '{0}'""".format(zone);
|
||||||
# (desc, results) = query_cur.exec_query(sql);
|
# (desc, results) = query_cur.exec_query(sql);
|
||||||
# if len(results) != 1:
|
# if len(results) != 1:
|
||||||
# raise MyError("servers build_version not match")
|
# raise MyError("servers build_version not match")
|
||||||
@ -2636,8 +2722,9 @@
|
|||||||
## 1. 检查paxos副本是否同步, paxos副本是否缺失
|
## 1. 检查paxos副本是否同步, paxos副本是否缺失
|
||||||
#def check_paxos_replica(query_cur, timeout):
|
#def check_paxos_replica(query_cur, timeout):
|
||||||
# # 1.1 检查paxos副本是否同步
|
# # 1.1 检查paxos副本是否同步
|
||||||
# sql = """select count(*) from GV$OB_LOG_STAT where in_sync = 'NO'"""
|
# sql = """select count(*) from oceanbase.GV$OB_LOG_STAT where in_sync = 'NO'"""
|
||||||
# check_until_timeout(query_cur, sql, 0, timeout)
|
# wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 10, 600)
|
||||||
|
# check_until_timeout(query_cur, sql, 0, wait_timeout)
|
||||||
#
|
#
|
||||||
# # 1.2 检查paxos副本是否有缺失 TODO
|
# # 1.2 检查paxos副本是否有缺失 TODO
|
||||||
# logging.info('check paxos replica success')
|
# logging.info('check paxos replica success')
|
||||||
@ -2647,26 +2734,29 @@
|
|||||||
# sql = """select count(*) from oceanbase.__all_server where (start_service_time <= 0 or status='inactive')"""
|
# sql = """select count(*) from oceanbase.__all_server where (start_service_time <= 0 or status='inactive')"""
|
||||||
# if zone != '':
|
# if zone != '':
|
||||||
# sql += """ and zone = '{0}'""".format(zone)
|
# sql += """ and zone = '{0}'""".format(zone)
|
||||||
# check_until_timeout(query_cur, sql, 0, timeout)
|
# wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 10, 600)
|
||||||
|
# check_until_timeout(query_cur, sql, 0, wait_timeout)
|
||||||
#
|
#
|
||||||
## 3. 检查schema是否刷新成功
|
## 3. 检查schema是否刷新成功
|
||||||
#def check_schema_status(query_cur, timeout):
|
#def check_schema_status(query_cur, timeout):
|
||||||
# sql = """select if (a.cnt = b.cnt, 1, 0) as passed from (select count(*) as cnt from oceanbase.__all_virtual_server_schema_info where refreshed_schema_version > 1 and refreshed_schema_version % 8 = 0) as a join (select count(*) as cnt from oceanbase.__all_server join oceanbase.__all_tenant) as b"""
|
# sql = """select if (a.cnt = b.cnt, 1, 0) as passed from (select count(*) as cnt from oceanbase.__all_virtual_server_schema_info where refreshed_schema_version > 1 and refreshed_schema_version % 8 = 0) as a join (select count(*) as cnt from oceanbase.__all_server join oceanbase.__all_tenant) as b"""
|
||||||
# check_until_timeout(query_cur, sql, 1, timeout)
|
# wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 30, 600)
|
||||||
|
# check_until_timeout(query_cur, sql, 1, wait_timeout)
|
||||||
#
|
#
|
||||||
## 4. check major finish
|
## 4. check major finish
|
||||||
#def check_major_merge(query_cur, timeout):
|
#def check_major_merge(query_cur, timeout):
|
||||||
# need_check = 0
|
# need_check = 0
|
||||||
# (desc, results) = query_cur.exec_query("""select distinct value from GV$OB_PARAMETERs where name = 'enable_major_freeze';""")
|
# (desc, results) = query_cur.exec_query("""select distinct value from oceanbase.GV$OB_PARAMETERS where name = 'enable_major_freeze';""")
|
||||||
# if len(results) != 1:
|
# if len(results) != 1:
|
||||||
# need_check = 1
|
# need_check = 1
|
||||||
# elif results[0][0] != 'True':
|
# elif results[0][0] != 'True':
|
||||||
# need_check = 1
|
# need_check = 1
|
||||||
# if need_check == 1:
|
# if need_check == 1:
|
||||||
# sql = """select count(1) from CDB_OB_MAJOR_COMPACTION where (GLOBAL_BROADCAST_SCN > LAST_SCN or STATUS != 'IDLE')"""
|
# wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 30, 600)
|
||||||
# check_until_timeout(query_cur, sql, 0, timeout)
|
# sql = """select count(1) from oceanbase.CDB_OB_MAJOR_COMPACTION where (GLOBAL_BROADCAST_SCN > LAST_SCN or STATUS != 'IDLE')"""
|
||||||
# sql2 = """select /*+ query_timeout(1000000000) */ count(1) from __all_virtual_tablet_compaction_info where max_received_scn > finished_scn and max_received_scn > 0"""
|
# check_until_timeout(query_cur, sql, 0, wait_timeout)
|
||||||
# check_until_timeout(query_cur, sql2, 0, timeout)
|
# sql2 = """select /*+ query_timeout(1000000000) */ count(1) from oceanbase.__all_virtual_tablet_compaction_info where max_received_scn > finished_scn and max_received_scn > 0"""
|
||||||
|
# check_until_timeout(query_cur, sql2, 0, wait_timeout)
|
||||||
#
|
#
|
||||||
#def check_until_timeout(query_cur, sql, value, timeout):
|
#def check_until_timeout(query_cur, sql, value, timeout):
|
||||||
# times = timeout / 10
|
# times = timeout / 10
|
||||||
@ -2698,7 +2788,6 @@
|
|||||||
# raise_on_warnings = True)
|
# raise_on_warnings = True)
|
||||||
# conn.autocommit = True
|
# conn.autocommit = True
|
||||||
# cur = conn.cursor(buffered=True)
|
# cur = conn.cursor(buffered=True)
|
||||||
# timeout = timeout if timeout > 0 else 600
|
|
||||||
# try:
|
# try:
|
||||||
# query_cur = QueryCursor(cur)
|
# query_cur = QueryCursor(cur)
|
||||||
# check_zone_valid(query_cur, zone)
|
# check_zone_valid(query_cur, zone)
|
||||||
@ -2795,8 +2884,14 @@
|
|||||||
# # check compatible sync
|
# # check compatible sync
|
||||||
# parameter_count = int(server_count) * int(tenant_count)
|
# parameter_count = int(server_count) * int(tenant_count)
|
||||||
# current_data_version = actions.get_current_data_version()
|
# current_data_version = actions.get_current_data_version()
|
||||||
|
#
|
||||||
|
# query_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 2, 60)
|
||||||
|
# actions.set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
# sql = """select count(*) as cnt from oceanbase.__all_virtual_tenant_parameter_info where name = 'compatible' and value = '{0}' and tenant_id in ({1})""".format(current_data_version, tenant_ids_str)
|
# sql = """select count(*) as cnt from oceanbase.__all_virtual_tenant_parameter_info where name = 'compatible' and value = '{0}' and tenant_id in ({1})""".format(current_data_version, tenant_ids_str)
|
||||||
# times = (timeout if timeout > 0 else 60) / 5
|
#
|
||||||
|
# wait_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
# times = wait_timeout / 5
|
||||||
# while times >= 0:
|
# while times >= 0:
|
||||||
# logging.info(sql)
|
# logging.info(sql)
|
||||||
# cur.execute(sql)
|
# cur.execute(sql)
|
||||||
@ -2816,6 +2911,8 @@
|
|||||||
# raise e
|
# raise e
|
||||||
# time.sleep(5)
|
# time.sleep(5)
|
||||||
#
|
#
|
||||||
|
# actions.set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
# # check target_data_version/current_data_version from __all_core_table
|
# # check target_data_version/current_data_version from __all_core_table
|
||||||
# int_current_data_version = actions.get_version(current_data_version)
|
# int_current_data_version = actions.get_version(current_data_version)
|
||||||
# sql = "select count(*) from __all_virtual_core_table where column_name in ('target_data_version', 'current_data_version') and column_value = {0} and tenant_id in ({1})".format(int_current_data_version, tenant_ids_str)
|
# sql = "select count(*) from __all_virtual_core_table where column_name in ('target_data_version', 'current_data_version') and column_value = {0} and tenant_id in ({1})".format(int_current_data_version, tenant_ids_str)
|
||||||
@ -2830,16 +2927,20 @@
|
|||||||
# logging.info("all tenant's target_data_version/current_data_version are match with {0}".format(current_data_version))
|
# logging.info("all tenant's target_data_version/current_data_version are match with {0}".format(current_data_version))
|
||||||
#
|
#
|
||||||
## 3 检查内部表自检是否成功
|
## 3 检查内部表自检是否成功
|
||||||
#def check_root_inspection(query_cur, timeout):
|
#def check_root_inspection(cur, query_cur, timeout):
|
||||||
# sql = "select count(*) from oceanbase.__all_virtual_upgrade_inspection where info != 'succeed'"
|
# sql = "select count(*) from oceanbase.__all_virtual_upgrade_inspection where info != 'succeed'"
|
||||||
# times = timeout if timeout > 0 else 180
|
#
|
||||||
# while times > 0 :
|
# wait_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 10, 600)
|
||||||
|
#
|
||||||
|
# times = wait_timeout / 10
|
||||||
|
# while times >= 0 :
|
||||||
# (desc, results) = query_cur.exec_query(sql)
|
# (desc, results) = query_cur.exec_query(sql)
|
||||||
# if results[0][0] == 0:
|
# if results[0][0] == 0:
|
||||||
# break
|
# break
|
||||||
# time.sleep(10)
|
# time.sleep(10)
|
||||||
# times -= 1
|
# times -= 1
|
||||||
# if times == 0:
|
#
|
||||||
|
# if times == -1:
|
||||||
# logging.warn('check root inspection failed!')
|
# logging.warn('check root inspection failed!')
|
||||||
# raise e
|
# raise e
|
||||||
# logging.info('check root inspection success')
|
# logging.info('check root inspection success')
|
||||||
@ -2867,7 +2968,7 @@
|
|||||||
# try:
|
# try:
|
||||||
# check_cluster_version(cur, timeout)
|
# check_cluster_version(cur, timeout)
|
||||||
# check_data_version(cur, query_cur, timeout)
|
# check_data_version(cur, query_cur, timeout)
|
||||||
# check_root_inspection(query_cur, timeout)
|
# check_root_inspection(cur, query_cur, timeout)
|
||||||
# enable_ddl(cur, timeout)
|
# enable_ddl(cur, timeout)
|
||||||
# enable_rebalance(cur, timeout)
|
# enable_rebalance(cur, timeout)
|
||||||
# enable_rereplication(cur, timeout)
|
# enable_rereplication(cur, timeout)
|
||||||
|
@ -41,8 +41,14 @@ def check_data_version(cur, query_cur, timeout):
|
|||||||
# check compatible sync
|
# check compatible sync
|
||||||
parameter_count = int(server_count) * int(tenant_count)
|
parameter_count = int(server_count) * int(tenant_count)
|
||||||
current_data_version = actions.get_current_data_version()
|
current_data_version = actions.get_current_data_version()
|
||||||
|
|
||||||
|
query_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 2, 60)
|
||||||
|
actions.set_session_timeout(cur, query_timeout)
|
||||||
|
|
||||||
sql = """select count(*) as cnt from oceanbase.__all_virtual_tenant_parameter_info where name = 'compatible' and value = '{0}' and tenant_id in ({1})""".format(current_data_version, tenant_ids_str)
|
sql = """select count(*) as cnt from oceanbase.__all_virtual_tenant_parameter_info where name = 'compatible' and value = '{0}' and tenant_id in ({1})""".format(current_data_version, tenant_ids_str)
|
||||||
times = (timeout if timeout > 0 else 60) / 5
|
|
||||||
|
wait_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
times = wait_timeout / 5
|
||||||
while times >= 0:
|
while times >= 0:
|
||||||
logging.info(sql)
|
logging.info(sql)
|
||||||
cur.execute(sql)
|
cur.execute(sql)
|
||||||
@ -62,6 +68,8 @@ def check_data_version(cur, query_cur, timeout):
|
|||||||
raise e
|
raise e
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
|
actions.set_session_timeout(cur, 10)
|
||||||
|
|
||||||
# check target_data_version/current_data_version from __all_core_table
|
# check target_data_version/current_data_version from __all_core_table
|
||||||
int_current_data_version = actions.get_version(current_data_version)
|
int_current_data_version = actions.get_version(current_data_version)
|
||||||
sql = "select count(*) from __all_virtual_core_table where column_name in ('target_data_version', 'current_data_version') and column_value = {0} and tenant_id in ({1})".format(int_current_data_version, tenant_ids_str)
|
sql = "select count(*) from __all_virtual_core_table where column_name in ('target_data_version', 'current_data_version') and column_value = {0} and tenant_id in ({1})".format(int_current_data_version, tenant_ids_str)
|
||||||
@ -76,16 +84,20 @@ def check_data_version(cur, query_cur, timeout):
|
|||||||
logging.info("all tenant's target_data_version/current_data_version are match with {0}".format(current_data_version))
|
logging.info("all tenant's target_data_version/current_data_version are match with {0}".format(current_data_version))
|
||||||
|
|
||||||
# 3 检查内部表自检是否成功
|
# 3 检查内部表自检是否成功
|
||||||
def check_root_inspection(query_cur, timeout):
|
def check_root_inspection(cur, query_cur, timeout):
|
||||||
sql = "select count(*) from oceanbase.__all_virtual_upgrade_inspection where info != 'succeed'"
|
sql = "select count(*) from oceanbase.__all_virtual_upgrade_inspection where info != 'succeed'"
|
||||||
times = timeout if timeout > 0 else 180
|
|
||||||
while times > 0 :
|
wait_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 10, 600)
|
||||||
|
|
||||||
|
times = wait_timeout / 10
|
||||||
|
while times >= 0 :
|
||||||
(desc, results) = query_cur.exec_query(sql)
|
(desc, results) = query_cur.exec_query(sql)
|
||||||
if results[0][0] == 0:
|
if results[0][0] == 0:
|
||||||
break
|
break
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
times -= 1
|
times -= 1
|
||||||
if times == 0:
|
|
||||||
|
if times == -1:
|
||||||
logging.warn('check root inspection failed!')
|
logging.warn('check root inspection failed!')
|
||||||
raise e
|
raise e
|
||||||
logging.info('check root inspection success')
|
logging.info('check root inspection success')
|
||||||
@ -113,7 +125,7 @@ def do_check(conn, cur, query_cur, timeout):
|
|||||||
try:
|
try:
|
||||||
check_cluster_version(cur, timeout)
|
check_cluster_version(cur, timeout)
|
||||||
check_data_version(cur, query_cur, timeout)
|
check_data_version(cur, query_cur, timeout)
|
||||||
check_root_inspection(query_cur, timeout)
|
check_root_inspection(cur, query_cur, timeout)
|
||||||
enable_ddl(cur, timeout)
|
enable_ddl(cur, timeout)
|
||||||
enable_rebalance(cur, timeout)
|
enable_rebalance(cur, timeout)
|
||||||
enable_rereplication(cur, timeout)
|
enable_rereplication(cur, timeout)
|
||||||
|
@ -135,16 +135,45 @@
|
|||||||
# cur.execute(sql)
|
# cur.execute(sql)
|
||||||
# wait_parameter_sync(cur, False, parameter, value, timeout)
|
# wait_parameter_sync(cur, False, parameter, value, timeout)
|
||||||
#
|
#
|
||||||
|
#def set_session_timeout(cur, seconds):
|
||||||
|
# sql = "set @@session.ob_query_timeout = {0}".format(seconds * 1000 * 1000)
|
||||||
|
# logging.info(sql)
|
||||||
|
# cur.execute(sql)
|
||||||
|
#
|
||||||
|
#def set_default_timeout_by_tenant(cur, timeout, timeout_per_tenant, min_timeout):
|
||||||
|
# if timeout > 0:
|
||||||
|
# logging.info("use timeout from opt, timeout(s):{0}".format(timeout))
|
||||||
|
# else:
|
||||||
|
# query_cur = QueryCursor(cur)
|
||||||
|
# tenant_id_list = fetch_tenant_ids(query_cur)
|
||||||
|
# cal_timeout = len(tenant_id_list) * timeout_per_tenant
|
||||||
|
# timeout = (cal_timeout if cal_timeout > min_timeout else min_timeout)
|
||||||
|
# logging.info("use default timeout caculated by tenants, "
|
||||||
|
# "timeout(s):{0}, tenant_count:{1}, "
|
||||||
|
# "timeout_per_tenant(s):{2}, min_timeout(s):{3}"
|
||||||
|
# .format(timeout, len(tenant_id_list), timeout_per_tenant, min_timeout))
|
||||||
|
#
|
||||||
|
# return timeout
|
||||||
|
#
|
||||||
#def set_tenant_parameter(cur, parameter, value, timeout = 0):
|
#def set_tenant_parameter(cur, parameter, value, timeout = 0):
|
||||||
|
#
|
||||||
# tenants_list = []
|
# tenants_list = []
|
||||||
# if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
# if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
||||||
# tenants_list = ['all']
|
# tenants_list = ['all']
|
||||||
# else:
|
# else:
|
||||||
# tenants_list = ['sys', 'all_user', 'all_meta']
|
# tenants_list = ['sys', 'all_user', 'all_meta']
|
||||||
|
#
|
||||||
|
# query_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
# for tenants in tenants_list:
|
# for tenants in tenants_list:
|
||||||
# sql = """alter system set {0} = '{1}' tenant = '{2}'""".format(parameter, value, tenants)
|
# sql = """alter system set {0} = '{1}' tenant = '{2}'""".format(parameter, value, tenants)
|
||||||
# logging.info(sql)
|
# logging.info(sql)
|
||||||
# cur.execute(sql)
|
# cur.execute(sql)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
# wait_parameter_sync(cur, True, parameter, value, timeout)
|
# wait_parameter_sync(cur, True, parameter, value, timeout)
|
||||||
#
|
#
|
||||||
#def get_ori_enable_ddl(cur, timeout):
|
#def get_ori_enable_ddl(cur, timeout):
|
||||||
@ -233,7 +262,20 @@
|
|||||||
# table_name = "GV$OB_PARAMETERS" if not is_tenant_config else "__all_virtual_tenant_parameter_info"
|
# table_name = "GV$OB_PARAMETERS" if not is_tenant_config else "__all_virtual_tenant_parameter_info"
|
||||||
# sql = """select count(*) as cnt from oceanbase.{0}
|
# sql = """select count(*) as cnt from oceanbase.{0}
|
||||||
# where name = '{1}' and value != '{2}'""".format(table_name, key, value)
|
# where name = '{1}' and value != '{2}'""".format(table_name, key, value)
|
||||||
# times = (timeout if timeout > 0 else 60) / 5
|
#
|
||||||
|
# wait_timeout = 0
|
||||||
|
# query_timeout = 0
|
||||||
|
# if not is_tenant_config or timeout > 0:
|
||||||
|
# wait_timeout = (timeout if timeout > 0 else 60)
|
||||||
|
# query_timeout = wait_timeout
|
||||||
|
# else:
|
||||||
|
# # is_tenant_config & timeout not set
|
||||||
|
# wait_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
# query_timeout = set_default_timeout_by_tenant(cur, timeout, 2, 60)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
|
# times = wait_timeout / 5
|
||||||
# while times >= 0:
|
# while times >= 0:
|
||||||
# logging.info(sql)
|
# logging.info(sql)
|
||||||
# cur.execute(sql)
|
# cur.execute(sql)
|
||||||
@ -253,6 +295,8 @@
|
|||||||
# raise e
|
# raise e
|
||||||
# time.sleep(5)
|
# time.sleep(5)
|
||||||
#
|
#
|
||||||
|
# set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
#def do_begin_upgrade(cur, timeout):
|
#def do_begin_upgrade(cur, timeout):
|
||||||
#
|
#
|
||||||
# if not check_parameter(cur, False, "enable_upgrade_mode", "True"):
|
# if not check_parameter(cur, False, "enable_upgrade_mode", "True"):
|
||||||
@ -324,24 +368,38 @@
|
|||||||
# tenants_list = ['all']
|
# tenants_list = ['all']
|
||||||
# else:
|
# else:
|
||||||
# tenants_list = ['sys', 'all_user', 'all_meta']
|
# tenants_list = ['sys', 'all_user', 'all_meta']
|
||||||
|
#
|
||||||
|
# query_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
# for tenants in tenants_list:
|
# for tenants in tenants_list:
|
||||||
# action_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
# action_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
||||||
# rollback_sql = "alter system resume merge tenant = {0}".format(tenants)
|
# rollback_sql = "alter system resume merge tenant = {0}".format(tenants)
|
||||||
# logging.info(action_sql)
|
# logging.info(action_sql)
|
||||||
# cur.execute(action_sql)
|
# cur.execute(action_sql)
|
||||||
#
|
#
|
||||||
|
# set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
#def do_resume_merge(cur, timeout):
|
#def do_resume_merge(cur, timeout):
|
||||||
# tenants_list = []
|
# tenants_list = []
|
||||||
# if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
# if get_min_cluster_version(cur) < get_version("4.2.1.0"):
|
||||||
# tenants_list = ['all']
|
# tenants_list = ['all']
|
||||||
# else:
|
# else:
|
||||||
# tenants_list = ['sys', 'all_user', 'all_meta']
|
# tenants_list = ['sys', 'all_user', 'all_meta']
|
||||||
|
#
|
||||||
|
# query_timeout = set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
#
|
||||||
|
# set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
# for tenants in tenants_list:
|
# for tenants in tenants_list:
|
||||||
# action_sql = "alter system resume merge tenant = {0}".format(tenants)
|
# action_sql = "alter system resume merge tenant = {0}".format(tenants)
|
||||||
# rollback_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
# rollback_sql = "alter system suspend merge tenant = {0}".format(tenants)
|
||||||
# logging.info(action_sql)
|
# logging.info(action_sql)
|
||||||
# cur.execute(action_sql)
|
# cur.execute(action_sql)
|
||||||
#
|
#
|
||||||
|
# set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
#class Cursor:
|
#class Cursor:
|
||||||
# __cursor = None
|
# __cursor = None
|
||||||
# def __init__(self, cursor):
|
# def __init__(self, cursor):
|
||||||
@ -1334,17 +1392,7 @@
|
|||||||
# else:
|
# else:
|
||||||
# run_upgrade_job(conn, cur, "UPGRADE_VIRTUAL_SCHEMA", timeout)
|
# run_upgrade_job(conn, cur, "UPGRADE_VIRTUAL_SCHEMA", timeout)
|
||||||
#
|
#
|
||||||
# # just to make __all_virtual_upgrade_inspection avaliable
|
# run_root_inspection(cur, timeout)
|
||||||
# timeout_ts = (timeout if timeout > 0 else 600) * 1000 * 1000
|
|
||||||
# sql = "set @@session.ob_query_timeout = {0}".format(timeout_ts)
|
|
||||||
# logging.info(sql)
|
|
||||||
# cur.execute(sql)
|
|
||||||
# sql = "alter system run job 'root_inspection'"
|
|
||||||
# logging.info(sql)
|
|
||||||
# cur.execute(sql)
|
|
||||||
# sql = "set @@session.ob_query_timeout = 10000000"
|
|
||||||
# logging.info(sql)
|
|
||||||
# cur.execute(sql)
|
|
||||||
#####========******####======== actions begin ========####******========####
|
#####========******####======== actions begin ========####******========####
|
||||||
# upgrade_syslog_level(conn, cur)
|
# upgrade_syslog_level(conn, cur)
|
||||||
# return
|
# return
|
||||||
@ -1360,7 +1408,6 @@
|
|||||||
# info_cnt = result[0][0]
|
# info_cnt = result[0][0]
|
||||||
# if info_cnt > 0:
|
# if info_cnt > 0:
|
||||||
# actions.set_parameter(cur, "syslog_level", "WDIAG")
|
# actions.set_parameter(cur, "syslog_level", "WDIAG")
|
||||||
#
|
|
||||||
# except Exception, e:
|
# except Exception, e:
|
||||||
# logging.warn("upgrade syslog level failed!")
|
# logging.warn("upgrade syslog level failed!")
|
||||||
# raise e
|
# raise e
|
||||||
@ -1375,6 +1422,18 @@
|
|||||||
#def get_tenant_ids(cur):
|
#def get_tenant_ids(cur):
|
||||||
# return [_[0] for _ in query(cur, 'select tenant_id from oceanbase.__all_tenant')]
|
# return [_[0] for _ in query(cur, 'select tenant_id from oceanbase.__all_tenant')]
|
||||||
#
|
#
|
||||||
|
#def run_root_inspection(cur, timeout):
|
||||||
|
#
|
||||||
|
# query_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 10, 600)
|
||||||
|
#
|
||||||
|
# actions.set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
|
# sql = "alter system run job 'root_inspection'"
|
||||||
|
# logging.info(sql)
|
||||||
|
# cur.execute(sql)
|
||||||
|
#
|
||||||
|
# actions.set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
#def upgrade_across_version(cur):
|
#def upgrade_across_version(cur):
|
||||||
# current_data_version = actions.get_current_data_version()
|
# current_data_version = actions.get_current_data_version()
|
||||||
# int_current_data_version = actions.get_version(current_data_version)
|
# int_current_data_version = actions.get_version(current_data_version)
|
||||||
@ -1480,7 +1539,9 @@
|
|||||||
#
|
#
|
||||||
#def check_upgrade_job_result(cur, job_name, timeout, max_used_job_id):
|
#def check_upgrade_job_result(cur, job_name, timeout, max_used_job_id):
|
||||||
# try:
|
# try:
|
||||||
# times = (timeout if timeout > 0 else 3600) / 10
|
# wait_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 100, 3600)
|
||||||
|
#
|
||||||
|
# times = wait_timeout / 10
|
||||||
# while (times >= 0):
|
# while (times >= 0):
|
||||||
# sql = """select job_status, rs_svr_ip, rs_svr_port, gmt_create from oceanbase.__all_rootservice_job
|
# sql = """select job_status, rs_svr_ip, rs_svr_port, gmt_create from oceanbase.__all_rootservice_job
|
||||||
# where job_type = '{0}' and job_id > {1} order by job_id desc limit 1
|
# where job_type = '{0}' and job_id > {1} order by job_id desc limit 1
|
||||||
@ -2217,7 +2278,8 @@
|
|||||||
# fail_list.append("""still has tenant snapshot task, upgrade is not allowed temporarily""")
|
# fail_list.append("""still has tenant snapshot task, upgrade is not allowed temporarily""")
|
||||||
# else:
|
# else:
|
||||||
# logging.info('check tenant snapshot task success')
|
# logging.info('check tenant snapshot task success')
|
||||||
## 19. 检查是否有租户在升到4.3.0版本之前已将binlog_row_image设为MINIMAL
|
#
|
||||||
|
## 17. 检查是否有租户在升到4.3.0版本之前已将binlog_row_image设为MINIMAL
|
||||||
#def check_variable_binlog_row_image(query_cur):
|
#def check_variable_binlog_row_image(query_cur):
|
||||||
## 4.3.0.0之前的版本,MINIMAL模式生成的日志CDC无法正常消费(DELETE日志).
|
## 4.3.0.0之前的版本,MINIMAL模式生成的日志CDC无法正常消费(DELETE日志).
|
||||||
## 4.3.0版本开始,MINIMAL模式做了改进,支持CDC消费,需要在升级到4.3.0.0之后再打开.
|
## 4.3.0版本开始,MINIMAL模式做了改进,支持CDC消费,需要在升级到4.3.0.0之后再打开.
|
||||||
@ -2405,7 +2467,7 @@
|
|||||||
#' that all modules should be run. They are splitted by ",".\n' +\
|
#' that all modules should be run. They are splitted by ",".\n' +\
|
||||||
#' For example: -m all, or --module=ddl,normal_dml,special_action\n' +\
|
#' For example: -m all, or --module=ddl,normal_dml,special_action\n' +\
|
||||||
#'-l, --log-file=name Log file path. If log file path is not given it\'s ' + os.path.splitext(sys.argv[0])[0] + '.log\n' +\
|
#'-l, --log-file=name Log file path. If log file path is not given it\'s ' + os.path.splitext(sys.argv[0])[0] + '.log\n' +\
|
||||||
#'-t, --timeout=name check timeout, default: 600(s).\n' + \
|
#'-t, --timeout=name check timeout.\n' + \
|
||||||
#'-z, --zone=name If zone is not specified, check all servers status in cluster. \n' +\
|
#'-z, --zone=name If zone is not specified, check all servers status in cluster. \n' +\
|
||||||
#' Otherwise, only check servers status in specified zone. \n' + \
|
#' Otherwise, only check servers status in specified zone. \n' + \
|
||||||
#'\n\n' +\
|
#'\n\n' +\
|
||||||
@ -2467,8 +2529,7 @@
|
|||||||
#Option('m', 'module', True, False, 'all'),\
|
#Option('m', 'module', True, False, 'all'),\
|
||||||
## 日志文件路径,不同脚本的main函数中中会改成不同的默认值
|
## 日志文件路径,不同脚本的main函数中中会改成不同的默认值
|
||||||
#Option('l', 'log-file', True, False),\
|
#Option('l', 'log-file', True, False),\
|
||||||
## 一些检查的超时时间,默认是600s
|
#Option('t', 'timeout', True, False, 0),\
|
||||||
#Option('t', 'timeout', True, False, '600'),\
|
|
||||||
#Option('z', 'zone', True, False, ''),\
|
#Option('z', 'zone', True, False, ''),\
|
||||||
#]\
|
#]\
|
||||||
#
|
#
|
||||||
@ -2620,13 +2681,38 @@
|
|||||||
# else:
|
# else:
|
||||||
# logging.info("zone is empty, check all servers in cluster")
|
# logging.info("zone is empty, check all servers in cluster")
|
||||||
#
|
#
|
||||||
|
#def fetch_tenant_ids(query_cur):
|
||||||
|
# try:
|
||||||
|
# tenant_id_list = []
|
||||||
|
# (desc, results) = query_cur.exec_query("""select distinct tenant_id from oceanbase.__all_tenant order by tenant_id desc""")
|
||||||
|
# for r in results:
|
||||||
|
# tenant_id_list.append(r[0])
|
||||||
|
# return tenant_id_list
|
||||||
|
# except Exception, e:
|
||||||
|
# logging.exception('fail to fetch distinct tenant ids')
|
||||||
|
# raise e
|
||||||
|
#
|
||||||
|
#def set_default_timeout_by_tenant(query_cur, timeout, timeout_per_tenant, min_timeout):
|
||||||
|
# if timeout > 0:
|
||||||
|
# logging.info("use timeout from opt, timeout(s):{0}".format(timeout))
|
||||||
|
# else:
|
||||||
|
# tenant_id_list = fetch_tenant_ids(query_cur)
|
||||||
|
# cal_timeout = len(tenant_id_list) * timeout_per_tenant
|
||||||
|
# timeout = (cal_timeout if cal_timeout > min_timeout else min_timeout)
|
||||||
|
# logging.info("use default timeout caculated by tenants, "
|
||||||
|
# "timeout(s):{0}, tenant_count:{1}, "
|
||||||
|
# "timeout_per_tenant(s):{2}, min_timeout(s):{3}"
|
||||||
|
# .format(timeout, len(tenant_id_list), timeout_per_tenant, min_timeout))
|
||||||
|
#
|
||||||
|
# return timeout
|
||||||
|
#
|
||||||
##### START ####
|
##### START ####
|
||||||
## 0. 检查server版本是否严格一致
|
## 0. 检查server版本是否严格一致
|
||||||
#def check_server_version_by_zone(query_cur, zone):
|
#def check_server_version_by_zone(query_cur, zone):
|
||||||
# if zone == '':
|
# if zone == '':
|
||||||
# logging.info("skip check server version by cluster")
|
# logging.info("skip check server version by cluster")
|
||||||
# else:
|
# else:
|
||||||
# sql = """select distinct(substring_index(build_version, '_', 1)) from __all_server where zone = '{0}'""".format(zone);
|
# sql = """select distinct(substring_index(build_version, '_', 1)) from oceanbase.__all_server where zone = '{0}'""".format(zone);
|
||||||
# (desc, results) = query_cur.exec_query(sql);
|
# (desc, results) = query_cur.exec_query(sql);
|
||||||
# if len(results) != 1:
|
# if len(results) != 1:
|
||||||
# raise MyError("servers build_version not match")
|
# raise MyError("servers build_version not match")
|
||||||
@ -2636,8 +2722,9 @@
|
|||||||
## 1. 检查paxos副本是否同步, paxos副本是否缺失
|
## 1. 检查paxos副本是否同步, paxos副本是否缺失
|
||||||
#def check_paxos_replica(query_cur, timeout):
|
#def check_paxos_replica(query_cur, timeout):
|
||||||
# # 1.1 检查paxos副本是否同步
|
# # 1.1 检查paxos副本是否同步
|
||||||
# sql = """select count(*) from GV$OB_LOG_STAT where in_sync = 'NO'"""
|
# sql = """select count(*) from oceanbase.GV$OB_LOG_STAT where in_sync = 'NO'"""
|
||||||
# check_until_timeout(query_cur, sql, 0, timeout)
|
# wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 10, 600)
|
||||||
|
# check_until_timeout(query_cur, sql, 0, wait_timeout)
|
||||||
#
|
#
|
||||||
# # 1.2 检查paxos副本是否有缺失 TODO
|
# # 1.2 检查paxos副本是否有缺失 TODO
|
||||||
# logging.info('check paxos replica success')
|
# logging.info('check paxos replica success')
|
||||||
@ -2647,26 +2734,29 @@
|
|||||||
# sql = """select count(*) from oceanbase.__all_server where (start_service_time <= 0 or status='inactive')"""
|
# sql = """select count(*) from oceanbase.__all_server where (start_service_time <= 0 or status='inactive')"""
|
||||||
# if zone != '':
|
# if zone != '':
|
||||||
# sql += """ and zone = '{0}'""".format(zone)
|
# sql += """ and zone = '{0}'""".format(zone)
|
||||||
# check_until_timeout(query_cur, sql, 0, timeout)
|
# wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 10, 600)
|
||||||
|
# check_until_timeout(query_cur, sql, 0, wait_timeout)
|
||||||
#
|
#
|
||||||
## 3. 检查schema是否刷新成功
|
## 3. 检查schema是否刷新成功
|
||||||
#def check_schema_status(query_cur, timeout):
|
#def check_schema_status(query_cur, timeout):
|
||||||
# sql = """select if (a.cnt = b.cnt, 1, 0) as passed from (select count(*) as cnt from oceanbase.__all_virtual_server_schema_info where refreshed_schema_version > 1 and refreshed_schema_version % 8 = 0) as a join (select count(*) as cnt from oceanbase.__all_server join oceanbase.__all_tenant) as b"""
|
# sql = """select if (a.cnt = b.cnt, 1, 0) as passed from (select count(*) as cnt from oceanbase.__all_virtual_server_schema_info where refreshed_schema_version > 1 and refreshed_schema_version % 8 = 0) as a join (select count(*) as cnt from oceanbase.__all_server join oceanbase.__all_tenant) as b"""
|
||||||
# check_until_timeout(query_cur, sql, 1, timeout)
|
# wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 30, 600)
|
||||||
|
# check_until_timeout(query_cur, sql, 1, wait_timeout)
|
||||||
#
|
#
|
||||||
## 4. check major finish
|
## 4. check major finish
|
||||||
#def check_major_merge(query_cur, timeout):
|
#def check_major_merge(query_cur, timeout):
|
||||||
# need_check = 0
|
# need_check = 0
|
||||||
# (desc, results) = query_cur.exec_query("""select distinct value from GV$OB_PARAMETERs where name = 'enable_major_freeze';""")
|
# (desc, results) = query_cur.exec_query("""select distinct value from oceanbase.GV$OB_PARAMETERS where name = 'enable_major_freeze';""")
|
||||||
# if len(results) != 1:
|
# if len(results) != 1:
|
||||||
# need_check = 1
|
# need_check = 1
|
||||||
# elif results[0][0] != 'True':
|
# elif results[0][0] != 'True':
|
||||||
# need_check = 1
|
# need_check = 1
|
||||||
# if need_check == 1:
|
# if need_check == 1:
|
||||||
# sql = """select count(1) from CDB_OB_MAJOR_COMPACTION where (GLOBAL_BROADCAST_SCN > LAST_SCN or STATUS != 'IDLE')"""
|
# wait_timeout = set_default_timeout_by_tenant(query_cur, timeout, 30, 600)
|
||||||
# check_until_timeout(query_cur, sql, 0, timeout)
|
# sql = """select count(1) from oceanbase.CDB_OB_MAJOR_COMPACTION where (GLOBAL_BROADCAST_SCN > LAST_SCN or STATUS != 'IDLE')"""
|
||||||
# sql2 = """select /*+ query_timeout(1000000000) */ count(1) from __all_virtual_tablet_compaction_info where max_received_scn > finished_scn and max_received_scn > 0"""
|
# check_until_timeout(query_cur, sql, 0, wait_timeout)
|
||||||
# check_until_timeout(query_cur, sql2, 0, timeout)
|
# sql2 = """select /*+ query_timeout(1000000000) */ count(1) from oceanbase.__all_virtual_tablet_compaction_info where max_received_scn > finished_scn and max_received_scn > 0"""
|
||||||
|
# check_until_timeout(query_cur, sql2, 0, wait_timeout)
|
||||||
#
|
#
|
||||||
#def check_until_timeout(query_cur, sql, value, timeout):
|
#def check_until_timeout(query_cur, sql, value, timeout):
|
||||||
# times = timeout / 10
|
# times = timeout / 10
|
||||||
@ -2698,7 +2788,6 @@
|
|||||||
# raise_on_warnings = True)
|
# raise_on_warnings = True)
|
||||||
# conn.autocommit = True
|
# conn.autocommit = True
|
||||||
# cur = conn.cursor(buffered=True)
|
# cur = conn.cursor(buffered=True)
|
||||||
# timeout = timeout if timeout > 0 else 600
|
|
||||||
# try:
|
# try:
|
||||||
# query_cur = QueryCursor(cur)
|
# query_cur = QueryCursor(cur)
|
||||||
# check_zone_valid(query_cur, zone)
|
# check_zone_valid(query_cur, zone)
|
||||||
@ -2795,8 +2884,14 @@
|
|||||||
# # check compatible sync
|
# # check compatible sync
|
||||||
# parameter_count = int(server_count) * int(tenant_count)
|
# parameter_count = int(server_count) * int(tenant_count)
|
||||||
# current_data_version = actions.get_current_data_version()
|
# current_data_version = actions.get_current_data_version()
|
||||||
|
#
|
||||||
|
# query_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 2, 60)
|
||||||
|
# actions.set_session_timeout(cur, query_timeout)
|
||||||
|
#
|
||||||
# sql = """select count(*) as cnt from oceanbase.__all_virtual_tenant_parameter_info where name = 'compatible' and value = '{0}' and tenant_id in ({1})""".format(current_data_version, tenant_ids_str)
|
# sql = """select count(*) as cnt from oceanbase.__all_virtual_tenant_parameter_info where name = 'compatible' and value = '{0}' and tenant_id in ({1})""".format(current_data_version, tenant_ids_str)
|
||||||
# times = (timeout if timeout > 0 else 60) / 5
|
#
|
||||||
|
# wait_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 10, 60)
|
||||||
|
# times = wait_timeout / 5
|
||||||
# while times >= 0:
|
# while times >= 0:
|
||||||
# logging.info(sql)
|
# logging.info(sql)
|
||||||
# cur.execute(sql)
|
# cur.execute(sql)
|
||||||
@ -2816,6 +2911,8 @@
|
|||||||
# raise e
|
# raise e
|
||||||
# time.sleep(5)
|
# time.sleep(5)
|
||||||
#
|
#
|
||||||
|
# actions.set_session_timeout(cur, 10)
|
||||||
|
#
|
||||||
# # check target_data_version/current_data_version from __all_core_table
|
# # check target_data_version/current_data_version from __all_core_table
|
||||||
# int_current_data_version = actions.get_version(current_data_version)
|
# int_current_data_version = actions.get_version(current_data_version)
|
||||||
# sql = "select count(*) from __all_virtual_core_table where column_name in ('target_data_version', 'current_data_version') and column_value = {0} and tenant_id in ({1})".format(int_current_data_version, tenant_ids_str)
|
# sql = "select count(*) from __all_virtual_core_table where column_name in ('target_data_version', 'current_data_version') and column_value = {0} and tenant_id in ({1})".format(int_current_data_version, tenant_ids_str)
|
||||||
@ -2830,16 +2927,20 @@
|
|||||||
# logging.info("all tenant's target_data_version/current_data_version are match with {0}".format(current_data_version))
|
# logging.info("all tenant's target_data_version/current_data_version are match with {0}".format(current_data_version))
|
||||||
#
|
#
|
||||||
## 3 检查内部表自检是否成功
|
## 3 检查内部表自检是否成功
|
||||||
#def check_root_inspection(query_cur, timeout):
|
#def check_root_inspection(cur, query_cur, timeout):
|
||||||
# sql = "select count(*) from oceanbase.__all_virtual_upgrade_inspection where info != 'succeed'"
|
# sql = "select count(*) from oceanbase.__all_virtual_upgrade_inspection where info != 'succeed'"
|
||||||
# times = timeout if timeout > 0 else 180
|
#
|
||||||
# while times > 0 :
|
# wait_timeout = actions.set_default_timeout_by_tenant(cur, timeout, 10, 600)
|
||||||
|
#
|
||||||
|
# times = wait_timeout / 10
|
||||||
|
# while times >= 0 :
|
||||||
# (desc, results) = query_cur.exec_query(sql)
|
# (desc, results) = query_cur.exec_query(sql)
|
||||||
# if results[0][0] == 0:
|
# if results[0][0] == 0:
|
||||||
# break
|
# break
|
||||||
# time.sleep(10)
|
# time.sleep(10)
|
||||||
# times -= 1
|
# times -= 1
|
||||||
# if times == 0:
|
#
|
||||||
|
# if times == -1:
|
||||||
# logging.warn('check root inspection failed!')
|
# logging.warn('check root inspection failed!')
|
||||||
# raise e
|
# raise e
|
||||||
# logging.info('check root inspection success')
|
# logging.info('check root inspection success')
|
||||||
@ -2867,7 +2968,7 @@
|
|||||||
# try:
|
# try:
|
||||||
# check_cluster_version(cur, timeout)
|
# check_cluster_version(cur, timeout)
|
||||||
# check_data_version(cur, query_cur, timeout)
|
# check_data_version(cur, query_cur, timeout)
|
||||||
# check_root_inspection(query_cur, timeout)
|
# check_root_inspection(cur, query_cur, timeout)
|
||||||
# enable_ddl(cur, timeout)
|
# enable_ddl(cur, timeout)
|
||||||
# enable_rebalance(cur, timeout)
|
# enable_rebalance(cur, timeout)
|
||||||
# enable_rereplication(cur, timeout)
|
# enable_rereplication(cur, timeout)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user