[scn] fix failure of mittest after refresh feature scn

2022-11-28 01:46:42 +00:00
parent 49a02f3304
commit 54b64a7263
1898 changed files with 255804 additions and 280809 deletions
--- a/tools/upgrade/actions.py
+++ b/tools/upgrade/actions.py
@ -340,3 +340,214 @@ def fetch_tenant_ids(query_cur):
    logging.exception('fail to fetch distinct tenant ids')
    raise e

+def check_current_cluster_is_primary(query_cur):
+  try:
+    sql = """SELECT * FROM v$ob_cluster
+             WHERE cluster_role = "PRIMARY"
+             AND cluster_status = "VALID"
+             AND (switchover_status = "NOT ALLOWED" OR switchover_status = "TO STANDBY") """
+    (desc, results) = query_cur.exec_query(sql)
+    is_primary = len(results) > 0
+    return is_primary
+  except Exception, e:
+    logging.exception("""fail to check current is primary""")
+    raise e
+
+def fetch_standby_cluster_infos(conn, query_cur, user, pwd):
+  try:
+    is_primary = check_current_cluster_is_primary(query_cur)
+    if not is_primary:
+      logging.exception("""should be primary cluster""")
+      raise e
+
+    standby_cluster_infos = []
+    sql = """SELECT cluster_id, rootservice_list from v$ob_standby_status"""
+    (desc, results) = query_cur.exec_query(sql)
+
+    for r in results:
+      standby_cluster_info = {}
+      if 2 != len(r):
+        logging.exception("length not match")
+        raise e
+      standby_cluster_info['cluster_id'] = r[0]
+      standby_cluster_info['user'] = user
+      standby_cluster_info['pwd'] = pwd
+      # construct ip/port
+      address = r[1].split(";")[0] # choose first address in rs_list
+      standby_cluster_info['ip'] = str(address.split(":")[0])
+      standby_cluster_info['port'] = address.split(":")[2]
+      # append
+      standby_cluster_infos.append(standby_cluster_info)
+      logging.info("""cluster_info :  cluster_id = {0}, ip = {1}, port = {2}"""
+                   .format(standby_cluster_info['cluster_id'],
+                           standby_cluster_info['ip'],
+                           standby_cluster_info['port']))
+    conn.commit()
+    # check standby cluster
+    for standby_cluster_info in standby_cluster_infos:
+      # connect
+      logging.info("""create connection : cluster_id = {0}, ip = {1}, port = {2}"""
+                   .format(standby_cluster_info['cluster_id'],
+                           standby_cluster_info['ip'],
+                           standby_cluster_info['port']))
+
+      tmp_conn = mysql.connector.connect(user     =  standby_cluster_info['user'],
+                                         password =  standby_cluster_info['pwd'],
+                                         host     =  standby_cluster_info['ip'],
+                                         port     =  standby_cluster_info['port'],
+                                         database =  'oceanbase',
+                                         raise_on_warnings = True)
+
+      tmp_cur = tmp_conn.cursor(buffered=True)
+      tmp_conn.autocommit = True
+      tmp_query_cur = QueryCursor(tmp_cur)
+      is_primary = check_current_cluster_is_primary(tmp_query_cur)
+      if is_primary:
+        logging.exception("""primary cluster changed : cluster_id = {0}, ip = {1}, port = {2}"""
+                          .format(standby_cluster_info['cluster_id'],
+                                  standby_cluster_info['ip'],
+                                  standby_cluster_info['port']))
+        raise e
+      # close
+      tmp_cur.close()
+      tmp_conn.close()
+
+    return standby_cluster_infos
+  except Exception, e:
+    logging.exception('fail to fetch standby cluster info')
+    raise e
+
+
+def check_ddl_and_dml_sync(conn, query_cur, standby_cluster_infos, tenant_ids):
+  try:
+    conn.commit()
+    # check if need check ddl and dml sync
+    is_primary = check_current_cluster_is_primary(query_cur)
+    if not is_primary:
+      logging.exception("""should be primary cluster""")
+      raise e
+
+    # fetch sys stats
+    sys_infos = []
+    sql = """SELECT tenant_id,
+                    refreshed_schema_version,
+                    min_sys_table_scn,
+                    min_user_table_scn
+             FROM oceanbase.v$ob_cluster_stats
+             ORDER BY tenant_id desc"""
+    (desc, results) = query_cur.exec_query(sql)
+    if len(tenant_ids) != len(results):
+      logging.exception("result not match")
+      raise e
+    else:
+      for i in range(len(results)):
+        if len(results[i]) != 4:
+          logging.exception("length not match")
+          raise e
+        elif results[i][0] != tenant_ids[i]:
+          logging.exception("tenant_id not match")
+          raise e
+        else:
+          sys_info = {}
+          sys_info['tenant_id'] = results[i][0]
+          sys_info['refreshed_schema_version'] = results[i][1]
+          sys_info['min_sys_table_scn'] = results[i][2]
+          sys_info['min_user_table_scn'] = results[i][3]
+          logging.info("sys info : {0}".format(sys_info))
+          sys_infos.append(sys_info)
+    conn.commit()
+
+    # check ddl and dml by cluster
+    for standby_cluster_info in standby_cluster_infos:
+      check_ddl_and_dml_sync_by_cluster(standby_cluster_info, sys_infos)
+
+  except Exception, e:
+    logging.exception("fail to check ddl and dml sync")
+    raise e
+
+def check_ddl_and_dml_sync_by_cluster(standby_cluster_info, sys_infos):
+  try:
+    # connect
+    logging.info("start to check ddl and dml sync by cluster: cluster_id = {0}"
+                 .format(standby_cluster_info['cluster_id']))
+    logging.info("create connection : cluster_id = {0}, ip = {1}, port = {2}"
+                 .format(standby_cluster_info['cluster_id'],
+                         standby_cluster_info['ip'],
+                         standby_cluster_info['port']))
+    tmp_conn = mysql.connector.connect(user     =  standby_cluster_info['user'],
+                                       password =  standby_cluster_info['pwd'],
+                                       host     =  standby_cluster_info['ip'],
+                                       port     =  standby_cluster_info['port'],
+                                       database =  'oceanbase',
+                                       raise_on_warnings = True)
+    tmp_cur = tmp_conn.cursor(buffered=True)
+    tmp_conn.autocommit = True
+    tmp_query_cur = QueryCursor(tmp_cur)
+    is_primary = check_current_cluster_is_primary(tmp_query_cur)
+    if is_primary:
+      logging.exception("""primary cluster changed : cluster_id = {0}, ip = {1}, port = {2}"""
+                        .format(standby_cluster_info['cluster_id'],
+                                standby_cluster_info['ip'],
+                                standby_cluster_info['port']))
+      raise e
+
+    for sys_info in sys_infos:
+      check_ddl_and_dml_sync_by_tenant(tmp_query_cur, sys_info)
+
+    # close
+    tmp_cur.close()
+    tmp_conn.close()
+    logging.info("""check_ddl_and_dml_sync_by_cluster success : cluster_id = {0}, ip = {1}, port = {2}"""
+                    .format(standby_cluster_info['cluster_id'],
+                            standby_cluster_info['ip'],
+                            standby_cluster_info['port']))
+
+  except Exception, e:
+    logging.exception("""fail to check ddl and dml sync : cluster_id = {0}, ip = {1}, port = {2}"""
+                         .format(standby_cluster_info['cluster_id'],
+                                 standby_cluster_info['ip'],
+                                 standby_cluster_info['port']))
+    raise e
+
+def check_ddl_and_dml_sync_by_tenant(query_cur, sys_info):
+  try:
+    times = 1800 # 30min
+    logging.info("start to check ddl and dml sync by tenant : {0}".format(sys_info))
+    start_time = time.time()
+    sql = ""
+    if 1 == sys_info['tenant_id'] :
+      # 备库系统租户DML不走物理同步，需要升级脚本负责写入，系统租户仅校验DDL同步
+      sql = """SELECT count(*)
+               FROM oceanbase.v$ob_cluster_stats
+               WHERE tenant_id = {0}
+                     AND refreshed_schema_version >= {1}
+            """.format(sys_info['tenant_id'],
+                       sys_info['refreshed_schema_version'])
+    else:
+      sql = """SELECT count(*)
+               FROM oceanbase.v$ob_cluster_stats
+               WHERE tenant_id = {0}
+                     AND refreshed_schema_version >= {1}
+                     AND min_sys_table_scn >= {2}
+                     AND min_user_table_scn >= {3}
+            """.format(sys_info['tenant_id'],
+                       sys_info['refreshed_schema_version'],
+                       sys_info['min_sys_table_scn'],
+                       sys_info['min_user_table_scn'])
+    while times > 0 :
+      (desc, results) = query_cur.exec_query(sql)
+      if len(results) == 1 and results[0][0] == 1:
+        break;
+      time.sleep(1)
+      times -= 1
+    if times == 0:
+      logging.exception("check ddl and dml sync timeout! : {0}, cost = {1}"
+                    .format(sys_info, time.time() - start_time))
+      raise e
+    else:
+      logging.info("check ddl and dml sync success! : {0}, cost = {1}"
+                   .format(sys_info, time.time() - start_time))
+
+  except Exception, e:
+    logging.exception("fail to check ddl and dml sync : {0}".format(sys_info))
+    raise e