fix ret= -4016 when switch leader

2022-12-22 05:11:31 +00:00
parent a6ae5c21dc
commit a3b6b8ad72
1 changed files with 26 additions and 12 deletions
--- a/src/rootserver/backup/ob_backup_data_set_task_mgr.cpp
+++ b/src/rootserver/backup/ob_backup_data_set_task_mgr.cpp
@ -724,8 +724,13 @@ int ObBackupSetTaskMgr::get_dst_server_(const ObLSID &ls_id, ObAddr &dst)
  } else if (OB_ISNULL(lst_operator)) {
    ret = OB_ERR_UNEXPECTED;
    LOG_WARN("[DATA_BACKUP]lst_operator ptr is null", K(ret));
-  } else if (OB_FAIL(lst_operator->get(cluster_id, tenant_id,
-             ls_id, share::ObLSTable::DEFAULT_MODE, ls_info))) {
+  } else {
+    // When change leader, the new leader may not be reported to __all_ls_meta_table timely, and we could get no leader.
+    // And ownerless election may cost more than 30s for choosing leader.
+    // So, we add retry to tolerate this scene, and set the abs timeout to 30s in the future.
+    const int64_t abs_timeout = ObTimeUtility::current_time() + 30 * 1000 * 1000;
+    do {
+      if (OB_FAIL(lst_operator->get(cluster_id, tenant_id, ls_id, share::ObLSTable::DEFAULT_MODE, ls_info))) {
        LOG_WARN("[DATA_BACKUP]failed to get log stream info", K(ret), K(cluster_id), K(tenant_id), K(ls_id));
      } else {
        const ObLSInfo::ReplicaArray &replica_array = ls_info.get_replicas();
@ -737,11 +742,20 @@ int ObBackupSetTaskMgr::get_dst_server_(const ObLSID &ls_id, ObAddr &dst)
          }
        }
      }
+      if (!dst.is_valid()) {
+        // wait 100 ms for next retry.
+        usleep(100 * 1000);
+        if(OB_FAIL(lease_service_->check_lease())) {
+          LOG_WARN("failed to check lease", K(ret));
+        }
+      }
+    } while (OB_SUCC(ret) && !dst.is_valid() && ObTimeUtility::current_time() < abs_timeout);
+  }

  if (OB_FAIL(ret)) {
  } else if (!dst.is_valid()) {
-    ret = OB_ERR_UNEXPECTED;
-    LOG_WARN("[DATA_BACKUP]no server", K(ret), K(set_task_attr_));
+    ret = OB_LEADER_NOT_EXIST;
+    LOG_WARN("[DATA_BACKUP]no leader be found", K(ret), K(ls_id), K(set_task_attr_));
  }
  return ret;
 }