fix ret= -4016 when switch leader

This commit is contained in:
hamstersox
2022-12-22 05:11:31 +00:00
committed by ob-robot
parent a6ae5c21dc
commit a3b6b8ad72

View File

@ -724,8 +724,13 @@ int ObBackupSetTaskMgr::get_dst_server_(const ObLSID &ls_id, ObAddr &dst)
} else if (OB_ISNULL(lst_operator)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("[DATA_BACKUP]lst_operator ptr is null", K(ret));
} else if (OB_FAIL(lst_operator->get(cluster_id, tenant_id,
ls_id, share::ObLSTable::DEFAULT_MODE, ls_info))) {
} else {
// When change leader, the new leader may not be reported to __all_ls_meta_table timely, and we could get no leader.
// And ownerless election may cost more than 30s for choosing leader.
// So, we add retry to tolerate this scene, and set the abs timeout to 30s in the future.
const int64_t abs_timeout = ObTimeUtility::current_time() + 30 * 1000 * 1000;
do {
if (OB_FAIL(lst_operator->get(cluster_id, tenant_id, ls_id, share::ObLSTable::DEFAULT_MODE, ls_info))) {
LOG_WARN("[DATA_BACKUP]failed to get log stream info", K(ret), K(cluster_id), K(tenant_id), K(ls_id));
} else {
const ObLSInfo::ReplicaArray &replica_array = ls_info.get_replicas();
@ -737,11 +742,20 @@ int ObBackupSetTaskMgr::get_dst_server_(const ObLSID &ls_id, ObAddr &dst)
}
}
}
if (!dst.is_valid()) {
// wait 100 ms for next retry.
usleep(100 * 1000);
if(OB_FAIL(lease_service_->check_lease())) {
LOG_WARN("failed to check lease", K(ret));
}
}
} while (OB_SUCC(ret) && !dst.is_valid() && ObTimeUtility::current_time() < abs_timeout);
}
if (OB_FAIL(ret)) {
} else if (!dst.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("[DATA_BACKUP]no server", K(ret), K(set_task_attr_));
ret = OB_LEADER_NOT_EXIST;
LOG_WARN("[DATA_BACKUP]no leader be found", K(ret), K(ls_id), K(set_task_attr_));
}
return ret;
}