[RS]fix RS start fail while get_master_rs timeout

This commit is contained in:
maosy
2021-07-27 15:15:14 +08:00
committed by wangzelin.wzl
parent f0726b6b6c
commit 85d7d1e6f8
2 changed files with 23 additions and 16 deletions

View File

@ -364,10 +364,15 @@ int ObRsMgr::do_detect_master_rs_v2(common::ObIArray<common::ObAddr>& rs_list)
ObGetRootserverRoleResult result; ObGetRootserverRoleResult result;
const int64_t cluster_id = GCONF.cluster_id; const int64_t cluster_id = GCONF.cluster_id;
bool has_rs = false; bool has_rs = false;
int64_t timeout = 0;
if (ObTimeoutCtx::get_ctx().is_timeout_set() && !ObTimeoutCtx::get_ctx().is_timeouted()) {
timeout = ObTimeoutCtx::get_ctx().get_timeout();
}
FOREACH_CNT(server, rs_list) FOREACH_CNT(server, rs_list)
{ {
result.reset(); result.reset();
if (OB_FAIL(do_detect_master_rs_v3(*server, cluster_id, result))) { if (OB_FAIL(do_detect_master_rs_v3(*server, cluster_id, timeout, result))) {
// LOG_WARN("detect master rootservice failed", K(ret), "server", *server); // LOG_WARN("detect master rootservice failed", K(ret), "server", *server);
} else { } else {
has_rs = true; has_rs = true;
@ -438,14 +443,14 @@ int ObRsMgr::do_detect_master_rs_v3(const ObIArray<ObAddr>& server_list, ObParti
bool has_rs = false; bool has_rs = false;
int tmp_ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS;
ObGetRootserverRoleResult result; ObGetRootserverRoleResult result;
int64_t timeout = 0;
if (ObTimeoutCtx::get_ctx().is_timeout_set() && !ObTimeoutCtx::get_ctx().is_timeouted()) {
timeout = ObTimeoutCtx::get_ctx().get_timeout();
}
FOREACH_CNT(server, server_list) FOREACH_CNT(server, server_list)
{ {
result.reset(); result.reset();
if (ObTimeoutCtx::get_ctx().is_timeout_set() && ObTimeoutCtx::get_ctx().is_timeouted()) { if (OB_FAIL(do_detect_master_rs_v3(*server, cluster_id, timeout, result))) {
ret = OB_TIMEOUT;
LOG_WARN("detect master rs timeout", KR(ret));
break;
} else if (OB_FAIL(do_detect_master_rs_v3(*server, cluster_id, result))) {
// LOG_WARN("detect master rootservice failed", K(ret), "server", *server); // LOG_WARN("detect master rootservice failed", K(ret), "server", *server);
} else { } else {
// if RS exists, return the memroy data of RS directly. // if RS exists, return the memroy data of RS directly.
@ -468,17 +473,14 @@ int ObRsMgr::do_detect_master_rs_v3(const ObIArray<ObAddr>& server_list, ObParti
} }
int ObRsMgr::do_detect_master_rs_v3( int ObRsMgr::do_detect_master_rs_v3(
const ObAddr& dst_server, const int64_t cluster_id, ObGetRootserverRoleResult& result) const ObAddr& dst_server, const int64_t cluster_id, const int64_t rpc_timeout, ObGetRootserverRoleResult& result)
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
result.reset(); result.reset();
result.role_ = ObRoleMgr::OB_SLAVE; result.role_ = ObRoleMgr::OB_SLAVE;
result.zone_.reset(); result.zone_.reset();
ObCurTraceId::Guard guard(GCTX.self_addr_); ObCurTraceId::Guard guard(GCTX.self_addr_);
int64_t timeout = DETECT_MASTER_TIMEOUT; int64_t timeout = max(DETECT_MASTER_TIMEOUT, rpc_timeout);
if (ObTimeoutCtx::get_ctx().is_timeout_set()) {
timeout = ObTimeoutCtx::get_ctx().get_timeout();
}
if (OB_UNLIKELY(!inited_)) { if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT; ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret)); LOG_WARN("not init", KR(ret));
@ -489,14 +491,14 @@ int ObRsMgr::do_detect_master_rs_v3(
ObCurTraceId::Guard guard(GCTX.self_addr_); ObCurTraceId::Guard guard(GCTX.self_addr_);
if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_2220) { if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_2220) {
if (OB_FAIL(rpc_proxy_->to_addr(dst_server).timeout(timeout).get_root_server_status(result))) { if (OB_FAIL(rpc_proxy_->to_addr(dst_server).timeout(timeout).get_root_server_status(result))) {
LOG_WARN("failed to get rootserver role", KR(ret), K(dst_server)); LOG_WARN("failed to get rootserver role", K(ret), K(dst_server), K(timeout));
} }
} else { } else {
if (OB_FAIL(rpc_proxy_->to_addr(dst_server) if (OB_FAIL(rpc_proxy_->to_addr(dst_server)
.timeout(timeout) .timeout(timeout)
.dst_cluster_id(cluster_id) .dst_cluster_id(cluster_id)
.get_master_root_server(result))) { .get_master_root_server(result))) {
LOG_WARN("fail to get rootserver role", KR(ret), K(dst_server), K(cluster_id)); LOG_WARN("fail to get rootserver role", K(ret), K(dst_server), K(cluster_id), K(timeout));
} }
} }
} }
@ -544,6 +546,11 @@ int ObRsMgr::get_remote_cluster_master_rs(const int64_t cluster_id, common::ObAd
} else { } else {
bool found = false; bool found = false;
ObGetRootserverRoleResult result; ObGetRootserverRoleResult result;
int64_t timeout = 0;
if (ObTimeoutCtx::get_ctx().is_timeout_set() && !ObTimeoutCtx::get_ctx().is_timeouted()) {
timeout = ObTimeoutCtx::get_ctx().get_timeout();
}
for (int64_t i = 0; i < addr_agent_.get_agent_num(); ++i) { for (int64_t i = 0; i < addr_agent_.get_agent_num(); ++i) {
if (OB_FAIL(addr_agent_.fetch_rslist_by_agent_idx(i, cluster_id, new_list, new_readonly_list, cluster_type))) { if (OB_FAIL(addr_agent_.fetch_rslist_by_agent_idx(i, cluster_id, new_list, new_readonly_list, cluster_type))) {
LOG_WARN("fetch rs list failed", K(ret), K(cluster_id), K(i)); LOG_WARN("fetch rs list failed", K(ret), K(cluster_id), K(i));
@ -552,7 +559,7 @@ int ObRsMgr::get_remote_cluster_master_rs(const int64_t cluster_id, common::ObAd
for (int64_t i = 0; i < new_list.count(); ++i) { for (int64_t i = 0; i < new_list.count(); ++i) {
const ObAddr& dst_server = new_list.at(i).server_; const ObAddr& dst_server = new_list.at(i).server_;
result.reset(); result.reset();
if (OB_FAIL(do_detect_master_rs_v3(dst_server, cluster_id, result))) { if (OB_FAIL(do_detect_master_rs_v3(dst_server, cluster_id, timeout, result))) {
} else { } else {
addr = result.replica_.server_; addr = result.replica_.server_;
LOG_INFO("new master rootserver found", "rootservice", addr, K(cluster_id)); LOG_INFO("new master rootserver found", "rootservice", addr, K(cluster_id));

View File

@ -100,8 +100,8 @@ private:
int renew_master_rootserver_v2(); int renew_master_rootserver_v2();
int do_detect_master_rs_v2(common::ObIArray<common::ObAddr>& rs_list); int do_detect_master_rs_v2(common::ObIArray<common::ObAddr>& rs_list);
int renew_master_rootserver_v3(); int renew_master_rootserver_v3();
int do_detect_master_rs_v3( int do_detect_master_rs_v3(const common::ObAddr& dst_server, const int64_t cluster_id, const int64_t rpc_timeout,
const common::ObAddr& dst_server, const int64_t cluster_id, obrpc::ObGetRootserverRoleResult& result); obrpc::ObGetRootserverRoleResult& result);
private: private:
static const int64_t DETECT_MASTER_TIMEOUT = 1 * 1000 * 1000; // 1s static const int64_t DETECT_MASTER_TIMEOUT = 1 * 1000 * 1000; // 1s