[RS]fix RS start fail while get_master_rs timeout
This commit is contained in:
@ -364,10 +364,15 @@ int ObRsMgr::do_detect_master_rs_v2(common::ObIArray<common::ObAddr>& rs_list)
|
|||||||
ObGetRootserverRoleResult result;
|
ObGetRootserverRoleResult result;
|
||||||
const int64_t cluster_id = GCONF.cluster_id;
|
const int64_t cluster_id = GCONF.cluster_id;
|
||||||
bool has_rs = false;
|
bool has_rs = false;
|
||||||
|
int64_t timeout = 0;
|
||||||
|
if (ObTimeoutCtx::get_ctx().is_timeout_set() && !ObTimeoutCtx::get_ctx().is_timeouted()) {
|
||||||
|
timeout = ObTimeoutCtx::get_ctx().get_timeout();
|
||||||
|
}
|
||||||
|
|
||||||
FOREACH_CNT(server, rs_list)
|
FOREACH_CNT(server, rs_list)
|
||||||
{
|
{
|
||||||
result.reset();
|
result.reset();
|
||||||
if (OB_FAIL(do_detect_master_rs_v3(*server, cluster_id, result))) {
|
if (OB_FAIL(do_detect_master_rs_v3(*server, cluster_id, timeout, result))) {
|
||||||
// LOG_WARN("detect master rootservice failed", K(ret), "server", *server);
|
// LOG_WARN("detect master rootservice failed", K(ret), "server", *server);
|
||||||
} else {
|
} else {
|
||||||
has_rs = true;
|
has_rs = true;
|
||||||
@ -438,14 +443,14 @@ int ObRsMgr::do_detect_master_rs_v3(const ObIArray<ObAddr>& server_list, ObParti
|
|||||||
bool has_rs = false;
|
bool has_rs = false;
|
||||||
int tmp_ret = OB_SUCCESS;
|
int tmp_ret = OB_SUCCESS;
|
||||||
ObGetRootserverRoleResult result;
|
ObGetRootserverRoleResult result;
|
||||||
|
int64_t timeout = 0;
|
||||||
|
if (ObTimeoutCtx::get_ctx().is_timeout_set() && !ObTimeoutCtx::get_ctx().is_timeouted()) {
|
||||||
|
timeout = ObTimeoutCtx::get_ctx().get_timeout();
|
||||||
|
}
|
||||||
FOREACH_CNT(server, server_list)
|
FOREACH_CNT(server, server_list)
|
||||||
{
|
{
|
||||||
result.reset();
|
result.reset();
|
||||||
if (ObTimeoutCtx::get_ctx().is_timeout_set() && ObTimeoutCtx::get_ctx().is_timeouted()) {
|
if (OB_FAIL(do_detect_master_rs_v3(*server, cluster_id, timeout, result))) {
|
||||||
ret = OB_TIMEOUT;
|
|
||||||
LOG_WARN("detect master rs timeout", KR(ret));
|
|
||||||
break;
|
|
||||||
} else if (OB_FAIL(do_detect_master_rs_v3(*server, cluster_id, result))) {
|
|
||||||
// LOG_WARN("detect master rootservice failed", K(ret), "server", *server);
|
// LOG_WARN("detect master rootservice failed", K(ret), "server", *server);
|
||||||
} else {
|
} else {
|
||||||
// if RS exists, return the memroy data of RS directly.
|
// if RS exists, return the memroy data of RS directly.
|
||||||
@ -468,17 +473,14 @@ int ObRsMgr::do_detect_master_rs_v3(const ObIArray<ObAddr>& server_list, ObParti
|
|||||||
}
|
}
|
||||||
|
|
||||||
int ObRsMgr::do_detect_master_rs_v3(
|
int ObRsMgr::do_detect_master_rs_v3(
|
||||||
const ObAddr& dst_server, const int64_t cluster_id, ObGetRootserverRoleResult& result)
|
const ObAddr& dst_server, const int64_t cluster_id, const int64_t rpc_timeout, ObGetRootserverRoleResult& result)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
result.reset();
|
result.reset();
|
||||||
result.role_ = ObRoleMgr::OB_SLAVE;
|
result.role_ = ObRoleMgr::OB_SLAVE;
|
||||||
result.zone_.reset();
|
result.zone_.reset();
|
||||||
ObCurTraceId::Guard guard(GCTX.self_addr_);
|
ObCurTraceId::Guard guard(GCTX.self_addr_);
|
||||||
int64_t timeout = DETECT_MASTER_TIMEOUT;
|
int64_t timeout = max(DETECT_MASTER_TIMEOUT, rpc_timeout);
|
||||||
if (ObTimeoutCtx::get_ctx().is_timeout_set()) {
|
|
||||||
timeout = ObTimeoutCtx::get_ctx().get_timeout();
|
|
||||||
}
|
|
||||||
if (OB_UNLIKELY(!inited_)) {
|
if (OB_UNLIKELY(!inited_)) {
|
||||||
ret = OB_NOT_INIT;
|
ret = OB_NOT_INIT;
|
||||||
LOG_WARN("not init", KR(ret));
|
LOG_WARN("not init", KR(ret));
|
||||||
@ -489,14 +491,14 @@ int ObRsMgr::do_detect_master_rs_v3(
|
|||||||
ObCurTraceId::Guard guard(GCTX.self_addr_);
|
ObCurTraceId::Guard guard(GCTX.self_addr_);
|
||||||
if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_2220) {
|
if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_2220) {
|
||||||
if (OB_FAIL(rpc_proxy_->to_addr(dst_server).timeout(timeout).get_root_server_status(result))) {
|
if (OB_FAIL(rpc_proxy_->to_addr(dst_server).timeout(timeout).get_root_server_status(result))) {
|
||||||
LOG_WARN("failed to get rootserver role", KR(ret), K(dst_server));
|
LOG_WARN("failed to get rootserver role", K(ret), K(dst_server), K(timeout));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (OB_FAIL(rpc_proxy_->to_addr(dst_server)
|
if (OB_FAIL(rpc_proxy_->to_addr(dst_server)
|
||||||
.timeout(timeout)
|
.timeout(timeout)
|
||||||
.dst_cluster_id(cluster_id)
|
.dst_cluster_id(cluster_id)
|
||||||
.get_master_root_server(result))) {
|
.get_master_root_server(result))) {
|
||||||
LOG_WARN("fail to get rootserver role", KR(ret), K(dst_server), K(cluster_id));
|
LOG_WARN("fail to get rootserver role", K(ret), K(dst_server), K(cluster_id), K(timeout));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -544,6 +546,11 @@ int ObRsMgr::get_remote_cluster_master_rs(const int64_t cluster_id, common::ObAd
|
|||||||
} else {
|
} else {
|
||||||
bool found = false;
|
bool found = false;
|
||||||
ObGetRootserverRoleResult result;
|
ObGetRootserverRoleResult result;
|
||||||
|
int64_t timeout = 0;
|
||||||
|
if (ObTimeoutCtx::get_ctx().is_timeout_set() && !ObTimeoutCtx::get_ctx().is_timeouted()) {
|
||||||
|
timeout = ObTimeoutCtx::get_ctx().get_timeout();
|
||||||
|
}
|
||||||
|
|
||||||
for (int64_t i = 0; i < addr_agent_.get_agent_num(); ++i) {
|
for (int64_t i = 0; i < addr_agent_.get_agent_num(); ++i) {
|
||||||
if (OB_FAIL(addr_agent_.fetch_rslist_by_agent_idx(i, cluster_id, new_list, new_readonly_list, cluster_type))) {
|
if (OB_FAIL(addr_agent_.fetch_rslist_by_agent_idx(i, cluster_id, new_list, new_readonly_list, cluster_type))) {
|
||||||
LOG_WARN("fetch rs list failed", K(ret), K(cluster_id), K(i));
|
LOG_WARN("fetch rs list failed", K(ret), K(cluster_id), K(i));
|
||||||
@ -552,7 +559,7 @@ int ObRsMgr::get_remote_cluster_master_rs(const int64_t cluster_id, common::ObAd
|
|||||||
for (int64_t i = 0; i < new_list.count(); ++i) {
|
for (int64_t i = 0; i < new_list.count(); ++i) {
|
||||||
const ObAddr& dst_server = new_list.at(i).server_;
|
const ObAddr& dst_server = new_list.at(i).server_;
|
||||||
result.reset();
|
result.reset();
|
||||||
if (OB_FAIL(do_detect_master_rs_v3(dst_server, cluster_id, result))) {
|
if (OB_FAIL(do_detect_master_rs_v3(dst_server, cluster_id, timeout, result))) {
|
||||||
} else {
|
} else {
|
||||||
addr = result.replica_.server_;
|
addr = result.replica_.server_;
|
||||||
LOG_INFO("new master rootserver found", "rootservice", addr, K(cluster_id));
|
LOG_INFO("new master rootserver found", "rootservice", addr, K(cluster_id));
|
||||||
|
|||||||
@ -100,8 +100,8 @@ private:
|
|||||||
int renew_master_rootserver_v2();
|
int renew_master_rootserver_v2();
|
||||||
int do_detect_master_rs_v2(common::ObIArray<common::ObAddr>& rs_list);
|
int do_detect_master_rs_v2(common::ObIArray<common::ObAddr>& rs_list);
|
||||||
int renew_master_rootserver_v3();
|
int renew_master_rootserver_v3();
|
||||||
int do_detect_master_rs_v3(
|
int do_detect_master_rs_v3(const common::ObAddr& dst_server, const int64_t cluster_id, const int64_t rpc_timeout,
|
||||||
const common::ObAddr& dst_server, const int64_t cluster_id, obrpc::ObGetRootserverRoleResult& result);
|
obrpc::ObGetRootserverRoleResult& result);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static const int64_t DETECT_MASTER_TIMEOUT = 1 * 1000 * 1000; // 1s
|
static const int64_t DETECT_MASTER_TIMEOUT = 1 * 1000 * 1000; // 1s
|
||||||
|
|||||||
Reference in New Issue
Block a user