[CP] [42x] support check_member_list_and_learner_list retry when create new ls
This commit is contained in:
@ -19,7 +19,6 @@
|
|||||||
#include "share/ob_define.h"
|
#include "share/ob_define.h"
|
||||||
#include "lib/ob_running_mode.h"
|
#include "lib/ob_running_mode.h"
|
||||||
|
|
||||||
|
|
||||||
namespace oceanbase
|
namespace oceanbase
|
||||||
{
|
{
|
||||||
using namespace common;
|
using namespace common;
|
||||||
@ -303,7 +302,7 @@ int ObLSTableUpdater::batch_process_tasks(
|
|||||||
} else if (1 != tasks.count()) {
|
} else if (1 != tasks.count()) {
|
||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
LOG_WARN("unexpected task count", KR(ret), "tasks count", tasks.count());
|
LOG_WARN("unexpected task count", KR(ret), "tasks count", tasks.count());
|
||||||
} else {
|
} else {
|
||||||
const ObLSTableUpdateTask &task = tasks.at(0);
|
const ObLSTableUpdateTask &task = tasks.at(0);
|
||||||
const uint64_t tenant_id = task.get_tenant_id();
|
const uint64_t tenant_id = task.get_tenant_id();
|
||||||
const ObLSID &ls_id = task.get_ls_id();
|
const ObLSID &ls_id = task.get_ls_id();
|
||||||
|
|||||||
@ -711,65 +711,91 @@ int ObLSCreator::persist_ls_member_list_(const common::ObMemberList &member_list
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ERRSIM_POINT_DEF(ERRSIM_CHECK_MEMBER_LIST_SAME_ERROR);
|
||||||
|
int ObLSCreator::inner_check_member_list_and_learner_list_(
|
||||||
|
const common::ObMemberList &member_list,
|
||||||
|
const common::GlobalLearnerList &learner_list)
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
ObLSInfo ls_info_to_check;
|
||||||
|
if (OB_UNLIKELY(ERRSIM_CHECK_MEMBER_LIST_SAME_ERROR)) {
|
||||||
|
ret = ERRSIM_CHECK_MEMBER_LIST_SAME_ERROR;
|
||||||
|
} else if (OB_ISNULL(GCTX.lst_operator_)
|
||||||
|
|| OB_UNLIKELY(!is_valid() || !member_list.is_valid())) {
|
||||||
|
ret = OB_INVALID_ARGUMENT;
|
||||||
|
LOG_WARN("invalid argument", KR(ret), K(member_list));
|
||||||
|
} else if (OB_FAIL(GCTX.lst_operator_->get(
|
||||||
|
GCONF.cluster_id, tenant_id_, id_,
|
||||||
|
share::ObLSTable::DEFAULT_MODE, ls_info_to_check))) {
|
||||||
|
LOG_WARN("fail to get ls info", KR(ret), K_(tenant_id), K_(id));
|
||||||
|
} else {
|
||||||
|
// check member_list all reported in __all_ls_meta_table
|
||||||
|
for (int64_t i = 0; OB_SUCC(ret) && i < member_list.get_member_number(); ++i) {
|
||||||
|
const share::ObLSReplica *replica = nullptr;
|
||||||
|
common::ObAddr server;
|
||||||
|
if (OB_FAIL(member_list.get_server_by_index(i, server))) {
|
||||||
|
LOG_WARN("fail to get server by index", KR(ret), K(i), K(member_list));
|
||||||
|
} else {
|
||||||
|
int tmp_ret = ls_info_to_check.find(server, replica);
|
||||||
|
if (OB_SUCCESS == tmp_ret) {
|
||||||
|
// good, replica exists, bypass
|
||||||
|
} else {
|
||||||
|
ret = OB_STATE_NOT_MATCH;
|
||||||
|
LOG_WARN("has replica only in member list, need try again", KR(ret), KR(tmp_ret),
|
||||||
|
K(member_list), K(ls_info_to_check), K(i), K(server));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// check learner_list all reported in __all_ls_meta_table
|
||||||
|
for (int64_t i = 0; OB_SUCC(ret) && i < learner_list.get_member_number(); ++i) {
|
||||||
|
const share::ObLSReplica *replica = nullptr;
|
||||||
|
common::ObAddr server;
|
||||||
|
if (OB_FAIL(learner_list.get_server_by_index(i, server))) {
|
||||||
|
LOG_WARN("fail to get server by index", KR(ret), K(i), K(learner_list));
|
||||||
|
} else {
|
||||||
|
int tmp_ret = ls_info_to_check.find(server, replica);
|
||||||
|
if (OB_SUCCESS == tmp_ret) {
|
||||||
|
// replica exists, bypass
|
||||||
|
} else {
|
||||||
|
ret = OB_STATE_NOT_MATCH;
|
||||||
|
LOG_WARN("has replica only in learner list, need try again", KR(ret), KR(tmp_ret),
|
||||||
|
K(learner_list), K(ls_info_to_check), K(i), K(server));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int ObLSCreator::check_member_list_and_learner_list_all_in_meta_table_(
|
int ObLSCreator::check_member_list_and_learner_list_all_in_meta_table_(
|
||||||
const common::ObMemberList &member_list,
|
const common::ObMemberList &member_list,
|
||||||
const common::GlobalLearnerList &learner_list)
|
const common::GlobalLearnerList &learner_list)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
bool has_replica_only_in_member_list_or_learner_list = true;
|
|
||||||
ObLSInfo ls_info_to_check;
|
|
||||||
const int64_t retry_interval_us = 1000l * 1000l; // 1s
|
const int64_t retry_interval_us = 1000l * 1000l; // 1s
|
||||||
ObTimeoutCtx ctx;
|
ObTimeoutCtx ctx;
|
||||||
if (OB_ISNULL(GCTX.lst_operator_)
|
int tmp_ret = OB_SUCCESS;
|
||||||
|| OB_UNLIKELY(!is_valid() || !member_list.is_valid())) {
|
|
||||||
|
if (OB_UNLIKELY(!is_valid() || !member_list.is_valid())) {
|
||||||
ret = OB_INVALID_ARGUMENT;
|
ret = OB_INVALID_ARGUMENT;
|
||||||
LOG_WARN("invalid argument", KR(ret), K(member_list));
|
LOG_WARN("invalid argument", KR(ret), K(member_list));
|
||||||
} else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, GCONF.internal_sql_execute_timeout))) {
|
} else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, GCONF.internal_sql_execute_timeout))) {
|
||||||
LOG_WARN("failed to set default timeout", KR(ret));
|
LOG_WARN("failed to set default timeout", KR(ret));
|
||||||
} else {
|
} else {
|
||||||
while (OB_SUCC(ret) && has_replica_only_in_member_list_or_learner_list) {
|
while (OB_SUCC(ret)) {
|
||||||
has_replica_only_in_member_list_or_learner_list = false;
|
|
||||||
if (ctx.is_timeouted()) {
|
if (ctx.is_timeouted()) {
|
||||||
ret = OB_TIMEOUT;
|
ret = OB_TIMEOUT;
|
||||||
LOG_WARN("wait member list all reported to meta table timeout", KR(ret), K(member_list), K_(tenant_id), K_(id));
|
LOG_WARN("wait member list and learner list all reported to meta table timeout",
|
||||||
} else if (OB_FAIL(GCTX.lst_operator_->get(GCONF.cluster_id, tenant_id_, id_, share::ObLSTable::DEFAULT_MODE, ls_info_to_check))) {
|
KR(ret), K(member_list), K(learner_list), K_(tenant_id), K_(id));
|
||||||
LOG_WARN("fail to get ls info", KR(ret), K_(tenant_id), K_(id));
|
} else if (OB_SUCCESS != (tmp_ret = inner_check_member_list_and_learner_list_(
|
||||||
|
member_list, learner_list))) {
|
||||||
|
LOG_WARN("fail to check member list and learner list all reported", KR(tmp_ret),
|
||||||
|
K_(tenant_id), K_(id), K(member_list), K(learner_list));
|
||||||
|
// has replica only in member_list or learner_list, need try again later
|
||||||
|
ob_usleep(retry_interval_us);
|
||||||
} else {
|
} else {
|
||||||
for (int64_t i = 0; OB_SUCC(ret) && i < member_list.get_member_number(); ++i) {
|
// good, all replicas in member_list and learner_list has already reported
|
||||||
const share::ObLSReplica *replica = nullptr;
|
break;
|
||||||
common::ObAddr server;
|
|
||||||
if (OB_FAIL(member_list.get_server_by_index(i, server))) {
|
|
||||||
LOG_WARN("fail to get server by index", KR(ret), K(i), K(member_list));
|
|
||||||
} else {
|
|
||||||
int tmp_ret = ls_info_to_check.find(server, replica);
|
|
||||||
if (OB_SUCCESS == tmp_ret) {
|
|
||||||
// replica exists, bypass
|
|
||||||
} else {
|
|
||||||
has_replica_only_in_member_list_or_learner_list = true;
|
|
||||||
LOG_INFO("has replica only in member list", KR(tmp_ret), K(member_list), K(ls_info_to_check), K(i), K(server));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int64_t i = 0; OB_SUCC(ret) && i < learner_list.get_member_number(); ++i) {
|
|
||||||
const share::ObLSReplica *replica = nullptr;
|
|
||||||
common::ObAddr server;
|
|
||||||
if (OB_FAIL(learner_list.get_server_by_index(i, server))) {
|
|
||||||
LOG_WARN("fail to get server by index", KR(ret), K(i), K(learner_list));
|
|
||||||
} else {
|
|
||||||
int tmp_ret = ls_info_to_check.find(server, replica);
|
|
||||||
if (OB_SUCCESS == tmp_ret) {
|
|
||||||
// replica exists, bypass
|
|
||||||
} else {
|
|
||||||
has_replica_only_in_member_list_or_learner_list = true;
|
|
||||||
LOG_INFO("has replica only in learner list", KR(tmp_ret), K(learner_list), K(ls_info_to_check), K(i), K(server));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (OB_SUCC(ret) && has_replica_only_in_member_list_or_learner_list) {
|
|
||||||
ob_usleep(retry_interval_us);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -125,6 +125,9 @@ private:
|
|||||||
int check_member_list_and_learner_list_all_in_meta_table_(
|
int check_member_list_and_learner_list_all_in_meta_table_(
|
||||||
const common::ObMemberList &member_list,
|
const common::ObMemberList &member_list,
|
||||||
const common::GlobalLearnerList &learner_list);
|
const common::GlobalLearnerList &learner_list);
|
||||||
|
int inner_check_member_list_and_learner_list_(
|
||||||
|
const common::ObMemberList &member_list,
|
||||||
|
const common::GlobalLearnerList &learner_list);
|
||||||
int construct_paxos_replica_number_to_persist_(
|
int construct_paxos_replica_number_to_persist_(
|
||||||
const int64_t paxos_replica_num,
|
const int64_t paxos_replica_num,
|
||||||
const int64_t arb_replica_num,
|
const int64_t arb_replica_num,
|
||||||
|
|||||||
Reference in New Issue
Block a user