[CP] [fix] avoid loop when registering parents for R replicas
This commit is contained in:
@ -54,9 +54,9 @@ bool check_children_valid(const std::vector<PalfHandleImplGuard*> &palf_list, co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ret = all_children.learner_addr_equal(all_learner);
|
bool bool_ret = all_children.learner_addr_equal(all_learner);
|
||||||
PALF_LOG(INFO, "check_children", K(ret), K(all_children), K(all_learner));
|
PALF_LOG(INFO, "check_children", K(ret), K(all_children), K(all_learner));
|
||||||
return ret;
|
return bool_ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool check_parent(const std::vector<PalfHandleImplGuard*> &palf_list, const LogLearnerList &all_learner, const ObAddr &parent)
|
bool check_parent(const std::vector<PalfHandleImplGuard*> &palf_list, const LogLearnerList &all_learner, const ObAddr &parent)
|
||||||
@ -1003,6 +1003,145 @@ TEST_F(TestObSimpleLogClusterConfigChange, test_switch_leader)
|
|||||||
PALF_LOG(INFO, "end test switch_leader", K(id));
|
PALF_LOG(INFO, "end test switch_leader", K(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 1. 3F(beijing), 4R(shanghai)
|
||||||
|
// 2. the client submits logs to F replicas, but R replicas can not receive logs
|
||||||
|
// 3. switch a R to F, the R replica must be one of the children of another R.
|
||||||
|
// Due to step 2, the R will not receive the reconfiguration log
|
||||||
|
// 4. enable the remaining R re-register parents
|
||||||
|
// 5. check loop between R replicas
|
||||||
|
TEST_F(TestObSimpleLogClusterConfigChange, learner_loop)
|
||||||
|
{
|
||||||
|
SET_CASE_LOG_FILE(TEST_NAME, "learner_loop");
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
|
||||||
|
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
|
||||||
|
int64_t leader_idx = 0;
|
||||||
|
PalfHandleImplGuard leader;
|
||||||
|
LogLearnerList all_learner;
|
||||||
|
const ObMemberList &node_list = get_node_list();
|
||||||
|
std::vector<PalfHandleImplGuard*> palf_list;
|
||||||
|
common::ObRegion beijing_region("BEIJING");
|
||||||
|
common::ObRegion shanghai_region("SHANGHAI");
|
||||||
|
|
||||||
|
EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, &loc_cb, leader_idx, leader));
|
||||||
|
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
|
||||||
|
loc_cb.leader_ = get_cluster()[leader_idx]->get_addr();
|
||||||
|
|
||||||
|
// 1. init
|
||||||
|
for (int64_t i = 3; i < ObSimpleLogClusterTestBase::node_cnt_; ++i) {
|
||||||
|
common::ObMember added_learner;
|
||||||
|
EXPECT_EQ(OB_SUCCESS, node_list.get_member_by_index(i, added_learner));
|
||||||
|
LogLearner learner(added_learner.get_server(), 1);
|
||||||
|
EXPECT_EQ(OB_SUCCESS, all_learner.add_learner(learner));
|
||||||
|
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(added_learner, CONFIG_CHANGE_TIMEOUT));
|
||||||
|
}
|
||||||
|
|
||||||
|
// set region, version 42x
|
||||||
|
// for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) {
|
||||||
|
// const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_;
|
||||||
|
// if (leader.palf_handle_impl_->config_mgr_.alive_paxos_memberlist_.contains(addr)) {
|
||||||
|
// get_cluster()[0]->get_locality_manager()->set_server_region(addr, beijing_region);
|
||||||
|
// } else {
|
||||||
|
// get_cluster()[0]->get_locality_manager()->set_server_region(addr, shanghai_region);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// for (auto palf_handle: palf_list) { palf_handle->palf_handle_impl_->update_self_region_(); }
|
||||||
|
|
||||||
|
// set region, version 421, master
|
||||||
|
LogMemberRegionMap region_map;
|
||||||
|
EXPECT_EQ(OB_SUCCESS, region_map.init("localmap", OB_MAX_MEMBER_NUMBER));
|
||||||
|
for (int i = 0; i < ObSimpleLogClusterTestBase::member_cnt_; i++) {
|
||||||
|
const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_;
|
||||||
|
if (leader.palf_handle_impl_->config_mgr_.alive_paxos_memberlist_.contains(addr)) {
|
||||||
|
EXPECT_EQ(OB_SUCCESS, palf_list[i]->palf_handle_impl_->set_region(beijing_region));
|
||||||
|
region_map.insert(addr, beijing_region);
|
||||||
|
} else {
|
||||||
|
EXPECT_EQ(OB_SUCCESS, palf_list[i]->palf_handle_impl_->set_region(shanghai_region));
|
||||||
|
region_map.insert(addr, shanghai_region);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// notify leader region of follower i has changed
|
||||||
|
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->set_paxos_member_region_map(region_map));
|
||||||
|
|
||||||
|
// check topo
|
||||||
|
EXPECT_UNTIL_EQ(true, check_children_valid(palf_list, all_learner));
|
||||||
|
EXPECT_UNTIL_EQ(1, leader.palf_handle_impl_->config_mgr_.children_.get_member_number());
|
||||||
|
EXPECT_UNTIL_EQ(0, palf_list[1]->palf_handle_impl_->config_mgr_.children_.get_member_number());
|
||||||
|
EXPECT_UNTIL_EQ(0, palf_list[2]->palf_handle_impl_->config_mgr_.children_.get_member_number());
|
||||||
|
ObAddr same_parent, any_child;
|
||||||
|
int64_t any_child_idx = -1;
|
||||||
|
for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++)
|
||||||
|
{
|
||||||
|
const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_;
|
||||||
|
if (leader.palf_handle_impl_->config_mgr_.children_.contains(addr)) {
|
||||||
|
EXPECT_UNTIL_EQ(palf_list[i]->palf_handle_impl_->config_mgr_.parent_, leader.palf_handle_impl_->self_);
|
||||||
|
same_parent = addr;
|
||||||
|
PALF_LOG(INFO, "SAME_PARENT", K(id), K(addr), K(same_parent));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(same_parent.is_valid());
|
||||||
|
for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++)
|
||||||
|
{
|
||||||
|
const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_;
|
||||||
|
if (all_learner.contains(addr) && addr != same_parent) {
|
||||||
|
EXPECT_UNTIL_EQ(same_parent, palf_list[i]->palf_handle_impl_->config_mgr_.parent_);
|
||||||
|
any_child = addr;
|
||||||
|
any_child_idx = i;
|
||||||
|
PALF_LOG(INFO, "CHECK_PARENT", K(id), K(addr), K(same_parent));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2. replicating logs to all F replicas
|
||||||
|
EXPECT_NE(-1, any_child_idx);
|
||||||
|
EXPECT_UNTIL_EQ(leader.palf_handle_impl_->config_mgr_.log_ms_meta_.curr_.config_.config_version_,
|
||||||
|
palf_list[any_child_idx]->palf_handle_impl_->config_mgr_.log_ms_meta_.curr_.config_.config_version_);
|
||||||
|
EXPECT_UNTIL_EQ(leader.palf_handle_impl_->get_max_lsn().val_, leader.palf_handle_impl_->get_end_lsn().val_);
|
||||||
|
for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) {
|
||||||
|
const common::ObAddr &addr = palf_list[i]->palf_handle_impl_->self_;
|
||||||
|
if (true == all_learner.contains(addr)) {
|
||||||
|
block_pcode(i, ObRpcPacketCode::OB_LOG_PUSH_REQ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
|
||||||
|
EXPECT_UNTIL_EQ(leader.palf_handle_impl_->get_max_lsn().val_, leader.palf_handle_impl_->get_end_lsn().val_);
|
||||||
|
|
||||||
|
// 3. switch a R replica to F
|
||||||
|
LogConfigVersion config_version;
|
||||||
|
ASSERT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version));
|
||||||
|
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_learner_to_acceptor(ObMember(any_child, -1), 4, config_version, CONFIG_CHANGE_TIMEOUT));
|
||||||
|
|
||||||
|
// 4. enable the remaining R re-register parents
|
||||||
|
leader.palf_handle_impl_->config_mgr_.children_.reset();
|
||||||
|
for (auto palf_handle: palf_list) {
|
||||||
|
const common::ObAddr addr = palf_handle->palf_handle_impl_->self_;
|
||||||
|
if (true == all_learner.contains(addr) && addr != any_child) {
|
||||||
|
palf_handle->palf_handle_impl_->config_mgr_.retire_parent_();
|
||||||
|
palf_handle->palf_handle_impl_->config_mgr_.register_parent_();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. check loop
|
||||||
|
for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) {
|
||||||
|
const common::ObAddr &addr = palf_list[i]->palf_handle_impl_->self_;
|
||||||
|
if (true == all_learner.contains(addr)) {
|
||||||
|
block_pcode(i, ObRpcPacketCode::OB_LOG_PUSH_REQ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sleep(2);
|
||||||
|
|
||||||
|
EXPECT_EQ(OB_SUCCESS, all_learner.remove_learner(any_child));
|
||||||
|
for (auto palf_handle: palf_list) {
|
||||||
|
const common::ObAddr addr = palf_handle->palf_handle_impl_->self_;
|
||||||
|
if (true == all_learner.contains(addr)) {
|
||||||
|
EXPECT_UNTIL_EQ(true, palf_handle->palf_handle_impl_->config_mgr_.parent_.is_valid());
|
||||||
|
EXPECT_UNTIL_EQ(false, palf_handle->palf_handle_impl_->config_mgr_.children_.contains(palf_handle->palf_handle_impl_->config_mgr_.parent_));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
revert_cluster_palf_handle_guard(palf_list);
|
||||||
|
PALF_LOG(INFO, "end test learner_loop", K(id));
|
||||||
|
}
|
||||||
|
|
||||||
} // end unittest
|
} // end unittest
|
||||||
} // end oceanbase
|
} // end oceanbase
|
||||||
|
|
||||||
|
|||||||
@ -2711,6 +2711,16 @@ int LogConfigMgr::check_parent_health()
|
|||||||
if (IS_NOT_INIT) {
|
if (IS_NOT_INIT) {
|
||||||
ret = OB_NOT_INIT;
|
ret = OB_NOT_INIT;
|
||||||
} else {
|
} else {
|
||||||
|
{
|
||||||
|
SpinLockGuard parent_guard(parent_lock_);
|
||||||
|
SpinLockGuard child_guard(child_lock_);
|
||||||
|
// break learner loop
|
||||||
|
if (parent_.is_valid() &&
|
||||||
|
children_.contains(parent_) &&
|
||||||
|
OB_FAIL(retire_parent_())) {
|
||||||
|
PALF_LOG(WARN, "retire_parent_ failed", KR(ret), K_(palf_id), K_(self));
|
||||||
|
}
|
||||||
|
}
|
||||||
SpinLockGuard guard(parent_lock_);
|
SpinLockGuard guard(parent_lock_);
|
||||||
const int64_t curr_time_us = common::ObTimeUtility::current_time();
|
const int64_t curr_time_us = common::ObTimeUtility::current_time();
|
||||||
const bool is_registering_timeout = (is_registering_() && curr_time_us - last_submit_register_req_time_us_ > PALF_CHILD_RESEND_REGISTER_INTERVAL_US);
|
const bool is_registering_timeout = (is_registering_() && curr_time_us - last_submit_register_req_time_us_ > PALF_CHILD_RESEND_REGISTER_INTERVAL_US);
|
||||||
|
|||||||
@ -596,7 +596,6 @@ private:
|
|||||||
int pre_sync_config_log_and_mode_meta_(const common::ObMember &server,
|
int pre_sync_config_log_and_mode_meta_(const common::ObMember &server,
|
||||||
const int64_t proposal_id,
|
const int64_t proposal_id,
|
||||||
const bool is_arb_replica);
|
const bool is_arb_replica);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// inner_config_meta_ is protected by RWLock in PalfHandleImpl,
|
// inner_config_meta_ is protected by RWLock in PalfHandleImpl,
|
||||||
// any read/write ops to inner_config_meta_ should acquire RLock/WLock in PalfHandleImpl.
|
// any read/write ops to inner_config_meta_ should acquire RLock/WLock in PalfHandleImpl.
|
||||||
|
|||||||
Reference in New Issue
Block a user