diff --git a/mittest/logservice/test_ob_simple_log_config_change.cpp b/mittest/logservice/test_ob_simple_log_config_change.cpp index edcf1a1169..b39a7fc1c4 100644 --- a/mittest/logservice/test_ob_simple_log_config_change.cpp +++ b/mittest/logservice/test_ob_simple_log_config_change.cpp @@ -54,9 +54,9 @@ bool check_children_valid(const std::vector &palf_list, co } } } - ret = all_children.learner_addr_equal(all_learner); + bool bool_ret = all_children.learner_addr_equal(all_learner); PALF_LOG(INFO, "check_children", K(ret), K(all_children), K(all_learner)); - return ret; + return bool_ret; } bool check_parent(const std::vector &palf_list, const LogLearnerList &all_learner, const ObAddr &parent) @@ -1003,6 +1003,145 @@ TEST_F(TestObSimpleLogClusterConfigChange, test_switch_leader) PALF_LOG(INFO, "end test switch_leader", K(id)); } +// 1. 3F(beijing), 4R(shanghai) +// 2. the client submits logs to F replicas, but R replicas can not receive logs +// 3. switch a R to F, the R replica must be one of the children of another R. +// Due to step 2, the R will not receive the reconfiguration log +// 4. enable the remaining R re-register parents +// 5. check loop between R replicas +TEST_F(TestObSimpleLogClusterConfigChange, learner_loop) +{ + SET_CASE_LOG_FILE(TEST_NAME, "learner_loop"); + int ret = OB_SUCCESS; + const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s + const int64_t id = ATOMIC_AAF(&palf_id_, 1); + int64_t leader_idx = 0; + PalfHandleImplGuard leader; + LogLearnerList all_learner; + const ObMemberList &node_list = get_node_list(); + std::vector palf_list; + common::ObRegion beijing_region("BEIJING"); + common::ObRegion shanghai_region("SHANGHAI"); + + EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, &loc_cb, leader_idx, leader)); + EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list)); + loc_cb.leader_ = get_cluster()[leader_idx]->get_addr(); + + // 1. init + for (int64_t i = 3; i < ObSimpleLogClusterTestBase::node_cnt_; ++i) { + common::ObMember added_learner; + EXPECT_EQ(OB_SUCCESS, node_list.get_member_by_index(i, added_learner)); + LogLearner learner(added_learner.get_server(), 1); + EXPECT_EQ(OB_SUCCESS, all_learner.add_learner(learner)); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(added_learner, CONFIG_CHANGE_TIMEOUT)); + } + + // set region, version 42x + // for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) { + // const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_; + // if (leader.palf_handle_impl_->config_mgr_.alive_paxos_memberlist_.contains(addr)) { + // get_cluster()[0]->get_locality_manager()->set_server_region(addr, beijing_region); + // } else { + // get_cluster()[0]->get_locality_manager()->set_server_region(addr, shanghai_region); + // } + // } + // for (auto palf_handle: palf_list) { palf_handle->palf_handle_impl_->update_self_region_(); } + + // set region, version 421, master + LogMemberRegionMap region_map; + EXPECT_EQ(OB_SUCCESS, region_map.init("localmap", OB_MAX_MEMBER_NUMBER)); + for (int i = 0; i < ObSimpleLogClusterTestBase::member_cnt_; i++) { + const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_; + if (leader.palf_handle_impl_->config_mgr_.alive_paxos_memberlist_.contains(addr)) { + EXPECT_EQ(OB_SUCCESS, palf_list[i]->palf_handle_impl_->set_region(beijing_region)); + region_map.insert(addr, beijing_region); + } else { + EXPECT_EQ(OB_SUCCESS, palf_list[i]->palf_handle_impl_->set_region(shanghai_region)); + region_map.insert(addr, shanghai_region); + } + } + // notify leader region of follower i has changed + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->set_paxos_member_region_map(region_map)); + + // check topo + EXPECT_UNTIL_EQ(true, check_children_valid(palf_list, all_learner)); + EXPECT_UNTIL_EQ(1, leader.palf_handle_impl_->config_mgr_.children_.get_member_number()); + EXPECT_UNTIL_EQ(0, palf_list[1]->palf_handle_impl_->config_mgr_.children_.get_member_number()); + EXPECT_UNTIL_EQ(0, palf_list[2]->palf_handle_impl_->config_mgr_.children_.get_member_number()); + ObAddr same_parent, any_child; + int64_t any_child_idx = -1; + for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) + { + const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_; + if (leader.palf_handle_impl_->config_mgr_.children_.contains(addr)) { + EXPECT_UNTIL_EQ(palf_list[i]->palf_handle_impl_->config_mgr_.parent_, leader.palf_handle_impl_->self_); + same_parent = addr; + PALF_LOG(INFO, "SAME_PARENT", K(id), K(addr), K(same_parent)); + break; + } + } + EXPECT_TRUE(same_parent.is_valid()); + for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) + { + const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_; + if (all_learner.contains(addr) && addr != same_parent) { + EXPECT_UNTIL_EQ(same_parent, palf_list[i]->palf_handle_impl_->config_mgr_.parent_); + any_child = addr; + any_child_idx = i; + PALF_LOG(INFO, "CHECK_PARENT", K(id), K(addr), K(same_parent)); + } + } + // 2. replicating logs to all F replicas + EXPECT_NE(-1, any_child_idx); + EXPECT_UNTIL_EQ(leader.palf_handle_impl_->config_mgr_.log_ms_meta_.curr_.config_.config_version_, + palf_list[any_child_idx]->palf_handle_impl_->config_mgr_.log_ms_meta_.curr_.config_.config_version_); + EXPECT_UNTIL_EQ(leader.palf_handle_impl_->get_max_lsn().val_, leader.palf_handle_impl_->get_end_lsn().val_); + for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) { + const common::ObAddr &addr = palf_list[i]->palf_handle_impl_->self_; + if (true == all_learner.contains(addr)) { + block_pcode(i, ObRpcPacketCode::OB_LOG_PUSH_REQ); + } + } + EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id)); + EXPECT_UNTIL_EQ(leader.palf_handle_impl_->get_max_lsn().val_, leader.palf_handle_impl_->get_end_lsn().val_); + + // 3. switch a R replica to F + LogConfigVersion config_version; + ASSERT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version)); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_learner_to_acceptor(ObMember(any_child, -1), 4, config_version, CONFIG_CHANGE_TIMEOUT)); + + // 4. enable the remaining R re-register parents + leader.palf_handle_impl_->config_mgr_.children_.reset(); + for (auto palf_handle: palf_list) { + const common::ObAddr addr = palf_handle->palf_handle_impl_->self_; + if (true == all_learner.contains(addr) && addr != any_child) { + palf_handle->palf_handle_impl_->config_mgr_.retire_parent_(); + palf_handle->palf_handle_impl_->config_mgr_.register_parent_(); + } + } + + // 5. check loop + for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) { + const common::ObAddr &addr = palf_list[i]->palf_handle_impl_->self_; + if (true == all_learner.contains(addr)) { + block_pcode(i, ObRpcPacketCode::OB_LOG_PUSH_REQ); + } + } + sleep(2); + + EXPECT_EQ(OB_SUCCESS, all_learner.remove_learner(any_child)); + for (auto palf_handle: palf_list) { + const common::ObAddr addr = palf_handle->palf_handle_impl_->self_; + if (true == all_learner.contains(addr)) { + EXPECT_UNTIL_EQ(true, palf_handle->palf_handle_impl_->config_mgr_.parent_.is_valid()); + EXPECT_UNTIL_EQ(false, palf_handle->palf_handle_impl_->config_mgr_.children_.contains(palf_handle->palf_handle_impl_->config_mgr_.parent_)); + } + } + + revert_cluster_palf_handle_guard(palf_list); + PALF_LOG(INFO, "end test learner_loop", K(id)); +} + } // end unittest } // end oceanbase diff --git a/src/logservice/palf/log_config_mgr.cpp b/src/logservice/palf/log_config_mgr.cpp index b87493f26e..69cc3ced7a 100755 --- a/src/logservice/palf/log_config_mgr.cpp +++ b/src/logservice/palf/log_config_mgr.cpp @@ -2711,6 +2711,16 @@ int LogConfigMgr::check_parent_health() if (IS_NOT_INIT) { ret = OB_NOT_INIT; } else { + { + SpinLockGuard parent_guard(parent_lock_); + SpinLockGuard child_guard(child_lock_); + // break learner loop + if (parent_.is_valid() && + children_.contains(parent_) && + OB_FAIL(retire_parent_())) { + PALF_LOG(WARN, "retire_parent_ failed", KR(ret), K_(palf_id), K_(self)); + } + } SpinLockGuard guard(parent_lock_); const int64_t curr_time_us = common::ObTimeUtility::current_time(); const bool is_registering_timeout = (is_registering_() && curr_time_us - last_submit_register_req_time_us_ > PALF_CHILD_RESEND_REGISTER_INTERVAL_US); diff --git a/src/logservice/palf/log_config_mgr.h b/src/logservice/palf/log_config_mgr.h index c1f7d81184..a2cf66eaef 100755 --- a/src/logservice/palf/log_config_mgr.h +++ b/src/logservice/palf/log_config_mgr.h @@ -596,7 +596,6 @@ private: int pre_sync_config_log_and_mode_meta_(const common::ObMember &server, const int64_t proposal_id, const bool is_arb_replica); - private: // inner_config_meta_ is protected by RWLock in PalfHandleImpl, // any read/write ops to inner_config_meta_ should acquire RLock/WLock in PalfHandleImpl.