[CP] [fix] avoid loop when registering parents for R replicas
This commit is contained in:
		@ -54,9 +54,9 @@ bool check_children_valid(const std::vector<PalfHandleImplGuard*> &palf_list, co
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  ret = all_children.learner_addr_equal(all_learner);
 | 
			
		||||
  bool bool_ret = all_children.learner_addr_equal(all_learner);
 | 
			
		||||
  PALF_LOG(INFO, "check_children", K(ret), K(all_children), K(all_learner));
 | 
			
		||||
  return ret;
 | 
			
		||||
  return bool_ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool check_parent(const std::vector<PalfHandleImplGuard*> &palf_list, const LogLearnerList &all_learner, const ObAddr &parent)
 | 
			
		||||
@ -1003,6 +1003,145 @@ TEST_F(TestObSimpleLogClusterConfigChange, test_switch_leader)
 | 
			
		||||
  PALF_LOG(INFO, "end test switch_leader", K(id));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 1. 3F(beijing), 4R(shanghai)
 | 
			
		||||
// 2. the client submits logs to F replicas, but R replicas can not receive logs
 | 
			
		||||
// 3. switch a R to F, the R replica must be one of the children of another R.
 | 
			
		||||
//    Due to step 2, the R will not receive the reconfiguration log
 | 
			
		||||
// 4. enable the remaining R re-register parents
 | 
			
		||||
// 5. check loop between R replicas
 | 
			
		||||
TEST_F(TestObSimpleLogClusterConfigChange, learner_loop)
 | 
			
		||||
{
 | 
			
		||||
  SET_CASE_LOG_FILE(TEST_NAME, "learner_loop");
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
 | 
			
		||||
	const int64_t id = ATOMIC_AAF(&palf_id_, 1);
 | 
			
		||||
	int64_t leader_idx = 0;
 | 
			
		||||
  PalfHandleImplGuard leader;
 | 
			
		||||
  LogLearnerList all_learner;
 | 
			
		||||
  const ObMemberList &node_list = get_node_list();
 | 
			
		||||
  std::vector<PalfHandleImplGuard*> palf_list;
 | 
			
		||||
  common::ObRegion beijing_region("BEIJING");
 | 
			
		||||
  common::ObRegion shanghai_region("SHANGHAI");
 | 
			
		||||
 | 
			
		||||
	EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, &loc_cb, leader_idx, leader));
 | 
			
		||||
  EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
 | 
			
		||||
  loc_cb.leader_ = get_cluster()[leader_idx]->get_addr();
 | 
			
		||||
 | 
			
		||||
  // 1. init
 | 
			
		||||
  for (int64_t i = 3; i < ObSimpleLogClusterTestBase::node_cnt_; ++i) {
 | 
			
		||||
    common::ObMember added_learner;
 | 
			
		||||
    EXPECT_EQ(OB_SUCCESS, node_list.get_member_by_index(i, added_learner));
 | 
			
		||||
    LogLearner learner(added_learner.get_server(), 1);
 | 
			
		||||
    EXPECT_EQ(OB_SUCCESS, all_learner.add_learner(learner));
 | 
			
		||||
    EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(added_learner, CONFIG_CHANGE_TIMEOUT));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // set region, version 42x
 | 
			
		||||
  // for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) {
 | 
			
		||||
  //   const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_;
 | 
			
		||||
  //   if (leader.palf_handle_impl_->config_mgr_.alive_paxos_memberlist_.contains(addr)) {
 | 
			
		||||
  //     get_cluster()[0]->get_locality_manager()->set_server_region(addr, beijing_region);
 | 
			
		||||
  //   } else {
 | 
			
		||||
  //     get_cluster()[0]->get_locality_manager()->set_server_region(addr, shanghai_region);
 | 
			
		||||
  //   }
 | 
			
		||||
  // }
 | 
			
		||||
  // for (auto palf_handle: palf_list) { palf_handle->palf_handle_impl_->update_self_region_(); }
 | 
			
		||||
 | 
			
		||||
  // set region, version 421, master
 | 
			
		||||
  LogMemberRegionMap region_map;
 | 
			
		||||
  EXPECT_EQ(OB_SUCCESS, region_map.init("localmap", OB_MAX_MEMBER_NUMBER));
 | 
			
		||||
  for (int i = 0; i < ObSimpleLogClusterTestBase::member_cnt_; i++) {
 | 
			
		||||
    const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_;
 | 
			
		||||
    if (leader.palf_handle_impl_->config_mgr_.alive_paxos_memberlist_.contains(addr)) {
 | 
			
		||||
      EXPECT_EQ(OB_SUCCESS, palf_list[i]->palf_handle_impl_->set_region(beijing_region));
 | 
			
		||||
      region_map.insert(addr, beijing_region);
 | 
			
		||||
    } else {
 | 
			
		||||
      EXPECT_EQ(OB_SUCCESS, palf_list[i]->palf_handle_impl_->set_region(shanghai_region));
 | 
			
		||||
      region_map.insert(addr, shanghai_region);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // notify leader region of follower i has changed
 | 
			
		||||
  EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->set_paxos_member_region_map(region_map));
 | 
			
		||||
 | 
			
		||||
  // check topo
 | 
			
		||||
  EXPECT_UNTIL_EQ(true, check_children_valid(palf_list, all_learner));
 | 
			
		||||
  EXPECT_UNTIL_EQ(1, leader.palf_handle_impl_->config_mgr_.children_.get_member_number());
 | 
			
		||||
  EXPECT_UNTIL_EQ(0, palf_list[1]->palf_handle_impl_->config_mgr_.children_.get_member_number());
 | 
			
		||||
  EXPECT_UNTIL_EQ(0, palf_list[2]->palf_handle_impl_->config_mgr_.children_.get_member_number());
 | 
			
		||||
  ObAddr same_parent, any_child;
 | 
			
		||||
  int64_t any_child_idx = -1;
 | 
			
		||||
  for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++)
 | 
			
		||||
  {
 | 
			
		||||
    const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_;
 | 
			
		||||
    if (leader.palf_handle_impl_->config_mgr_.children_.contains(addr)) {
 | 
			
		||||
      EXPECT_UNTIL_EQ(palf_list[i]->palf_handle_impl_->config_mgr_.parent_, leader.palf_handle_impl_->self_);
 | 
			
		||||
      same_parent = addr;
 | 
			
		||||
      PALF_LOG(INFO, "SAME_PARENT", K(id), K(addr), K(same_parent));
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  EXPECT_TRUE(same_parent.is_valid());
 | 
			
		||||
  for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++)
 | 
			
		||||
  {
 | 
			
		||||
    const common::ObAddr addr = palf_list[i]->palf_handle_impl_->self_;
 | 
			
		||||
    if (all_learner.contains(addr) && addr != same_parent) {
 | 
			
		||||
      EXPECT_UNTIL_EQ(same_parent, palf_list[i]->palf_handle_impl_->config_mgr_.parent_);
 | 
			
		||||
      any_child = addr;
 | 
			
		||||
      any_child_idx = i;
 | 
			
		||||
      PALF_LOG(INFO, "CHECK_PARENT", K(id), K(addr), K(same_parent));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // 2. replicating logs to all F replicas
 | 
			
		||||
  EXPECT_NE(-1, any_child_idx);
 | 
			
		||||
  EXPECT_UNTIL_EQ(leader.palf_handle_impl_->config_mgr_.log_ms_meta_.curr_.config_.config_version_,
 | 
			
		||||
      palf_list[any_child_idx]->palf_handle_impl_->config_mgr_.log_ms_meta_.curr_.config_.config_version_);
 | 
			
		||||
  EXPECT_UNTIL_EQ(leader.palf_handle_impl_->get_max_lsn().val_, leader.palf_handle_impl_->get_end_lsn().val_);
 | 
			
		||||
  for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) {
 | 
			
		||||
    const common::ObAddr &addr = palf_list[i]->palf_handle_impl_->self_;
 | 
			
		||||
    if (true == all_learner.contains(addr)) {
 | 
			
		||||
      block_pcode(i, ObRpcPacketCode::OB_LOG_PUSH_REQ);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
 | 
			
		||||
  EXPECT_UNTIL_EQ(leader.palf_handle_impl_->get_max_lsn().val_, leader.palf_handle_impl_->get_end_lsn().val_);
 | 
			
		||||
 | 
			
		||||
  // 3. switch a R replica to F
 | 
			
		||||
  LogConfigVersion config_version;
 | 
			
		||||
  ASSERT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version));
 | 
			
		||||
  EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_learner_to_acceptor(ObMember(any_child, -1), 4, config_version, CONFIG_CHANGE_TIMEOUT));
 | 
			
		||||
 | 
			
		||||
  // 4. enable the remaining R re-register parents
 | 
			
		||||
  leader.palf_handle_impl_->config_mgr_.children_.reset();
 | 
			
		||||
  for (auto palf_handle: palf_list) {
 | 
			
		||||
    const common::ObAddr addr = palf_handle->palf_handle_impl_->self_;
 | 
			
		||||
    if (true == all_learner.contains(addr) && addr != any_child) {
 | 
			
		||||
      palf_handle->palf_handle_impl_->config_mgr_.retire_parent_();
 | 
			
		||||
      palf_handle->palf_handle_impl_->config_mgr_.register_parent_();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // 5. check loop
 | 
			
		||||
  for (int i = 0; i < ObSimpleLogClusterTestBase::node_cnt_; i++) {
 | 
			
		||||
    const common::ObAddr &addr = palf_list[i]->palf_handle_impl_->self_;
 | 
			
		||||
    if (true == all_learner.contains(addr)) {
 | 
			
		||||
      block_pcode(i, ObRpcPacketCode::OB_LOG_PUSH_REQ);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  sleep(2);
 | 
			
		||||
 | 
			
		||||
  EXPECT_EQ(OB_SUCCESS, all_learner.remove_learner(any_child));
 | 
			
		||||
  for (auto palf_handle: palf_list) {
 | 
			
		||||
    const common::ObAddr addr = palf_handle->palf_handle_impl_->self_;
 | 
			
		||||
    if (true == all_learner.contains(addr)) {
 | 
			
		||||
      EXPECT_UNTIL_EQ(true, palf_handle->palf_handle_impl_->config_mgr_.parent_.is_valid());
 | 
			
		||||
      EXPECT_UNTIL_EQ(false, palf_handle->palf_handle_impl_->config_mgr_.children_.contains(palf_handle->palf_handle_impl_->config_mgr_.parent_));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  revert_cluster_palf_handle_guard(palf_list);
 | 
			
		||||
  PALF_LOG(INFO, "end test learner_loop", K(id));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // end unittest
 | 
			
		||||
} // end oceanbase
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -2711,6 +2711,16 @@ int LogConfigMgr::check_parent_health()
 | 
			
		||||
  if (IS_NOT_INIT) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
  } else {
 | 
			
		||||
    {
 | 
			
		||||
      SpinLockGuard parent_guard(parent_lock_);
 | 
			
		||||
      SpinLockGuard child_guard(child_lock_);
 | 
			
		||||
      // break learner loop
 | 
			
		||||
      if (parent_.is_valid() &&
 | 
			
		||||
          children_.contains(parent_) &&
 | 
			
		||||
          OB_FAIL(retire_parent_())) {
 | 
			
		||||
        PALF_LOG(WARN, "retire_parent_ failed", KR(ret), K_(palf_id), K_(self));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    SpinLockGuard guard(parent_lock_);
 | 
			
		||||
    const int64_t curr_time_us = common::ObTimeUtility::current_time();
 | 
			
		||||
    const bool is_registering_timeout = (is_registering_() && curr_time_us - last_submit_register_req_time_us_ > PALF_CHILD_RESEND_REGISTER_INTERVAL_US);
 | 
			
		||||
 | 
			
		||||
@ -596,7 +596,6 @@ private:
 | 
			
		||||
  int pre_sync_config_log_and_mode_meta_(const common::ObMember &server,
 | 
			
		||||
                                         const int64_t proposal_id,
 | 
			
		||||
                                         const bool is_arb_replica);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  // inner_config_meta_ is protected by RWLock in PalfHandleImpl,
 | 
			
		||||
  // any read/write ops to inner_config_meta_ should acquire RLock/WLock in PalfHandleImpl.
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user