[PALF] fix the leader is revoked when the PALF group is been degraded

This commit is contained in:
BinChenn 2023-03-21 16:56:50 +00:00 committed by ob-robot
parent a1c3c9cf20
commit 1ec233bb5d
3 changed files with 31 additions and 12 deletions

View File

@ -139,6 +139,27 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_degrade_upgrade)
palf_list[another_f_idx]->palf_handle_impl_->enable_vote();
EXPECT_TRUE(is_upgraded(leader, id));
// test revoking the leader when arb service is degrading
block_all_net(another_f_idx);
const common::ObAddr follower_addr = get_cluster()[another_f_idx]->get_addr();
LogConfigChangeArgs args(common::ObMember(follower_addr, 1), 0, DEGRADE_ACCEPTOR_TO_LEARNER);
int64_t ele_epoch;
common::ObRole ele_role;
int64_t proposal_id = leader.palf_handle_impl_->state_mgr_.get_proposal_id();
leader.palf_handle_impl_->election_.get_role(ele_role, ele_epoch);
LogConfigVersion config_version;
EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config_(args, proposal_id, ele_epoch, config_version));
EXPECT_FALSE(leader.palf_handle_impl_->config_mgr_.alive_paxos_memberlist_.contains(follower_addr));
EXPECT_TRUE(leader.palf_handle_impl_->config_mgr_.applied_alive_paxos_memberlist_.contains(follower_addr));
EXPECT_EQ(leader.palf_handle_impl_->config_mgr_.state_, 1);
// reset status supposing the lease is expried
block_net(leader_idx, another_f_idx);
leader.palf_handle_impl_->config_mgr_.reset_status();
EXPECT_TRUE(is_degraded(leader, another_f_idx));
unblock_net(leader_idx, another_f_idx);
unblock_all_net(another_f_idx);
revert_cluster_palf_handle_guard(palf_list);
leader.reset();
delete_paxos_group(id);
@ -177,8 +198,6 @@ TEST_F(TestObSimpleLogClusterArbService, test_4f1a_degrade_upgrade)
EXPECT_TRUE(is_degraded(leader, another_f1_idx));
EXPECT_TRUE(is_degraded(leader, another_f2_idx));
// 确保lease过期,验证loc_cb是否可以找到leader拉日志
sleep(5);
unblock_all_net(another_f1_idx);
unblock_all_net(another_f2_idx);
loc_cb.leader_ = leader.palf_handle_impl_->self_;
@ -225,7 +244,6 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_reconfirm_degrade_upgrade)
int64_t new_leader_idx = -1;
PalfHandleImplGuard new_leader;
EXPECT_EQ(OB_SUCCESS, get_leader(id, new_leader, new_leader_idx));
sleep(5);
loc_cb.leader_ = new_leader.palf_handle_impl_->self_;
unblock_net(leader_idx, another_f_idx);
unblock_net(leader_idx, arb_replica_idx);
@ -283,7 +301,6 @@ TEST_F(TestObSimpleLogClusterArbService, test_4f1a_reconfirm_degrade_upgrade)
EXPECT_TRUE(is_degraded(new_leader, another_f1_idx));
EXPECT_TRUE(is_degraded(new_leader, leader_idx));
sleep(5);
loc_cb.leader_ = new_leader.palf_handle_impl_->self_;
// restart two servers
unblock_all_net(leader_idx);

View File

@ -941,16 +941,16 @@ int LogConfigMgr::check_config_change_args_(const LogConfigChangeArgs &args, boo
} else if (OB_FAIL(get_curr_member_list(curr_member_list, curr_replica_num))) {
PALF_LOG(WARN, "get_curr_member_list failed", KR(ret), K_(palf_id), K_(self), K(args));
} else {
const ObMemberList &log_sync_member_list = log_ms_meta_.curr_.log_sync_memberlist_;
const common::GlobalLearnerList &curr_learner_list = log_ms_meta_.curr_.learnerlist_;
const common::GlobalLearnerList &degraded_learnerlist = log_ms_meta_.curr_.degraded_learnerlist_;
const ObMemberList &log_sync_member_list = config_meta_.curr_.log_sync_memberlist_;
const common::GlobalLearnerList &curr_learner_list = config_meta_.curr_.learnerlist_;
const common::GlobalLearnerList &degraded_learnerlist = config_meta_.curr_.degraded_learnerlist_;
const common::ObMember &member = args.server_;
const int64_t new_replica_num = args.new_replica_num_;
const bool is_in_log_sync_memberlist = log_sync_member_list.contains(member);
const bool is_in_degraded_learnerlist = degraded_learnerlist.contains(member);
const bool is_in_learnerlist = curr_learner_list.contains(member);
const bool is_arb_replica = (log_ms_meta_.curr_.arbitration_member_ == member);
const bool has_arb_replica = (log_ms_meta_.curr_.arbitration_member_.is_valid());
const bool is_arb_replica = (config_meta_.curr_.arbitration_member_ == member);
const bool has_arb_replica = (config_meta_.curr_.arbitration_member_.is_valid());
switch (args.type_) {
case CHANGE_REPLICA_NUM:
{
@ -1032,7 +1032,7 @@ int LogConfigMgr::check_config_change_args_(const LogConfigChangeArgs &args, boo
PALF_LOG(INFO, "arb replica already exists, but new_replica_num not equal to curr val", KR(ret), K_(palf_id), K_(self),
K_(log_ms_meta), K(member), K(new_replica_num), K_(alive_paxos_replica_num));
}
} else if (log_ms_meta_.curr_.arbitration_member_.is_valid()) {
} else if (true == has_arb_replica) {
ret = OB_INVALID_ARGUMENT;
PALF_LOG(WARN, "arbitration replica exists, can not add_arb_member", KR(ret), K_(palf_id), K_(self), K_(log_ms_meta), K(member));
}
@ -1127,7 +1127,7 @@ int LogConfigMgr::check_config_change_args_(const LogConfigChangeArgs &args, boo
}
} else if (!is_in_degraded_learnerlist && is_in_log_sync_memberlist) {
// degrade operation can only be done when there is arbitration replica in paxos group
if (args.type_ == DEGRADE_ACCEPTOR_TO_LEARNER && !log_ms_meta_.curr_.arbitration_member_.is_valid()) {
if (args.type_ == DEGRADE_ACCEPTOR_TO_LEARNER && false == has_arb_replica) {
ret = OB_INVALID_ARGUMENT;
PALF_LOG(WARN, "arb member is invalid, can't degrade", KR(ret), K_(palf_id), K_(self), K_(log_ms_meta), K(member));
}
@ -1479,7 +1479,7 @@ int LogConfigMgr::generate_new_config_info_(const int64_t proposal_id,
int ret = OB_SUCCESS;
const LogConfigChangeType cc_type = args.type_;
const common::ObMember member = args.server_;
new_config_info = log_ms_meta_.curr_;
new_config_info = config_meta_.curr_;
int64_t curr_replica_num = -1;
if (INVALID_PROPOSAL_ID == proposal_id || !args.is_valid()) {
ret = OB_INVALID_ARGUMENT;

View File

@ -609,6 +609,8 @@ TEST_F(TestLogConfigMgr, test_apply_config_meta)
// memberlist will not be applied right now when there is arb member, so use alive_paxos_memberlist_
bool member_equal = (cm.alive_paxos_memberlist_.member_addr_equal(expect_member_list));
EXPECT_TRUE(member_equal);
// apply config meta
cm.config_meta_ = cm.log_ms_meta_;
// remove(C, 5)
cm.reset_status();
LogConfigChangeArgs remove_c_arg(ObMember(addr3, -1), 5, palf::REMOVE_MEMBER);