[fix] fix the committed_end_lsn can not be advanced after removing member
This commit is contained in:
parent
14f411060e
commit
eeacd9c6bc
@ -362,6 +362,67 @@ TEST_F(TestObSimpleLogClusterConfigChangeMockEle, test_remove_if_another_rebuild
|
||||
PALF_LOG(INFO, "end test test_committed_end_lsn_after_removing_member", K(id));
|
||||
}
|
||||
|
||||
// 1. (ABCD), A is the leader, D crashed
|
||||
// 2. remove C from member list (ABCD) and match_lsn_map, committed_end_lsn is 100 and last_submit_end_lsn is 200.
|
||||
// 2. because committed_end_lsn is smaller than last_submit_end_lsn, the leader will use prev_member_list (ABCD) to generate committed_end_lsn
|
||||
// 3. C is not in match_lsn_map, D crashed, the leader may can not generate committed_end_lsn
|
||||
TEST_F(TestObSimpleLogClusterConfigChangeMockEle, test_committed_end_lsn_after_removing_member2)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
const int64_t id = ATOMIC_AAF(&palf_id_, 1);
|
||||
const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
|
||||
SET_CASE_LOG_FILE(TEST_NAME, "test_committed_end_lsn_after_removing_member");
|
||||
PALF_LOG(INFO, "begin test test_committed_end_lsn_after_removing_member", K(id));
|
||||
{
|
||||
int64_t leader_idx = 0;
|
||||
PalfHandleImplGuard leader;
|
||||
std::vector<PalfHandleImplGuard*> palf_list;
|
||||
EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_mock_election(id, leader_idx, leader));
|
||||
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 200, id));
|
||||
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
|
||||
|
||||
const int64_t b_idx = (leader_idx + 1) % 4;
|
||||
const int64_t c_idx = (leader_idx + 2) % 4;
|
||||
const int64_t d_idx = (leader_idx + 3) % 4;
|
||||
const common::ObAddr b_addr = get_cluster()[b_idx]->get_addr();
|
||||
const common::ObAddr c_addr = get_cluster()[c_idx]->get_addr();
|
||||
const common::ObAddr d_addr = get_cluster()[d_idx]->get_addr();
|
||||
PalfHandleImplGuard *a_handle = palf_list[leader_idx];
|
||||
PalfHandleImplGuard *b_handle = palf_list[b_idx];
|
||||
PalfHandleImplGuard *c_handle = palf_list[c_idx];
|
||||
PalfHandleImplGuard *d_handle = palf_list[d_idx];
|
||||
LogConfigVersion config_version;
|
||||
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version));
|
||||
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(common::ObMember(d_addr, 1), 4, config_version, CONFIG_CHANGE_TIMEOUT));
|
||||
|
||||
// 1. D crashed
|
||||
block_all_net(d_idx);
|
||||
|
||||
// 2. leader can not commit logs
|
||||
block_pcode(leader_idx, ObRpcPacketCode::OB_LOG_PUSH_RESP);
|
||||
EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
|
||||
sleep(1);
|
||||
EXPECT_GT(leader.palf_handle_impl_->sw_.last_submit_lsn_, leader.palf_handle_impl_->sw_.committed_end_lsn_);
|
||||
|
||||
// 3. remove C
|
||||
EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->remove_member(common::ObMember(c_addr, 1), 3, CONFIG_CHANGE_TIMEOUT));
|
||||
EXPECT_GT(leader.palf_handle_impl_->sw_.last_submit_lsn_, leader.palf_handle_impl_->sw_.committed_end_lsn_);
|
||||
EXPECT_EQ(leader.palf_handle_impl_->config_mgr_.reconfig_barrier_.prev_end_lsn_, leader.palf_handle_impl_->sw_.last_submit_end_lsn_);
|
||||
EXPECT_GT(leader.palf_handle_impl_->config_mgr_.reconfig_barrier_.prev_end_lsn_, leader.palf_handle_impl_->sw_.committed_end_lsn_);
|
||||
|
||||
// 3. leader can commit logs
|
||||
unblock_pcode(leader_idx, ObRpcPacketCode::OB_LOG_PUSH_RESP);
|
||||
|
||||
// 4. check if the leader can commit logs after C has been removed from match_lsn_map
|
||||
EXPECT_UNTIL_EQ(leader.palf_handle_impl_->sw_.committed_end_lsn_, leader.palf_handle_impl_->sw_.last_submit_end_lsn_);
|
||||
|
||||
leader.reset();
|
||||
revert_cluster_palf_handle_guard(palf_list);
|
||||
}
|
||||
delete_paxos_group(id);
|
||||
PALF_LOG(INFO, "end test test_committed_end_lsn_after_removing_member", K(id));
|
||||
}
|
||||
|
||||
} // end unittest
|
||||
} // end oceanbase
|
||||
|
||||
|
@ -437,8 +437,10 @@ int LogConfigMgr::get_alive_member_list_with_arb(
|
||||
|
||||
// require rlock of PalfHandleImpl
|
||||
int LogConfigMgr::get_log_sync_member_list_for_generate_committed_lsn(
|
||||
ObMemberList &member_list,
|
||||
int64_t &replica_num,
|
||||
ObMemberList &prev_member_list,
|
||||
int64_t &prev_replica_num,
|
||||
ObMemberList &curr_member_list,
|
||||
int64_t &curr_replica_num,
|
||||
bool &is_before_barrier,
|
||||
LSN &barrier_lsn) const
|
||||
{
|
||||
@ -451,6 +453,9 @@ int LogConfigMgr::get_log_sync_member_list_for_generate_committed_lsn(
|
||||
if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
PALF_LOG(WARN, "LogConfigMgr not init", KR(ret));
|
||||
} else if (OB_FAIL(curr_member_list.deep_copy(log_ms_meta_.curr_.config_.log_sync_memberlist_))) {
|
||||
PALF_LOG(WARN, "deep_copy member_list failed", KR(ret), K_(palf_id), K_(self));
|
||||
} else if (FALSE_IT(curr_replica_num = log_ms_meta_.curr_.config_.log_sync_replica_num_)) {
|
||||
} else if (OB_UNLIKELY(prev_committed_end_lsn < reconfig_barrier_.prev_end_lsn_ &&
|
||||
reconfig_barrier_.prev_end_lsn_.is_valid() &&
|
||||
prev_mode_pid == reconfig_barrier_.prev_mode_pid_)) {
|
||||
@ -465,16 +470,12 @@ int LogConfigMgr::get_log_sync_member_list_for_generate_committed_lsn(
|
||||
// be used only when the reconfir_barrier_.prev_mode_pid_ is equal to current mode
|
||||
// proposal_id. That means access mode hasn’t been changed (PALF hasn’t been flashed back)
|
||||
// since last reconfiguration.
|
||||
if (OB_FAIL(member_list.deep_copy(log_ms_meta_.prev_.config_.log_sync_memberlist_))) {
|
||||
if (OB_FAIL(prev_member_list.deep_copy(log_ms_meta_.prev_.config_.log_sync_memberlist_))) {
|
||||
PALF_LOG(WARN, "deep_copy member_list failed", KR(ret), K_(palf_id), K_(self));
|
||||
} else {
|
||||
replica_num = log_ms_meta_.prev_.config_.log_sync_replica_num_;
|
||||
prev_replica_num = log_ms_meta_.prev_.config_.log_sync_replica_num_;
|
||||
}
|
||||
} else if (OB_FAIL(member_list.deep_copy(log_ms_meta_.curr_.config_.log_sync_memberlist_))) {
|
||||
PALF_LOG(WARN, "deep_copy member_list failed", KR(ret), K_(palf_id), K_(self));
|
||||
} else {
|
||||
replica_num = log_ms_meta_.curr_.config_.log_sync_replica_num_;
|
||||
}
|
||||
} else { }
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -406,8 +406,10 @@ public:
|
||||
// return OB_SUCCESS if success
|
||||
// else return other errno
|
||||
virtual int get_log_sync_member_list_for_generate_committed_lsn(
|
||||
ObMemberList &member_list,
|
||||
int64_t &replica_num,
|
||||
ObMemberList &prev_member_list,
|
||||
int64_t &prev_replica_num,
|
||||
ObMemberList &curr_member_list,
|
||||
int64_t &curr_replica_num,
|
||||
bool &is_before_barrier,
|
||||
LSN &barrier_lsn) const;
|
||||
virtual int get_arbitration_member(common::ObMember &arb_member) const;
|
||||
@ -498,11 +500,12 @@ public:
|
||||
SpinLockGuard guard(lock_);
|
||||
int64_t pos = 0;
|
||||
J_OBJ_START();
|
||||
J_KV(K_(palf_id), K_(self), K_(alive_paxos_memberlist), K_(alive_paxos_replica_num), \
|
||||
K_(log_ms_meta), K_(checking_barrier), K_(reconfig_barrier), K_(persistent_config_version), \
|
||||
K_(ms_ack_list), K_(resend_config_version), K_(resend_log_list), \
|
||||
K_(last_submit_config_log_time_us), K_(region), K_(paxos_member_region_map), \
|
||||
K_(register_time_us), K_(parent), K_(parent_keepalive_time_us), \
|
||||
J_KV(K_(palf_id), K_(self), K_(alive_paxos_memberlist), K_(alive_paxos_replica_num), \
|
||||
K_(log_ms_meta), K_(running_args), K_(state), K_(checking_barrier), K_(reconfig_barrier), \
|
||||
K_(persistent_config_version), K_(ms_ack_list), K_(resend_config_version), K_(resend_log_list), \
|
||||
K_(last_submit_config_log_time_us), K_(need_change_config_bkgd), K_(bkgd_config_version), \
|
||||
K_(region), K_(paxos_member_region_map), \
|
||||
K_(register_time_us), K_(parent), K_(parent_keepalive_time_us), \
|
||||
K_(last_submit_register_req_time_us), K_(children), K_(last_submit_keepalive_time_us), KP(this));
|
||||
J_OBJ_END();
|
||||
return pos;
|
||||
|
@ -2598,27 +2598,31 @@ int64_t LogSlidingWindow::get_start_id() const
|
||||
int LogSlidingWindow::gen_committed_end_lsn_(LSN &new_committed_end_lsn)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObMemberList member_list;
|
||||
int64_t replica_num = 0;
|
||||
LSN result_lsn;
|
||||
ObMemberList curr_member_list, prev_member_list;
|
||||
int64_t curr_replica_num = 0, prev_replica_num = 0;
|
||||
LSN curr_result_lsn, prev_result_lsn;
|
||||
bool is_before_barrier = false;
|
||||
LSN barrier_lsn;
|
||||
if (OB_FAIL(mm_->get_log_sync_member_list_for_generate_committed_lsn(member_list,
|
||||
replica_num, is_before_barrier, barrier_lsn))) {
|
||||
if (OB_FAIL(mm_->get_log_sync_member_list_for_generate_committed_lsn(prev_member_list,
|
||||
prev_replica_num, curr_member_list, curr_replica_num, is_before_barrier, barrier_lsn))) {
|
||||
PALF_LOG(WARN, "get_log_sync_member_list failed", K(ret), K_(palf_id), K_(self));
|
||||
} else if (OB_FAIL(get_majority_lsn_(member_list, replica_num, result_lsn))) {
|
||||
} else if (OB_FAIL(get_majority_lsn_(curr_member_list, curr_replica_num, curr_result_lsn))) {
|
||||
PALF_LOG(WARN, "get_majority_lsn failed", K(ret), K_(palf_id), K_(self));
|
||||
} else if (OB_UNLIKELY(true == is_before_barrier) &&
|
||||
OB_FAIL(get_majority_lsn_(prev_member_list, prev_replica_num, prev_result_lsn))) {
|
||||
PALF_LOG(WARN, "get_majority_lsn failed", K(ret), K_(palf_id), K_(self));
|
||||
} else {
|
||||
// Note: the leader generates committed_end_lsn based on different memberlists before
|
||||
// and after a reconfiguration, barrier_lsn is the boundary.
|
||||
// - Logs which is before barrier_lsn should be committed by previous memberlist.
|
||||
// - Logs which is before barrier_lsn could be committed by previous/current memberlist.
|
||||
LSN result_lsn = (OB_UNLIKELY(is_before_barrier))? MAX(prev_result_lsn, curr_result_lsn): curr_result_lsn;
|
||||
// - Logs which is after barrier_lsn should be committed by current memberlist.
|
||||
// - If current committed_end_lsn is smaller than barrier_lsn, then new committed_end_lsn
|
||||
// generated by previous memberlist must be smaller than barrier_lsn.
|
||||
// For example, memberlist:{A} + replica:B. After adding B successfully, Logs after the
|
||||
// barrier may have been persisted by A, but not B. The leader A can not commit logs after
|
||||
// the barrier with memberlist:{A}.
|
||||
result_lsn = (is_before_barrier)? MIN(result_lsn, barrier_lsn): result_lsn;
|
||||
result_lsn = (OB_UNLIKELY(is_before_barrier))? MIN(result_lsn, barrier_lsn): result_lsn;
|
||||
// Note: The leader is not allowed to generate new committed_end_lsn while changing configs with arb.
|
||||
// 1. {A, B, C(arb)}, A is the leader, end_lsns of A and B are both 100.
|
||||
// 2. B crashes and A decicdes to degrade B to a learner, A changes memberlist to {A, C(arb)} and
|
||||
|
@ -113,8 +113,10 @@ public:
|
||||
return ret;
|
||||
}
|
||||
int get_log_sync_member_list_for_generate_committed_lsn(
|
||||
ObMemberList &member_list,
|
||||
int64_t &replica_num,
|
||||
ObMemberList &prev_member_list,
|
||||
int64_t &prev_replica_num,
|
||||
ObMemberList &curr_member_list,
|
||||
int64_t &curr_replica_num,
|
||||
bool &is_before_barrier,
|
||||
LSN &barrier_lsn) const
|
||||
{
|
||||
@ -126,20 +128,19 @@ public:
|
||||
if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
PALF_LOG(WARN, "LogConfigMgr not init", KR(ret));
|
||||
} else if (OB_FAIL(curr_member_list.deep_copy(log_ms_meta_.curr_.config_.log_sync_memberlist_))) {
|
||||
PALF_LOG(WARN, "deep_copy member_list failed", KR(ret), K_(palf_id), K_(self));
|
||||
} else if (FALSE_IT(curr_replica_num = log_ms_meta_.curr_.config_.log_sync_replica_num_)) {
|
||||
} else if (OB_UNLIKELY(prev_committed_end_lsn < reconfig_barrier_.prev_end_lsn_ &&
|
||||
reconfig_barrier_.prev_end_lsn_.is_valid())) {
|
||||
is_before_barrier = true;
|
||||
barrier_lsn = reconfig_barrier_.prev_end_lsn_;
|
||||
if (OB_FAIL(member_list.deep_copy(log_ms_meta_.prev_.config_.log_sync_memberlist_))) {
|
||||
if (OB_FAIL(prev_member_list.deep_copy(log_ms_meta_.prev_.config_.log_sync_memberlist_))) {
|
||||
PALF_LOG(WARN, "deep_copy member_list failed", KR(ret), K_(palf_id), K_(self));
|
||||
} else {
|
||||
replica_num = log_ms_meta_.prev_.config_.log_sync_replica_num_;
|
||||
prev_replica_num = log_ms_meta_.prev_.config_.log_sync_replica_num_;
|
||||
}
|
||||
} else if (OB_FAIL(member_list.deep_copy(log_ms_meta_.curr_.config_.log_sync_memberlist_))) {
|
||||
PALF_LOG(WARN, "deep_copy member_list failed", KR(ret), K_(palf_id), K_(self));
|
||||
} else {
|
||||
replica_num = log_ms_meta_.curr_.config_.log_sync_replica_num_;
|
||||
}
|
||||
} else { }
|
||||
return ret;
|
||||
}
|
||||
int get_alive_member_list_with_arb(common::ObMemberList &member_list, int64_t &replica_num) const
|
||||
|
Loading…
x
Reference in New Issue
Block a user