[fix] fix the committed_end_lsn can not be advanced after removing member
This commit is contained in:
@ -437,8 +437,10 @@ int LogConfigMgr::get_alive_member_list_with_arb(
|
||||
|
||||
// require rlock of PalfHandleImpl
|
||||
int LogConfigMgr::get_log_sync_member_list_for_generate_committed_lsn(
|
||||
ObMemberList &member_list,
|
||||
int64_t &replica_num,
|
||||
ObMemberList &prev_member_list,
|
||||
int64_t &prev_replica_num,
|
||||
ObMemberList &curr_member_list,
|
||||
int64_t &curr_replica_num,
|
||||
bool &is_before_barrier,
|
||||
LSN &barrier_lsn) const
|
||||
{
|
||||
@ -451,6 +453,9 @@ int LogConfigMgr::get_log_sync_member_list_for_generate_committed_lsn(
|
||||
if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
PALF_LOG(WARN, "LogConfigMgr not init", KR(ret));
|
||||
} else if (OB_FAIL(curr_member_list.deep_copy(log_ms_meta_.curr_.config_.log_sync_memberlist_))) {
|
||||
PALF_LOG(WARN, "deep_copy member_list failed", KR(ret), K_(palf_id), K_(self));
|
||||
} else if (FALSE_IT(curr_replica_num = log_ms_meta_.curr_.config_.log_sync_replica_num_)) {
|
||||
} else if (OB_UNLIKELY(prev_committed_end_lsn < reconfig_barrier_.prev_end_lsn_ &&
|
||||
reconfig_barrier_.prev_end_lsn_.is_valid() &&
|
||||
prev_mode_pid == reconfig_barrier_.prev_mode_pid_)) {
|
||||
@ -465,16 +470,12 @@ int LogConfigMgr::get_log_sync_member_list_for_generate_committed_lsn(
|
||||
// be used only when the reconfir_barrier_.prev_mode_pid_ is equal to current mode
|
||||
// proposal_id. That means access mode hasn’t been changed (PALF hasn’t been flashed back)
|
||||
// since last reconfiguration.
|
||||
if (OB_FAIL(member_list.deep_copy(log_ms_meta_.prev_.config_.log_sync_memberlist_))) {
|
||||
if (OB_FAIL(prev_member_list.deep_copy(log_ms_meta_.prev_.config_.log_sync_memberlist_))) {
|
||||
PALF_LOG(WARN, "deep_copy member_list failed", KR(ret), K_(palf_id), K_(self));
|
||||
} else {
|
||||
replica_num = log_ms_meta_.prev_.config_.log_sync_replica_num_;
|
||||
prev_replica_num = log_ms_meta_.prev_.config_.log_sync_replica_num_;
|
||||
}
|
||||
} else if (OB_FAIL(member_list.deep_copy(log_ms_meta_.curr_.config_.log_sync_memberlist_))) {
|
||||
PALF_LOG(WARN, "deep_copy member_list failed", KR(ret), K_(palf_id), K_(self));
|
||||
} else {
|
||||
replica_num = log_ms_meta_.curr_.config_.log_sync_replica_num_;
|
||||
}
|
||||
} else { }
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -406,8 +406,10 @@ public:
|
||||
// return OB_SUCCESS if success
|
||||
// else return other errno
|
||||
virtual int get_log_sync_member_list_for_generate_committed_lsn(
|
||||
ObMemberList &member_list,
|
||||
int64_t &replica_num,
|
||||
ObMemberList &prev_member_list,
|
||||
int64_t &prev_replica_num,
|
||||
ObMemberList &curr_member_list,
|
||||
int64_t &curr_replica_num,
|
||||
bool &is_before_barrier,
|
||||
LSN &barrier_lsn) const;
|
||||
virtual int get_arbitration_member(common::ObMember &arb_member) const;
|
||||
@ -498,11 +500,12 @@ public:
|
||||
SpinLockGuard guard(lock_);
|
||||
int64_t pos = 0;
|
||||
J_OBJ_START();
|
||||
J_KV(K_(palf_id), K_(self), K_(alive_paxos_memberlist), K_(alive_paxos_replica_num), \
|
||||
K_(log_ms_meta), K_(checking_barrier), K_(reconfig_barrier), K_(persistent_config_version), \
|
||||
K_(ms_ack_list), K_(resend_config_version), K_(resend_log_list), \
|
||||
K_(last_submit_config_log_time_us), K_(region), K_(paxos_member_region_map), \
|
||||
K_(register_time_us), K_(parent), K_(parent_keepalive_time_us), \
|
||||
J_KV(K_(palf_id), K_(self), K_(alive_paxos_memberlist), K_(alive_paxos_replica_num), \
|
||||
K_(log_ms_meta), K_(running_args), K_(state), K_(checking_barrier), K_(reconfig_barrier), \
|
||||
K_(persistent_config_version), K_(ms_ack_list), K_(resend_config_version), K_(resend_log_list), \
|
||||
K_(last_submit_config_log_time_us), K_(need_change_config_bkgd), K_(bkgd_config_version), \
|
||||
K_(region), K_(paxos_member_region_map), \
|
||||
K_(register_time_us), K_(parent), K_(parent_keepalive_time_us), \
|
||||
K_(last_submit_register_req_time_us), K_(children), K_(last_submit_keepalive_time_us), KP(this));
|
||||
J_OBJ_END();
|
||||
return pos;
|
||||
|
||||
@ -2598,27 +2598,31 @@ int64_t LogSlidingWindow::get_start_id() const
|
||||
int LogSlidingWindow::gen_committed_end_lsn_(LSN &new_committed_end_lsn)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObMemberList member_list;
|
||||
int64_t replica_num = 0;
|
||||
LSN result_lsn;
|
||||
ObMemberList curr_member_list, prev_member_list;
|
||||
int64_t curr_replica_num = 0, prev_replica_num = 0;
|
||||
LSN curr_result_lsn, prev_result_lsn;
|
||||
bool is_before_barrier = false;
|
||||
LSN barrier_lsn;
|
||||
if (OB_FAIL(mm_->get_log_sync_member_list_for_generate_committed_lsn(member_list,
|
||||
replica_num, is_before_barrier, barrier_lsn))) {
|
||||
if (OB_FAIL(mm_->get_log_sync_member_list_for_generate_committed_lsn(prev_member_list,
|
||||
prev_replica_num, curr_member_list, curr_replica_num, is_before_barrier, barrier_lsn))) {
|
||||
PALF_LOG(WARN, "get_log_sync_member_list failed", K(ret), K_(palf_id), K_(self));
|
||||
} else if (OB_FAIL(get_majority_lsn_(member_list, replica_num, result_lsn))) {
|
||||
} else if (OB_FAIL(get_majority_lsn_(curr_member_list, curr_replica_num, curr_result_lsn))) {
|
||||
PALF_LOG(WARN, "get_majority_lsn failed", K(ret), K_(palf_id), K_(self));
|
||||
} else if (OB_UNLIKELY(true == is_before_barrier) &&
|
||||
OB_FAIL(get_majority_lsn_(prev_member_list, prev_replica_num, prev_result_lsn))) {
|
||||
PALF_LOG(WARN, "get_majority_lsn failed", K(ret), K_(palf_id), K_(self));
|
||||
} else {
|
||||
// Note: the leader generates committed_end_lsn based on different memberlists before
|
||||
// and after a reconfiguration, barrier_lsn is the boundary.
|
||||
// - Logs which is before barrier_lsn should be committed by previous memberlist.
|
||||
// - Logs which is before barrier_lsn could be committed by previous/current memberlist.
|
||||
LSN result_lsn = (OB_UNLIKELY(is_before_barrier))? MAX(prev_result_lsn, curr_result_lsn): curr_result_lsn;
|
||||
// - Logs which is after barrier_lsn should be committed by current memberlist.
|
||||
// - If current committed_end_lsn is smaller than barrier_lsn, then new committed_end_lsn
|
||||
// generated by previous memberlist must be smaller than barrier_lsn.
|
||||
// For example, memberlist:{A} + replica:B. After adding B successfully, Logs after the
|
||||
// barrier may have been persisted by A, but not B. The leader A can not commit logs after
|
||||
// the barrier with memberlist:{A}.
|
||||
result_lsn = (is_before_barrier)? MIN(result_lsn, barrier_lsn): result_lsn;
|
||||
result_lsn = (OB_UNLIKELY(is_before_barrier))? MIN(result_lsn, barrier_lsn): result_lsn;
|
||||
// Note: The leader is not allowed to generate new committed_end_lsn while changing configs with arb.
|
||||
// 1. {A, B, C(arb)}, A is the leader, end_lsns of A and B are both 100.
|
||||
// 2. B crashes and A decicdes to degrade B to a learner, A changes memberlist to {A, C(arb)} and
|
||||
|
||||
Reference in New Issue
Block a user