fix the log barrier before adding member is too loose
This commit is contained in:
@ -1004,7 +1004,6 @@ int LogConfigMgr::pre_sync_config_log(const common::ObMember &server, const int6
|
||||
int ret = OB_SUCCESS;
|
||||
SpinLockGuard guard(lock_);
|
||||
common::ObMemberList member_list;
|
||||
LogMeta log_meta;
|
||||
if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
} else if (false == server.is_valid()) {
|
||||
@ -1012,16 +1011,8 @@ int LogConfigMgr::pre_sync_config_log(const common::ObMember &server, const int6
|
||||
} else if (CHANGING == state_) {
|
||||
ret = OB_EAGAIN;
|
||||
} else if (FALSE_IT(member_list.add_member(server))) {
|
||||
} else if (OB_FAIL(log_meta.generate_by_default(AccessMode::APPEND))) {
|
||||
PALF_LOG(WARN, "generate_default_log_meta failed", KR(ret), K_(palf_id), K_(self));
|
||||
// Generate default log barrier for pre_sync config log. Therefore, the added member will
|
||||
// accept this config log as long as receiving it, without waiting for log barrier.
|
||||
// The config meta has been accepted by majority, so it's safe to skip log barrier
|
||||
} else if (OB_FAIL(log_engine_->submit_change_config_meta_req(member_list, proposal_id,
|
||||
INVALID_PROPOSAL_ID,
|
||||
log_meta.get_log_snapshot_meta().base_lsn_,
|
||||
log_meta.get_log_prepare_meta().log_proposal_id_,
|
||||
log_ms_meta_))) {
|
||||
prev_log_proposal_id_, prev_lsn_, prev_mode_pid_, log_ms_meta_))) {
|
||||
PALF_LOG(WARN, "submit_change_config_meta_req failed", KR(ret), K_(palf_id), K_(self), K(proposal_id), K(server));
|
||||
}
|
||||
return ret;
|
||||
|
||||
@ -93,6 +93,7 @@ const int64_t PALF_CHILD_RESEND_REGISTER_INTERVAL_US = 4 * 1000 * 1000L; //
|
||||
const int64_t PALF_CHECK_PARENT_CHILD_INTERVAL_US = 1 * 1000 * 1000; // 1000ms
|
||||
const int64_t PALF_DUMP_DEBUG_INFO_INTERVAL_US = 10 * 1000 * 1000; // 10s
|
||||
constexpr int64_t INVALID_PROPOSAL_ID = INT64_MAX;
|
||||
constexpr int64_t PALF_INITIAL_PROPOSAL_ID = 0;
|
||||
|
||||
inline int64_t max_proposal_id(const int64_t a, const int64_t b)
|
||||
{
|
||||
|
||||
@ -40,7 +40,7 @@ int LogMeta::generate_by_default(const AccessMode &access_mode)
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
PALF_LOG(INFO, "invalid argument", KPC(this), K(access_mode));
|
||||
} else {
|
||||
const int64_t init_log_proposal_id(0);
|
||||
const int64_t init_log_proposal_id(PALF_INITIAL_PROPOSAL_ID);
|
||||
const LSN init_base_lsn(PALF_INITIAL_LSN_VAL);
|
||||
SCN init_ref_scn;
|
||||
init_ref_scn.set_min();
|
||||
@ -73,7 +73,8 @@ int LogMeta::generate_by_palf_base_info(const PalfBaseInfo &palf_base_info, cons
|
||||
} else {
|
||||
const int64_t prev_log_proposal_id = palf_base_info.prev_log_info_.log_proposal_id_;
|
||||
const SCN &prev_scn = palf_base_info.prev_log_info_.scn_;
|
||||
const int64_t init_log_proposal_id = (prev_log_proposal_id != INVALID_PROPOSAL_ID)? prev_log_proposal_id: 0;
|
||||
const int64_t init_log_proposal_id = (prev_log_proposal_id != INVALID_PROPOSAL_ID)? \
|
||||
prev_log_proposal_id: PALF_INITIAL_PROPOSAL_ID;
|
||||
const SCN init_ref_scn = (prev_scn.is_valid() ? prev_scn: SCN::min_scn());
|
||||
LogConfigInfo init_config_info;
|
||||
LogConfigVersion init_config_version;
|
||||
|
||||
@ -1015,6 +1015,7 @@ bool PalfHandleImpl::check_follower_sync_status_(const LogConfigChangeArgs &args
|
||||
(void) sw_.get_committed_end_lsn(first_leader_committed_end_lsn);
|
||||
|
||||
added_member_has_new_version = true;
|
||||
const int64_t max_log_gap_time = PALF_LEADER_ACTIVE_SYNC_TIMEOUT_US / 4;
|
||||
if (new_member_list.get_member_number() == 0) {
|
||||
} else if (FALSE_IT(conn_timeout_us = half_timeout_us / (new_member_list.get_member_number()))) {
|
||||
} else if (OB_FAIL(sync_get_committed_end_lsn_(args, new_member_list, new_replica_num,
|
||||
@ -1049,16 +1050,16 @@ bool PalfHandleImpl::check_follower_sync_status_(const LogConfigChangeArgs &args
|
||||
PALF_LOG(WARN, "follwer is not sync with leader after waiting 500 ms", KPC(this), K(sync_speed_gap),
|
||||
K(bool_ret), K(second_committed_end_lsn), K(second_leader_committed_end_lsn));
|
||||
} else if (FALSE_IT(expected_sync_time_s = (second_leader_committed_end_lsn - second_committed_end_lsn) / sync_speed_gap)) {
|
||||
} else if ((expected_sync_time_s * 1E6) <= half_timeout_us) {
|
||||
} else if ((expected_sync_time_s * 1E6) <= max_log_gap_time) {
|
||||
bool_ret = true;
|
||||
PALF_LOG(INFO, "majority of new_member_list are sync with leader, start config change",
|
||||
KPC(this), K(bool_ret), K(second_committed_end_lsn), K(first_committed_end_lsn), K(sync_speed_gap),
|
||||
K(second_leader_committed_end_lsn), K(half_timeout_us));
|
||||
K(second_leader_committed_end_lsn), K(max_log_gap_time));
|
||||
} else {
|
||||
bool_ret = false;
|
||||
PALF_LOG(INFO, "majority of new_member_list are far behind, can not change member",
|
||||
KPC(this), K(bool_ret), K(second_committed_end_lsn), K(first_committed_end_lsn), K(sync_speed_gap),
|
||||
K(second_leader_committed_end_lsn), K(half_timeout_us));
|
||||
K(second_leader_committed_end_lsn), K(max_log_gap_time));
|
||||
}
|
||||
}
|
||||
bool_ret = bool_ret && added_member_has_new_version;
|
||||
@ -3061,7 +3062,7 @@ int PalfHandleImpl::receive_config_log(const common::ObAddr &server,
|
||||
false == meta.is_valid()) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
PALF_LOG(WARN, "invalid argument", KR(ret), KPC(this), K(server),
|
||||
K(msg_proposal_id), K(prev_lsn), K(meta));
|
||||
K(msg_proposal_id), K(prev_lsn), K(prev_mode_pid), K(meta));
|
||||
} else if (OB_FAIL(try_update_proposal_id_(server, msg_proposal_id))) {
|
||||
PALF_LOG(WARN, "try_update_proposal_id_ failed", KR(ret), KPC(this), K(server), K(msg_proposal_id));
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user