fix the log barrier before adding member is too loose

This commit is contained in:
obdev
2022-12-06 15:35:53 +00:00
committed by ob-robot
parent 65c78d50fc
commit 754dba3795
4 changed files with 10 additions and 16 deletions

View File

@ -1004,7 +1004,6 @@ int LogConfigMgr::pre_sync_config_log(const common::ObMember &server, const int6
int ret = OB_SUCCESS;
SpinLockGuard guard(lock_);
common::ObMemberList member_list;
LogMeta log_meta;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
} else if (false == server.is_valid()) {
@ -1012,16 +1011,8 @@ int LogConfigMgr::pre_sync_config_log(const common::ObMember &server, const int6
} else if (CHANGING == state_) {
ret = OB_EAGAIN;
} else if (FALSE_IT(member_list.add_member(server))) {
} else if (OB_FAIL(log_meta.generate_by_default(AccessMode::APPEND))) {
PALF_LOG(WARN, "generate_default_log_meta failed", KR(ret), K_(palf_id), K_(self));
// Generate default log barrier for pre_sync config log. Therefore, the added member will
// accept this config log as long as receiving it, without waiting for log barrier.
// The config meta has been accepted by majority, so it's safe to skip log barrier
} else if (OB_FAIL(log_engine_->submit_change_config_meta_req(member_list, proposal_id,
INVALID_PROPOSAL_ID,
log_meta.get_log_snapshot_meta().base_lsn_,
log_meta.get_log_prepare_meta().log_proposal_id_,
log_ms_meta_))) {
prev_log_proposal_id_, prev_lsn_, prev_mode_pid_, log_ms_meta_))) {
PALF_LOG(WARN, "submit_change_config_meta_req failed", KR(ret), K_(palf_id), K_(self), K(proposal_id), K(server));
}
return ret;

View File

@ -93,6 +93,7 @@ const int64_t PALF_CHILD_RESEND_REGISTER_INTERVAL_US = 4 * 1000 * 1000L; //
const int64_t PALF_CHECK_PARENT_CHILD_INTERVAL_US = 1 * 1000 * 1000; // 1000ms
const int64_t PALF_DUMP_DEBUG_INFO_INTERVAL_US = 10 * 1000 * 1000; // 10s
constexpr int64_t INVALID_PROPOSAL_ID = INT64_MAX;
constexpr int64_t PALF_INITIAL_PROPOSAL_ID = 0;
inline int64_t max_proposal_id(const int64_t a, const int64_t b)
{

View File

@ -40,7 +40,7 @@ int LogMeta::generate_by_default(const AccessMode &access_mode)
ret = OB_INVALID_ARGUMENT;
PALF_LOG(INFO, "invalid argument", KPC(this), K(access_mode));
} else {
const int64_t init_log_proposal_id(0);
const int64_t init_log_proposal_id(PALF_INITIAL_PROPOSAL_ID);
const LSN init_base_lsn(PALF_INITIAL_LSN_VAL);
SCN init_ref_scn;
init_ref_scn.set_min();
@ -73,7 +73,8 @@ int LogMeta::generate_by_palf_base_info(const PalfBaseInfo &palf_base_info, cons
} else {
const int64_t prev_log_proposal_id = palf_base_info.prev_log_info_.log_proposal_id_;
const SCN &prev_scn = palf_base_info.prev_log_info_.scn_;
const int64_t init_log_proposal_id = (prev_log_proposal_id != INVALID_PROPOSAL_ID)? prev_log_proposal_id: 0;
const int64_t init_log_proposal_id = (prev_log_proposal_id != INVALID_PROPOSAL_ID)? \
prev_log_proposal_id: PALF_INITIAL_PROPOSAL_ID;
const SCN init_ref_scn = (prev_scn.is_valid() ? prev_scn: SCN::min_scn());
LogConfigInfo init_config_info;
LogConfigVersion init_config_version;

View File

@ -1015,6 +1015,7 @@ bool PalfHandleImpl::check_follower_sync_status_(const LogConfigChangeArgs &args
(void) sw_.get_committed_end_lsn(first_leader_committed_end_lsn);
added_member_has_new_version = true;
const int64_t max_log_gap_time = PALF_LEADER_ACTIVE_SYNC_TIMEOUT_US / 4;
if (new_member_list.get_member_number() == 0) {
} else if (FALSE_IT(conn_timeout_us = half_timeout_us / (new_member_list.get_member_number()))) {
} else if (OB_FAIL(sync_get_committed_end_lsn_(args, new_member_list, new_replica_num,
@ -1049,16 +1050,16 @@ bool PalfHandleImpl::check_follower_sync_status_(const LogConfigChangeArgs &args
PALF_LOG(WARN, "follwer is not sync with leader after waiting 500 ms", KPC(this), K(sync_speed_gap),
K(bool_ret), K(second_committed_end_lsn), K(second_leader_committed_end_lsn));
} else if (FALSE_IT(expected_sync_time_s = (second_leader_committed_end_lsn - second_committed_end_lsn) / sync_speed_gap)) {
} else if ((expected_sync_time_s * 1E6) <= half_timeout_us) {
} else if ((expected_sync_time_s * 1E6) <= max_log_gap_time) {
bool_ret = true;
PALF_LOG(INFO, "majority of new_member_list are sync with leader, start config change",
KPC(this), K(bool_ret), K(second_committed_end_lsn), K(first_committed_end_lsn), K(sync_speed_gap),
K(second_leader_committed_end_lsn), K(half_timeout_us));
K(second_leader_committed_end_lsn), K(max_log_gap_time));
} else {
bool_ret = false;
PALF_LOG(INFO, "majority of new_member_list are far behind, can not change member",
KPC(this), K(bool_ret), K(second_committed_end_lsn), K(first_committed_end_lsn), K(sync_speed_gap),
K(second_leader_committed_end_lsn), K(half_timeout_us));
K(second_leader_committed_end_lsn), K(max_log_gap_time));
}
}
bool_ret = bool_ret && added_member_has_new_version;
@ -3061,7 +3062,7 @@ int PalfHandleImpl::receive_config_log(const common::ObAddr &server,
false == meta.is_valid()) {
ret = OB_INVALID_ARGUMENT;
PALF_LOG(WARN, "invalid argument", KR(ret), KPC(this), K(server),
K(msg_proposal_id), K(prev_lsn), K(meta));
K(msg_proposal_id), K(prev_lsn), K(prev_mode_pid), K(meta));
} else if (OB_FAIL(try_update_proposal_id_(server, msg_proposal_id))) {
PALF_LOG(WARN, "try_update_proposal_id_ failed", KR(ret), KPC(this), K(server), K(msg_proposal_id));
} else {