From 754dba3795768bc213d08a040b20294110b38c31 Mon Sep 17 00:00:00 2001 From: obdev Date: Tue, 6 Dec 2022 15:35:53 +0000 Subject: [PATCH] fix the log barrier before adding member is too loose --- src/logservice/palf/log_config_mgr.cpp | 11 +---------- src/logservice/palf/log_define.h | 1 + src/logservice/palf/log_meta.cpp | 5 +++-- src/logservice/palf/palf_handle_impl.cpp | 9 +++++---- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/logservice/palf/log_config_mgr.cpp b/src/logservice/palf/log_config_mgr.cpp index 64ef0bfc5e..364cc8feea 100644 --- a/src/logservice/palf/log_config_mgr.cpp +++ b/src/logservice/palf/log_config_mgr.cpp @@ -1004,7 +1004,6 @@ int LogConfigMgr::pre_sync_config_log(const common::ObMember &server, const int6 int ret = OB_SUCCESS; SpinLockGuard guard(lock_); common::ObMemberList member_list; - LogMeta log_meta; if (IS_NOT_INIT) { ret = OB_NOT_INIT; } else if (false == server.is_valid()) { @@ -1012,16 +1011,8 @@ int LogConfigMgr::pre_sync_config_log(const common::ObMember &server, const int6 } else if (CHANGING == state_) { ret = OB_EAGAIN; } else if (FALSE_IT(member_list.add_member(server))) { - } else if (OB_FAIL(log_meta.generate_by_default(AccessMode::APPEND))) { - PALF_LOG(WARN, "generate_default_log_meta failed", KR(ret), K_(palf_id), K_(self)); - // Generate default log barrier for pre_sync config log. Therefore, the added member will - // accept this config log as long as receiving it, without waiting for log barrier. - // The config meta has been accepted by majority, so it's safe to skip log barrier } else if (OB_FAIL(log_engine_->submit_change_config_meta_req(member_list, proposal_id, - INVALID_PROPOSAL_ID, - log_meta.get_log_snapshot_meta().base_lsn_, - log_meta.get_log_prepare_meta().log_proposal_id_, - log_ms_meta_))) { + prev_log_proposal_id_, prev_lsn_, prev_mode_pid_, log_ms_meta_))) { PALF_LOG(WARN, "submit_change_config_meta_req failed", KR(ret), K_(palf_id), K_(self), K(proposal_id), K(server)); } return ret; diff --git a/src/logservice/palf/log_define.h b/src/logservice/palf/log_define.h index 73b76bf5a0..3f66fb2224 100644 --- a/src/logservice/palf/log_define.h +++ b/src/logservice/palf/log_define.h @@ -93,6 +93,7 @@ const int64_t PALF_CHILD_RESEND_REGISTER_INTERVAL_US = 4 * 1000 * 1000L; // const int64_t PALF_CHECK_PARENT_CHILD_INTERVAL_US = 1 * 1000 * 1000; // 1000ms const int64_t PALF_DUMP_DEBUG_INFO_INTERVAL_US = 10 * 1000 * 1000; // 10s constexpr int64_t INVALID_PROPOSAL_ID = INT64_MAX; +constexpr int64_t PALF_INITIAL_PROPOSAL_ID = 0; inline int64_t max_proposal_id(const int64_t a, const int64_t b) { diff --git a/src/logservice/palf/log_meta.cpp b/src/logservice/palf/log_meta.cpp index f955977078..1157ffe7c8 100644 --- a/src/logservice/palf/log_meta.cpp +++ b/src/logservice/palf/log_meta.cpp @@ -40,7 +40,7 @@ int LogMeta::generate_by_default(const AccessMode &access_mode) ret = OB_INVALID_ARGUMENT; PALF_LOG(INFO, "invalid argument", KPC(this), K(access_mode)); } else { - const int64_t init_log_proposal_id(0); + const int64_t init_log_proposal_id(PALF_INITIAL_PROPOSAL_ID); const LSN init_base_lsn(PALF_INITIAL_LSN_VAL); SCN init_ref_scn; init_ref_scn.set_min(); @@ -73,7 +73,8 @@ int LogMeta::generate_by_palf_base_info(const PalfBaseInfo &palf_base_info, cons } else { const int64_t prev_log_proposal_id = palf_base_info.prev_log_info_.log_proposal_id_; const SCN &prev_scn = palf_base_info.prev_log_info_.scn_; - const int64_t init_log_proposal_id = (prev_log_proposal_id != INVALID_PROPOSAL_ID)? prev_log_proposal_id: 0; + const int64_t init_log_proposal_id = (prev_log_proposal_id != INVALID_PROPOSAL_ID)? \ + prev_log_proposal_id: PALF_INITIAL_PROPOSAL_ID; const SCN init_ref_scn = (prev_scn.is_valid() ? prev_scn: SCN::min_scn()); LogConfigInfo init_config_info; LogConfigVersion init_config_version; diff --git a/src/logservice/palf/palf_handle_impl.cpp b/src/logservice/palf/palf_handle_impl.cpp index 00e44ab807..894159592f 100644 --- a/src/logservice/palf/palf_handle_impl.cpp +++ b/src/logservice/palf/palf_handle_impl.cpp @@ -1015,6 +1015,7 @@ bool PalfHandleImpl::check_follower_sync_status_(const LogConfigChangeArgs &args (void) sw_.get_committed_end_lsn(first_leader_committed_end_lsn); added_member_has_new_version = true; + const int64_t max_log_gap_time = PALF_LEADER_ACTIVE_SYNC_TIMEOUT_US / 4; if (new_member_list.get_member_number() == 0) { } else if (FALSE_IT(conn_timeout_us = half_timeout_us / (new_member_list.get_member_number()))) { } else if (OB_FAIL(sync_get_committed_end_lsn_(args, new_member_list, new_replica_num, @@ -1049,16 +1050,16 @@ bool PalfHandleImpl::check_follower_sync_status_(const LogConfigChangeArgs &args PALF_LOG(WARN, "follwer is not sync with leader after waiting 500 ms", KPC(this), K(sync_speed_gap), K(bool_ret), K(second_committed_end_lsn), K(second_leader_committed_end_lsn)); } else if (FALSE_IT(expected_sync_time_s = (second_leader_committed_end_lsn - second_committed_end_lsn) / sync_speed_gap)) { - } else if ((expected_sync_time_s * 1E6) <= half_timeout_us) { + } else if ((expected_sync_time_s * 1E6) <= max_log_gap_time) { bool_ret = true; PALF_LOG(INFO, "majority of new_member_list are sync with leader, start config change", KPC(this), K(bool_ret), K(second_committed_end_lsn), K(first_committed_end_lsn), K(sync_speed_gap), - K(second_leader_committed_end_lsn), K(half_timeout_us)); + K(second_leader_committed_end_lsn), K(max_log_gap_time)); } else { bool_ret = false; PALF_LOG(INFO, "majority of new_member_list are far behind, can not change member", KPC(this), K(bool_ret), K(second_committed_end_lsn), K(first_committed_end_lsn), K(sync_speed_gap), - K(second_leader_committed_end_lsn), K(half_timeout_us)); + K(second_leader_committed_end_lsn), K(max_log_gap_time)); } } bool_ret = bool_ret && added_member_has_new_version; @@ -3061,7 +3062,7 @@ int PalfHandleImpl::receive_config_log(const common::ObAddr &server, false == meta.is_valid()) { ret = OB_INVALID_ARGUMENT; PALF_LOG(WARN, "invalid argument", KR(ret), KPC(this), K(server), - K(msg_proposal_id), K(prev_lsn), K(meta)); + K(msg_proposal_id), K(prev_lsn), K(prev_mode_pid), K(meta)); } else if (OB_FAIL(try_update_proposal_id_(server, msg_proposal_id))) { PALF_LOG(WARN, "try_update_proposal_id_ failed", KR(ret), KPC(this), K(server), K(msg_proposal_id)); } else {