diff --git a/src/logservice/palf/log_reconfirm.cpp b/src/logservice/palf/log_reconfirm.cpp index 08ccec642c..e265a3cdf0 100644 --- a/src/logservice/palf/log_reconfirm.cpp +++ b/src/logservice/palf/log_reconfirm.cpp @@ -156,14 +156,14 @@ int LogReconfirm::init_reconfirm_() ObMemberList member_list; int64_t replica_num = 0; if (OB_FAIL(mm_->get_alive_member_list_with_arb(member_list, replica_num))) { - PALF_LOG(WARN, "get_alive_member_list_with_arb failed", K(ret), K_(palf_id)); + PALF_LOG(WARN, "get_alive_member_list_with_arb failed", K_(palf_id)); } else if (!member_list.contains(self_)) { ret = OB_ERR_UNEXPECTED; - PALF_LOG(WARN, "self is not in curr_member_list", K(ret), K_(palf_id), K(member_list), K(self_)); + PALF_LOG(WARN, "self is not in curr_member_list", K_(palf_id), K(member_list), K(self_)); } else if (OB_FAIL(curr_paxos_follower_list_.deep_copy(member_list))) { - PALF_LOG(ERROR, "deep_copy failed", K(ret), K_(palf_id)); + PALF_LOG(ERROR, "deep_copy failed", K_(palf_id)); } else if (OB_FAIL(curr_paxos_follower_list_.remove_server(self_))) { - PALF_LOG(ERROR, "remove_server failed", K(ret), K_(palf_id), K_(curr_paxos_follower_list), K_(self), KP(mm_)); + PALF_LOG(ERROR, "remove_server failed", K_(palf_id), K_(curr_paxos_follower_list), K_(self), KP(mm_)); } else { majority_cnt_ = replica_num / 2 + 1; PALF_LOG(INFO, "init_reconfirm_ success", K(ret), K_(palf_id), K(member_list), K_(self), K_(majority_cnt), K(replica_num)); @@ -179,9 +179,9 @@ int LogReconfirm::submit_prepare_log_() int64_t max_flushed_log_pid = INVALID_PROPOSAL_ID; const int64_t now_us = ObTimeUtility::current_time(); if (OB_FAIL(sw_->get_max_flushed_log_info(unused_prev_lsn, max_flushed_end_lsn, max_flushed_log_pid))) { - PALF_LOG(WARN, "get_max_flushed_log_info failed", K(ret), K_(palf_id)); + PALF_LOG(WARN, "get_max_flushed_log_info failed", K_(palf_id)); } else if (OB_EAGAIN != (ret = mode_mgr_->reconfirm_mode_meta())) { - PALF_LOG(WARN, "reconfirm_mode_meta failed", K(ret), K_(palf_id)); + PALF_LOG(WARN, "reconfirm_mode_meta failed", K_(palf_id)); } else { // clear prepare_log_ack_list_ prepare_log_ack_list_.reset(); @@ -216,10 +216,10 @@ int LogReconfirm::submit_prepare_log_() new_proposal_id_ = old_proposal_id + 1; if (OB_FAIL(state_mgr_->handle_prepare_request(self_, new_proposal_id_))) { - PALF_LOG(WARN, "handle_prepare_request failed", K(ret), K_(palf_id), K_(self), K(new_proposal_id_)); + PALF_LOG(WARN, "handle_prepare_request failed", K_(palf_id), K_(self), K(new_proposal_id_)); } else if (curr_paxos_follower_list_.is_valid() && OB_FAIL(log_engine_->submit_prepare_meta_req(curr_paxos_follower_list_, new_proposal_id_))) { - PALF_LOG(WARN, "submit_prepare_meta_req failed", K(ret), K_(palf_id), K_(self), K_(new_proposal_id)); + PALF_LOG(WARN, "submit_prepare_meta_req failed", K_(palf_id), K_(self), K_(new_proposal_id)); } else { const int64_t old_prepare_time_us = last_submit_prepare_req_time_us_; last_submit_prepare_req_time_us_ = now_us; @@ -257,18 +257,18 @@ int LogReconfirm::handle_prepare_response(const common::ObAddr &server, } else if (!server.is_valid() || INVALID_PROPOSAL_ID == src_proposal_id || INVALID_PROPOSAL_ID == accept_proposal_id || !last_lsn.is_valid()) { ret = OB_INVALID_ARGUMENT; - PALF_LOG(WARN, "invalid arguments", K(ret), K_(palf_id), K(server), K(src_proposal_id), + PALF_LOG(WARN, "invalid arguments", K_(palf_id), K(server), K(src_proposal_id), K(accept_proposal_id), K(last_lsn), K(committed_end_lsn)); // NB: no need change 'majority_max_log_server_' after FETCH_MAX_LOG_LSN } else if (src_proposal_id != new_proposal_id_ || FETCH_MAX_LOG_LSN != state_) { ret = OB_STATE_NOT_MATCH; - PALF_LOG(WARN, "state not match", K(ret), K_(palf_id), K(server), K(src_proposal_id), K_(new_proposal_id), + PALF_LOG(WARN, "state not match", K_(palf_id), K(server), K(src_proposal_id), K_(new_proposal_id), K(state_)); } else if (false == curr_paxos_follower_list_.contains(server)) { - PALF_LOG(WARN, "receive prepare req not in follower_list", K(ret), K_(palf_id), K(server), + PALF_LOG(WARN, "receive prepare req not in follower_list", K_(palf_id), K(server), K_(curr_paxos_follower_list)); } else if (OB_FAIL(prepare_log_ack_list_.add_server(server))) { - PALF_LOG(WARN, "prepare_log_ack_list_ add_sever failed", K(ret), K_(palf_id), K(server)); + PALF_LOG(WARN, "prepare_log_ack_list_ add_sever failed", K_(palf_id), K(server)); } else if (INVALID_PROPOSAL_ID != majority_max_accept_pid_ && accept_proposal_id < majority_max_accept_pid_) { // accept_proposal_id is smaller than cur majority value, ignore @@ -368,7 +368,7 @@ int LogReconfirm::try_fetch_log_() || (sw_start_id == last_record_sw_start_id_ && now_us - last_fetch_log_time_us_ >= PALF_FETCH_LOG_INTERVAL_US)) { if (OB_FAIL(sw_->try_fetch_log_for_reconfirm(majority_max_log_server_, majority_max_lsn_, is_fetched))) { - PALF_LOG(WARN, "try_fetch_log_for_reconfirm failed", K(ret), K_(palf_id)); + PALF_LOG(WARN, "try_fetch_log_for_reconfirm failed", K_(palf_id)); } else if (is_fetched) { // send fetch req success last_record_sw_start_id_ = sw_start_id; @@ -444,13 +444,13 @@ int LogReconfirm::reconfirm() ObLockGuard guard(lock_); if (IS_NOT_INIT) { ret = OB_NOT_INIT; - PALF_LOG(WARN, "LogReconfirm is not init", K(ret), K_(self), K_(palf_id)); + PALF_LOG(WARN, "LogReconfirm is not init", K_(self), K_(palf_id)); } else { const int64_t now_us = ObTimeUtility::current_time(); switch (state_) { case INITED: { if (OB_FAIL(init_reconfirm_())) { - PALF_LOG(WARN, "init reconfirm failed", K(ret), K_(palf_id)); + PALF_LOG(WARN, "init reconfirm failed", K_(palf_id)); } else { state_ = WAITING_LOG_FLUSHED; PALF_EVENT("Reconfirm come into WAITING_LOG_FLUSHED state", palf_id_, K_(self), K_(majority_cnt), @@ -460,9 +460,9 @@ int LogReconfirm::reconfirm() } case WAITING_LOG_FLUSHED: { if (OB_FAIL(wait_all_log_flushed_())) { - PALF_LOG(WARN, "wait_all_log_flushed_ failed", K(ret), K_(palf_id)); + PALF_LOG(WARN, "wait_all_log_flushed_ failed", K_(palf_id)); } else if (OB_FAIL(submit_prepare_log_())) { - PALF_LOG(WARN, "submit_prepare_log_ failed", K(ret), K_(palf_id)); + PALF_LOG(WARN, "submit_prepare_log_ failed", K_(palf_id)); } else { state_ = FETCH_MAX_LOG_LSN; PALF_EVENT("Reconfirm come into FETCH_MAX_LOG_LSN state", palf_id_, K_(self), K_(majority_max_accept_pid)); @@ -478,7 +478,7 @@ int LogReconfirm::reconfirm() if (RECONFIRM_PREPARE_RETRY_INTERVAL_US <= cost_ts) { ret = submit_prepare_log_(); PALF_LOG(WARN, "prepare_log_ack_list_ is not majority in expected interval, submit_prepare_log_ again", - K(ret), K(cost_ts), K(prepare_log_ack_list_), K(majority_cnt_), K_(palf_id)); + K(cost_ts), K(prepare_log_ack_list_), K(majority_cnt_), K_(palf_id)); } } else { state_ = RECONFIRM_MODE_META; @@ -499,7 +499,7 @@ int LogReconfirm::reconfirm() PALF_EVENT("Reconfirm come into RECONFIRM_FETCH_LOG state", palf_id_, K(prepare_log_ack_list_)); } } else if (OB_EAGAIN != ret) { - PALF_LOG(WARN, "reconfirm_mode_meta failed", K(ret), K_(palf_id)); + PALF_LOG(WARN, "reconfirm_mode_meta failed", K_(palf_id)); } if (state_ != RECONFIRM_FETCH_LOG) { break; @@ -514,10 +514,10 @@ int LogReconfirm::reconfirm() // For this case, followers' match_lsn at A will be 100, and it has no chance to be updated during reconfirm, // which will lead to waiting majority sync timeout. if (OB_FAIL(ack_log_with_end_lsn_())) { - PALF_LOG(WARN, "ack_log_with_end_lsn_", K(ret), K_(follower_end_lsn_list)); + PALF_LOG(WARN, "ack_log_with_end_lsn_", K_(follower_end_lsn_list)); } else if (majority_max_log_server_ != self_ && !is_fetch_log_finished_()) { if (OB_FAIL(try_fetch_log_())) { - PALF_LOG(WARN, "try_fetch_log_ failed", K(ret), K_(palf_id)); + PALF_LOG(WARN, "try_fetch_log_ failed", K_(palf_id)); } } else { state_ = RECONFIRMING; @@ -546,10 +546,10 @@ int LogReconfirm::reconfirm() K_(self), K_(palf_id), K_(majority_max_lsn)); } } else if (OB_FAIL(sw_->get_max_flushed_log_info(last_lsn, last_end_lsn, last_log_proposal_id))) { - PALF_LOG(WARN, "get_max_flushed_log_info failed", K(ret), K_(self), K_(palf_id)); + PALF_LOG(WARN, "get_max_flushed_log_info failed", K_(self), K_(palf_id)); } else if (OB_FAIL(mm_->confirm_start_working_log(new_proposal_id_, leader_epoch, sw_config_version_)) && OB_EAGAIN != ret) { - PALF_LOG(WARN, "confirm_start_working_log failed", K(ret), K_(self), K_(palf_id)); + PALF_LOG(WARN, "confirm_start_working_log failed", K_(self), K_(palf_id)); } else { // 记录进入start_working阶段时的end_lsn saved_end_lsn_ = last_end_lsn; @@ -573,7 +573,7 @@ int LogReconfirm::reconfirm() const LSN curr_max_lsn = sw_->get_max_lsn(); if (curr_max_lsn != saved_end_lsn_) { ret = OB_ERR_UNEXPECTED; - PALF_LOG(ERROR, "max_lsn is not equal to saved_end_lsn_, unexpected", K(ret), K_(palf_id), K_(self), + PALF_LOG(ERROR, "max_lsn is not equal to saved_end_lsn_, unexpected", K_(palf_id), K_(self), K(curr_max_lsn), K_(saved_end_lsn)); } else if (OB_FAIL(sw_->try_advance_committed_end_lsn(saved_end_lsn_))) { // start_working达成多数派后,leader推进committed_end_lsn至之前记录的位点(预期等于当前的max_lsn) @@ -605,7 +605,8 @@ int LogReconfirm::reconfirm() if (state_ == FINISHED) { ret = OB_SUCCESS; } else if (OB_SUCCESS != ret && OB_EAGAIN != ret) { - PALF_LOG(ERROR, "reconfirm failed", K(ret), K_(palf_id), K_(state)); + // election leader may has changed, so ret is maybe OB_NOT_MASTER. + PALF_LOG(WARN, "reconfirm failed", K_(palf_id), K_(state)); } else { ret = OB_EAGAIN; if (REACH_TIME_INTERVAL(10 * 1000)) { diff --git a/src/logservice/palf/log_sliding_window.cpp b/src/logservice/palf/log_sliding_window.cpp index 9e7b7ac33a..b9516caf19 100644 --- a/src/logservice/palf/log_sliding_window.cpp +++ b/src/logservice/palf/log_sliding_window.cpp @@ -893,16 +893,21 @@ int LogSlidingWindow::handle_next_submit_log_(bool &is_committed_lsn_updated) const LSN prev_lsn = log_task->get_prev_lsn(); const int64_t prev_log_pid = log_task->get_prev_proposal_id(); log_task->unlock(); - if (!state_mgr_->is_leader_active() + if (OB_UNLIKELY(last_submit_end_lsn != begin_lsn)) { + ret = OB_ERR_UNEXPECTED; + PALF_LOG(ERROR, "Current log's begin_lsn is not continuous with last_submit_end_lsn, unexpected", + K_(palf_id), K_(self), K(prev_lsn), K(last_submit_log_id), K(last_submit_lsn), + K(last_submit_end_lsn), K(last_submit_log_pid), K(tmp_log_id), KPC(log_task)); + } else if (!state_mgr_->is_leader_active() && tmp_log_id > FIRST_VALID_LOG_ID // the first log doesn't need check prev_proposal_id && (last_submit_lsn != prev_lsn || last_submit_log_pid != prev_log_pid)) { ret = OB_STATE_NOT_MATCH; - PALF_LOG(WARN, "prev_proposal_id does not match, cannot submit this log", K(ret), K_(palf_id), K_(self), + PALF_LOG(WARN, "prev_proposal_id does not match, cannot submit this log", K_(palf_id), K_(self), K(prev_lsn), K(prev_log_pid), K(last_submit_log_id), K(last_submit_lsn), K(last_submit_log_pid), K(tmp_log_id), KPC(log_task)); } else if (OB_FAIL(generate_group_entry_header_(tmp_log_id, log_task, group_entry_header, group_log_data_checksum))) { - PALF_LOG(WARN, "generate_group_entry_header_ failed", K(ret), K_(palf_id), K_(self)); + PALF_LOG(WARN, "generate_group_entry_header_ failed", K_(palf_id), K_(self)); } else { // set flag for rollback accum_checksum is_accum_checksum_acquired = true; diff --git a/src/logservice/palf/log_state_mgr.cpp b/src/logservice/palf/log_state_mgr.cpp index a9f2f3ee7e..bb83f4286f 100644 --- a/src/logservice/palf/log_state_mgr.cpp +++ b/src/logservice/palf/log_state_mgr.cpp @@ -840,7 +840,8 @@ bool LogStateMgr::is_reconfirm_state_changed_(int &ret) PALF_LOG(TRACE, "is_reconfirm_state_changed_", K(bool_ret)); bool_ret = true; } else { - PALF_LOG(ERROR, "reconfirm failed", K(ret), K_(palf_id)); + // election leader may has changed, so ret is maybe OB_NOT_MASTER. + PALF_LOG(WARN, "reconfirm failed", K_(palf_id)); bool_ret = true; } return bool_ret;