Modify some debug logs of palf.

This commit is contained in:
obdev
2023-02-08 11:04:55 +08:00
committed by ob-robot
parent 52d03796af
commit 0a63534f58
3 changed files with 36 additions and 29 deletions

View File

@ -156,14 +156,14 @@ int LogReconfirm::init_reconfirm_()
ObMemberList member_list;
int64_t replica_num = 0;
if (OB_FAIL(mm_->get_alive_member_list_with_arb(member_list, replica_num))) {
PALF_LOG(WARN, "get_alive_member_list_with_arb failed", K(ret), K_(palf_id));
PALF_LOG(WARN, "get_alive_member_list_with_arb failed", K_(palf_id));
} else if (!member_list.contains(self_)) {
ret = OB_ERR_UNEXPECTED;
PALF_LOG(WARN, "self is not in curr_member_list", K(ret), K_(palf_id), K(member_list), K(self_));
PALF_LOG(WARN, "self is not in curr_member_list", K_(palf_id), K(member_list), K(self_));
} else if (OB_FAIL(curr_paxos_follower_list_.deep_copy(member_list))) {
PALF_LOG(ERROR, "deep_copy failed", K(ret), K_(palf_id));
PALF_LOG(ERROR, "deep_copy failed", K_(palf_id));
} else if (OB_FAIL(curr_paxos_follower_list_.remove_server(self_))) {
PALF_LOG(ERROR, "remove_server failed", K(ret), K_(palf_id), K_(curr_paxos_follower_list), K_(self), KP(mm_));
PALF_LOG(ERROR, "remove_server failed", K_(palf_id), K_(curr_paxos_follower_list), K_(self), KP(mm_));
} else {
majority_cnt_ = replica_num / 2 + 1;
PALF_LOG(INFO, "init_reconfirm_ success", K(ret), K_(palf_id), K(member_list), K_(self), K_(majority_cnt), K(replica_num));
@ -179,9 +179,9 @@ int LogReconfirm::submit_prepare_log_()
int64_t max_flushed_log_pid = INVALID_PROPOSAL_ID;
const int64_t now_us = ObTimeUtility::current_time();
if (OB_FAIL(sw_->get_max_flushed_log_info(unused_prev_lsn, max_flushed_end_lsn, max_flushed_log_pid))) {
PALF_LOG(WARN, "get_max_flushed_log_info failed", K(ret), K_(palf_id));
PALF_LOG(WARN, "get_max_flushed_log_info failed", K_(palf_id));
} else if (OB_EAGAIN != (ret = mode_mgr_->reconfirm_mode_meta())) {
PALF_LOG(WARN, "reconfirm_mode_meta failed", K(ret), K_(palf_id));
PALF_LOG(WARN, "reconfirm_mode_meta failed", K_(palf_id));
} else {
// clear prepare_log_ack_list_
prepare_log_ack_list_.reset();
@ -216,10 +216,10 @@ int LogReconfirm::submit_prepare_log_()
new_proposal_id_ = old_proposal_id + 1;
if (OB_FAIL(state_mgr_->handle_prepare_request(self_, new_proposal_id_))) {
PALF_LOG(WARN, "handle_prepare_request failed", K(ret), K_(palf_id), K_(self), K(new_proposal_id_));
PALF_LOG(WARN, "handle_prepare_request failed", K_(palf_id), K_(self), K(new_proposal_id_));
} else if (curr_paxos_follower_list_.is_valid()
&& OB_FAIL(log_engine_->submit_prepare_meta_req(curr_paxos_follower_list_, new_proposal_id_))) {
PALF_LOG(WARN, "submit_prepare_meta_req failed", K(ret), K_(palf_id), K_(self), K_(new_proposal_id));
PALF_LOG(WARN, "submit_prepare_meta_req failed", K_(palf_id), K_(self), K_(new_proposal_id));
} else {
const int64_t old_prepare_time_us = last_submit_prepare_req_time_us_;
last_submit_prepare_req_time_us_ = now_us;
@ -257,18 +257,18 @@ int LogReconfirm::handle_prepare_response(const common::ObAddr &server,
} else if (!server.is_valid() || INVALID_PROPOSAL_ID == src_proposal_id
|| INVALID_PROPOSAL_ID == accept_proposal_id || !last_lsn.is_valid()) {
ret = OB_INVALID_ARGUMENT;
PALF_LOG(WARN, "invalid arguments", K(ret), K_(palf_id), K(server), K(src_proposal_id),
PALF_LOG(WARN, "invalid arguments", K_(palf_id), K(server), K(src_proposal_id),
K(accept_proposal_id), K(last_lsn), K(committed_end_lsn));
// NB: no need change 'majority_max_log_server_' after FETCH_MAX_LOG_LSN
} else if (src_proposal_id != new_proposal_id_ || FETCH_MAX_LOG_LSN != state_) {
ret = OB_STATE_NOT_MATCH;
PALF_LOG(WARN, "state not match", K(ret), K_(palf_id), K(server), K(src_proposal_id), K_(new_proposal_id),
PALF_LOG(WARN, "state not match", K_(palf_id), K(server), K(src_proposal_id), K_(new_proposal_id),
K(state_));
} else if (false == curr_paxos_follower_list_.contains(server)) {
PALF_LOG(WARN, "receive prepare req not in follower_list", K(ret), K_(palf_id), K(server),
PALF_LOG(WARN, "receive prepare req not in follower_list", K_(palf_id), K(server),
K_(curr_paxos_follower_list));
} else if (OB_FAIL(prepare_log_ack_list_.add_server(server))) {
PALF_LOG(WARN, "prepare_log_ack_list_ add_sever failed", K(ret), K_(palf_id), K(server));
PALF_LOG(WARN, "prepare_log_ack_list_ add_sever failed", K_(palf_id), K(server));
} else if (INVALID_PROPOSAL_ID != majority_max_accept_pid_
&& accept_proposal_id < majority_max_accept_pid_) {
// accept_proposal_id is smaller than cur majority value, ignore
@ -368,7 +368,7 @@ int LogReconfirm::try_fetch_log_()
|| (sw_start_id == last_record_sw_start_id_
&& now_us - last_fetch_log_time_us_ >= PALF_FETCH_LOG_INTERVAL_US)) {
if (OB_FAIL(sw_->try_fetch_log_for_reconfirm(majority_max_log_server_, majority_max_lsn_, is_fetched))) {
PALF_LOG(WARN, "try_fetch_log_for_reconfirm failed", K(ret), K_(palf_id));
PALF_LOG(WARN, "try_fetch_log_for_reconfirm failed", K_(palf_id));
} else if (is_fetched) {
// send fetch req success
last_record_sw_start_id_ = sw_start_id;
@ -444,13 +444,13 @@ int LogReconfirm::reconfirm()
ObLockGuard<ObSpinLock> guard(lock_);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
PALF_LOG(WARN, "LogReconfirm is not init", K(ret), K_(self), K_(palf_id));
PALF_LOG(WARN, "LogReconfirm is not init", K_(self), K_(palf_id));
} else {
const int64_t now_us = ObTimeUtility::current_time();
switch (state_) {
case INITED: {
if (OB_FAIL(init_reconfirm_())) {
PALF_LOG(WARN, "init reconfirm failed", K(ret), K_(palf_id));
PALF_LOG(WARN, "init reconfirm failed", K_(palf_id));
} else {
state_ = WAITING_LOG_FLUSHED;
PALF_EVENT("Reconfirm come into WAITING_LOG_FLUSHED state", palf_id_, K_(self), K_(majority_cnt),
@ -460,9 +460,9 @@ int LogReconfirm::reconfirm()
}
case WAITING_LOG_FLUSHED: {
if (OB_FAIL(wait_all_log_flushed_())) {
PALF_LOG(WARN, "wait_all_log_flushed_ failed", K(ret), K_(palf_id));
PALF_LOG(WARN, "wait_all_log_flushed_ failed", K_(palf_id));
} else if (OB_FAIL(submit_prepare_log_())) {
PALF_LOG(WARN, "submit_prepare_log_ failed", K(ret), K_(palf_id));
PALF_LOG(WARN, "submit_prepare_log_ failed", K_(palf_id));
} else {
state_ = FETCH_MAX_LOG_LSN;
PALF_EVENT("Reconfirm come into FETCH_MAX_LOG_LSN state", palf_id_, K_(self), K_(majority_max_accept_pid));
@ -478,7 +478,7 @@ int LogReconfirm::reconfirm()
if (RECONFIRM_PREPARE_RETRY_INTERVAL_US <= cost_ts) {
ret = submit_prepare_log_();
PALF_LOG(WARN, "prepare_log_ack_list_ is not majority in expected interval, submit_prepare_log_ again",
K(ret), K(cost_ts), K(prepare_log_ack_list_), K(majority_cnt_), K_(palf_id));
K(cost_ts), K(prepare_log_ack_list_), K(majority_cnt_), K_(palf_id));
}
} else {
state_ = RECONFIRM_MODE_META;
@ -499,7 +499,7 @@ int LogReconfirm::reconfirm()
PALF_EVENT("Reconfirm come into RECONFIRM_FETCH_LOG state", palf_id_, K(prepare_log_ack_list_));
}
} else if (OB_EAGAIN != ret) {
PALF_LOG(WARN, "reconfirm_mode_meta failed", K(ret), K_(palf_id));
PALF_LOG(WARN, "reconfirm_mode_meta failed", K_(palf_id));
}
if (state_ != RECONFIRM_FETCH_LOG) {
break;
@ -514,10 +514,10 @@ int LogReconfirm::reconfirm()
// For this case, followers' match_lsn at A will be 100, and it has no chance to be updated during reconfirm,
// which will lead to waiting majority sync timeout.
if (OB_FAIL(ack_log_with_end_lsn_())) {
PALF_LOG(WARN, "ack_log_with_end_lsn_", K(ret), K_(follower_end_lsn_list));
PALF_LOG(WARN, "ack_log_with_end_lsn_", K_(follower_end_lsn_list));
} else if (majority_max_log_server_ != self_ && !is_fetch_log_finished_()) {
if (OB_FAIL(try_fetch_log_())) {
PALF_LOG(WARN, "try_fetch_log_ failed", K(ret), K_(palf_id));
PALF_LOG(WARN, "try_fetch_log_ failed", K_(palf_id));
}
} else {
state_ = RECONFIRMING;
@ -546,10 +546,10 @@ int LogReconfirm::reconfirm()
K_(self), K_(palf_id), K_(majority_max_lsn));
}
} else if (OB_FAIL(sw_->get_max_flushed_log_info(last_lsn, last_end_lsn, last_log_proposal_id))) {
PALF_LOG(WARN, "get_max_flushed_log_info failed", K(ret), K_(self), K_(palf_id));
PALF_LOG(WARN, "get_max_flushed_log_info failed", K_(self), K_(palf_id));
} else if (OB_FAIL(mm_->confirm_start_working_log(new_proposal_id_, leader_epoch, sw_config_version_))
&& OB_EAGAIN != ret) {
PALF_LOG(WARN, "confirm_start_working_log failed", K(ret), K_(self), K_(palf_id));
PALF_LOG(WARN, "confirm_start_working_log failed", K_(self), K_(palf_id));
} else {
// 记录进入start_working阶段时的end_lsn
saved_end_lsn_ = last_end_lsn;
@ -573,7 +573,7 @@ int LogReconfirm::reconfirm()
const LSN curr_max_lsn = sw_->get_max_lsn();
if (curr_max_lsn != saved_end_lsn_) {
ret = OB_ERR_UNEXPECTED;
PALF_LOG(ERROR, "max_lsn is not equal to saved_end_lsn_, unexpected", K(ret), K_(palf_id), K_(self),
PALF_LOG(ERROR, "max_lsn is not equal to saved_end_lsn_, unexpected", K_(palf_id), K_(self),
K(curr_max_lsn), K_(saved_end_lsn));
} else if (OB_FAIL(sw_->try_advance_committed_end_lsn(saved_end_lsn_))) {
// start_working达成多数派后,leader推进committed_end_lsn至之前记录的位点(预期等于当前的max_lsn)
@ -605,7 +605,8 @@ int LogReconfirm::reconfirm()
if (state_ == FINISHED) {
ret = OB_SUCCESS;
} else if (OB_SUCCESS != ret && OB_EAGAIN != ret) {
PALF_LOG(ERROR, "reconfirm failed", K(ret), K_(palf_id), K_(state));
// election leader may has changed, so ret is maybe OB_NOT_MASTER.
PALF_LOG(WARN, "reconfirm failed", K_(palf_id), K_(state));
} else {
ret = OB_EAGAIN;
if (REACH_TIME_INTERVAL(10 * 1000)) {

View File

@ -893,16 +893,21 @@ int LogSlidingWindow::handle_next_submit_log_(bool &is_committed_lsn_updated)
const LSN prev_lsn = log_task->get_prev_lsn();
const int64_t prev_log_pid = log_task->get_prev_proposal_id();
log_task->unlock();
if (!state_mgr_->is_leader_active()
if (OB_UNLIKELY(last_submit_end_lsn != begin_lsn)) {
ret = OB_ERR_UNEXPECTED;
PALF_LOG(ERROR, "Current log's begin_lsn is not continuous with last_submit_end_lsn, unexpected",
K_(palf_id), K_(self), K(prev_lsn), K(last_submit_log_id), K(last_submit_lsn),
K(last_submit_end_lsn), K(last_submit_log_pid), K(tmp_log_id), KPC(log_task));
} else if (!state_mgr_->is_leader_active()
&& tmp_log_id > FIRST_VALID_LOG_ID // the first log doesn't need check prev_proposal_id
&& (last_submit_lsn != prev_lsn || last_submit_log_pid != prev_log_pid)) {
ret = OB_STATE_NOT_MATCH;
PALF_LOG(WARN, "prev_proposal_id does not match, cannot submit this log", K(ret), K_(palf_id), K_(self),
PALF_LOG(WARN, "prev_proposal_id does not match, cannot submit this log", K_(palf_id), K_(self),
K(prev_lsn), K(prev_log_pid), K(last_submit_log_id), K(last_submit_lsn), K(last_submit_log_pid),
K(tmp_log_id), KPC(log_task));
} else if (OB_FAIL(generate_group_entry_header_(tmp_log_id, log_task, group_entry_header,
group_log_data_checksum))) {
PALF_LOG(WARN, "generate_group_entry_header_ failed", K(ret), K_(palf_id), K_(self));
PALF_LOG(WARN, "generate_group_entry_header_ failed", K_(palf_id), K_(self));
} else {
// set flag for rollback accum_checksum
is_accum_checksum_acquired = true;

View File

@ -840,7 +840,8 @@ bool LogStateMgr::is_reconfirm_state_changed_(int &ret)
PALF_LOG(TRACE, "is_reconfirm_state_changed_", K(bool_ret));
bool_ret = true;
} else {
PALF_LOG(ERROR, "reconfirm failed", K(ret), K_(palf_id));
// election leader may has changed, so ret is maybe OB_NOT_MASTER.
PALF_LOG(WARN, "reconfirm failed", K_(palf_id));
bool_ret = true;
}
return bool_ret;