[Standby] restore sync status optimization

This commit is contained in:
obdev 2024-05-30 14:46:30 +00:00 committed by ob-robot
parent a78b006e5a
commit 92caf6bb03
5 changed files with 60 additions and 13 deletions

View File

@ -305,7 +305,8 @@ int ObLogSysTableQueryer::do_query_(const uint64_t tenant_id,
LOG_WARN("errsim do query error", K(ERRSIM_FETCH_LOG_SYS_QUERY_FAILED));
}
if (OB_NOT_NULL(err_handler_) && (-ER_CONNECT_FAILED == ret || -ER_ACCESS_DENIED_ERROR == ret
|| OB_SERVER_IS_INIT == ret || OB_TENANT_NOT_EXIST == ret || OB_TENANT_NOT_IN_SERVER == ret)) {
|| OB_SERVER_IS_INIT == ret || OB_TENANT_NOT_EXIST == ret || OB_TENANT_NOT_IN_SERVER == ret
|| OB_SIZE_OVERFLOW == ret || OB_TIMEOUT == ret)) {
err_handler_->handle_error(share::SYS_LS, logfetcher::IObLogErrHandler::ErrType::FETCH_LOG, trace_id,
palf::LSN(palf::LOG_INVALID_LSN_VAL)/*no need to pass lsn*/, ret, "%s");
}

View File

@ -64,9 +64,12 @@ const char *restore_comment_str[static_cast<int>(RestoreSyncStatus::MAX_RESTORE_
"Log source can not be accessed, the replication account may be incorrect or the privelege is insufficient",
"Log source is unreachable, the log source access point may be unavailable",
"Fetch log time out",
"Restore suspend, the standby has synchronized to recovery until scn",
"Restore suspend, the log stream has synchronized to recovery until scn",
"Standby binary version is lower than primary data version, standby need to upgrade",
"Primary tenant has been dropped",
"Waiting log stream created",
"Query primary failed",
"Restore handler has no leader",
"Unexpected exceptions",
};
@ -82,6 +85,9 @@ const char *restore_status_str[static_cast<int>(RestoreSyncStatus::MAX_RESTORE_S
"RESTORE SUSPEND",
"STANDBY NEED UPGRADE",
"PRIMARY TENANT DROPPED",
"WAITING LS CREATED",
"QUERY PRIMARY FAILED",
"RESTORE HANDLER HAS NO LEADER",
"NOT AVAILABLE",
};
@ -545,9 +551,10 @@ void ObLogRestoreHandler::mark_error(share::ObTaskId &trace_id,
|| (OB_TENANT_NOT_IN_SERVER == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|| (OB_IN_STOP_STATE == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|| (OB_SERVER_IS_INIT == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|| (OB_ERR_OUT_OF_LOWER_BOUND == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)) {
|| (OB_ERR_OUT_OF_LOWER_BOUND == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|| (OB_SIZE_OVERFLOW == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)) {
CLOG_LOG(WARN, "fetch log failed in restore", KPC(parent_), KPC(this));
} else if(OB_SUCCESS != ret_code) {
} else if (OB_SUCCESS != ret_code) {
CLOG_LOG(ERROR, "fatal error occur in restore", KPC(parent_), KPC(this));
}
}
@ -1053,10 +1060,29 @@ int ObLogRestoreHandler::get_ls_restore_status_info(RestoreStatusInfo &restore_s
bool error_exist = false;
bool is_leader = true;
RestoreSyncStatus sync_status;
ObRole palf_role = FOLLOWER;
int64_t palf_proposal_id = -1;
bool is_pending_state = true;
if (!is_strong_leader(role_)) {
if (OB_FAIL(palf_handle_.get_role(palf_role, palf_proposal_id, is_pending_state))) {
CLOG_LOG(WARN, "fail to get palf role", K(ret), K_(id));
} else if (LEADER != palf_role || true == is_pending_state) {
CLOG_LOG(TRACE, "palf is not leader when get ls restore status info", K_(id), K(palf_role), K(is_pending_state));
} else if (LEADER == palf_role && !is_strong_leader(role_)) {
is_leader = false;
CLOG_LOG(TRACE, "restore not leader", K(role_));
CLOG_LOG(WARN, "restore handler not leader", K_(id), K(role_), K(palf_role));
restore_status_info.ls_id_ = id_;
restore_status_info.err_code_ = OB_NOT_MASTER;
restore_status_info.sync_lsn_ = 0;
restore_status_info.sync_scn_ = SCN::min_scn();
if (OB_FAIL(get_restore_sync_status(restore_status_info.err_code_, context_.error_context_.error_type_, sync_status))) {
CLOG_LOG(WARN, "fail to get restore sync status", K_(restore_status_info.err_code), K(sync_status));
} else if (OB_FALSE_IT(restore_status_info.sync_status_ = sync_status)) { // set sync_status before get_restore_comment
} else if (OB_FAIL(restore_status_info.get_restore_comment())) {
CLOG_LOG(WARN, "fail to get restore comment", K(sync_status));
} else {
CLOG_LOG(TRACE, "success to get error code and message", K(restore_status_info));
}
} else if (OB_FAIL(palf_handle_.get_end_lsn(lsn))) {
CLOG_LOG(WARN, "fail to get end lsn when get ls restore status info");
} else if (OB_FAIL(palf_handle_.get_end_scn(scn))) {
@ -1083,7 +1109,7 @@ int ObLogRestoreHandler::get_ls_restore_status_info(RestoreStatusInfo &restore_s
restore_status_info.sync_lsn_ = lsn.val_;
restore_status_info.sync_scn_ = scn;
if (OB_FAIL(restore_status_info.get_restore_comment())) {
CLOG_LOG(WARN, "fail to get comment", K(sync_status));
CLOG_LOG(WARN, "fail to get restore comment", K(sync_status));
} else {
CLOG_LOG(TRACE, "success to get error code and message", K(restore_status_info));
}
@ -1092,13 +1118,17 @@ int ObLogRestoreHandler::get_ls_restore_status_info(RestoreStatusInfo &restore_s
}
int ObLogRestoreHandler::get_restore_sync_status(int ret_code,
ObLogRestoreErrorContext::ErrorType error_type,
const ObLogRestoreErrorContext::ErrorType error_type,
RestoreSyncStatus &sync_status)
{
int ret = OB_SUCCESS;
// RESTORE_SYNC_RESTORE_HANDLER_HAS_NO_LEADER
if (OB_NOT_MASTER == ret_code) {
sync_status = RestoreSyncStatus::RESTORE_SYNC_RESTORE_HANDLER_HAS_NO_LEADER;
}
// RESTORE_SYNC_SOURCE_HAS_A_GAP
if ((OB_ERR_OUT_OF_LOWER_BOUND == ret_code
else if ((OB_ERR_OUT_OF_LOWER_BOUND == ret_code
|| OB_ARCHIVE_ROUND_NOT_CONTINUOUS == ret_code
|| OB_ARCHIVE_LOG_RECYCLED == ret_code)
&& ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) {
@ -1134,6 +1164,14 @@ int ObLogRestoreHandler::get_restore_sync_status(int ret_code,
else if (OB_ERR_RESTORE_PRIMARY_TENANT_DROPPED == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) {
sync_status = RestoreSyncStatus::RESTORE_SYNC_PRIMARY_IS_DROPPED;
}
// RESTORE_SYNC_WAITING_LS_CREATED
else if (OB_LS_NOT_EXIST == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) {
sync_status = RestoreSyncStatus::RESTORE_SYNC_WAITING_LS_CREATED;
}
// RESTORE_SYNC_ACCESS_PRIMARY_FAILED
else if (OB_SIZE_OVERFLOW == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) {
sync_status = RestoreSyncStatus::RESTORE_SYNC_QUERY_PRIMARY_FAILED;
}
// RESTORE_SYNC_NOT_AVAILABLE
else if (OB_SUCCESS != ret_code) {
sync_status = RestoreSyncStatus::RESTORE_SYNC_NOT_AVAILABLE;
@ -1230,7 +1268,7 @@ int RestoreStatusInfo::get_restore_comment()
if (OB_FAIL(comment_.assign_fmt("%s", restore_comment_str[int(sync_status_)]))) {
CLOG_LOG(WARN, "fail to assign comment", K_(sync_status));
} else {
CLOG_LOG(TRACE, "success to get restore status comment", K_(sync_status));
CLOG_LOG(TRACE, "success to get restore status comment", K_(sync_status), K_(comment));
}
return ret;
}

View File

@ -87,7 +87,10 @@ enum class RestoreSyncStatus {
RESTORE_SYNC_SUSPEND = 8,
RESTORE_SYNC_STANDBY_NEED_UPGRADE = 9,
RESTORE_SYNC_PRIMARY_IS_DROPPED = 10,
RESTORE_SYNC_NOT_AVAILABLE = 11,
RESTORE_SYNC_WAITING_LS_CREATED = 11,
RESTORE_SYNC_QUERY_PRIMARY_FAILED = 12,
RESTORE_SYNC_RESTORE_HANDLER_HAS_NO_LEADER = 13,
RESTORE_SYNC_NOT_AVAILABLE = 14,
MAX_RESTORE_SYNC_STATUS
};

View File

@ -123,7 +123,7 @@ int ObVirtualLSLogRestoreStatus::inner_get_next_row(common::ObNewRow *&row)
&& OB_FAIL(insert_ls_restore_status_info_(restore_status_info))) {
SERVER_LOG(WARN, "fail to insert ls restore status info", K(restore_status_info));
} else {
SERVER_LOG(TRACE, "iterate sys log_stream success", K(ls));
SERVER_LOG(TRACE, "iterate sys log_stream success", K(ls->get_ls_id()));
scanner_.add_row(cur_row_);
}
}

View File

@ -910,6 +910,11 @@ int ObRecoveryLSService::process_ls_operator_in_trans_(
//can not be creating, must be created or other status
ret = OB_EAGAIN;
LOG_WARN("ls not created, need wait", KR(ret), K(ls_status));
int tmp_ret = OB_SUCCESS;
//reuse OB_LS_NOT_EXIST error code, which means the ls has not created in this scenario.
if (OB_TMP_FAIL(init_restore_status(sync_scn, OB_LS_NOT_EXIST))) {
LOG_WARN("failed to init restore status", KR(tmp_ret), K(sync_scn));
}
} else {
ObLSStatus target_status = share::OB_LS_EMPTY;
if (share::is_ls_create_end_op(ls_attr.get_ls_operation_type())) {
@ -1067,7 +1072,7 @@ int ObRecoveryLSService::report_sys_ls_recovery_stat_in_trans_(
K(ls_recovery_stat), K(tenant_info));
} else {
last_report_ts_ = ObTimeUtility::current_time();
if (!only_update_readable_scn) {
if (!only_update_readable_scn && sync_scn >= restore_status_.sync_scn_) {
//如果汇报了sync_scn,需要把restore_status重置掉
restore_status_.reset();
}