[Standby] restore sync status optimization
This commit is contained in:
parent
a78b006e5a
commit
92caf6bb03
@ -305,7 +305,8 @@ int ObLogSysTableQueryer::do_query_(const uint64_t tenant_id,
|
||||
LOG_WARN("errsim do query error", K(ERRSIM_FETCH_LOG_SYS_QUERY_FAILED));
|
||||
}
|
||||
if (OB_NOT_NULL(err_handler_) && (-ER_CONNECT_FAILED == ret || -ER_ACCESS_DENIED_ERROR == ret
|
||||
|| OB_SERVER_IS_INIT == ret || OB_TENANT_NOT_EXIST == ret || OB_TENANT_NOT_IN_SERVER == ret)) {
|
||||
|| OB_SERVER_IS_INIT == ret || OB_TENANT_NOT_EXIST == ret || OB_TENANT_NOT_IN_SERVER == ret
|
||||
|| OB_SIZE_OVERFLOW == ret || OB_TIMEOUT == ret)) {
|
||||
err_handler_->handle_error(share::SYS_LS, logfetcher::IObLogErrHandler::ErrType::FETCH_LOG, trace_id,
|
||||
palf::LSN(palf::LOG_INVALID_LSN_VAL)/*no need to pass lsn*/, ret, "%s");
|
||||
}
|
||||
|
@ -64,9 +64,12 @@ const char *restore_comment_str[static_cast<int>(RestoreSyncStatus::MAX_RESTORE_
|
||||
"Log source can not be accessed, the replication account may be incorrect or the privelege is insufficient",
|
||||
"Log source is unreachable, the log source access point may be unavailable",
|
||||
"Fetch log time out",
|
||||
"Restore suspend, the standby has synchronized to recovery until scn",
|
||||
"Restore suspend, the log stream has synchronized to recovery until scn",
|
||||
"Standby binary version is lower than primary data version, standby need to upgrade",
|
||||
"Primary tenant has been dropped",
|
||||
"Waiting log stream created",
|
||||
"Query primary failed",
|
||||
"Restore handler has no leader",
|
||||
"Unexpected exceptions",
|
||||
};
|
||||
|
||||
@ -82,6 +85,9 @@ const char *restore_status_str[static_cast<int>(RestoreSyncStatus::MAX_RESTORE_S
|
||||
"RESTORE SUSPEND",
|
||||
"STANDBY NEED UPGRADE",
|
||||
"PRIMARY TENANT DROPPED",
|
||||
"WAITING LS CREATED",
|
||||
"QUERY PRIMARY FAILED",
|
||||
"RESTORE HANDLER HAS NO LEADER",
|
||||
"NOT AVAILABLE",
|
||||
};
|
||||
|
||||
@ -545,9 +551,10 @@ void ObLogRestoreHandler::mark_error(share::ObTaskId &trace_id,
|
||||
|| (OB_TENANT_NOT_IN_SERVER == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|
||||
|| (OB_IN_STOP_STATE == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|
||||
|| (OB_SERVER_IS_INIT == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|
||||
|| (OB_ERR_OUT_OF_LOWER_BOUND == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)) {
|
||||
|| (OB_ERR_OUT_OF_LOWER_BOUND == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)
|
||||
|| (OB_SIZE_OVERFLOW == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)) {
|
||||
CLOG_LOG(WARN, "fetch log failed in restore", KPC(parent_), KPC(this));
|
||||
} else if(OB_SUCCESS != ret_code) {
|
||||
} else if (OB_SUCCESS != ret_code) {
|
||||
CLOG_LOG(ERROR, "fatal error occur in restore", KPC(parent_), KPC(this));
|
||||
}
|
||||
}
|
||||
@ -1053,10 +1060,29 @@ int ObLogRestoreHandler::get_ls_restore_status_info(RestoreStatusInfo &restore_s
|
||||
bool error_exist = false;
|
||||
bool is_leader = true;
|
||||
RestoreSyncStatus sync_status;
|
||||
ObRole palf_role = FOLLOWER;
|
||||
int64_t palf_proposal_id = -1;
|
||||
bool is_pending_state = true;
|
||||
|
||||
if (!is_strong_leader(role_)) {
|
||||
if (OB_FAIL(palf_handle_.get_role(palf_role, palf_proposal_id, is_pending_state))) {
|
||||
CLOG_LOG(WARN, "fail to get palf role", K(ret), K_(id));
|
||||
} else if (LEADER != palf_role || true == is_pending_state) {
|
||||
CLOG_LOG(TRACE, "palf is not leader when get ls restore status info", K_(id), K(palf_role), K(is_pending_state));
|
||||
} else if (LEADER == palf_role && !is_strong_leader(role_)) {
|
||||
is_leader = false;
|
||||
CLOG_LOG(TRACE, "restore not leader", K(role_));
|
||||
CLOG_LOG(WARN, "restore handler not leader", K_(id), K(role_), K(palf_role));
|
||||
restore_status_info.ls_id_ = id_;
|
||||
restore_status_info.err_code_ = OB_NOT_MASTER;
|
||||
restore_status_info.sync_lsn_ = 0;
|
||||
restore_status_info.sync_scn_ = SCN::min_scn();
|
||||
if (OB_FAIL(get_restore_sync_status(restore_status_info.err_code_, context_.error_context_.error_type_, sync_status))) {
|
||||
CLOG_LOG(WARN, "fail to get restore sync status", K_(restore_status_info.err_code), K(sync_status));
|
||||
} else if (OB_FALSE_IT(restore_status_info.sync_status_ = sync_status)) { // set sync_status before get_restore_comment
|
||||
} else if (OB_FAIL(restore_status_info.get_restore_comment())) {
|
||||
CLOG_LOG(WARN, "fail to get restore comment", K(sync_status));
|
||||
} else {
|
||||
CLOG_LOG(TRACE, "success to get error code and message", K(restore_status_info));
|
||||
}
|
||||
} else if (OB_FAIL(palf_handle_.get_end_lsn(lsn))) {
|
||||
CLOG_LOG(WARN, "fail to get end lsn when get ls restore status info");
|
||||
} else if (OB_FAIL(palf_handle_.get_end_scn(scn))) {
|
||||
@ -1083,7 +1109,7 @@ int ObLogRestoreHandler::get_ls_restore_status_info(RestoreStatusInfo &restore_s
|
||||
restore_status_info.sync_lsn_ = lsn.val_;
|
||||
restore_status_info.sync_scn_ = scn;
|
||||
if (OB_FAIL(restore_status_info.get_restore_comment())) {
|
||||
CLOG_LOG(WARN, "fail to get comment", K(sync_status));
|
||||
CLOG_LOG(WARN, "fail to get restore comment", K(sync_status));
|
||||
} else {
|
||||
CLOG_LOG(TRACE, "success to get error code and message", K(restore_status_info));
|
||||
}
|
||||
@ -1092,13 +1118,17 @@ int ObLogRestoreHandler::get_ls_restore_status_info(RestoreStatusInfo &restore_s
|
||||
}
|
||||
|
||||
int ObLogRestoreHandler::get_restore_sync_status(int ret_code,
|
||||
ObLogRestoreErrorContext::ErrorType error_type,
|
||||
const ObLogRestoreErrorContext::ErrorType error_type,
|
||||
RestoreSyncStatus &sync_status)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
// RESTORE_SYNC_RESTORE_HANDLER_HAS_NO_LEADER
|
||||
if (OB_NOT_MASTER == ret_code) {
|
||||
sync_status = RestoreSyncStatus::RESTORE_SYNC_RESTORE_HANDLER_HAS_NO_LEADER;
|
||||
}
|
||||
// RESTORE_SYNC_SOURCE_HAS_A_GAP
|
||||
if ((OB_ERR_OUT_OF_LOWER_BOUND == ret_code
|
||||
else if ((OB_ERR_OUT_OF_LOWER_BOUND == ret_code
|
||||
|| OB_ARCHIVE_ROUND_NOT_CONTINUOUS == ret_code
|
||||
|| OB_ARCHIVE_LOG_RECYCLED == ret_code)
|
||||
&& ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) {
|
||||
@ -1134,6 +1164,14 @@ int ObLogRestoreHandler::get_restore_sync_status(int ret_code,
|
||||
else if (OB_ERR_RESTORE_PRIMARY_TENANT_DROPPED == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) {
|
||||
sync_status = RestoreSyncStatus::RESTORE_SYNC_PRIMARY_IS_DROPPED;
|
||||
}
|
||||
// RESTORE_SYNC_WAITING_LS_CREATED
|
||||
else if (OB_LS_NOT_EXIST == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) {
|
||||
sync_status = RestoreSyncStatus::RESTORE_SYNC_WAITING_LS_CREATED;
|
||||
}
|
||||
// RESTORE_SYNC_ACCESS_PRIMARY_FAILED
|
||||
else if (OB_SIZE_OVERFLOW == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) {
|
||||
sync_status = RestoreSyncStatus::RESTORE_SYNC_QUERY_PRIMARY_FAILED;
|
||||
}
|
||||
// RESTORE_SYNC_NOT_AVAILABLE
|
||||
else if (OB_SUCCESS != ret_code) {
|
||||
sync_status = RestoreSyncStatus::RESTORE_SYNC_NOT_AVAILABLE;
|
||||
@ -1230,7 +1268,7 @@ int RestoreStatusInfo::get_restore_comment()
|
||||
if (OB_FAIL(comment_.assign_fmt("%s", restore_comment_str[int(sync_status_)]))) {
|
||||
CLOG_LOG(WARN, "fail to assign comment", K_(sync_status));
|
||||
} else {
|
||||
CLOG_LOG(TRACE, "success to get restore status comment", K_(sync_status));
|
||||
CLOG_LOG(TRACE, "success to get restore status comment", K_(sync_status), K_(comment));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -87,7 +87,10 @@ enum class RestoreSyncStatus {
|
||||
RESTORE_SYNC_SUSPEND = 8,
|
||||
RESTORE_SYNC_STANDBY_NEED_UPGRADE = 9,
|
||||
RESTORE_SYNC_PRIMARY_IS_DROPPED = 10,
|
||||
RESTORE_SYNC_NOT_AVAILABLE = 11,
|
||||
RESTORE_SYNC_WAITING_LS_CREATED = 11,
|
||||
RESTORE_SYNC_QUERY_PRIMARY_FAILED = 12,
|
||||
RESTORE_SYNC_RESTORE_HANDLER_HAS_NO_LEADER = 13,
|
||||
RESTORE_SYNC_NOT_AVAILABLE = 14,
|
||||
MAX_RESTORE_SYNC_STATUS
|
||||
};
|
||||
|
||||
|
@ -123,7 +123,7 @@ int ObVirtualLSLogRestoreStatus::inner_get_next_row(common::ObNewRow *&row)
|
||||
&& OB_FAIL(insert_ls_restore_status_info_(restore_status_info))) {
|
||||
SERVER_LOG(WARN, "fail to insert ls restore status info", K(restore_status_info));
|
||||
} else {
|
||||
SERVER_LOG(TRACE, "iterate sys log_stream success", K(ls));
|
||||
SERVER_LOG(TRACE, "iterate sys log_stream success", K(ls->get_ls_id()));
|
||||
scanner_.add_row(cur_row_);
|
||||
}
|
||||
}
|
||||
|
@ -910,6 +910,11 @@ int ObRecoveryLSService::process_ls_operator_in_trans_(
|
||||
//can not be creating, must be created or other status
|
||||
ret = OB_EAGAIN;
|
||||
LOG_WARN("ls not created, need wait", KR(ret), K(ls_status));
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
//reuse OB_LS_NOT_EXIST error code, which means the ls has not created in this scenario.
|
||||
if (OB_TMP_FAIL(init_restore_status(sync_scn, OB_LS_NOT_EXIST))) {
|
||||
LOG_WARN("failed to init restore status", KR(tmp_ret), K(sync_scn));
|
||||
}
|
||||
} else {
|
||||
ObLSStatus target_status = share::OB_LS_EMPTY;
|
||||
if (share::is_ls_create_end_op(ls_attr.get_ls_operation_type())) {
|
||||
@ -1067,7 +1072,7 @@ int ObRecoveryLSService::report_sys_ls_recovery_stat_in_trans_(
|
||||
K(ls_recovery_stat), K(tenant_info));
|
||||
} else {
|
||||
last_report_ts_ = ObTimeUtility::current_time();
|
||||
if (!only_update_readable_scn) {
|
||||
if (!only_update_readable_scn && sync_scn >= restore_status_.sync_scn_) {
|
||||
//如果汇报了sync_scn,需要把restore_status重置掉
|
||||
restore_status_.reset();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user