diff --git a/src/logservice/logrouteservice/ob_log_systable_queryer.cpp b/src/logservice/logrouteservice/ob_log_systable_queryer.cpp index 7f42db81c..50f1f8810 100644 --- a/src/logservice/logrouteservice/ob_log_systable_queryer.cpp +++ b/src/logservice/logrouteservice/ob_log_systable_queryer.cpp @@ -305,7 +305,8 @@ int ObLogSysTableQueryer::do_query_(const uint64_t tenant_id, LOG_WARN("errsim do query error", K(ERRSIM_FETCH_LOG_SYS_QUERY_FAILED)); } if (OB_NOT_NULL(err_handler_) && (-ER_CONNECT_FAILED == ret || -ER_ACCESS_DENIED_ERROR == ret - || OB_SERVER_IS_INIT == ret || OB_TENANT_NOT_EXIST == ret || OB_TENANT_NOT_IN_SERVER == ret)) { + || OB_SERVER_IS_INIT == ret || OB_TENANT_NOT_EXIST == ret || OB_TENANT_NOT_IN_SERVER == ret + || OB_SIZE_OVERFLOW == ret || OB_TIMEOUT == ret)) { err_handler_->handle_error(share::SYS_LS, logfetcher::IObLogErrHandler::ErrType::FETCH_LOG, trace_id, palf::LSN(palf::LOG_INVALID_LSN_VAL)/*no need to pass lsn*/, ret, "%s"); } diff --git a/src/logservice/restoreservice/ob_log_restore_handler.cpp b/src/logservice/restoreservice/ob_log_restore_handler.cpp index 8e8e912e2..2c6551a4d 100644 --- a/src/logservice/restoreservice/ob_log_restore_handler.cpp +++ b/src/logservice/restoreservice/ob_log_restore_handler.cpp @@ -64,9 +64,12 @@ const char *restore_comment_str[static_cast(RestoreSyncStatus::MAX_RESTORE_ "Log source can not be accessed, the replication account may be incorrect or the privelege is insufficient", "Log source is unreachable, the log source access point may be unavailable", "Fetch log time out", - "Restore suspend, the standby has synchronized to recovery until scn", + "Restore suspend, the log stream has synchronized to recovery until scn", "Standby binary version is lower than primary data version, standby need to upgrade", "Primary tenant has been dropped", + "Waiting log stream created", + "Query primary failed", + "Restore handler has no leader", "Unexpected exceptions", }; @@ -82,6 +85,9 @@ const char *restore_status_str[static_cast(RestoreSyncStatus::MAX_RESTORE_S "RESTORE SUSPEND", "STANDBY NEED UPGRADE", "PRIMARY TENANT DROPPED", + "WAITING LS CREATED", + "QUERY PRIMARY FAILED", + "RESTORE HANDLER HAS NO LEADER", "NOT AVAILABLE", }; @@ -545,9 +551,10 @@ void ObLogRestoreHandler::mark_error(share::ObTaskId &trace_id, || (OB_TENANT_NOT_IN_SERVER == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) || (OB_IN_STOP_STATE == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) || (OB_SERVER_IS_INIT == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) - || (OB_ERR_OUT_OF_LOWER_BOUND == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)) { + || (OB_ERR_OUT_OF_LOWER_BOUND == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) + || (OB_SIZE_OVERFLOW == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type)) { CLOG_LOG(WARN, "fetch log failed in restore", KPC(parent_), KPC(this)); - } else if(OB_SUCCESS != ret_code) { + } else if (OB_SUCCESS != ret_code) { CLOG_LOG(ERROR, "fatal error occur in restore", KPC(parent_), KPC(this)); } } @@ -1053,10 +1060,29 @@ int ObLogRestoreHandler::get_ls_restore_status_info(RestoreStatusInfo &restore_s bool error_exist = false; bool is_leader = true; RestoreSyncStatus sync_status; + ObRole palf_role = FOLLOWER; + int64_t palf_proposal_id = -1; + bool is_pending_state = true; - if (!is_strong_leader(role_)) { + if (OB_FAIL(palf_handle_.get_role(palf_role, palf_proposal_id, is_pending_state))) { + CLOG_LOG(WARN, "fail to get palf role", K(ret), K_(id)); + } else if (LEADER != palf_role || true == is_pending_state) { + CLOG_LOG(TRACE, "palf is not leader when get ls restore status info", K_(id), K(palf_role), K(is_pending_state)); + } else if (LEADER == palf_role && !is_strong_leader(role_)) { is_leader = false; - CLOG_LOG(TRACE, "restore not leader", K(role_)); + CLOG_LOG(WARN, "restore handler not leader", K_(id), K(role_), K(palf_role)); + restore_status_info.ls_id_ = id_; + restore_status_info.err_code_ = OB_NOT_MASTER; + restore_status_info.sync_lsn_ = 0; + restore_status_info.sync_scn_ = SCN::min_scn(); + if (OB_FAIL(get_restore_sync_status(restore_status_info.err_code_, context_.error_context_.error_type_, sync_status))) { + CLOG_LOG(WARN, "fail to get restore sync status", K_(restore_status_info.err_code), K(sync_status)); + } else if (OB_FALSE_IT(restore_status_info.sync_status_ = sync_status)) { // set sync_status before get_restore_comment + } else if (OB_FAIL(restore_status_info.get_restore_comment())) { + CLOG_LOG(WARN, "fail to get restore comment", K(sync_status)); + } else { + CLOG_LOG(TRACE, "success to get error code and message", K(restore_status_info)); + } } else if (OB_FAIL(palf_handle_.get_end_lsn(lsn))) { CLOG_LOG(WARN, "fail to get end lsn when get ls restore status info"); } else if (OB_FAIL(palf_handle_.get_end_scn(scn))) { @@ -1083,7 +1109,7 @@ int ObLogRestoreHandler::get_ls_restore_status_info(RestoreStatusInfo &restore_s restore_status_info.sync_lsn_ = lsn.val_; restore_status_info.sync_scn_ = scn; if (OB_FAIL(restore_status_info.get_restore_comment())) { - CLOG_LOG(WARN, "fail to get comment", K(sync_status)); + CLOG_LOG(WARN, "fail to get restore comment", K(sync_status)); } else { CLOG_LOG(TRACE, "success to get error code and message", K(restore_status_info)); } @@ -1092,13 +1118,17 @@ int ObLogRestoreHandler::get_ls_restore_status_info(RestoreStatusInfo &restore_s } int ObLogRestoreHandler::get_restore_sync_status(int ret_code, - ObLogRestoreErrorContext::ErrorType error_type, + const ObLogRestoreErrorContext::ErrorType error_type, RestoreSyncStatus &sync_status) { int ret = OB_SUCCESS; + // RESTORE_SYNC_RESTORE_HANDLER_HAS_NO_LEADER + if (OB_NOT_MASTER == ret_code) { + sync_status = RestoreSyncStatus::RESTORE_SYNC_RESTORE_HANDLER_HAS_NO_LEADER; + } // RESTORE_SYNC_SOURCE_HAS_A_GAP - if ((OB_ERR_OUT_OF_LOWER_BOUND == ret_code + else if ((OB_ERR_OUT_OF_LOWER_BOUND == ret_code || OB_ARCHIVE_ROUND_NOT_CONTINUOUS == ret_code || OB_ARCHIVE_LOG_RECYCLED == ret_code) && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) { @@ -1134,6 +1164,14 @@ int ObLogRestoreHandler::get_restore_sync_status(int ret_code, else if (OB_ERR_RESTORE_PRIMARY_TENANT_DROPPED == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) { sync_status = RestoreSyncStatus::RESTORE_SYNC_PRIMARY_IS_DROPPED; } + // RESTORE_SYNC_WAITING_LS_CREATED + else if (OB_LS_NOT_EXIST == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) { + sync_status = RestoreSyncStatus::RESTORE_SYNC_WAITING_LS_CREATED; + } + // RESTORE_SYNC_ACCESS_PRIMARY_FAILED + else if (OB_SIZE_OVERFLOW == ret_code && ObLogRestoreErrorContext::ErrorType::FETCH_LOG == error_type) { + sync_status = RestoreSyncStatus::RESTORE_SYNC_QUERY_PRIMARY_FAILED; + } // RESTORE_SYNC_NOT_AVAILABLE else if (OB_SUCCESS != ret_code) { sync_status = RestoreSyncStatus::RESTORE_SYNC_NOT_AVAILABLE; @@ -1230,7 +1268,7 @@ int RestoreStatusInfo::get_restore_comment() if (OB_FAIL(comment_.assign_fmt("%s", restore_comment_str[int(sync_status_)]))) { CLOG_LOG(WARN, "fail to assign comment", K_(sync_status)); } else { - CLOG_LOG(TRACE, "success to get restore status comment", K_(sync_status)); + CLOG_LOG(TRACE, "success to get restore status comment", K_(sync_status), K_(comment)); } return ret; } diff --git a/src/logservice/restoreservice/ob_log_restore_handler.h b/src/logservice/restoreservice/ob_log_restore_handler.h index e45ecb7a6..4f8ee1de4 100644 --- a/src/logservice/restoreservice/ob_log_restore_handler.h +++ b/src/logservice/restoreservice/ob_log_restore_handler.h @@ -87,7 +87,10 @@ enum class RestoreSyncStatus { RESTORE_SYNC_SUSPEND = 8, RESTORE_SYNC_STANDBY_NEED_UPGRADE = 9, RESTORE_SYNC_PRIMARY_IS_DROPPED = 10, - RESTORE_SYNC_NOT_AVAILABLE = 11, + RESTORE_SYNC_WAITING_LS_CREATED = 11, + RESTORE_SYNC_QUERY_PRIMARY_FAILED = 12, + RESTORE_SYNC_RESTORE_HANDLER_HAS_NO_LEADER = 13, + RESTORE_SYNC_NOT_AVAILABLE = 14, MAX_RESTORE_SYNC_STATUS }; diff --git a/src/observer/virtual_table/ob_all_virtual_ls_log_restore_status.cpp b/src/observer/virtual_table/ob_all_virtual_ls_log_restore_status.cpp index 17a806590..e791e9c7e 100644 --- a/src/observer/virtual_table/ob_all_virtual_ls_log_restore_status.cpp +++ b/src/observer/virtual_table/ob_all_virtual_ls_log_restore_status.cpp @@ -123,7 +123,7 @@ int ObVirtualLSLogRestoreStatus::inner_get_next_row(common::ObNewRow *&row) && OB_FAIL(insert_ls_restore_status_info_(restore_status_info))) { SERVER_LOG(WARN, "fail to insert ls restore status info", K(restore_status_info)); } else { - SERVER_LOG(TRACE, "iterate sys log_stream success", K(ls)); + SERVER_LOG(TRACE, "iterate sys log_stream success", K(ls->get_ls_id())); scanner_.add_row(cur_row_); } } diff --git a/src/rootserver/ob_recovery_ls_service.cpp b/src/rootserver/ob_recovery_ls_service.cpp index f122f77e0..7b164bcf2 100755 --- a/src/rootserver/ob_recovery_ls_service.cpp +++ b/src/rootserver/ob_recovery_ls_service.cpp @@ -910,6 +910,11 @@ int ObRecoveryLSService::process_ls_operator_in_trans_( //can not be creating, must be created or other status ret = OB_EAGAIN; LOG_WARN("ls not created, need wait", KR(ret), K(ls_status)); + int tmp_ret = OB_SUCCESS; + //reuse OB_LS_NOT_EXIST error code, which means the ls has not created in this scenario. + if (OB_TMP_FAIL(init_restore_status(sync_scn, OB_LS_NOT_EXIST))) { + LOG_WARN("failed to init restore status", KR(tmp_ret), K(sync_scn)); + } } else { ObLSStatus target_status = share::OB_LS_EMPTY; if (share::is_ls_create_end_op(ls_attr.get_ls_operation_type())) { @@ -1067,7 +1072,7 @@ int ObRecoveryLSService::report_sys_ls_recovery_stat_in_trans_( K(ls_recovery_stat), K(tenant_info)); } else { last_report_ts_ = ObTimeUtility::current_time(); - if (!only_update_readable_scn) { + if (!only_update_readable_scn && sync_scn >= restore_status_.sync_scn_) { //如果汇报了sync_scn,需要把restore_status重置掉 restore_status_.reset(); }