[LogFetcher] Fix the bug that add_ls failed would cause standby stuck

This commit is contained in:
zxlzxlzxlzxlzxl 2024-03-21 07:51:57 +00:00 committed by ob-robot
parent 073f07895c
commit b2525ff486
3 changed files with 55 additions and 1 deletions

View File

@ -15,6 +15,10 @@
#include "share/rc/ob_tenant_base.h" // MTL_ID
#include "ob_log_fetch_stream_container_mgr.h"
#ifdef ERRSIM
ERRSIM_POINT_DEF(LOGFETCHER_ALLOC_FSC_FAILED);
#endif
namespace oceanbase
{
namespace logfetcher
@ -111,6 +115,10 @@ int ObFsContainerMgr::add_fsc(const FetchStreamType stype,
ret = OB_INVALID_ERROR;
LOG_ERROR("invalid argument", KR(ret), K(stype), K(rpc_), K(stream_worker_),
K(progress_controller_));
#ifdef ERRSIM
} else if (OB_FAIL(LOGFETCHER_ALLOC_FSC_FAILED)) {
LOG_ERROR("ERRSIM: failed to alloc fsc");
#endif
} else if (OB_FAIL(fsc_pool_.alloc(fsc))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_ERROR("allocate fsc from pool failed", KR(ret), K(tls_id), K(fsc));

View File

@ -443,6 +443,34 @@ int ObLogFetcher::add_ls(
K(start_parameters));
}
if (OB_FAIL(ret)) {
int tmp_ret = OB_SUCCESS;
logservice::TenantLSID failed_tls_id(source_tenant_id_, ls_id);
if (OB_TMP_FAIL(ls_fetch_mgr_.recycle_ls(failed_tls_id))) {
if (OB_ENTRY_NOT_EXIST != tmp_ret) {
LOG_WARN_RET(tmp_ret, "failed to recycle ls in failure post process", K(failed_tls_id));
} else {
LOG_INFO("tls_id is not in ls_fetch_mgr, recycle done", K(failed_tls_id));
}
}
if (OB_TMP_FAIL(fs_container_mgr_.remove_fsc(failed_tls_id))) {
if (OB_ENTRY_NOT_EXIST != tmp_ret) {
LOG_WARN_RET(tmp_ret, "failed ", K(failed_tls_id));
} else {
LOG_INFO("tls_id not exist in fs_container_mgr_, remove done", K(failed_tls_id));
}
}
if (OB_TMP_FAIL(ls_fetch_mgr_.remove_ls(failed_tls_id))) {
if (OB_ENTRY_NOT_EXIST != tmp_ret) {
LOG_WARN_RET(tmp_ret, "failed to remove ls in ls_fetch_mgr ", K(failed_tls_id));
} else {
LOG_INFO("tls_id not exist in ls_fetch_mgr_, remove done", K(failed_tls_id));
}
}
}
return ret;
}

View File

@ -106,6 +106,10 @@ void ObLogLSFetchMgr::destroy()
LOG_INFO("destroy LS fetch mgr succ");
}
#ifdef ERRSIM
ERRSIM_POINT_DEF(LOG_FETCHER_ALLOC_LS_CTX_ADD_INFO_FAIL);
ERRSIM_POINT_DEF(LOG_FETCHER_ALLOC_LS_CTX_FAIL);
#endif
int ObLogLSFetchMgr::add_ls(
const logservice::TenantLSID &tls_id,
const ObLogFetcherStartParameters &start_parameters,
@ -138,6 +142,11 @@ int ObLogLSFetchMgr::add_ls(
LOG_ERROR("init_tls_info_ failed", KR(ret), K(tls_id), K(tls_id_str));
}
// alloc a part trans resolver
#ifdef ERRSIM
else if (OB_FAIL(LOG_FETCHER_ALLOC_LS_CTX_ADD_INFO_FAIL)) {
LOG_ERROR("ERRSIM: failed to alloc ls_ctx_add_info", K(tls_id));
}
#endif
else if (OB_FAIL(ls_ctx_add_info_factory_->alloc(tls_id_str, ls_ctx_add_info))) {
LOG_ERROR("alloc ObILogFetcherLSCtxAddInfo fail", KR(ret), K(tls_id_str));
} else if (OB_ISNULL(ls_ctx_add_info)) {
@ -147,6 +156,11 @@ int ObLogLSFetchMgr::add_ls(
LOG_ERROR("init part trans resolver fail", KR(ret), K(tls_id), K(start_tstamp_ns));
}
// alloc a LSFetchCtx
#ifdef ERRSIM
else if (OB_FAIL(LOG_FETCHER_ALLOC_LS_CTX_FAIL)) {
LOG_ERROR("ERRSIM: failed to alloc ls_fetch_ctx", K(tls_id));
}
#endif
else if (OB_FAIL(ls_ctx_factory_->alloc(ctx))) {
LOG_ERROR("alloc LSFetchCtx fail", KR(ret));
} else if (OB_ISNULL(ctx)) {
@ -290,7 +304,11 @@ int ObLogLSFetchMgr::remove_ls(const logservice::TenantLSID &tls_id)
// remove node from map first to guarantee the correctness of the concurrent operation on the map
if (OB_FAIL(ctx_map_.erase(tls_id, fetch_ctx))) {
LOG_ERROR("erase LSFetchCtx from map fail", KR(ret), K(tls_id));
if (OB_ENTRY_NOT_EXIST != ret) {
LOG_ERROR("erase LSFetchCtx from map fail", KR(ret), K(tls_id));
} else {
LOG_WARN("erase LSFetchCtx from map fail, ctx not exist", K(tls_id));
}
} else if (OB_ISNULL(fetch_ctx)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("LSFetchCtx is NULL, unexcepted error", KR(ret), K(tls_id), K(fetch_ctx));