[FIX] create memtable for dml may dead loop

This commit is contained in:
ZenoWang 2024-04-26 12:20:43 +00:00 committed by ob-robot
parent 6a23c064bc
commit af51225977
4 changed files with 35 additions and 20 deletions

View File

@ -659,9 +659,9 @@ int ObDDLIncStartReplayExecutor::do_replay_(ObTabletHandle &tablet_handle)
if (lob_meta_tablet_id.is_valid()) {
tmp_ret_lob = ls_->sync_tablet_freeze_for_direct_load(lob_meta_tablet_id);
}
ret = tmp_ret | tmp_ret_lob;
if (OB_FAIL(ret)) {
LOG_ERROR("sync freeze failed", KR(ret), KR(tmp_ret), KR(tmp_ret_lob), K(tablet_id), K(lob_meta_tablet_id));
if (OB_SUCCESS != (tmp_ret | tmp_ret_lob)) {
ret = OB_EAGAIN;
LOG_WARN("sync freeze failed", KR(ret), KR(tmp_ret), KR(tmp_ret_lob), K(tablet_id), K(lob_meta_tablet_id));
}
}
}
@ -727,9 +727,9 @@ int ObDDLIncCommitReplayExecutor::do_replay_(ObTabletHandle &tablet_handle)
if (lob_meta_tablet_id.is_valid()) {
tmp_ret_lob = ls_->sync_tablet_freeze_for_direct_load(lob_meta_tablet_id);
}
ret = tmp_ret | tmp_ret_lob;
if (OB_FAIL(ret)) {
LOG_ERROR("sync freeze failed", KR(ret), KR(tmp_ret), KR(tmp_ret_lob), K(tablet_id), K(lob_meta_tablet_id));
if (OB_SUCCESS != (tmp_ret | tmp_ret_lob)) {
ret = OB_EAGAIN;
LOG_WARN("sync freeze failed", KR(ret), KR(tmp_ret), KR(tmp_ret_lob), K(tablet_id), K(lob_meta_tablet_id));
}
}
}

View File

@ -1913,7 +1913,7 @@ int ObLS::tablet_freeze_with_rewrite_meta(const ObTabletID &tablet_id,
}
/**
* @brief Used for async freeze task
* @brief Used for both async and sync freeze
*
* @param tablet_id tablet to be freezed
* @param epoch to check if logstream has offlined
@ -1930,9 +1930,13 @@ int ObLS::tablet_freeze_task_for_direct_load(const ObTabletID &tablet_id, const
} else if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
STORAGE_LOG(WARN, "ls is not inited", K(ret));
} else if (OB_UNLIKELY(is_offline()) || ATOMIC_LOAD(&switch_epoch_) != epoch) {
} else if (OB_UNLIKELY(is_offline())) {
ret = OB_LS_OFFLINE;
LOG_WARN("ls has offlined", K(ret), K_(ls_meta));
} else if (ATOMIC_LOAD(&switch_epoch_) != epoch) {
// happened in async freeze situation. This ls has offlined and onlined again
ret = OB_SUCCESS;
FLOG_INFO("quit freeze because logstream epoch has changed", K(ret), K(tablet_id), K(epoch), K(ls_meta_));
} else if (OB_FAIL(ls_freezer_.tablet_freeze_task_for_direct_load(tablet_id, result))) {
LOG_WARN("tablet force freeze failed", K(ret), K(tablet_id));
} else {
@ -1940,17 +1944,23 @@ int ObLS::tablet_freeze_task_for_direct_load(const ObTabletID &tablet_id, const
}
if (OB_FAIL(ret)) {
if (OB_NOT_INIT == ret || OB_NOT_RUNNING == ret || OB_LS_OFFLINE == ret) {
STORAGE_LOG(INFO, "reset ret code to stop retry", KR(ret));
ret = OB_SUCCESS;
} else {
// reset ret to EAGAIN to retry freeze
ret = OB_EAGAIN;
int origin_ret = ret;
// reset ret to EAGAIN to retry freeze
ret = OB_EAGAIN;
if (REACH_TIME_INTERVAL(1LL * 1000LL * 1000LL /* 1 second */)) {
STORAGE_LOG(INFO, "reset ret code to stop retry", KR(ret), KR(origin_ret));
}
}
return ret;
}
/**
* @brief sync freeze only retry for a while.
*
* @param tablet_id
* @param max_retry_time
* @return int
*/
int ObLS::sync_tablet_freeze_for_direct_load(const ObTabletID &tablet_id, const int64_t max_retry_time)
{
int ret = OB_SUCCESS;
@ -1962,9 +1972,6 @@ int ObLS::sync_tablet_freeze_for_direct_load(const ObTabletID &tablet_id, const
ret = OB_SUCCESS;
ObFuture<int> result;
if (OB_FAIL(tablet_freeze_task_for_direct_load(tablet_id, epoch, &result))) {
if (REACH_TIME_INTERVAL(1LL * 1000LL * 1000LL /* 1 second */)) {
LOG_INFO("fail to start tablet freeze. need retry", K(ret), K(tablet_id));
}
usleep(RETRY_INTERVAL);
} else if (OB_FAIL(ls_freezer_.wait_freeze_finished(result))) {
STORAGE_LOG(WARN, "freeze task failed", KR(ret));

View File

@ -874,7 +874,7 @@ public:
const uint64_t epoch,
ObFuture<int> *result = nullptr);
int sync_tablet_freeze_for_direct_load(const ObTabletID &tablet_id,
const int64_t max_retry_time = 3600LL * 1000LL * 1000LL /*1 hour*/);
const int64_t max_retry_time = 5LL * 1000LL * 1000LL /*5 seconds*/);
void async_tablet_freeze_for_direct_load(const ObTabletID &tablet_id);
DELEGATE_WITH_RET(ls_freezer_, wait_freeze_finished, int);

View File

@ -180,6 +180,10 @@ int ObStorageTableGuard::refresh_and_protect_memtable()
}
} while ((OB_SUCC(ret) || OB_ENTRY_NOT_EXIST == ret || OB_EAGAIN == ret) && need_retry);
if (OB_LS_OFFLINE == ret) {
ret = OB_EAGAIN;
STORAGE_LOG(INFO, "reset ret code to OB_EAGAIN to avoid error log", KR(ret), K(ls_id), K(tablet_id));
}
return ret;
}
@ -192,13 +196,17 @@ int ObStorageTableGuard::create_data_memtable_(const share::ObLSID &ls_id,
ObLSHandle ls_handle;
ObTabletHandle tmp_handle;
SCN clog_checkpoint_scn;
ObLS *ls = nullptr;
if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("failed to get log stream", K(ret), K(ls_id), K(tablet_id));
} else if (OB_UNLIKELY(!ls_handle.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, invalid ls handle", K(ret), K(ls_handle), K(ls_id), K(tablet_id));
} else if (OB_FAIL(ls_handle.get_ls()->get_tablet_svr()->get_tablet(
tablet_id, tmp_handle, 0, ObMDSGetTabletMode::READ_WITHOUT_CHECK))) {
} else if (FALSE_IT(ls = ls_handle.get_ls())) {
} else if (ls->is_offline()) {
ret = OB_LS_OFFLINE;
FLOG_INFO("create data memtable failed because of ls offline", KR(ret), K(ls_id), K(tablet_id));
} else if (OB_FAIL(ls->get_tablet_svr()->get_tablet(tablet_id, tmp_handle, 0, ObMDSGetTabletMode::READ_WITHOUT_CHECK))) {
LOG_WARN("fail to get tablet", K(ret), K(ls_id), K(tablet_id));
} else if (FALSE_IT(clog_checkpoint_scn = tmp_handle.get_obj()->get_tablet_meta().clog_checkpoint_scn_)) {
} else if (replay_scn_ > clog_checkpoint_scn) {