write remove_ls_slog after removing all tablets from ObTabletLSService

This commit is contained in:
obdev
2024-02-09 19:49:22 +00:00
committed by ob-robot
parent ad2f0027ec
commit e2e70a40a0
4 changed files with 55 additions and 16 deletions

View File

@ -728,7 +728,7 @@ int ObTenantMetaMemMgr::gc_tablet(ObTablet *tablet)
if (OB_SUCC(ret) && OB_FAIL(push_tablet_into_gc_queue(tablet))) {
LOG_WARN("fail to push tablet into gc queue", K(ret), KPC(tablet));
}
#ifdef OB_BUILD_RPM
#ifndef OB_BUILD_RPM
FLOG_INFO("push tablet into gc queue", K(ret), KP(tablet), K(common::lbt()));
#endif
return ret;

View File

@ -1470,6 +1470,19 @@ int ObTenantCheckpointSlogHandler::parse(
}
break;
}
case ObRedoLogSubType::OB_REDO_LOG_EMPTY_SHELL_TABLET: {
ObEmptyShellTabletLog slog_entry;
snprintf(slog_name, ObStorageLogReplayer::MAX_SLOG_NAME_LEN, "empty shell tablet slog: ");
if (OB_FAIL(slog_entry.deserialize_id(buf, len, pos))) {
LOG_WARN("failed to deserialize empty shell tablet_id_", K(ret));
} else if (0 > fprintf(stream, "%s\n%s\n", slog_name, to_cstring(slog_entry))) {
ret = OB_IO_ERROR;
LOG_WARN("Fail to print slog to file.", K(ret));
}
break;
}
case ObRedoLogSubType::OB_REDO_LOG_UPDATE_DUP_TABLE_LS: {
ObDupTableCkptLog slog_entry;
snprintf(slog_name, ObStorageLogReplayer::MAX_SLOG_NAME_LEN, "update dup table ls meta slog: ");

View File

@ -696,9 +696,9 @@ int ObLSService::gc_ls_after_replay_slog()
usleep(SLEEP_TS);
}
} while (tmp_ret != OB_SUCCESS);
remove_ls_(ls);
remove_ls_(ls, true/*remove_from_disk*/, false/*write_slog*/);
} else if (ls_status.is_zombie_state()) {
remove_ls_(ls);
remove_ls_(ls, true/*remove_from_disk*/, false/*write_slog*/);
}
}
}
@ -953,6 +953,7 @@ int ObLSService::safe_remove_ls_(ObLSHandle handle, const bool remove_from_disk)
const ObLSID &ls_id = ls->get_ls_id();
static const int64_t SLEEP_TS = 100_ms;
ObLSLockGuard lock_ls(ls);
const bool write_slog = remove_from_disk;
if (OB_ISNULL(task = (ObLSSafeDestroyTask*)ob_malloc(sizeof(ObLSSafeDestroyTask),
"LSSafeDestroy"))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
@ -960,15 +961,12 @@ int ObLSService::safe_remove_ls_(ObLSHandle handle, const bool remove_from_disk)
} else if (FALSE_IT(task = new(task) ObLSSafeDestroyTask())) {
} else if (remove_from_disk && OB_FAIL(ls->set_remove_state())) {
LOG_WARN("ls set remove state failed", KR(ret), K(ls_id));
// set ls to remove state and prevent slog write
} else if(remove_from_disk && OB_FAIL(write_remove_ls_slog_(ls_id))) {
LOG_WARN("fail to write remove ls slog", K(ret));
} else if (OB_FAIL(task->init(MTL_ID(),
handle,
this))) {
LOG_WARN("init safe destroy task failed", K(ret));
} else {
remove_ls_(ls, remove_from_disk);
remove_ls_(ls, remove_from_disk, write_slog);
// try until success.
while (OB_FAIL(gc_service->add_safe_destroy_task(*task))) {
if (REACH_TIME_INTERVAL(1_min)) { // every minute
@ -985,25 +983,53 @@ int ObLSService::safe_remove_ls_(ObLSHandle handle, const bool remove_from_disk)
return ret;
}
void ObLSService::remove_ls_(ObLS *ls, const bool remove_from_disk)
void ObLSService::remove_ls_(ObLS *ls, const bool remove_from_disk, const bool write_slog)
{
int ret = OB_SUCCESS;
const share::ObLSID &ls_id = ls->get_ls_id();
static const int64_t SLEEP_TS = 100_ms;
int64_t retry_cnt = 0;
int64_t success_step = 0;
do {
if (OB_FAIL(ls->prepare_for_safe_destroy())) {
LOG_WARN("prepare safe destroy failed", K(ret), KPC(ls));
} else if (remove_from_disk && OB_FAIL(ls->remove_ls())) {
LOG_WARN("remove ls from disk failed", K(ret), K(remove_from_disk), K(ls_id));
} else if (OB_FAIL(remove_ls_from_map_(ls_id))) {
LOG_WARN("remove log stream from map fail", K(ret), K(ls_id));
// We must do prepare_for_safe_destroy to remove tablets from ObLSTabletService before writing the remove_ls_slog,
// After removing tablets, no update_tablet_slog will be written. Otherwise, writing the update_tablet_slog will be
// concurrent with remove_ls_slog, causing the update_tablet_slog to fall behind remove_ls_slog, and causing replay
// creating an invalid tablet during restart.
ret = OB_SUCCESS;
if (success_step < 1) {
if (OB_FAIL(ls->prepare_for_safe_destroy())) {
LOG_WARN("prepare safe destroy failed", K(ret), KPC(ls));
} else {
success_step = 1;
}
}
if (success_step < 2 && OB_SUCC(ret)) {
if(write_slog && OB_FAIL(write_remove_ls_slog_(ls_id))) {
LOG_WARN("fail to write remove ls slog", K(ret));
} else {
success_step = 2;
}
}
if (success_step < 3 && OB_SUCC(ret)) {
if (remove_from_disk && OB_FAIL(ls->remove_ls())) {
LOG_WARN("remove ls from disk failed", K(ret), K(remove_from_disk), K(ls_id));
} else {
success_step = 3;
}
}
if (success_step < 4 && OB_SUCC(ret)) {
if (OB_FAIL(remove_ls_from_map_(ls_id))) {
LOG_WARN("remove log stream from map fail", K(ret), K(ls_id));
} else {
success_step = 4;
}
}
if (OB_FAIL(ret)) {
retry_cnt++;
ob_usleep(SLEEP_TS);
if (retry_cnt % 100 == 0) {
LOG_ERROR("remove_ls_ cost too much time", K(ret), KP(ls), K(ls_id));
LOG_ERROR("remove_ls_ cost too much time", K(ret), KP(ls), K(ls_id), K(success_step));
}
}
} while (OB_FAIL(ret));

View File

@ -190,7 +190,7 @@ private:
int write_abort_create_ls_slog_(const share::ObLSID &ls_id) const;
int write_remove_ls_slog_(const share::ObLSID &ls_id) const;
int remove_ls_from_map_(const share::ObLSID &ls_id);
void remove_ls_(ObLS *ls, const bool remove_from_disk = true);
void remove_ls_(ObLS *ls, const bool remove_from_disk, const bool write_slog);
int safe_remove_ls_(ObLSHandle handle, const bool remove_from_disk);
int replay_update_ls_(const ObLSMeta &ls_meta);
int restore_update_ls_(const ObLSMetaPackage &meta_package);