enhance clog replaying checking logic to prevent replaying clog after tablet delete/finish transfer out tx is committed

This commit is contained in:
hiddenbomb
2024-03-26 02:45:53 +00:00
committed by ob-robot
parent 0251ce4679
commit d57716b902
6 changed files with 64 additions and 9 deletions

View File

@ -24,6 +24,7 @@
#include "observer/net/ob_ingress_bw_alloc_service.h"
#include "observer/ob_srv_network_frame.h"
#include "observer/report/ob_i_meta_report.h"
#include "observer/omt/ob_tenant_config_mgr.h"
#include "rootserver/freeze/ob_major_freeze_service.h"
#include "rootserver/tenant_snapshot/ob_tenant_snapshot_scheduler.h"
#include "rootserver/restore/ob_clone_scheduler.h"
@ -46,6 +47,7 @@
#include "share/leak_checker/obj_leak_checker.h"
#include "share/ob_ls_id.h"
#include "share/ob_global_autoinc_service.h"
#include "share/ob_force_print_log.h"
#include "sql/das/ob_das_id_service.h"
#include "storage/compaction/ob_tenant_tablet_scheduler.h"
#include "storage/ls/ob_ls.h"
@ -1780,6 +1782,8 @@ int ObLS::replay_get_tablet(
&& ObTabletStatus::TRANSFER_OUT_DELETED != tablet_status) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet is empty shell but user data is unexpected", K(ret), KPC(tablet));
} else if (OB_FAIL(check_tablet_status_and_scn(data, scn))) {
LOG_ERROR("fail to check tablet status and scn", K(ret), K(ls_id), K(tablet_id), K(data), K(scn));
} else {
ret = OB_OBSOLETE_CLOG_NEED_SKIP;
LOG_INFO("tablet is already deleted, need skip", KR(ret), K(ls_id), K(tablet_id), K(scn));
@ -1800,6 +1804,8 @@ int ObLS::replay_get_tablet(
LOG_INFO("latest transaction has not committed yet, should retry", KR(ret), K(ls_id), K(tablet_id),
K(scn), "clog_checkpoint_scn", tablet->get_clog_checkpoint_scn(), K(data));
}
} else if (OB_FAIL(check_tablet_status_and_scn(data, scn))) {
LOG_ERROR("fail to check tablet status and scn", K(ret), K(ls_id), K(tablet_id), K(data), K(scn));
}
}
@ -1810,6 +1816,30 @@ int ObLS::replay_get_tablet(
return ret;
}
int ObLS::check_tablet_status_and_scn(
const ObTabletCreateDeleteMdsUserData &data,
const share::SCN &scn)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY((ObTabletStatus::DELETED == data.tablet_status_ || ObTabletStatus::TRANSFER_OUT_DELETED == data.tablet_status_)
&& scn >= data.delete_commit_scn_)) {
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
if (OB_UNLIKELY(!tenant_config.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tenant config is invalid", K(ret));
} else if (tenant_config->_allow_skip_replay_redo_after_detete_tablet) {
FLOG_WARN("scn is bigger than tablet delete commit scn, allow to skip replaying this clog for emergency",
K(ret), K(data), K(scn));
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("scn is bigger than tablet delete commit scn", K(ret), K(data), K(scn));
}
}
return ret;
}
int ObLS::logstream_freeze(const int64_t trace_id, const bool is_sync, const int64_t abs_timeout_ts)
{
int ret = OB_SUCCESS;