Fix transfer out and then tranfer in restart replay clog bug

This commit is contained in:
godyangfight
2023-06-27 21:48:06 +00:00
committed by ob-robot
parent d0b5dd3226
commit 1af10b5313
9 changed files with 51 additions and 36 deletions

View File

@ -81,8 +81,8 @@ int ObTabletReplayExecutor::execute(const share::SCN &scn, const share::ObLSID &
} else if (can_skip_replay) {
// do nothing
} else if (CLICK_FAIL(replay_get_tablet_(ls_handle, tablet_id, scn, tablet_handle))) {
if (OB_TABLET_NOT_EXIST == ret) {
CLOG_LOG(INFO, "tablet not exist, replay skip", K(ret), K(ls_id), K(scn));
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
CLOG_LOG(INFO, "clog is already obsolete, should skip replay", K(ret), K(ls_id), K(scn));
ret = OB_SUCCESS;
} else if (OB_EAGAIN == ret) {
CLOG_LOG_LIMIT(WARN, "need retry to get tablet", K(ret), K(ls_id), K(scn));

File diff suppressed because one or more lines are too long

View File

@ -1793,7 +1793,7 @@ DEFINE_ERROR(OB_TRANSFER_DETECT_ACTIVE_TRANS, -7114, -1, "HY000", "transfer dete
DEFINE_ERROR(OB_TRANSFER_SRC_LS_NOT_EXIST, -7115, -1, "HY000", "transfer src ls does not exist");
DEFINE_ERROR(OB_TRANSFER_SRC_TABLET_NOT_EXIST, -7116, -1, "HY000", "transfer src tablet does not exist");
DEFINE_ERROR(OB_LS_NEED_REBUILD, -7117, -1, "HY000", "ls need rebuild");
DEFINE_ERROR(OB_OBSOLETE_CLOG_NEED_SKIP, -7118, -1, "HY000", "obsolete clog need skip");
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//error code for gis -7201 ---- -7300
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -1403,6 +1403,7 @@ constexpr int OB_TRANSFER_DETECT_ACTIVE_TRANS = -7114;
constexpr int OB_TRANSFER_SRC_LS_NOT_EXIST = -7115;
constexpr int OB_TRANSFER_SRC_TABLET_NOT_EXIST = -7116;
constexpr int OB_LS_NEED_REBUILD = -7117;
constexpr int OB_OBSOLETE_CLOG_NEED_SKIP = -7118;
constexpr int OB_ERR_INVALID_XML_DATATYPE = -7402;
constexpr int OB_ERR_XML_MISSING_COMMA = -7403;
constexpr int OB_ERR_INVALID_XPATH_EXPRESSION = -7404;
@ -3386,6 +3387,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219;
#define OB_TRANSFER_SRC_LS_NOT_EXIST__USER_ERROR_MSG "transfer src ls does not exist"
#define OB_TRANSFER_SRC_TABLET_NOT_EXIST__USER_ERROR_MSG "transfer src tablet does not exist"
#define OB_LS_NEED_REBUILD__USER_ERROR_MSG "ls need rebuild"
#define OB_OBSOLETE_CLOG_NEED_SKIP__USER_ERROR_MSG "obsolete clog need skip"
#define OB_ERR_GIS_DIFFERENT_SRIDS__USER_ERROR_MSG "Binary geometry function %s given two geometries of different srids: %u and %u, which should have been identical."
#define OB_ERR_GIS_UNSUPPORTED_ARGUMENT__USER_ERROR_MSG "Calling geometry function %s with unsupported types of arguments."
#define OB_ERR_GIS_UNKNOWN_ERROR__USER_ERROR_MSG "Unknown GIS error occurred in function %s."
@ -5479,6 +5481,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219;
#define OB_TRANSFER_SRC_LS_NOT_EXIST__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7115, transfer src ls does not exist"
#define OB_TRANSFER_SRC_TABLET_NOT_EXIST__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7116, transfer src tablet does not exist"
#define OB_LS_NEED_REBUILD__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7117, ls need rebuild"
#define OB_OBSOLETE_CLOG_NEED_SKIP__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7118, obsolete clog need skip"
#define OB_ERR_GIS_DIFFERENT_SRIDS__ORA_USER_ERROR_MSG "ORA-00600: Binary geometry function %s given two geometries of different srids: %u and %u, which should have been identical."
#define OB_ERR_GIS_UNSUPPORTED_ARGUMENT__ORA_USER_ERROR_MSG "ORA-00600: Calling geometry function %s with unsupported types of arguments."
#define OB_ERR_GIS_UNKNOWN_ERROR__ORA_USER_ERROR_MSG "ORA-00600: Unknown GIS error occurred in function %s."
@ -5944,7 +5947,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219;
#define OB_ERR_DATA_TOO_LONG_MSG_FMT_V2__ORA_USER_ERROR_MSG "ORA-12899: value too large for column %.*s (actual: %ld, maximum: %ld)"
#define OB_ERR_INVALID_DATE_MSG_FMT_V2__ORA_USER_ERROR_MSG "ORA-01861: Incorrect datetime value for column '%.*s' at row %ld"
extern int g_all_ob_errnos[2089];
extern int g_all_ob_errnos[2090];
const char *ob_error_name(const int oberr);
const char* ob_error_cause(const int oberr);

View File

@ -1443,6 +1443,7 @@ int ObLS::replay_get_tablet_no_check(const common::ObTabletID &tablet_id,
if (OB_TABLET_NOT_EXIST != ret) {
LOG_WARN("failed to get tablet", K(ret), K(key));
} else if (scn <= tablet_change_checkpoint_scn) {
ret = OB_OBSOLETE_CLOG_NEED_SKIP;
LOG_WARN("tablet already gc", K(ret), K(key), K(scn), K(tablet_change_checkpoint_scn));
} else if (OB_FAIL(MTL(ObLogService*)->get_log_replay_service()->get_max_replayed_scn(ls_meta_.ls_id_, max_scn))) {
LOG_WARN("failed to get_max_replayed_scn", KR(ret), K_(ls_meta), K(scn), K(tablet_id));
@ -1461,7 +1462,9 @@ int ObLS::replay_get_tablet_no_check(const common::ObTabletID &tablet_id,
ret = OB_EAGAIN;
LOG_INFO("tablet does not exist, but need retry", KR(ret), K(key), K(scn), K(tablet_change_checkpoint_scn), K(max_scn));
} else {
LOG_INFO("tablet already gc, but scn is more than tablet_change_checkpoint_scn", KR(ret), K(key), K(scn), K(tablet_change_checkpoint_scn), K(max_scn));
ret = OB_OBSOLETE_CLOG_NEED_SKIP;
LOG_INFO("tablet already gc, but scn is more than tablet_change_checkpoint_scn", KR(ret),
K(key), K(scn), K(tablet_change_checkpoint_scn), K(max_scn));
}
}
}
@ -1479,41 +1482,37 @@ int ObLS::replay_get_tablet(const common::ObTabletID &tablet_id,
{
int ret = OB_SUCCESS;
ObTabletHandle tablet_handle;
ObTablet *tablet = nullptr;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", KR(ret));
} else if (OB_FAIL(replay_get_tablet_no_check(tablet_id, scn, tablet_handle))) {
LOG_WARN("failed to get tablet", K(ret), K(tablet_id), K(ls_meta_.ls_id_));
} else {
} else if (tablet_id.is_ls_inner_tablet()) {
// do nothing
} else if (OB_ISNULL(tablet = tablet_handle.get_obj())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet should not be NULL", K(ret), KP(tablet), K(tablet_id), K(scn));
} else if (tablet->is_empty_shell()) {
ObTabletStatus::Status tablet_status = ObTabletStatus::MAX;
ObTabletCreateDeleteMdsUserData data;
bool is_commited = false;
if (tablet_id.is_ls_inner_tablet()) {
// do nothing
} else if (OB_FAIL(tablet_handle.get_obj()->ObITabletMdsInterface::get_latest_tablet_status(data, is_commited))) {
if (OB_EMPTY_RESULT == ret) {
LOG_WARN("rewrite errcode to EAGAIN", KR(ret), K(tablet_id), K(ls_meta_.ls_id_));
ret = OB_EAGAIN;
} else {
LOG_WARN("failed to get CreateDeleteMdsUserData", KR(ret), K(tablet_id), K(ls_meta_.ls_id_));
}
} else if (FALSE_IT(tablet_status = data.get_tablet_status())) {
} else if (ObTabletStatus::NORMAL == tablet_status
|| ObTabletStatus::TRANSFER_OUT == tablet_status
|| ObTabletStatus::TRANSFER_IN == tablet_status) {
// do nothing
} else if (ObTabletStatus::DELETED == tablet_status
|| ObTabletStatus::TRANSFER_OUT_DELETED == tablet_status) {
// tablet shell
ret = OB_TABLET_NOT_EXIST;
LOG_INFO("tablet is already deleted", KR(ret), K(tablet_id), K(ls_meta_.ls_id_), K(scn));
} else {
bool is_committed = false;
if (OB_FAIL(tablet->get_latest_tablet_status(data, is_committed))) {
LOG_WARN("failed to get latest tablet status", K(ret), KPC(tablet));
} else if (!is_committed) {
ret = OB_ERR_UNEXPECTED;
LOG_INFO("invalid status", KR(ret), K(tablet_id), K(ls_meta_.ls_id_), K(tablet_status), K(scn));
LOG_WARN("tablet is empty shell but user data is uncommitted, unexpected", K(ret), KPC(tablet));
} else if (FALSE_IT(tablet_status = data.get_tablet_status())) {
} else if (ObTabletStatus::DELETED != tablet_status
&& ObTabletStatus::TRANSFER_OUT_DELETED != tablet_status) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet is empty shell but user data is unexpected", K(ret), KPC(tablet));
} else {
ret = OB_OBSOLETE_CLOG_NEED_SKIP;
LOG_INFO("tablet is already deleted, need skip", KR(ret), K(tablet_id), K(ls_meta_.ls_id_), K(scn));
}
}
if (OB_SUCC(ret)) {
handle = tablet_handle;
}

View File

@ -113,8 +113,8 @@ int ObMediumCompactionClogHandler::inner_replay(
} else if (OB_FAIL(tablet_id.deserialize(buffer, buffer_size, new_pos))) {
LOG_WARN("fail to deserialize tablet id", K(ret), K(buffer_size), K(pos), K(tablet_id));
} else if (OB_FAIL(ls_->replay_get_tablet(tablet_id, scn, handle))) {
if (OB_TABLET_NOT_EXIST == ret) {
LOG_INFO("tablet not exist", K(ret), K(tablet_id));
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
LOG_INFO("clog is obsolete, should skip replay", K(ret), K(tablet_id));
ret = OB_SUCCESS;
} else {
LOG_WARN("failed to get tablet", K(ret), K(tablet_id));

View File

@ -145,7 +145,7 @@ int ObStorageSchemaRecorder::inner_replay_clog(
ObTabletHandle tmp_tablet_handle;
if (OB_FAIL(replay_get_tablet_handle(ls_id_, tablet_id_, scn, tmp_tablet_handle))) {
if (OB_TABLET_NOT_EXIST == ret) {
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
ret = OB_SUCCESS;
} else {
LOG_WARN("failed to get tablet handle", K(ret), K_(tablet_id), K(scn));

View File

@ -118,9 +118,9 @@ int ObTabletBindingHelper::get_tablet_for_new_mds(const ObLS &ls, const ObTablet
const bool for_replay = replay_scn.is_valid();
if (for_replay) {
if (OB_FAIL(ls.replay_get_tablet(tablet_id, replay_scn, handle))) {
if (OB_TABLET_NOT_EXIST == ret) {
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
ret = OB_NO_NEED_UPDATE;
LOG_WARN("tablet does not exist", K(ret));
LOG_WARN("clog is obsolete, should skip replay", K(ret));
} else {
LOG_WARN("failed to get tablet", K(ret));
}

View File

@ -604,7 +604,7 @@ int ObTxReplayExecutor::replay_one_row_in_memtable_(ObMutatorRowHeader &row_head
ObTabletHandle tablet_handle;
if (OB_FAIL(ls_->replay_get_tablet(row_head.tablet_id_, log_ts_ns_, tablet_handle))) {
if (OB_TABLET_NOT_EXIST == ret) {
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
ctx_->force_no_need_replay_checksum();
ret = OB_SUCCESS;
TRANS_LOG(WARN, "[Replay Tx] tablet gc, skip this log entry", K(ret), K(row_head.tablet_id_),