Revert Fix transfer out and then tranfer in restart replay clog bug

This commit is contained in:
godyangfight
2023-06-24 11:12:27 +00:00
committed by ob-robot
parent 5d06ee2487
commit ceac8bd13c
9 changed files with 22 additions and 50 deletions

View File

@ -81,8 +81,8 @@ int ObTabletReplayExecutor::execute(const share::SCN &scn, const share::ObLSID &
} else if (can_skip_replay) {
// do nothing
} else if (CLICK_FAIL(replay_get_tablet_(ls_handle, tablet_id, scn, tablet_handle))) {
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
CLOG_LOG(INFO, "clog is already obsolete, should skip replay", K(ret), K(ls_id), K(scn));
if (OB_TABLET_NOT_EXIST == ret) {
CLOG_LOG(INFO, "tablet not exist, replay skip", K(ret), K(ls_id), K(scn));
ret = OB_SUCCESS;
} else if (OB_EAGAIN == ret) {
CLOG_LOG_LIMIT(WARN, "need retry to get tablet", K(ret), K(ls_id), K(scn));

File diff suppressed because one or more lines are too long

View File

@ -1791,7 +1791,7 @@ DEFINE_ERROR(OB_TRANSFER_DETECT_ACTIVE_TRANS, -7114, -1, "HY000", "transfer dete
DEFINE_ERROR(OB_TRANSFER_SRC_LS_NOT_EXIST, -7115, -1, "HY000", "transfer src ls does not exist");
DEFINE_ERROR(OB_TRANSFER_SRC_TABLET_NOT_EXIST, -7116, -1, "HY000", "transfer src tablet does not exist");
DEFINE_ERROR(OB_LS_NEED_REBUILD, -7117, -1, "HY000", "ls need rebuild");
DEFINE_ERROR(OB_OBSOLETE_CLOG_NEED_SKIP, -7118, -1, "HY000", "obsolete clog need skip");
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//error code for gis -7201 ---- -7300
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -1401,7 +1401,6 @@ constexpr int OB_TRANSFER_DETECT_ACTIVE_TRANS = -7114;
constexpr int OB_TRANSFER_SRC_LS_NOT_EXIST = -7115;
constexpr int OB_TRANSFER_SRC_TABLET_NOT_EXIST = -7116;
constexpr int OB_LS_NEED_REBUILD = -7117;
constexpr int OB_OBSOLETE_CLOG_NEED_SKIP = -7118;
constexpr int OB_ERR_INVALID_XML_DATATYPE = -7402;
constexpr int OB_ERR_XML_MISSING_COMMA = -7403;
constexpr int OB_ERR_INVALID_XPATH_EXPRESSION = -7404;
@ -3383,7 +3382,6 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219;
#define OB_TRANSFER_SRC_LS_NOT_EXIST__USER_ERROR_MSG "transfer src ls does not exist"
#define OB_TRANSFER_SRC_TABLET_NOT_EXIST__USER_ERROR_MSG "transfer src tablet does not exist"
#define OB_LS_NEED_REBUILD__USER_ERROR_MSG "ls need rebuild"
#define OB_OBSOLETE_CLOG_NEED_SKIP__USER_ERROR_MSG "obsolete clog need skip"
#define OB_ERR_GIS_DIFFERENT_SRIDS__USER_ERROR_MSG "Binary geometry function %s given two geometries of different srids: %u and %u, which should have been identical."
#define OB_ERR_GIS_UNSUPPORTED_ARGUMENT__USER_ERROR_MSG "Calling geometry function %s with unsupported types of arguments."
#define OB_ERR_GIS_UNKNOWN_ERROR__USER_ERROR_MSG "Unknown GIS error occurred in function %s."
@ -5475,7 +5473,6 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219;
#define OB_TRANSFER_SRC_LS_NOT_EXIST__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7115, transfer src ls does not exist"
#define OB_TRANSFER_SRC_TABLET_NOT_EXIST__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7116, transfer src tablet does not exist"
#define OB_LS_NEED_REBUILD__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7117, ls need rebuild"
#define OB_OBSOLETE_CLOG_NEED_SKIP__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -7118, obsolete clog need skip"
#define OB_ERR_GIS_DIFFERENT_SRIDS__ORA_USER_ERROR_MSG "ORA-00600: Binary geometry function %s given two geometries of different srids: %u and %u, which should have been identical."
#define OB_ERR_GIS_UNSUPPORTED_ARGUMENT__ORA_USER_ERROR_MSG "ORA-00600: Calling geometry function %s with unsupported types of arguments."
#define OB_ERR_GIS_UNKNOWN_ERROR__ORA_USER_ERROR_MSG "ORA-00600: Unknown GIS error occurred in function %s."
@ -5941,7 +5938,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219;
#define OB_ERR_DATA_TOO_LONG_MSG_FMT_V2__ORA_USER_ERROR_MSG "ORA-12899: value too large for column %.*s (actual: %ld, maximum: %ld)"
#define OB_ERR_INVALID_DATE_MSG_FMT_V2__ORA_USER_ERROR_MSG "ORA-01861: Incorrect datetime value for column '%.*s' at row %ld"
extern int g_all_ob_errnos[2088];
extern int g_all_ob_errnos[2087];
const char *ob_error_name(const int oberr);
const char* ob_error_cause(const int oberr);

View File

@ -1444,7 +1444,6 @@ int ObLS::replay_get_tablet_no_check(const common::ObTabletID &tablet_id,
LOG_WARN("failed to get tablet", K(ret), K(key));
} else if (scn <= tablet_change_checkpoint_scn) {
LOG_WARN("tablet already gc", K(ret), K(key), K(scn), K(tablet_change_checkpoint_scn));
ret = OB_OBSOLETE_CLOG_NEED_SKIP;
} else if (OB_FAIL(MTL(ObLogService*)->get_log_replay_service()->get_max_replayed_scn(ls_meta_.ls_id_, max_scn))) {
LOG_WARN("failed to get_max_replayed_scn", KR(ret), K_(ls_meta), K(scn), K(tablet_id));
}
@ -1463,7 +1462,6 @@ int ObLS::replay_get_tablet_no_check(const common::ObTabletID &tablet_id,
LOG_INFO("tablet does not exist, but need retry", KR(ret), K(key), K(scn), K(tablet_change_checkpoint_scn), K(max_scn));
} else {
LOG_INFO("tablet already gc, but scn is more than tablet_change_checkpoint_scn", KR(ret), K(key), K(scn), K(tablet_change_checkpoint_scn), K(max_scn));
ret = OB_OBSOLETE_CLOG_NEED_SKIP;
}
}
}
@ -1481,25 +1479,18 @@ int ObLS::replay_get_tablet(const common::ObTabletID &tablet_id,
{
int ret = OB_SUCCESS;
ObTabletHandle tablet_handle;
ObTablet *tablet = nullptr;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", KR(ret));
} else if (OB_FAIL(replay_get_tablet_no_check(tablet_id, scn, tablet_handle))) {
LOG_WARN("failed to get tablet", K(ret), K(tablet_id), K(ls_meta_.ls_id_));
} else if (OB_ISNULL(tablet = tablet_handle.get_obj())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet should not be NULL", K(ret), K(tablet_id), K(scn));
} else {
ObTabletStatus::Status tablet_status = ObTabletStatus::MAX;
ObTabletCreateDeleteMdsUserData data;
bool is_commited = false;
if (tablet_id.is_ls_inner_tablet()) {
// do nothing
} else if (tablet->get_clog_checkpoint_scn() >= scn) {
ret = OB_OBSOLETE_CLOG_NEED_SKIP;
LOG_WARN("replay scn is smaller than tablet clog checkpoint scn, need skip", K(ret), KPC(tablet), K(scn));
} else if (OB_FAIL(tablet_handle.get_obj()->ObITabletMdsInterface::get_latest_tablet_status(data, is_commited))) {
if (OB_EMPTY_RESULT == ret) {
LOG_WARN("rewrite errcode to EAGAIN", KR(ret), K(tablet_id), K(ls_meta_.ls_id_));
@ -1508,21 +1499,18 @@ int ObLS::replay_get_tablet(const common::ObTabletID &tablet_id,
LOG_WARN("failed to get CreateDeleteMdsUserData", KR(ret), K(tablet_id), K(ls_meta_.ls_id_));
}
} else if (FALSE_IT(tablet_status = data.get_tablet_status())) {
} else if (tablet->is_empty_shell()) {
if (ObTabletStatus::DELETED != tablet_status
&& ObTabletStatus::TRANSFER_OUT_DELETED != tablet_status) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("tablet is empty shell but tablet status is not deleted or transfer out deleted",
KR(ret), K(tablet_id), K(ls_meta_.ls_id_), K(scn), K(tablet_status));
} else if (ObTabletStatus::NORMAL == tablet_status
|| ObTabletStatus::TRANSFER_OUT == tablet_status
|| ObTabletStatus::TRANSFER_IN == tablet_status) {
// do nothing
} else if (ObTabletStatus::DELETED == tablet_status
|| ObTabletStatus::TRANSFER_OUT_DELETED == tablet_status) {
// tablet shell
ret = OB_TABLET_NOT_EXIST;
LOG_INFO("tablet is already deleted", KR(ret), K(tablet_id), K(ls_meta_.ls_id_), K(scn));
} else {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("tablet is already be empty shell but still has data clog", KR(ret), K(tablet_id), K(ls_meta_.ls_id_), K(scn));
}
} else {
//There will be cases when the mds has been persisted but the clog_checkpoint_scn is still relatively small.
//There is no problem with primary database, but for the standby database, the read timestamp of tenant's
//standby machine is relatively small but no valid data can be read because the data is filtered when skip
//ObTabletStatus::DELETED == tablet_status or ObTabletStatus::TRANSFER_OUT_DELETED == tablet_status
LOG_INFO("invalid status", KR(ret), K(tablet_id), K(ls_meta_.ls_id_), K(tablet_status), K(scn));
}
}

View File

@ -113,8 +113,8 @@ int ObMediumCompactionClogHandler::inner_replay(
} else if (OB_FAIL(tablet_id.deserialize(buffer, buffer_size, new_pos))) {
LOG_WARN("fail to deserialize tablet id", K(ret), K(buffer_size), K(pos), K(tablet_id));
} else if (OB_FAIL(ls_->replay_get_tablet(tablet_id, scn, handle))) {
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
LOG_INFO("clog is obsolete, should skip replay", K(ret), K(tablet_id));
if (OB_TABLET_NOT_EXIST == ret) {
LOG_INFO("tablet not exist", K(ret), K(tablet_id));
ret = OB_SUCCESS;
} else {
LOG_WARN("failed to get tablet", K(ret), K(tablet_id));

View File

@ -145,7 +145,7 @@ int ObStorageSchemaRecorder::inner_replay_clog(
ObTabletHandle tmp_tablet_handle;
if (OB_FAIL(replay_get_tablet_handle(ls_id_, tablet_id_, scn, tmp_tablet_handle))) {
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
if (OB_TABLET_NOT_EXIST == ret) {
ret = OB_SUCCESS;
} else {
LOG_WARN("failed to get tablet handle", K(ret), K_(tablet_id), K(scn));

View File

@ -118,9 +118,9 @@ int ObTabletBindingHelper::get_tablet_for_new_mds(const ObLS &ls, const ObTablet
const bool for_replay = replay_scn.is_valid();
if (for_replay) {
if (OB_FAIL(ls.replay_get_tablet(tablet_id, replay_scn, handle))) {
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
if (OB_TABLET_NOT_EXIST == ret) {
ret = OB_NO_NEED_UPDATE;
LOG_WARN("clog is obsolete, should skip replay", K(ret));
LOG_WARN("tablet does not exist", K(ret));
} else {
LOG_WARN("failed to get tablet", K(ret));
}

View File

@ -604,7 +604,7 @@ int ObTxReplayExecutor::replay_one_row_in_memtable_(ObMutatorRowHeader &row_head
ObTabletHandle tablet_handle;
if (OB_FAIL(ls_->replay_get_tablet(row_head.tablet_id_, log_ts_ns_, tablet_handle))) {
if (OB_OBSOLETE_CLOG_NEED_SKIP == ret) {
if (OB_TABLET_NOT_EXIST == ret) {
ctx_->force_no_need_replay_checksum();
ret = OB_SUCCESS;
TRANS_LOG(WARN, "[Replay Tx] tablet gc, skip this log entry", K(ret), K(row_head.tablet_id_),