diff --git a/src/share/parameter/ob_parameter_seed.ipp b/src/share/parameter/ob_parameter_seed.ipp index eec503f995..2fbf5a31d8 100644 --- a/src/share/parameter/ob_parameter_seed.ipp +++ b/src/share/parameter/ob_parameter_seed.ipp @@ -1895,6 +1895,10 @@ DEF_BOOL(_enable_choose_migration_source_policy, OB_TENANT_PARAMETER, "True", DEF_BOOL(_global_enable_rich_vector_format, OB_CLUSTER_PARAMETER, "True", "Control whether use rich vector format in vectorization engine", ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +DEF_BOOL(_allow_skip_replay_redo_after_detete_tablet, OB_TENANT_PARAMETER, "FALSE", + "allow skip replay invalid redo log after tablet delete transaction is committed." + "The default value is FALSE. Value: TRUE means we allow skip replaying this invalid redo log, False means we do not alow such behavior.", + ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); //check os params DEF_BOOL(strict_check_os_params, OB_CLUSTER_PARAMETER, "False", diff --git a/src/storage/ls/ob_ls.cpp b/src/storage/ls/ob_ls.cpp index a5cd0edfb1..a205eb4a47 100644 --- a/src/storage/ls/ob_ls.cpp +++ b/src/storage/ls/ob_ls.cpp @@ -24,6 +24,7 @@ #include "observer/net/ob_ingress_bw_alloc_service.h" #include "observer/ob_srv_network_frame.h" #include "observer/report/ob_i_meta_report.h" +#include "observer/omt/ob_tenant_config_mgr.h" #include "rootserver/freeze/ob_major_freeze_service.h" #include "rootserver/tenant_snapshot/ob_tenant_snapshot_scheduler.h" #include "rootserver/restore/ob_clone_scheduler.h" @@ -46,6 +47,7 @@ #include "share/leak_checker/obj_leak_checker.h" #include "share/ob_ls_id.h" #include "share/ob_global_autoinc_service.h" +#include "share/ob_force_print_log.h" #include "sql/das/ob_das_id_service.h" #include "storage/compaction/ob_tenant_tablet_scheduler.h" #include "storage/ls/ob_ls.h" @@ -1780,6 +1782,8 @@ int ObLS::replay_get_tablet( && ObTabletStatus::TRANSFER_OUT_DELETED != tablet_status) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet is empty shell but user data is unexpected", K(ret), KPC(tablet)); + } else if (OB_FAIL(check_tablet_status_and_scn(data, scn))) { + LOG_ERROR("fail to check tablet status and scn", K(ret), K(ls_id), K(tablet_id), K(data), K(scn)); } else { ret = OB_OBSOLETE_CLOG_NEED_SKIP; LOG_INFO("tablet is already deleted, need skip", KR(ret), K(ls_id), K(tablet_id), K(scn)); @@ -1800,6 +1804,8 @@ int ObLS::replay_get_tablet( LOG_INFO("latest transaction has not committed yet, should retry", KR(ret), K(ls_id), K(tablet_id), K(scn), "clog_checkpoint_scn", tablet->get_clog_checkpoint_scn(), K(data)); } + } else if (OB_FAIL(check_tablet_status_and_scn(data, scn))) { + LOG_ERROR("fail to check tablet status and scn", K(ret), K(ls_id), K(tablet_id), K(data), K(scn)); } } @@ -1810,6 +1816,30 @@ int ObLS::replay_get_tablet( return ret; } +int ObLS::check_tablet_status_and_scn( + const ObTabletCreateDeleteMdsUserData &data, + const share::SCN &scn) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY((ObTabletStatus::DELETED == data.tablet_status_ || ObTabletStatus::TRANSFER_OUT_DELETED == data.tablet_status_) + && scn >= data.delete_commit_scn_)) { + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); + if (OB_UNLIKELY(!tenant_config.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tenant config is invalid", K(ret)); + } else if (tenant_config->_allow_skip_replay_redo_after_detete_tablet) { + FLOG_WARN("scn is bigger than tablet delete commit scn, allow to skip replaying this clog for emergency", + K(ret), K(data), K(scn)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("scn is bigger than tablet delete commit scn", K(ret), K(data), K(scn)); + } + } + + return ret; +} + int ObLS::logstream_freeze(const int64_t trace_id, const bool is_sync, const int64_t abs_timeout_ts) { int ret = OB_SUCCESS; diff --git a/src/storage/ls/ob_ls.h b/src/storage/ls/ob_ls.h index 362ca7e3f5..87879a53cf 100644 --- a/src/storage/ls/ob_ls.h +++ b/src/storage/ls/ob_ls.h @@ -85,7 +85,7 @@ class ObCompactionScheduleIterator; } namespace storage { -const static int64_t LS_INNER_TABLET_FROZEN_TIMESTAMP = 1; +class ObTabletCreateDeleteMdsUserData; struct ObLSVTInfo { @@ -928,7 +928,10 @@ public: DELEGATE_WITH_RET(reserved_snapshot_mgr_, add_dependent_medium_tablet, int); DELEGATE_WITH_RET(reserved_snapshot_mgr_, del_dependent_medium_tablet, int); int set_ls_migration_gc(bool &allow_gc); - +private: + static int check_tablet_status_and_scn( + const ObTabletCreateDeleteMdsUserData &data, + const share::SCN &scn); private: // StorageBaseUtil // table manager: create, remove and guard get. diff --git a/src/storage/tablet/ob_tablet.cpp b/src/storage/tablet/ob_tablet.cpp index fee26b18d2..21495e3c78 100644 --- a/src/storage/tablet/ob_tablet.cpp +++ b/src/storage/tablet/ob_tablet.cpp @@ -5508,7 +5508,8 @@ int ObTablet::get_ddl_info(int64_t &schema_version, int64_t &schema_refreshed_ts return ret; } -int ObTablet::get_rec_log_scn(SCN &rec_scn) { +int ObTablet::get_rec_log_scn(SCN &rec_scn) +{ int ret = OB_SUCCESS; rec_scn = SCN::max_scn(); ObTableHandleV2 handle; diff --git a/src/storage/tx/ob_tx_replay_executor.cpp b/src/storage/tx/ob_tx_replay_executor.cpp index e8b86a3f1b..82aef5ae6a 100644 --- a/src/storage/tx/ob_tx_replay_executor.cpp +++ b/src/storage/tx/ob_tx_replay_executor.cpp @@ -11,6 +11,7 @@ */ #include "share/throttle/ob_throttle_unit.h" +#include "observer/omt/ob_tenant_config_mgr.h" #include "storage/ls/ob_ls.h" #include "storage/ls/ob_ls_tx_service.h" #include "storage/memtable/ob_memtable.h" @@ -765,7 +766,9 @@ int ObTxReplayExecutor::replay_row_(storage::ObStoreCtx &store_ctx, memtable::ObMemtableMutatorIterator *mmi_ptr) { int ret = OB_SUCCESS; - common::ObTimeGuard timeguard("replay_row_in_memtable", 10 * 1000); + const share::ObLSID &ls_id = tablet->get_ls_id(); + const common::ObTabletID &tablet_id = tablet->get_tablet_id(); + common::ObTimeGuard timeguard("replay_row_in_memtable", 10_ms); ObIMemtable *mem_ptr = nullptr; ObMemtable *data_mem_ptr = nullptr; ObStorageTableGuard w_guard(tablet, store_ctx, true, true, log_ts_ns_); @@ -774,12 +777,25 @@ int ObTxReplayExecutor::replay_row_(storage::ObStoreCtx &store_ctx, TRANS_LOG(WARN, "[Replay Tx] invaild arguments", K(ret), KP(mmi_ptr)); } else if (FALSE_IT(timeguard.click("start"))) { } else if (OB_FAIL(prepare_memtable_replay_(w_guard, mem_ptr))) { - if (OB_NO_NEED_UPDATE != ret) { - TRANS_LOG(WARN, "[Replay Tx] prepare for replay failed", K(ret), KP(mem_ptr), KP(mmi_ptr)); + if (OB_NO_NEED_UPDATE == ret) { + TRANS_LOG(DEBUG, "[Replay Tx] Not need replay row for tablet", + K(ret), K(ls_id), K(tablet_id), K(log_ts_ns_), + K(tx_part_log_no_), K(mmi_ptr->get_row_head())); + } else if (OB_TABLET_NOT_EXIST == ret) { + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); + if (OB_UNLIKELY(!tenant_config.is_valid())) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "tenant config is invalid", K(ret)); + } else if (tenant_config->_allow_skip_replay_redo_after_detete_tablet) { + ret = OB_NO_NEED_UPDATE; + TRANS_LOG(WARN, "[Replay Tx] tablet does not exist while preparing memtable for replay, allow to skip this clog replaying for emergency", + K(ret), K(ls_id), K(tablet_id), K_(log_ts_ns)); + } else { + TRANS_LOG(ERROR, "[Replay Tx] tablet does not exist while preparing memtable for replay", + K(ret), K(ls_id), K(tablet_id), K_(log_ts_ns)); + } } else { - TRANS_LOG(DEBUG, "[Replay Tx] Not need replay row for tablet", K(log_ts_ns_), - K(tx_part_log_no_), K(mmi_ptr->get_row_head()), - K(tablet->get_tablet_meta().tablet_id_)); + TRANS_LOG(WARN, "[Replay Tx] prepare for replay failed", K(ret), K(ls_id), K(tablet_id), KP(mem_ptr), KP(mmi_ptr)); } // dynamic_cast will check whether this is really a ObMemtable. } else if (OB_ISNULL(data_mem_ptr = static_cast(mem_ptr))) { diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result index 5b42d77e4a..9656444f45 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result @@ -254,6 +254,7 @@ writing_throttling_maximum_duration writing_throttling_trigger_percentage zone _advance_checkpoint_timeout +_allow_skip_replay_redo_after_detete_tablet _audit_mode _auto_broadcast_tablet_location_rate_limit _auto_drop_recovering_auxiliary_tenant