enhance clog replaying checking logic to prevent replaying clog after tablet delete/finish transfer out tx is committed
This commit is contained in:
		| @ -1895,6 +1895,10 @@ DEF_BOOL(_enable_choose_migration_source_policy, OB_TENANT_PARAMETER, "True", | ||||
| DEF_BOOL(_global_enable_rich_vector_format, OB_CLUSTER_PARAMETER, "True", | ||||
|          "Control whether use rich vector format in vectorization engine", | ||||
|          ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); | ||||
| DEF_BOOL(_allow_skip_replay_redo_after_detete_tablet, OB_TENANT_PARAMETER, "FALSE", | ||||
|          "allow skip replay invalid redo log after tablet delete transaction is committed." | ||||
|          "The default value is FALSE. Value: TRUE means we allow skip replaying this invalid redo log, False means we do not alow such behavior.", | ||||
|          ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); | ||||
|  | ||||
| //check os params | ||||
| DEF_BOOL(strict_check_os_params, OB_CLUSTER_PARAMETER, "False", | ||||
|  | ||||
| @ -24,6 +24,7 @@ | ||||
| #include "observer/net/ob_ingress_bw_alloc_service.h" | ||||
| #include "observer/ob_srv_network_frame.h" | ||||
| #include "observer/report/ob_i_meta_report.h" | ||||
| #include "observer/omt/ob_tenant_config_mgr.h" | ||||
| #include "rootserver/freeze/ob_major_freeze_service.h" | ||||
| #include "rootserver/tenant_snapshot/ob_tenant_snapshot_scheduler.h" | ||||
| #include "rootserver/restore/ob_clone_scheduler.h" | ||||
| @ -46,6 +47,7 @@ | ||||
| #include "share/leak_checker/obj_leak_checker.h" | ||||
| #include "share/ob_ls_id.h" | ||||
| #include "share/ob_global_autoinc_service.h" | ||||
| #include "share/ob_force_print_log.h" | ||||
| #include "sql/das/ob_das_id_service.h" | ||||
| #include "storage/compaction/ob_tenant_tablet_scheduler.h" | ||||
| #include "storage/ls/ob_ls.h" | ||||
| @ -1780,6 +1782,8 @@ int ObLS::replay_get_tablet( | ||||
|         && ObTabletStatus::TRANSFER_OUT_DELETED != tablet_status) { | ||||
|       ret = OB_ERR_UNEXPECTED; | ||||
|       LOG_WARN("tablet is empty shell but user data is unexpected", K(ret), KPC(tablet)); | ||||
|     } else if (OB_FAIL(check_tablet_status_and_scn(data, scn))) { | ||||
|       LOG_ERROR("fail to check tablet status and scn", K(ret), K(ls_id), K(tablet_id), K(data), K(scn)); | ||||
|     } else { | ||||
|       ret = OB_OBSOLETE_CLOG_NEED_SKIP; | ||||
|       LOG_INFO("tablet is already deleted, need skip", KR(ret), K(ls_id), K(tablet_id), K(scn)); | ||||
| @ -1800,6 +1804,8 @@ int ObLS::replay_get_tablet( | ||||
|         LOG_INFO("latest transaction has not committed yet, should retry", KR(ret), K(ls_id), K(tablet_id), | ||||
|             K(scn), "clog_checkpoint_scn", tablet->get_clog_checkpoint_scn(), K(data)); | ||||
|       } | ||||
|     } else if (OB_FAIL(check_tablet_status_and_scn(data, scn))) { | ||||
|       LOG_ERROR("fail to check tablet status and scn", K(ret), K(ls_id), K(tablet_id), K(data), K(scn)); | ||||
|     } | ||||
|   } | ||||
|  | ||||
| @ -1810,6 +1816,30 @@ int ObLS::replay_get_tablet( | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| int ObLS::check_tablet_status_and_scn( | ||||
|     const ObTabletCreateDeleteMdsUserData &data, | ||||
|     const share::SCN &scn) | ||||
| { | ||||
|   int ret = OB_SUCCESS; | ||||
|  | ||||
|   if (OB_UNLIKELY((ObTabletStatus::DELETED == data.tablet_status_ || ObTabletStatus::TRANSFER_OUT_DELETED == data.tablet_status_) | ||||
|       && scn >= data.delete_commit_scn_)) { | ||||
|     omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); | ||||
|     if (OB_UNLIKELY(!tenant_config.is_valid())) { | ||||
|       ret = OB_ERR_UNEXPECTED; | ||||
|       LOG_WARN("tenant config is invalid", K(ret)); | ||||
|     } else if (tenant_config->_allow_skip_replay_redo_after_detete_tablet) { | ||||
|       FLOG_WARN("scn is bigger than tablet delete commit scn, allow to skip replaying this clog for emergency", | ||||
|           K(ret), K(data), K(scn)); | ||||
|     } else { | ||||
|       ret = OB_ERR_UNEXPECTED; | ||||
|       LOG_WARN("scn is bigger than tablet delete commit scn", K(ret), K(data), K(scn)); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| int ObLS::logstream_freeze(const int64_t trace_id, const bool is_sync, const int64_t abs_timeout_ts) | ||||
| { | ||||
|   int ret = OB_SUCCESS; | ||||
|  | ||||
| @ -85,7 +85,7 @@ class ObCompactionScheduleIterator; | ||||
| } | ||||
| namespace storage | ||||
| { | ||||
| const static int64_t LS_INNER_TABLET_FROZEN_TIMESTAMP = 1; | ||||
| class ObTabletCreateDeleteMdsUserData; | ||||
|  | ||||
| struct ObLSVTInfo | ||||
| { | ||||
| @ -928,7 +928,10 @@ public: | ||||
|   DELEGATE_WITH_RET(reserved_snapshot_mgr_, add_dependent_medium_tablet, int); | ||||
|   DELEGATE_WITH_RET(reserved_snapshot_mgr_, del_dependent_medium_tablet, int); | ||||
|   int set_ls_migration_gc(bool &allow_gc); | ||||
|  | ||||
| private: | ||||
|   static int check_tablet_status_and_scn( | ||||
|       const ObTabletCreateDeleteMdsUserData &data, | ||||
|       const share::SCN &scn); | ||||
| private: | ||||
|   // StorageBaseUtil | ||||
|   // table manager: create, remove and guard get. | ||||
|  | ||||
| @ -5508,7 +5508,8 @@ int ObTablet::get_ddl_info(int64_t &schema_version, int64_t &schema_refreshed_ts | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| int ObTablet::get_rec_log_scn(SCN &rec_scn) { | ||||
| int ObTablet::get_rec_log_scn(SCN &rec_scn) | ||||
| { | ||||
|   int ret = OB_SUCCESS; | ||||
|   rec_scn = SCN::max_scn(); | ||||
|   ObTableHandleV2 handle; | ||||
|  | ||||
| @ -11,6 +11,7 @@ | ||||
|  */ | ||||
|  | ||||
| #include "share/throttle/ob_throttle_unit.h" | ||||
| #include "observer/omt/ob_tenant_config_mgr.h" | ||||
| #include "storage/ls/ob_ls.h" | ||||
| #include "storage/ls/ob_ls_tx_service.h" | ||||
| #include "storage/memtable/ob_memtable.h" | ||||
| @ -765,7 +766,9 @@ int ObTxReplayExecutor::replay_row_(storage::ObStoreCtx &store_ctx, | ||||
|                                     memtable::ObMemtableMutatorIterator *mmi_ptr) | ||||
| { | ||||
|   int ret = OB_SUCCESS; | ||||
|   common::ObTimeGuard timeguard("replay_row_in_memtable", 10 * 1000); | ||||
|   const share::ObLSID &ls_id = tablet->get_ls_id(); | ||||
|   const common::ObTabletID &tablet_id = tablet->get_tablet_id(); | ||||
|   common::ObTimeGuard timeguard("replay_row_in_memtable", 10_ms); | ||||
|   ObIMemtable *mem_ptr = nullptr; | ||||
|   ObMemtable *data_mem_ptr = nullptr; | ||||
|   ObStorageTableGuard w_guard(tablet, store_ctx, true, true, log_ts_ns_); | ||||
| @ -774,12 +777,25 @@ int ObTxReplayExecutor::replay_row_(storage::ObStoreCtx &store_ctx, | ||||
|     TRANS_LOG(WARN, "[Replay Tx] invaild arguments", K(ret), KP(mmi_ptr)); | ||||
|   } else if (FALSE_IT(timeguard.click("start"))) { | ||||
|   } else if (OB_FAIL(prepare_memtable_replay_(w_guard, mem_ptr))) { | ||||
|     if (OB_NO_NEED_UPDATE != ret) { | ||||
|       TRANS_LOG(WARN, "[Replay Tx] prepare for replay failed", K(ret), KP(mem_ptr), KP(mmi_ptr)); | ||||
|     if (OB_NO_NEED_UPDATE == ret) { | ||||
|       TRANS_LOG(DEBUG, "[Replay Tx] Not need replay row for tablet", | ||||
|                 K(ret), K(ls_id), K(tablet_id), K(log_ts_ns_), | ||||
|                 K(tx_part_log_no_), K(mmi_ptr->get_row_head())); | ||||
|     } else if (OB_TABLET_NOT_EXIST == ret) { | ||||
|       omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); | ||||
|       if (OB_UNLIKELY(!tenant_config.is_valid())) { | ||||
|         ret = OB_ERR_UNEXPECTED; | ||||
|         TRANS_LOG(WARN, "tenant config is invalid", K(ret)); | ||||
|       } else if (tenant_config->_allow_skip_replay_redo_after_detete_tablet) { | ||||
|         ret = OB_NO_NEED_UPDATE; | ||||
|         TRANS_LOG(WARN, "[Replay Tx] tablet does not exist while preparing memtable for replay, allow to skip this clog replaying for emergency", | ||||
|             K(ret), K(ls_id), K(tablet_id), K_(log_ts_ns)); | ||||
|       } else { | ||||
|       TRANS_LOG(DEBUG, "[Replay Tx] Not need replay row for tablet", K(log_ts_ns_), | ||||
|                 K(tx_part_log_no_), K(mmi_ptr->get_row_head()), | ||||
|                 K(tablet->get_tablet_meta().tablet_id_)); | ||||
|         TRANS_LOG(ERROR, "[Replay Tx] tablet does not exist while preparing memtable for replay", | ||||
|             K(ret), K(ls_id), K(tablet_id), K_(log_ts_ns)); | ||||
|       } | ||||
|     } else { | ||||
|       TRANS_LOG(WARN, "[Replay Tx] prepare for replay failed", K(ret), K(ls_id), K(tablet_id), KP(mem_ptr), KP(mmi_ptr)); | ||||
|     } | ||||
|     // dynamic_cast will check whether this is really a ObMemtable. | ||||
|   } else if (OB_ISNULL(data_mem_ptr = static_cast<ObMemtable *>(mem_ptr))) { | ||||
|  | ||||
| @ -254,6 +254,7 @@ writing_throttling_maximum_duration | ||||
| writing_throttling_trigger_percentage | ||||
| zone | ||||
| _advance_checkpoint_timeout | ||||
| _allow_skip_replay_redo_after_detete_tablet | ||||
| _audit_mode | ||||
| _auto_broadcast_tablet_location_rate_limit | ||||
| _auto_drop_recovering_auxiliary_tenant | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 hiddenbomb
					hiddenbomb