diff --git a/src/share/ob_debug_sync_point.h b/src/share/ob_debug_sync_point.h index d8cf21903d..5ab7454ffd 100644 --- a/src/share/ob_debug_sync_point.h +++ b/src/share/ob_debug_sync_point.h @@ -417,6 +417,9 @@ class ObString; ACT(BEFORE_BACKUP_TASK_FINISH,)\ ACT(BEFORE_UPDATE_LS_META_TABLE,)\ ACT(BEFORE_LOAD_ARCHIVE_ROUND,)\ + ACT(BEFORE_MIG_DDL_TABLE_MERGE_TASK,)\ + ACT(BEFORE_COPY_DDL_SSTABLE,)\ + ACT(BEFORE_DDL_WRITE_PREPARE_LOG,)\ ACT(MAX_DEBUG_SYNC_POINT,) DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF); diff --git a/src/share/parameter/ob_parameter_seed.ipp b/src/share/parameter/ob_parameter_seed.ipp index d0fee577d0..32c27b104e 100644 --- a/src/share/parameter/ob_parameter_seed.ipp +++ b/src/share/parameter/ob_parameter_seed.ipp @@ -1291,6 +1291,21 @@ DEF_STR_LIST(sanity_whitelist, OB_CLUSTER_PARAMETER, "", "vip who wouldn't leadi DEF_TIME(_advance_checkpoint_timeout, OB_CLUSTER_PARAMETER, "30m", "[10s,180m]", "the timeout for backup/migrate advance checkpoint Range: [10s,180m]", ObParameterAttr(Section::ROOT_SERVICE, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +ERRSIM_DEF_INT(errsim_migration_tablet_id, OB_CLUSTER_PARAMETER, "0", "[0,)", + "the tablet id that migration want to insert error" + "Range: [0,) in integer", + ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +ERRSIM_DEF_INT(errsim_max_ddl_sstable_count, OB_CLUSTER_PARAMETER, "0", "[0,)", + "max ddl sstable count in errsim mode" + "Range: [0,) in integer", + ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +ERRSIM_DEF_INT(errsim_max_ddl_block_count, OB_CLUSTER_PARAMETER, "0", "[0,)", + "max ddl block count in errsim mode" + "Range: [0,) in integer", + ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +ERRSIM_DEF_STR(errsim_migration_src_server_addr, OB_CLUSTER_PARAMETER, "", + "the server dest ls choose as src when in errsim mode", + ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); DEF_BOOL(enable_cgroup, OB_CLUSTER_PARAMETER, "True", "when set to false, cgroup will not init; when set to true but cgroup root dir is not ready, print ERROR", diff --git a/src/storage/ddl/ob_ddl_merge_task.cpp b/src/storage/ddl/ob_ddl_merge_task.cpp index cf633935de..1de7924b81 100644 --- a/src/storage/ddl/ob_ddl_merge_task.cpp +++ b/src/storage/ddl/ob_ddl_merge_task.cpp @@ -288,7 +288,13 @@ int ObDDLTableMergeTask::init(const ObDDLTableMergeDagParam &ddl_dag_param) int ObDDLTableMergeTask::process() { int ret = OB_SUCCESS; - const int64_t MAX_DDL_SSTABLE = 128; + int64_t MAX_DDL_SSTABLE = 128; +#ifdef ERRSIM + if (0 != GCONF.errsim_max_ddl_sstable_count) { + MAX_DDL_SSTABLE = GCONF.errsim_max_ddl_sstable_count; + LOG_INFO("set max ddl sstable in errsim mode", K(MAX_DDL_SSTABLE)); + } +#endif LOG_INFO("ddl merge task start process", K(*this)); ObTabletHandle tablet_handle; ObDDLKvMgrHandle ddl_kv_mgr_handle; @@ -312,6 +318,14 @@ int ObDDLTableMergeTask::process() LOG_WARN("get ddl sstable handles failed", K(ret)); } else if (ddl_sstable_handles.get_count() >= MAX_DDL_SSTABLE || merge_param_.is_commit_) { DEBUG_SYNC(BEFORE_DDL_TABLE_MERGE_TASK); +#ifdef ERRSIM + static int64_t counter = 0; + counter++; + if (counter >= 2) { + DEBUG_SYNC(BEFORE_MIG_DDL_TABLE_MERGE_TASK); + } +#endif + ObTabletDDLParam ddl_param; ObTableHandleV2 table_handle; bool is_data_complete = false; diff --git a/src/storage/ddl/ob_ddl_redo_log_writer.cpp b/src/storage/ddl/ob_ddl_redo_log_writer.cpp index 490bec2944..62e610670a 100644 --- a/src/storage/ddl/ob_ddl_redo_log_writer.cpp +++ b/src/storage/ddl/ob_ddl_redo_log_writer.cpp @@ -23,6 +23,7 @@ #include "storage/tx_storage/ob_ls_service.h" #include "storage/tx/ob_ts_mgr.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" +#include "observer/ob_server_event_history_table_operator.h" using namespace oceanbase::common; using namespace oceanbase::storage; @@ -1018,6 +1019,14 @@ int ObDDLSSTableRedoWriter::write_prepare_log(const ObITable::TableKey &table_ke { int ret = OB_SUCCESS; +#ifdef ERRSIM + SERVER_EVENT_SYNC_ADD("storage_ddl", "before_write_prepare_log", + "table_key", table_key, + "table_id", table_id, + "execution_id", execution_id, + "ddl_task_id", ddl_task_id); + DEBUG_SYNC(BEFORE_DDL_WRITE_PREPARE_LOG); +#endif prepare_log_ts = 0; ObLS *ls = nullptr; ObDDLPrepareLog log; diff --git a/src/storage/ddl/ob_ddl_struct.cpp b/src/storage/ddl/ob_ddl_struct.cpp index 6cccb4e2cd..aeab466673 100644 --- a/src/storage/ddl/ob_ddl_struct.cpp +++ b/src/storage/ddl/ob_ddl_struct.cpp @@ -210,6 +210,12 @@ int ObDDLKV::set_macro_block(const ObDDLMacroBlock ¯o_block) int ret = OB_SUCCESS; const int64_t MAX_DDL_BLOCK_COUNT = 10L * 1024L * 1024L * 1024L / OB_SERVER_BLOCK_MGR.get_macro_block_size(); int64_t freeze_block_count = MAX_DDL_BLOCK_COUNT; +#ifdef ERRSIM + if (0 != GCONF.errsim_max_ddl_block_count) { + freeze_block_count = GCONF.errsim_max_ddl_block_count; + LOG_INFO("ddl set macro block count", K(freeze_block_count)); + } +#endif if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ddl kv is not init", K(ret)); diff --git a/src/storage/high_availability/ob_ls_migration.cpp b/src/storage/high_availability/ob_ls_migration.cpp index 8b27d77677..45824c5f7f 100644 --- a/src/storage/high_availability/ob_ls_migration.cpp +++ b/src/storage/high_availability/ob_ls_migration.cpp @@ -1085,8 +1085,8 @@ int ObStartMigrationTask::choose_src_() int64_t local_clog_checkpoint_ts = 0; if (OB_FAIL(get_local_ls_checkpoint_ts_(local_clog_checkpoint_ts))) { LOG_WARN("failed to get local ls checkpoint ts", K(ret)); - } else if (OB_FAIL(src_provider.init(tenant_id, storage_rpc_))) { - LOG_WARN("failed to init src provider", K(ret), K(tenant_id)); + } else if (OB_FAIL(src_provider.init(tenant_id, ctx_->arg_.type_, storage_rpc_))) { + LOG_WARN("failed to init src provider", K(ret), K(tenant_id), "type", ctx_->arg_.type_); } else if (OB_FAIL(src_provider.choose_ob_src(ls_id, local_clog_checkpoint_ts, src_info))) { LOG_WARN("failed to choose ob src", K(ret), K(tenant_id), K(ls_id), K(local_clog_checkpoint_ts)); } else if (OB_FAIL(fetch_ls_info_(tenant_id, ls_id, src_info.src_addr_, ls_info))) { diff --git a/src/storage/high_availability/ob_storage_ha_src_provider.cpp b/src/storage/high_availability/ob_storage_ha_src_provider.cpp index ba4b5edd14..6f68c2748d 100644 --- a/src/storage/high_availability/ob_storage_ha_src_provider.cpp +++ b/src/storage/high_availability/ob_storage_ha_src_provider.cpp @@ -18,13 +18,18 @@ namespace oceanbase { namespace storage { -ObStorageHASrcProvider::ObStorageHASrcProvider() : is_inited_(false), tenant_id_(OB_INVALID_ID), storage_rpc_(nullptr) +ObStorageHASrcProvider::ObStorageHASrcProvider() + : is_inited_(false), + tenant_id_(OB_INVALID_ID), + type_(ObMigrationOpType::MAX_LS_OP), + storage_rpc_(nullptr) {} ObStorageHASrcProvider::~ObStorageHASrcProvider() {} -int ObStorageHASrcProvider::init(const uint64_t tenant_id, storage::ObStorageRpc *storage_rpc) +int ObStorageHASrcProvider::init(const uint64_t tenant_id, const ObMigrationOpType::TYPE &type, + storage::ObStorageRpc *storage_rpc) { int ret = OB_SUCCESS; if (is_inited_) { @@ -35,6 +40,7 @@ int ObStorageHASrcProvider::init(const uint64_t tenant_id, storage::ObStorageRpc LOG_WARN("get invalid argument", K(ret), K(tenant_id), K(ls_id), KP(storage_rpc)); } else { tenant_id_ = tenant_id; + type_ = type; storage_rpc_ = storage_rpc; is_inited_ = true; } @@ -62,8 +68,27 @@ int ObStorageHASrcProvider::choose_ob_src(const share::ObLSID &ls_id, const int6 } else { src_info.src_addr_ = chosen_src_addr; src_info.cluster_id_ = GCONF.cluster_id; - SERVER_EVENT_ADD( - "storage_ha", "choose_src", "tenant_id", tenant_id_, "ls_id", ls_id.id(), "src_addr", chosen_src_addr); +#ifdef ERRSIM + if (ObMigrationOpType::ADD_LS_OP == type_ || ObMigrationOpType::MIGRATE_LS_OP == type_) { + const ObString &errsim_server = GCONF.errsim_migration_src_server_addr.str(); + if (!errsim_server.empty()) { + common::ObAddr tmp_errsim_addr; + if (OB_FAIL(tmp_errsim_addr.parse_from_string(errsim_server))) { + LOG_WARN("failed to parse from string", K(ret), K(errsim_server)); + } else { + src_info.src_addr_ = tmp_errsim_addr; + src_info.cluster_id_ = GCONF.cluster_id; + LOG_INFO("storage ha choose errsim src", K(tmp_errsim_addr)); + } + } + } +#endif + SERVER_EVENT_ADD("storage_ha", "choose_src", + "tenant_id", tenant_id_, + "ls_id", ls_id.id(), + "src_addr", src_info.src_addr_, + "op_type", ObMigrationOpType::get_str(type_)); + } return ret; } diff --git a/src/storage/high_availability/ob_storage_ha_src_provider.h b/src/storage/high_availability/ob_storage_ha_src_provider.h index d51e3a8c92..4a6513d961 100644 --- a/src/storage/high_availability/ob_storage_ha_src_provider.h +++ b/src/storage/high_availability/ob_storage_ha_src_provider.h @@ -24,7 +24,7 @@ class ObStorageHASrcProvider { public: ObStorageHASrcProvider(); virtual ~ObStorageHASrcProvider(); - int init(const uint64_t tenant_id, storage::ObStorageRpc *storage_rpc); + int init(const uint64_t tenant_id, const ObMigrationOpType::TYPE &type, storage::ObStorageRpc *storage_rpc); int choose_ob_src(const share::ObLSID &ls_id, const int64_t local_clog_checkpoint_ts, ObStorageHASrcInfo &src_info); @@ -41,6 +41,7 @@ private: private: bool is_inited_; uint64_t tenant_id_; + ObMigrationOpType::TYPE type_; storage::ObStorageRpc *storage_rpc_; DISALLOW_COPY_AND_ASSIGN(ObStorageHASrcProvider); }; diff --git a/src/storage/high_availability/ob_storage_ha_tablet_builder.cpp b/src/storage/high_availability/ob_storage_ha_tablet_builder.cpp index 8f975d10b1..60fb19759c 100644 --- a/src/storage/high_availability/ob_storage_ha_tablet_builder.cpp +++ b/src/storage/high_availability/ob_storage_ha_tablet_builder.cpp @@ -533,6 +533,15 @@ int ObStorageHATabletsBuilder::build_copy_tablet_sstable_info_arg_( ObTablet *tablet = nullptr; arg.reset(); +#ifdef ERRSIM + const int64_t errsim_tablet_id = GCONF.errsim_migration_tablet_id; + if (errsim_tablet_id == tablet_id.id()) { + SERVER_EVENT_SYNC_ADD("storage_ha", "before_copy_ddl_sstable", + "tablet_id", tablet_id); + DEBUG_SYNC(BEFORE_COPY_DDL_SSTABLE); + } +#endif + if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("storage ha tablets builder do not init", K(ret)); @@ -703,6 +712,14 @@ int ObStorageHATabletsBuilder::get_need_copy_ddl_sstable_range_( need_copy_log_ts_range.start_log_ts_ = ddl_start_log_ts; need_copy_log_ts_range.end_log_ts_ = ddl_checkpoint_ts; } +#ifdef ERRSIM + LOG_INFO("ddl checkpoint pushed", K(ddl_checkpoint_pushed), K(ddl_sstable_array), K(ddl_start_log_ts), K(ddl_checkpoint_ts)); + SERVER_EVENT_SYNC_ADD("storage_ha", "get_need_copy_ddl_sstable_range", + "tablet_id", tablet->get_tablet_meta().tablet_id_, + "dest_ddl_checkpoint_pushed", ddl_checkpoint_pushed, + "start_log_ts", need_copy_log_ts_range.start_log_ts_, + "end_log_ts", need_copy_log_ts_range.end_log_ts_); +#endif } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("checkpoint ts should be greater than start ts", @@ -718,12 +735,15 @@ int ObStorageHATabletsBuilder::get_ddl_sstable_min_start_log_ts_( { int ret = OB_SUCCESS; ObArray sstables; - min_start_log_ts = 0; + min_start_log_ts = INT64_MAX; if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("storage ha tables builder do not init", K(ret)); - } else if (ddl_sstable_array.count() > 0 && OB_FAIL(ddl_sstable_array.get_all_tables(sstables))) { + } else if (ddl_sstable_array.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ddl sstable should not be empty", K(ret)); + } else if (OB_FAIL(ddl_sstable_array.get_all_tables(sstables))) { LOG_WARN("failed to get all tables", K(ret), K(param_)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < sstables.count(); ++i) {