diff --git a/deps/oblib/src/lib/utility/ob_tracepoint.h b/deps/oblib/src/lib/utility/ob_tracepoint.h index d20df8a7b..8e9ca1f2c 100644 --- a/deps/oblib/src/lib/utility/ob_tracepoint.h +++ b/deps/oblib/src/lib/utility/ob_tracepoint.h @@ -576,6 +576,7 @@ class EventTable EN_DDL_COMPACT_FAIL = 513, EN_DDL_RELEASE_DDL_KV_FAIL = 514, EN_DDL_REPORT_CHECKSUM_FAIL = 515, + EN_DDL_REPORT_REPLICA_BUILD_STATUS_FAIL = 516, // 600-700 For PX use EN_PX_SQC_EXECUTE_FAILED = 600, diff --git a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp index bf0b0a9b7..109ca4a24 100644 --- a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp +++ b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp @@ -99,7 +99,7 @@ int ObDDLSingleReplicaExecutor::schedule_task() for (int64_t i = 0; OB_SUCC(ret) && i < build_infos.count(); ++i) { ObPartitionBuildInfo &build_info = build_infos.at(i); if (ObPartitionBuildStat::BUILD_INIT == build_info.stat_|| - ObPartitionBuildStat::BUILD_RETRY == build_info.stat_) { + build_info.need_schedule()) { // get leader of partition ObAddr leader_addr; obrpc::ObDDLBuildSingleReplicaRequestArg arg; @@ -147,6 +147,7 @@ int ObDDLSingleReplicaExecutor::schedule_task() continue; } else if (OB_SUCCESS == ret_array.at(i)) { build_infos.at(idx).stat_ = ObPartitionBuildStat::BUILD_REQUESTED; + build_infos.at(idx).heart_beat_time_ = ObTimeUtility::current_time(); LOG_INFO("rpc send successfully", K(source_tablet_ids_.at(idx)), K(dest_tablet_ids_.at(idx))); } else if (ObIDDLTask::in_ddl_retry_white_list(ret_array.at(i))) { build_infos.at(idx).stat_ = ObPartitionBuildStat::BUILD_RETRY; @@ -154,6 +155,7 @@ int ObDDLSingleReplicaExecutor::schedule_task() } else { build_infos.at(idx).stat_ = ObPartitionBuildStat::BUILD_FAILED; build_infos.at(idx).ret_code_ = ret_array.at(i); + build_infos.at(idx).heart_beat_time_ = ObTimeUtility::current_time(); LOG_INFO("task is failed", K(build_infos.at(idx)), K(source_tablet_ids_.at(idx)), K(dest_tablet_ids_.at(idx))); } } @@ -183,7 +185,7 @@ int ObDDLSingleReplicaExecutor::check_build_end(bool &is_end, int64_t &ret_code) if (OB_SUCC(ret) && !is_end) { for (int64_t i = 0; OB_SUCC(ret) && i < build_infos.count(); ++i) { succ_cnt += ObPartitionBuildStat::BUILD_SUCCEED == build_infos.at(i).stat_; - need_schedule |= (ObPartitionBuildStat::BUILD_RETRY == build_infos.at(i).stat_); + need_schedule |= build_infos.at(i).need_schedule(); } if (OB_SUCC(ret) && build_infos.count() == succ_cnt) { is_end = true; diff --git a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.h b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.h index b3e8fec92..7371d0f86 100644 --- a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.h +++ b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.h @@ -82,14 +82,21 @@ private: struct ObPartitionBuildInfo final { public: + static const int64_t PARTITION_BUILD_HEART_BEAT_TIME = 10 * 1000 * 1000; ObPartitionBuildInfo() - : ret_code_(common::OB_SUCCESS), stat_(ObPartitionBuildStat::BUILD_INIT) + : ret_code_(common::OB_SUCCESS), stat_(ObPartitionBuildStat::BUILD_INIT), heart_beat_time_(0) {} ~ObPartitionBuildInfo() = default; - TO_STRING_KV(K_(ret_code), K_(stat)); + bool need_schedule() const { + return ObPartitionBuildStat::BUILD_RETRY == stat_ + || (ObPartitionBuildStat::BUILD_REQUESTED == stat_ + && ObTimeUtility::current_time() - heart_beat_time_ > PARTITION_BUILD_HEART_BEAT_TIME); + } + TO_STRING_KV(K_(ret_code), K_(stat), K_(heart_beat_time)); public: int64_t ret_code_; ObPartitionBuildStat stat_; + int64_t heart_beat_time_; }; private: uint64_t tenant_id_; diff --git a/src/storage/ddl/ob_complement_data_task.cpp b/src/storage/ddl/ob_complement_data_task.cpp index 9e30a3e74..c9b70f89e 100644 --- a/src/storage/ddl/ob_complement_data_task.cpp +++ b/src/storage/ddl/ob_complement_data_task.cpp @@ -504,6 +504,12 @@ int ObComplementDataDag::report_replica_build_status() ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid param", K(ret), K(param_)); } else { +#ifdef ERRSIM + if (OB_SUCC(ret)) { + ret = E(EventTable::EN_DDL_REPORT_REPLICA_BUILD_STATUS_FAIL) OB_SUCCESS; + LOG_INFO("report replica build status errsim", K(ret)); + } +#endif obrpc::ObDDLBuildSingleReplicaResponseArg arg; ObAddr rs_addr; arg.tenant_id_ = param_.tenant_id_; @@ -517,7 +523,8 @@ int ObComplementDataDag::report_replica_build_status() arg.task_id_ = param_.task_id_; arg.execution_id_ = param_.execution_id_; FLOG_INFO("send replica build status response to RS", K(ret), K(context_.complement_data_ret_), K(arg)); - if (OB_ISNULL(GCTX.rs_rpc_proxy_) || OB_ISNULL(GCTX.rs_mgr_)) { + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(GCTX.rs_rpc_proxy_) || OB_ISNULL(GCTX.rs_mgr_)) { ret = OB_ERR_SYS; LOG_WARN("innner system error, rootserver rpc proxy or rs mgr must not be NULL", K(ret), K(GCTX)); } else if (OB_FAIL(GCTX.rs_mgr_->get_master_root_server(rs_addr))) {