fix drop column retry when switch tablet leader

This commit is contained in:
Charles0429
2022-11-08 07:06:26 +00:00
committed by wangzelin.wzl
parent c36622e1c8
commit 5a7d53fa60
4 changed files with 22 additions and 5 deletions

View File

@ -576,6 +576,7 @@ class EventTable
EN_DDL_COMPACT_FAIL = 513, EN_DDL_COMPACT_FAIL = 513,
EN_DDL_RELEASE_DDL_KV_FAIL = 514, EN_DDL_RELEASE_DDL_KV_FAIL = 514,
EN_DDL_REPORT_CHECKSUM_FAIL = 515, EN_DDL_REPORT_CHECKSUM_FAIL = 515,
EN_DDL_REPORT_REPLICA_BUILD_STATUS_FAIL = 516,
// 600-700 For PX use // 600-700 For PX use
EN_PX_SQC_EXECUTE_FAILED = 600, EN_PX_SQC_EXECUTE_FAILED = 600,

View File

@ -99,7 +99,7 @@ int ObDDLSingleReplicaExecutor::schedule_task()
for (int64_t i = 0; OB_SUCC(ret) && i < build_infos.count(); ++i) { for (int64_t i = 0; OB_SUCC(ret) && i < build_infos.count(); ++i) {
ObPartitionBuildInfo &build_info = build_infos.at(i); ObPartitionBuildInfo &build_info = build_infos.at(i);
if (ObPartitionBuildStat::BUILD_INIT == build_info.stat_|| if (ObPartitionBuildStat::BUILD_INIT == build_info.stat_||
ObPartitionBuildStat::BUILD_RETRY == build_info.stat_) { build_info.need_schedule()) {
// get leader of partition // get leader of partition
ObAddr leader_addr; ObAddr leader_addr;
obrpc::ObDDLBuildSingleReplicaRequestArg arg; obrpc::ObDDLBuildSingleReplicaRequestArg arg;
@ -147,6 +147,7 @@ int ObDDLSingleReplicaExecutor::schedule_task()
continue; continue;
} else if (OB_SUCCESS == ret_array.at(i)) { } else if (OB_SUCCESS == ret_array.at(i)) {
build_infos.at(idx).stat_ = ObPartitionBuildStat::BUILD_REQUESTED; build_infos.at(idx).stat_ = ObPartitionBuildStat::BUILD_REQUESTED;
build_infos.at(idx).heart_beat_time_ = ObTimeUtility::current_time();
LOG_INFO("rpc send successfully", K(source_tablet_ids_.at(idx)), K(dest_tablet_ids_.at(idx))); LOG_INFO("rpc send successfully", K(source_tablet_ids_.at(idx)), K(dest_tablet_ids_.at(idx)));
} else if (ObIDDLTask::in_ddl_retry_white_list(ret_array.at(i))) { } else if (ObIDDLTask::in_ddl_retry_white_list(ret_array.at(i))) {
build_infos.at(idx).stat_ = ObPartitionBuildStat::BUILD_RETRY; build_infos.at(idx).stat_ = ObPartitionBuildStat::BUILD_RETRY;
@ -154,6 +155,7 @@ int ObDDLSingleReplicaExecutor::schedule_task()
} else { } else {
build_infos.at(idx).stat_ = ObPartitionBuildStat::BUILD_FAILED; build_infos.at(idx).stat_ = ObPartitionBuildStat::BUILD_FAILED;
build_infos.at(idx).ret_code_ = ret_array.at(i); build_infos.at(idx).ret_code_ = ret_array.at(i);
build_infos.at(idx).heart_beat_time_ = ObTimeUtility::current_time();
LOG_INFO("task is failed", K(build_infos.at(idx)), K(source_tablet_ids_.at(idx)), K(dest_tablet_ids_.at(idx))); LOG_INFO("task is failed", K(build_infos.at(idx)), K(source_tablet_ids_.at(idx)), K(dest_tablet_ids_.at(idx)));
} }
} }
@ -183,7 +185,7 @@ int ObDDLSingleReplicaExecutor::check_build_end(bool &is_end, int64_t &ret_code)
if (OB_SUCC(ret) && !is_end) { if (OB_SUCC(ret) && !is_end) {
for (int64_t i = 0; OB_SUCC(ret) && i < build_infos.count(); ++i) { for (int64_t i = 0; OB_SUCC(ret) && i < build_infos.count(); ++i) {
succ_cnt += ObPartitionBuildStat::BUILD_SUCCEED == build_infos.at(i).stat_; succ_cnt += ObPartitionBuildStat::BUILD_SUCCEED == build_infos.at(i).stat_;
need_schedule |= (ObPartitionBuildStat::BUILD_RETRY == build_infos.at(i).stat_); need_schedule |= build_infos.at(i).need_schedule();
} }
if (OB_SUCC(ret) && build_infos.count() == succ_cnt) { if (OB_SUCC(ret) && build_infos.count() == succ_cnt) {
is_end = true; is_end = true;

View File

@ -82,14 +82,21 @@ private:
struct ObPartitionBuildInfo final struct ObPartitionBuildInfo final
{ {
public: public:
static const int64_t PARTITION_BUILD_HEART_BEAT_TIME = 10 * 1000 * 1000;
ObPartitionBuildInfo() ObPartitionBuildInfo()
: ret_code_(common::OB_SUCCESS), stat_(ObPartitionBuildStat::BUILD_INIT) : ret_code_(common::OB_SUCCESS), stat_(ObPartitionBuildStat::BUILD_INIT), heart_beat_time_(0)
{} {}
~ObPartitionBuildInfo() = default; ~ObPartitionBuildInfo() = default;
TO_STRING_KV(K_(ret_code), K_(stat)); bool need_schedule() const {
return ObPartitionBuildStat::BUILD_RETRY == stat_
|| (ObPartitionBuildStat::BUILD_REQUESTED == stat_
&& ObTimeUtility::current_time() - heart_beat_time_ > PARTITION_BUILD_HEART_BEAT_TIME);
}
TO_STRING_KV(K_(ret_code), K_(stat), K_(heart_beat_time));
public: public:
int64_t ret_code_; int64_t ret_code_;
ObPartitionBuildStat stat_; ObPartitionBuildStat stat_;
int64_t heart_beat_time_;
}; };
private: private:
uint64_t tenant_id_; uint64_t tenant_id_;

View File

@ -504,6 +504,12 @@ int ObComplementDataDag::report_replica_build_status()
ret = OB_INVALID_ARGUMENT; ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid param", K(ret), K(param_)); LOG_WARN("invalid param", K(ret), K(param_));
} else { } else {
#ifdef ERRSIM
if (OB_SUCC(ret)) {
ret = E(EventTable::EN_DDL_REPORT_REPLICA_BUILD_STATUS_FAIL) OB_SUCCESS;
LOG_INFO("report replica build status errsim", K(ret));
}
#endif
obrpc::ObDDLBuildSingleReplicaResponseArg arg; obrpc::ObDDLBuildSingleReplicaResponseArg arg;
ObAddr rs_addr; ObAddr rs_addr;
arg.tenant_id_ = param_.tenant_id_; arg.tenant_id_ = param_.tenant_id_;
@ -517,7 +523,8 @@ int ObComplementDataDag::report_replica_build_status()
arg.task_id_ = param_.task_id_; arg.task_id_ = param_.task_id_;
arg.execution_id_ = param_.execution_id_; arg.execution_id_ = param_.execution_id_;
FLOG_INFO("send replica build status response to RS", K(ret), K(context_.complement_data_ret_), K(arg)); FLOG_INFO("send replica build status response to RS", K(ret), K(context_.complement_data_ret_), K(arg));
if (OB_ISNULL(GCTX.rs_rpc_proxy_) || OB_ISNULL(GCTX.rs_mgr_)) { if (OB_FAIL(ret)) {
} else if (OB_ISNULL(GCTX.rs_rpc_proxy_) || OB_ISNULL(GCTX.rs_mgr_)) {
ret = OB_ERR_SYS; ret = OB_ERR_SYS;
LOG_WARN("innner system error, rootserver rpc proxy or rs mgr must not be NULL", K(ret), K(GCTX)); LOG_WARN("innner system error, rootserver rpc proxy or rs mgr must not be NULL", K(ret), K(GCTX));
} else if (OB_FAIL(GCTX.rs_mgr_->get_master_root_server(rs_addr))) { } else if (OB_FAIL(GCTX.rs_mgr_->get_master_root_server(rs_addr))) {