From 87005aa5b26a79b694fd85ff35eee17151d36ccd Mon Sep 17 00:00:00 2001 From: Siyang Tang <82279870+TangSiyang2001@users.noreply.github.com> Date: Sat, 12 Oct 2024 22:29:10 +0800 Subject: [PATCH] [fix](delete) Fix potential delete job stuck util timeout if exception happend in FE DeleteJob execution (#41672) (#41765) pick: #41672 Fail task should also count down for the count down latch to prevent job stuck. --- .../src/main/java/org/apache/doris/master/MasterImpl.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index 4870b3a582..a1acd72974 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -433,6 +433,9 @@ public class MasterImpl { } catch (MetaNotFoundException e) { AgentTaskQueue.removeTask(backendId, TTaskType.REALTIME_PUSH, signature); LOG.warn("finish push replica error", e); + if (pushTask.getPushType() == TPushType.DELETE) { + pushTask.countDownLatch(backendId, pushTabletId); + } } finally { olapTable.writeUnlock(); }