From f1b81fafd45b70c287e54bf31048119befedb0a0 Mon Sep 17 00:00:00 2001 From: yujun Date: Fri, 20 Oct 2023 19:35:16 +0800 Subject: [PATCH] [improvement](tablet clone) partition balance should invalidate tablet move in cache when sched failed (#25602) --- .../org/apache/doris/clone/MovesCacheMap.java | 12 ++++++++++++ .../apache/doris/clone/PartitionRebalancer.java | 7 ++++++- .../java/org/apache/doris/clone/Rebalancer.java | 3 +++ .../org/apache/doris/clone/TabletScheduler.java | 17 +++++++++++++---- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/MovesCacheMap.java b/fe/fe-core/src/main/java/org/apache/doris/clone/MovesCacheMap.java index bd14abfea2..b08dc72f77 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/MovesCacheMap.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/MovesCacheMap.java @@ -92,6 +92,18 @@ public class MovesCacheMap { return null; } + public void invalidateTablet(TabletSchedCtx tabletCtx) { + Map mediumMoves = cacheMap.get(tabletCtx.getTag()); + if (mediumMoves != null) { + MovesCache cache = mediumMoves.get(tabletCtx.getStorageMedium()); + if (cache != null) { + cache.get().invalidate(tabletCtx.getTabletId()); + } else { + mediumMoves.values().forEach(it -> it.get().invalidate(tabletCtx.getTabletId())); + } + } + } + // For given tablet ctx, find it in cacheMap public Pair getTabletMove(TabletSchedCtx tabletCtx) { for (Map mediumMap : cacheMap.values()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java index d5b0c3d32d..c8aa345501 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java @@ -306,13 +306,18 @@ public class PartitionRebalancer extends Rebalancer { // To be improved } + @Override + public void onTabletFailed(TabletSchedCtx tabletCtx) { + movesCacheMap.invalidateTablet(tabletCtx); + } + @Override public Long getToDeleteReplicaId(TabletSchedCtx tabletCtx) { // We don't invalidate the cached move here, cuz the redundant repair progress is just started. // The move should be invalidated by TTL or Algo.CheckMoveCompleted() Pair pair = movesCacheMap.getTabletMove(tabletCtx); if (pair != null) { - Preconditions.checkState(pair.second != -1L); + //Preconditions.checkState(pair.second != -1L); return pair.second; } else { return (long) -1; diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java index bee23747bc..f339418055 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java @@ -100,6 +100,9 @@ public abstract class Rebalancer { return -1L; } + public void onTabletFailed(TabletSchedCtx tabletCtx) { + } + public void updateLoadStatistic(Map statisticMap) { this.statisticMap = statisticMap; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 25ff14068d..b3cbe4546b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -1234,13 +1234,15 @@ public class TabletScheduler extends MasterDaemon { List alternativeTablets = rebalancer.selectAlternativeTablets(); Collections.shuffle(alternativeTablets); for (TabletSchedCtx tabletCtx : alternativeTablets) { - if (addTablet(tabletCtx, false) == AddResult.ADDED) { + if (needAddBalanceNum > 0 && addTablet(tabletCtx, false) == AddResult.ADDED) { needAddBalanceNum--; - if (needAddBalanceNum <= 0) { - return; - } + } else { + rebalancer.onTabletFailed(tabletCtx); } } + if (needAddBalanceNum <= 0) { + return; + } if (Config.disable_disk_balance) { LOG.info("disk balance is disabled. skip selecting tablets for disk balance"); return; @@ -1448,6 +1450,13 @@ public class TabletScheduler extends MasterDaemon { private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, Status status, String reason) { + if (state == TabletSchedCtx.State.CANCELLED || state == TabletSchedCtx.State.UNEXPECTED) { + if (tabletCtx.getType() == TabletSchedCtx.Type.BALANCE + && tabletCtx.getBalanceType() == TabletSchedCtx.BalanceType.BE_BALANCE) { + rebalancer.onTabletFailed(tabletCtx); + } + } + // use 2 steps to avoid nested database lock and synchronized.(releaseTabletCtx() may hold db lock) // remove the tablet ctx, so that no other process can see it removeTabletCtx(tabletCtx, reason);