[improvement](tablet clone) partition balance should invalidate tablet move in cache when sched failed (#25602)
This commit is contained in:
@ -92,6 +92,18 @@ public class MovesCacheMap {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void invalidateTablet(TabletSchedCtx tabletCtx) {
|
||||
Map<TStorageMedium, MovesCache> mediumMoves = cacheMap.get(tabletCtx.getTag());
|
||||
if (mediumMoves != null) {
|
||||
MovesCache cache = mediumMoves.get(tabletCtx.getStorageMedium());
|
||||
if (cache != null) {
|
||||
cache.get().invalidate(tabletCtx.getTabletId());
|
||||
} else {
|
||||
mediumMoves.values().forEach(it -> it.get().invalidate(tabletCtx.getTabletId()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For given tablet ctx, find it in cacheMap
|
||||
public Pair<PartitionRebalancer.TabletMove, Long> getTabletMove(TabletSchedCtx tabletCtx) {
|
||||
for (Map<TStorageMedium, MovesCache> mediumMap : cacheMap.values()) {
|
||||
|
||||
@ -306,13 +306,18 @@ public class PartitionRebalancer extends Rebalancer {
|
||||
// To be improved
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTabletFailed(TabletSchedCtx tabletCtx) {
|
||||
movesCacheMap.invalidateTablet(tabletCtx);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long getToDeleteReplicaId(TabletSchedCtx tabletCtx) {
|
||||
// We don't invalidate the cached move here, cuz the redundant repair progress is just started.
|
||||
// The move should be invalidated by TTL or Algo.CheckMoveCompleted()
|
||||
Pair<TabletMove, Long> pair = movesCacheMap.getTabletMove(tabletCtx);
|
||||
if (pair != null) {
|
||||
Preconditions.checkState(pair.second != -1L);
|
||||
//Preconditions.checkState(pair.second != -1L);
|
||||
return pair.second;
|
||||
} else {
|
||||
return (long) -1;
|
||||
|
||||
@ -100,6 +100,9 @@ public abstract class Rebalancer {
|
||||
return -1L;
|
||||
}
|
||||
|
||||
public void onTabletFailed(TabletSchedCtx tabletCtx) {
|
||||
}
|
||||
|
||||
public void updateLoadStatistic(Map<Tag, LoadStatisticForTag> statisticMap) {
|
||||
this.statisticMap = statisticMap;
|
||||
}
|
||||
|
||||
@ -1234,13 +1234,15 @@ public class TabletScheduler extends MasterDaemon {
|
||||
List<TabletSchedCtx> alternativeTablets = rebalancer.selectAlternativeTablets();
|
||||
Collections.shuffle(alternativeTablets);
|
||||
for (TabletSchedCtx tabletCtx : alternativeTablets) {
|
||||
if (addTablet(tabletCtx, false) == AddResult.ADDED) {
|
||||
if (needAddBalanceNum > 0 && addTablet(tabletCtx, false) == AddResult.ADDED) {
|
||||
needAddBalanceNum--;
|
||||
if (needAddBalanceNum <= 0) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
rebalancer.onTabletFailed(tabletCtx);
|
||||
}
|
||||
}
|
||||
if (needAddBalanceNum <= 0) {
|
||||
return;
|
||||
}
|
||||
if (Config.disable_disk_balance) {
|
||||
LOG.info("disk balance is disabled. skip selecting tablets for disk balance");
|
||||
return;
|
||||
@ -1448,6 +1450,13 @@ public class TabletScheduler extends MasterDaemon {
|
||||
|
||||
|
||||
private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, Status status, String reason) {
|
||||
if (state == TabletSchedCtx.State.CANCELLED || state == TabletSchedCtx.State.UNEXPECTED) {
|
||||
if (tabletCtx.getType() == TabletSchedCtx.Type.BALANCE
|
||||
&& tabletCtx.getBalanceType() == TabletSchedCtx.BalanceType.BE_BALANCE) {
|
||||
rebalancer.onTabletFailed(tabletCtx);
|
||||
}
|
||||
}
|
||||
|
||||
// use 2 steps to avoid nested database lock and synchronized.(releaseTabletCtx() may hold db lock)
|
||||
// remove the tablet ctx, so that no other process can see it
|
||||
removeTabletCtx(tabletCtx, reason);
|
||||
|
||||
Reference in New Issue
Block a user