diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java index 47befaaccb..d040b8053c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java @@ -303,6 +303,8 @@ public class BackendLoadStatistic { RootPathLoadStatistic pathStatistic = pathStatistics.get(i); // if this is a supplement task, ignore the storage medium if (!isSupplement && pathStatistic.getStorageMedium() != medium) { + LOG.debug("backend {} path {}'s storage medium {} is not {} storage medium, actual: {}", + beId, pathStatistic.getPath(), pathStatistic.getStorageMedium(), medium); continue; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index 990153a00a..b889a70430 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -488,6 +488,7 @@ public class TabletSchedCtx implements Comparable { if (backend == null) { // containsBE() is currently only used for choosing dest backend to do clone task. // return true so that it won't choose this backend. + LOG.debug("desc backend {} does not exist, skip. tablet: {}", beId, tabletId); return true; } String host = backend.getHost(); @@ -495,13 +496,18 @@ public class TabletSchedCtx implements Comparable { Backend be = infoService.getBackend(replica.getBackendId()); if (be == null) { // BE has been dropped, skip it + LOG.debug("replica's backend {} does not exist, skip. tablet: {}", replica.getBackendId(), tabletId); continue; } if (!Config.allow_replica_on_same_host && !FeConstants.runningUnitTest && host.equals(be.getHost())) { + LOG.debug("replica's backend {} is on same host {}, skip. tablet: {}", + replica.getBackendId(), host, tabletId); return true; } if (replica.getBackendId() == beId) { + LOG.debug("replica's backend {} is same as dest backend {}, skip. tablet: {}", + replica.getBackendId(), beId, tabletId); return true; } } @@ -557,24 +563,34 @@ public class TabletSchedCtx implements Comparable { List candidates = Lists.newArrayList(); for (Replica replica : tablet.getReplicas()) { if (exceptBeId != -1 && replica.getBackendId() == exceptBeId) { + LOG.debug("replica's backend {} is same as except backend {}, skip. tablet: {}", + replica.getBackendId(), exceptBeId, tabletId); continue; } if (replica.isBad() || replica.tooSlow()) { + LOG.debug("replica {} is bad({}) or too slow({}), skip. tablet: {}", + replica.getId(), replica.isBad(), replica.tooSlow(), tabletId); continue; } Backend be = infoService.getBackend(replica.getBackendId()); if (be == null || !be.isAlive()) { // backend which is in decommission can still be the source backend + LOG.debug("replica's backend {} does not exist or is not alive, skip. tablet: {}", + replica.getBackendId(), tabletId); continue; } if (replica.getLastFailedVersion() > 0) { + LOG.debug("replica {} has failed version {}, skip. tablet: {}", + replica.getId(), replica.getLastFailedVersion(), tabletId); continue; } if (!replica.checkVersionCatchUp(visibleVersion, false)) { + LOG.debug("replica {} version {} has not catch up to visible version {}, skip. tablet: {}", + replica.getId(), replica.getVersion(), visibleVersion, tabletId); continue; } @@ -591,14 +607,19 @@ public class TabletSchedCtx implements Comparable { for (Replica srcReplica : candidates) { PathSlot slot = backendsWorkingSlots.get(srcReplica.getBackendId()); if (slot == null) { + LOG.debug("replica's backend {} does not have working slot, skip. tablet: {}", + srcReplica.getBackendId(), tabletId); continue; } long srcPathHash = slot.takeSlot(srcReplica.getPathHash()); - if (srcPathHash != -1) { - setSrc(srcReplica); - return; + if (srcPathHash == -1) { + LOG.debug("replica's backend {} does not have available slot, skip. tablet: {}", + srcReplica.getBackendId(), tabletId); + continue; } + setSrc(srcReplica); + return; } throw new SchedException(Status.SCHEDULE_FAILED, "unable to find source slot"); } @@ -629,11 +650,15 @@ public class TabletSchedCtx implements Comparable { Replica chosenReplica = null; for (Replica replica : tablet.getReplicas()) { if (replica.isBad()) { + LOG.debug("replica {} is bad, skip. tablet: {}", + replica.getId(), tabletId); continue; } Backend be = infoService.getBackend(replica.getBackendId()); if (be == null || !be.isScheduleAvailable()) { + LOG.debug("replica's backend {} does not exist or is not scheduler available, skip. tablet: {}", + replica.getBackendId(), tabletId); continue; } @@ -644,10 +669,14 @@ public class TabletSchedCtx implements Comparable { && ((replica.getVersion() == visibleVersion) || replica.getVersion() > visibleVersion) && replica.getState() != ReplicaState.DECOMMISSION) { // skip healthy replica + LOG.debug("replica {} version {} is healthy, visible version {}, replica state {}, skip. tablet: {}", + replica.getId(), replica.getVersion(), visibleVersion, replica.getState(), tabletId); continue; } if (replica.needFurtherRepair()) { + LOG.debug("replica {} need further repair, choose it. tablet: {}", + replica.getId(), tabletId); chosenReplica = replica; break; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 7d2e7cc3b9..470cf0a669 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -1311,11 +1311,15 @@ public class TabletScheduler extends MasterDaemon { List allFitPaths = Lists.newArrayList(); for (BackendLoadStatistic bes : beStatistics) { if (!bes.isAvailable()) { + LOG.debug("backend {} is not available, skip. tablet: {}", bes.getBeId(), tabletCtx.getTabletId()); continue; } // exclude BE which already has replica of this tablet or another BE at same host has this replica if (tabletCtx.filterDestBE(bes.getBeId())) { + LOG.debug("backend {} already has replica of this tablet or another BE " + + "at same host has this replica, skip. tablet: {}", + bes.getBeId(), tabletCtx.getTabletId()); continue; } @@ -1323,9 +1327,13 @@ public class TabletScheduler extends MasterDaemon { // Else, check the tag. if (forColocate) { if (!tabletCtx.getColocateBackendsSet().contains(bes.getBeId())) { + LOG.debug("backend {} is not in colocate backend set, skip. tablet: {}", + bes.getBeId(), tabletCtx.getTabletId()); continue; } } else if (!bes.getTag().equals(tag)) { + LOG.debug("backend {}'s tag {} is not equal to tablet's tag {}, skip. tablet: {}", + bes.getBeId(), bes.getTag(), tag, tabletCtx.getTabletId()); continue; } @@ -1334,6 +1342,7 @@ public class TabletScheduler extends MasterDaemon { resultPaths, tabletCtx.getTabletStatus() != TabletStatus.REPLICA_RELOCATING /* if REPLICA_RELOCATING, then it is not a supplement task */); if (!st.ok()) { + LOG.debug("unable to find path for tablet: {}. {}", tabletCtx, st); // This is to solve, when we decommission some BEs with SSD disks, // if there are no SSD disks on the remaining BEs, it will be impossible to select a // suitable destination path. @@ -1361,31 +1370,50 @@ public class TabletScheduler extends MasterDaemon { // we try to find a path with specified media type, if not find, arbitrarily use one. for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) { if (rootPathLoadStatistic.getStorageMedium() != tabletCtx.getStorageMedium()) { + LOG.debug("backend {}'s path {}'s storage medium {} " + + "is not equal to tablet's storage medium {}, skip. tablet: {}", + rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(), + rootPathLoadStatistic.getStorageMedium(), tabletCtx.getStorageMedium(), + tabletCtx.getTabletId()); continue; } PathSlot slot = backendsWorkingSlots.get(rootPathLoadStatistic.getBeId()); if (slot == null) { + LOG.debug("backend {}'s path {}'s slot is null, skip. tablet: {}", + rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(), + tabletCtx.getTabletId()); continue; } long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash()); - if (pathHash != -1) { - return rootPathLoadStatistic; + if (pathHash == -1) { + LOG.debug("backend {}'s path {}'s slot is full, skip. tablet: {}", + rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(), + tabletCtx.getTabletId()); + continue; } + return rootPathLoadStatistic; } // no root path with specified media type is found, get arbitrary one. for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) { PathSlot slot = backendsWorkingSlots.get(rootPathLoadStatistic.getBeId()); if (slot == null) { + LOG.debug("backend {}'s path {}'s slot is null, skip. tablet: {}", + rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(), + tabletCtx.getTabletId()); continue; } long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash()); - if (pathHash != -1) { - return rootPathLoadStatistic; + if (pathHash == -1) { + LOG.debug("backend {}'s path {}'s slot is full, skip. tablet: {}", + rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(), + tabletCtx.getTabletId()); + continue; } + return rootPathLoadStatistic; } throw new SchedException(Status.SCHEDULE_FAILED, "unable to find dest path which can be fit in"); @@ -1712,10 +1740,12 @@ public class TabletScheduler extends MasterDaemon { Slot slot = pathSlots.get(pathHash); if (slot == null) { + LOG.debug("path {} is not exist", pathHash); return -1; } slot.rectify(); if (slot.available <= 0) { + LOG.debug("path {} has no available slot", pathHash); return -1; } slot.available--;