From d98d2d491f95def39a2c3e55c2417e7d05af41f9 Mon Sep 17 00:00:00 2001 From: yujun Date: Fri, 24 Nov 2023 18:09:49 +0800 Subject: [PATCH] [chore](tablet scheduler) update sched ctx err (#27514) --- .../apache/doris/clone/BeLoadRebalancer.java | 6 ++-- .../apache/doris/clone/TabletScheduler.java | 29 ++++++++++--------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java index acc11e921f..82079946e0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java @@ -290,7 +290,7 @@ public class BeLoadRebalancer extends Rebalancer { } } if (!setSource) { - throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to take src slot"); + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to take src backend slot"); } // Select a low load backend as destination. @@ -333,7 +333,7 @@ public class BeLoadRebalancer extends Rebalancer { } if (candidates.isEmpty()) { - throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to find low backend"); + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to find low dest backend"); } List candFitPaths = Lists.newArrayList(); @@ -370,6 +370,6 @@ public class BeLoadRebalancer extends Rebalancer { } throw new SchedException(Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT, - "unable to find low backend"); + "beload waiting for dest backend slot"); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index dcf503b8ad..77b2d44055 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -387,16 +387,6 @@ public class TabletScheduler extends MasterDaemon { AgentBatchTask batchTask = new AgentBatchTask(); for (TabletSchedCtx tabletCtx : currentBatch) { try { - if (Config.disable_tablet_scheduler) { - // do not schedule more tablet is tablet scheduler is disabled. - throw new SchedException(Status.FINISHED, "tablet scheduler is disabled"); - } - if (Config.disable_balance && tabletCtx.getType() == Type.BALANCE) { - tabletCtx.setSchedFailedCode(SubCode.DIAGNOSE_IGNORE); - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, - "config disable balance"); - continue; - } scheduleTablet(tabletCtx, batchTask); } catch (SchedException e) { tabletCtx.setErrMsg(e.getMessage()); @@ -429,6 +419,7 @@ public class TabletScheduler extends MasterDaemon { tabletCtx.getTabletId(), e); stat.counterTabletScheduledFailed.incrementAndGet(); tabletCtx.setSchedFailedCode(SubCode.NONE); + tabletCtx.setErrMsg(e.getMessage()); finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage()); continue; } @@ -470,6 +461,14 @@ public class TabletScheduler extends MasterDaemon { * Try to schedule a single tablet. */ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) throws SchedException { + if (Config.disable_tablet_scheduler) { + // do not schedule more tablet is tablet scheduler is disabled. + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "tablet scheduler is disabled"); + } + if (Config.disable_balance && tabletCtx.getType() == Type.BALANCE) { + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "balance is disabled"); + } + long currentTime = System.currentTimeMillis(); tabletCtx.setLastSchedTime(currentTime); tabletCtx.setLastVisitedTime(currentTime); @@ -1458,7 +1457,7 @@ public class TabletScheduler extends MasterDaemon { if (hasBePath) { throw new SchedException(Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT, - "waiting for dest replica slot"); + "scheduler waiting for dest backend slot"); } else { throw new SchedException(Status.UNRECOVERABLE, "unable to find dest path which can be fit in"); @@ -1663,8 +1662,9 @@ public class TabletScheduler extends MasterDaemon { updateDestPathHash(tabletCtx); finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, Status.FINISHED, "finished"); } else { - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, - request.getTaskStatus().getErrorMsgs().get(0)); + String errMsg = request.getTaskStatus().getErrorMsgs().get(0); + tabletCtx.setErrMsg(errMsg); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, errMsg); } return true; @@ -1792,6 +1792,7 @@ public class TabletScheduler extends MasterDaemon { // Set "resetReplicaState" to true because // the timeout task should also be considered as UNRECOVERABLE, // so need to reset replica state. + t.setErrMsg("timeout"); finalizeTabletCtx(t, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, "timeout"); stat.counterCloneTaskTimeout.incrementAndGet(); }); @@ -1971,7 +1972,7 @@ public class TabletScheduler extends MasterDaemon { LOG.debug("path hash is not set.", new Exception()); } throw new SchedException(Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT, - "path hash is not set"); + "backend " + beId + " path hash is not set"); } Slot slot = pathSlots.get(pathHash);