From 3d5b90befe6a0143e2f45c2a175c75856e26f390 Mon Sep 17 00:00:00 2001 From: yujun Date: Fri, 4 Aug 2023 14:09:02 +0800 Subject: [PATCH] [fix](tablet clone) fix not add colocate replica and print some logs #22378 --- .../ColocateTableCheckerAndBalancer.java | 16 ++++++++++ .../org/apache/doris/clone/TabletChecker.java | 2 +- .../apache/doris/clone/TabletSchedCtx.java | 31 ++++++++++++++----- .../apache/doris/clone/TabletScheduler.java | 9 ++---- .../apache/doris/master/ReportHandler.java | 8 ++++- .../java/org/apache/doris/task/CloneTask.java | 2 +- 6 files changed, 51 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java index b34bc926dd..5c18c2bd46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java @@ -29,6 +29,7 @@ import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.clone.TabletChecker.CheckerCounter; import org.apache.doris.clone.TabletSchedCtx.Priority; import org.apache.doris.clone.TabletScheduler.AddResult; import org.apache.doris.common.Config; @@ -201,6 +202,9 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { * If every replicas match the backends in group, mark that group as stable. */ private void matchGroup() { + long start = System.currentTimeMillis(); + CheckerCounter counter = new CheckerCounter(); + Env env = Env.getCurrentEnv(); SystemInfoService infoService = Env.getCurrentSystemInfo(); ColocateTableIndex colocateIndex = env.getColocateTableIndex(); @@ -244,6 +248,7 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { backendBucketsSeq.size() + " vs. " + index.getTablets().size()); int idx = 0; for (Long tabletId : index.getTabletIdsInOrder()) { + counter.totalTabletNum++; Set bucketsSeq = backendBucketsSeq.get(idx); Preconditions.checkState(bucketsSeq.size() == replicationNum, bucketsSeq.size() + " vs. " + replicationNum); @@ -251,11 +256,13 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { TabletStatus st = tablet.getColocateHealthStatus( visibleVersion, replicaAlloc, bucketsSeq); if (st != TabletStatus.HEALTHY) { + counter.unhealthyTabletNum++; unstableReason = String.format("get unhealthy tablet %d in colocate table." + " status: %s", tablet.getId(), st); LOG.debug(unstableReason); if (!tablet.readyToBeRepaired(infoService, Priority.NORMAL)) { + counter.tabletNotReady++; continue; } @@ -275,6 +282,10 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { // skip this group and check next one. LOG.info("tablet scheduler return: {}. stop colocate table check", res.name()); break OUT; + } else if (res == AddResult.ADDED) { + counter.addToSchedulerTabletNum++; + } else { + counter.tabletInScheduler++; } } idx++; @@ -293,6 +304,11 @@ public class ColocateTableCheckerAndBalancer extends MasterDaemon { colocateIndex.markGroupUnstable(groupId, unstableReason, true); } } // end for groups + + long cost = System.currentTimeMillis() - start; + LOG.info("finished to check tablets. unhealth/total/added/in_sched/not_ready: {}/{}/{}/{}/{}, cost: {} ms", + counter.unhealthyTabletNum, counter.totalTabletNum, counter.addToSchedulerTabletNum, + counter.tabletInScheduler, counter.tabletNotReady, cost); } /* diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java index d9330e0f16..2b8bddbf9c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java @@ -214,7 +214,7 @@ public class TabletChecker extends MasterDaemon { LOG.debug(stat.incrementalBrief()); } - private static class CheckerCounter { + public static class CheckerCounter { public long totalTabletNum = 0; public long unhealthyTabletNum = 0; public long addToSchedulerTabletNum = 0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index 876b8d10fe..3c2d2b148c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -127,7 +127,6 @@ public class TabletSchedCtx implements Comparable { RUNNING, // tablet is being scheduled FINISHED, // task is finished CANCELLED, // task is failed - TIMEOUT, // task is timeout UNEXPECTED // other unexpected errors } @@ -656,7 +655,9 @@ public class TabletSchedCtx implements Comparable { */ public void chooseDestReplicaForVersionIncomplete(Map backendsWorkingSlots) throws SchedException { - List candidates = Lists.newArrayList(); + List decommissionCand = Lists.newArrayList(); + List colocateCand = Lists.newArrayList(); + List notColocateCand = Lists.newArrayList(); for (Replica replica : tablet.getReplicas()) { if (replica.isBad()) { LOG.debug("replica {} is bad, skip. tablet: {}", @@ -671,21 +672,35 @@ public class TabletSchedCtx implements Comparable { continue; } - // check version and replica state. - // if the replica's state is DECOMMISSION, it may be chose as dest replica, - // and its state will be set to NORMAL later. + // not enough version completed replicas, then try add back the decommission replica. + if (replica.getState() == ReplicaState.DECOMMISSION) { + decommissionCand.add(replica); + continue; + } + if (replica.getLastFailedVersion() <= 0 - && replica.getVersion() >= visibleVersion - && replica.getState() != ReplicaState.DECOMMISSION) { + && replica.getVersion() >= visibleVersion) { // skip healthy replica LOG.debug("replica {} version {} is healthy, visible version {}, replica state {}, skip. tablet: {}", replica.getId(), replica.getVersion(), visibleVersion, replica.getState(), tabletId); continue; } - candidates.add(replica); + if (colocateBackendsSet != null && colocateBackendsSet.contains(replica.getBackendId())) { + colocateCand.add(replica); + } else { + notColocateCand.add(replica); + } } + List candidates = null; + if (!colocateCand.isEmpty()) { + candidates = colocateCand; + } else if (!notColocateCand.isEmpty()) { + candidates = notColocateCand; + } else { + candidates = decommissionCand; + } if (candidates.isEmpty()) { throw new SchedException(Status.UNRECOVERABLE, "unable to choose dest replica"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index de168634e3..20e0c171f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -1483,6 +1483,7 @@ public class TabletScheduler extends MasterDaemon { TabletStatus st = tablet.getColocateHealthStatus( partition.getVisibleVersion(), replicaAlloc, backendsSet); statusPair = Pair.of(st, Priority.HIGH); + tabletCtx.setColocateGroupBackendIds(backendsSet); } else { List aliveBeIds = infoService.getAllBackendIds(true); statusPair = tablet.getHealthStatusWithPriority( @@ -1525,7 +1526,7 @@ public class TabletScheduler extends MasterDaemon { runningTablets.remove(tabletCtx.getTabletId()); allTabletTypes.remove(tabletCtx.getTabletId()); schedHistory.add(tabletCtx); - LOG.info("remove the tablet {}. because: {}", tabletCtx.getTabletId(), reason); + LOG.info("remove the tablet {}. because: {}", tabletCtx, reason); } // get next batch of tablets from queue. @@ -1703,10 +1704,6 @@ public class TabletScheduler extends MasterDaemon { List timeoutTablets = Lists.newArrayList(); synchronized (this) { runningTablets.values().stream().filter(TabletSchedCtx::isTimeout).forEach(timeoutTablets::add); - - for (TabletSchedCtx tabletSchedCtx : timeoutTablets) { - removeTabletCtx(tabletSchedCtx, "timeout"); - } } // 2. release ctx @@ -1714,7 +1711,7 @@ public class TabletScheduler extends MasterDaemon { // Set "resetReplicaState" to true because // the timeout task should also be considered as UNRECOVERABLE, // so need to reset replica state. - releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED, true); + finalizeTabletCtx(t, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, "timeout"); stat.counterCloneTaskTimeout.incrementAndGet(); }); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index 055652a0b9..3a9c01eb3c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -1163,6 +1163,7 @@ public class ReportHandler extends Daemon { // colocate table will delete Replica in meta when balance // but we need to rely on MetaNotFoundException to decide whether delete the tablet in backend // if the tablet is healthy, delete it. + boolean isColocateBackend = false; ColocateTableIndex colocateTableIndex = Env.getCurrentColocateIndex(); if (colocateTableIndex.isColocateTable(olapTable.getId())) { ColocateTableIndex.GroupId groupId = colocateTableIndex.getGroup(tableId); @@ -1176,6 +1177,10 @@ public class ReportHandler extends Daemon { if (status == TabletStatus.HEALTHY) { return false; } + + if (backendsSet.contains(backendId)) { + isColocateBackend = true; + } } SystemInfoService infoService = Env.getCurrentSystemInfo(); @@ -1183,7 +1188,8 @@ public class ReportHandler extends Daemon { Pair status = tablet.getHealthStatusWithPriority(infoService, visibleVersion, replicaAlloc, aliveBeIds); - if (status.first == TabletStatus.VERSION_INCOMPLETE || status.first == TabletStatus.REPLICA_MISSING + if (isColocateBackend || status.first == TabletStatus.VERSION_INCOMPLETE + || status.first == TabletStatus.REPLICA_MISSING || status.first == TabletStatus.UNRECOVERABLE) { long lastFailedVersion = -1L; diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/CloneTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/CloneTask.java index d614bbb24e..019fded640 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/CloneTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/CloneTask.java @@ -98,7 +98,7 @@ public class CloneTask extends AgentTask { sb.append("tablet id: ").append(tabletId).append(", replica id: ").append(replicaId).append(", schema hash: ") .append(schemaHash); sb.append(", storageMedium: ").append(storageMedium.name()); - sb.append(", visible version(hash): ").append(visibleVersion); + sb.append(", visible version: ").append(visibleVersion); sb.append(", src backend: ").append(srcBackends.get(0).getHost()) .append(", src path hash: ").append(srcPathHash); sb.append(", src backend: ").append(srcBackends.get(0).getHost()).append(", src path hash: ")