[feature](diagnose) diagnose for cluster balance (#26085)
This commit is contained in:
@ -245,7 +245,7 @@ public class BeLoadRebalancer extends Rebalancer {
|
||||
clusterStat.getBackendStatisticByClass(lowBe, midBe, highBe, tabletCtx.getStorageMedium());
|
||||
|
||||
if (lowBe.isEmpty() && highBe.isEmpty()) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "cluster is balance");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "cluster is balance");
|
||||
}
|
||||
|
||||
// if all low backends is not available, return
|
||||
@ -265,12 +265,14 @@ public class BeLoadRebalancer extends Rebalancer {
|
||||
}
|
||||
Backend be = infoService.getBackend(replica.getBackendId());
|
||||
if (be == null) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "backend is dropped: " + replica.getBackendId());
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"backend is dropped: " + replica.getBackendId());
|
||||
}
|
||||
hosts.add(be.getHost());
|
||||
}
|
||||
if (!hasHighReplica) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "no replica on high load backend");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"no replica on high load backend");
|
||||
}
|
||||
|
||||
// select a replica as source
|
||||
@ -288,7 +290,7 @@ public class BeLoadRebalancer extends Rebalancer {
|
||||
}
|
||||
}
|
||||
if (!setSource) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "unable to take src slot");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to take src slot");
|
||||
}
|
||||
|
||||
// Select a low load backend as destination.
|
||||
@ -331,7 +333,7 @@ public class BeLoadRebalancer extends Rebalancer {
|
||||
}
|
||||
|
||||
if (candidates.isEmpty()) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "unable to find low backend");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to find low backend");
|
||||
}
|
||||
|
||||
List<BePathLoadStatPair> candFitPaths = Lists.newArrayList();
|
||||
|
||||
@ -26,6 +26,7 @@ import org.apache.doris.catalog.Replica;
|
||||
import org.apache.doris.catalog.TabletInvertedIndex;
|
||||
import org.apache.doris.catalog.TabletMeta;
|
||||
import org.apache.doris.clone.SchedException.Status;
|
||||
import org.apache.doris.clone.SchedException.SubCode;
|
||||
import org.apache.doris.clone.TabletSchedCtx.BalanceType;
|
||||
import org.apache.doris.clone.TabletSchedCtx.Priority;
|
||||
import org.apache.doris.clone.TabletScheduler.PathSlot;
|
||||
@ -296,23 +297,26 @@ public class DiskRebalancer extends Rebalancer {
|
||||
Replica replica = invertedIndex.getReplica(tabletCtx.getTabletId(), tabletCtx.getTempSrcBackendId());
|
||||
// check src replica still there
|
||||
if (replica == null || replica.getPathHash() != tabletCtx.getTempSrcPathHash()) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "src replica may be rebalanced");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "src replica may be rebalanced");
|
||||
}
|
||||
// ignore empty replicas as they do not make disk more balance
|
||||
if (replica.getDataSize() == 0) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "size of src replica is zero");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "size of src replica is zero");
|
||||
}
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrException(tabletCtx.getDbId(),
|
||||
s -> new SchedException(Status.UNRECOVERABLE, "db " + tabletCtx.getDbId() + " does not exist"));
|
||||
s -> new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"db " + tabletCtx.getDbId() + " does not exist"));
|
||||
OlapTable tbl = (OlapTable) db.getTableOrException(tabletCtx.getTblId(),
|
||||
s -> new SchedException(Status.UNRECOVERABLE, "tbl " + tabletCtx.getTblId() + " does not exist"));
|
||||
s -> new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"tbl " + tabletCtx.getTblId() + " does not exist"));
|
||||
DataProperty dataProperty = tbl.getPartitionInfo().getDataProperty(tabletCtx.getPartitionId());
|
||||
if (dataProperty == null) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "data property is null");
|
||||
}
|
||||
String storagePolicy = dataProperty.getStoragePolicy();
|
||||
if (!Strings.isNullOrEmpty(storagePolicy)) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "disk balance not support for cooldown storage");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"disk balance not support for cooldown storage");
|
||||
}
|
||||
|
||||
// check src slot
|
||||
@ -323,7 +327,7 @@ public class DiskRebalancer extends Rebalancer {
|
||||
}
|
||||
long pathHash = slot.takeBalanceSlot(replica.getPathHash());
|
||||
if (pathHash == -1) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "unable to take src slot");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to take src slot");
|
||||
}
|
||||
// after take src slot, we can set src replica now
|
||||
tabletCtx.setSrc(replica);
|
||||
@ -343,7 +347,7 @@ public class DiskRebalancer extends Rebalancer {
|
||||
if (pathHigh.contains(replica.getPathHash())) {
|
||||
pathLow.addAll(pathMid);
|
||||
} else if (!pathMid.contains(replica.getPathHash())) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "src path is low load");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "src path is low load");
|
||||
}
|
||||
// check if this migration task can make the be's disks more balance.
|
||||
List<RootPathLoadStatistic> availPaths = Lists.newArrayList();
|
||||
@ -380,7 +384,7 @@ public class DiskRebalancer extends Rebalancer {
|
||||
}
|
||||
|
||||
if (!setDest) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "unable to find low load path");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to find low load path");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -20,6 +20,7 @@ package org.apache.doris.clone;
|
||||
import org.apache.doris.catalog.Replica;
|
||||
import org.apache.doris.catalog.TabletInvertedIndex;
|
||||
import org.apache.doris.catalog.TabletMeta;
|
||||
import org.apache.doris.clone.SchedException.SubCode;
|
||||
import org.apache.doris.clone.TabletScheduler.PathSlot;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.Pair;
|
||||
@ -261,7 +262,7 @@ public class PartitionRebalancer extends Rebalancer {
|
||||
if (slot.takeBalanceSlot(srcReplica.getPathHash()) != -1) {
|
||||
tabletCtx.setSrc(srcReplica);
|
||||
} else {
|
||||
throw new SchedException(SchedException.Status.SCHEDULE_FAILED, SchedException.SubCode.WAITING_SLOT,
|
||||
throw new SchedException(SchedException.Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT,
|
||||
"no slot for src replica " + srcReplica + ", pathHash " + srcReplica.getPathHash());
|
||||
}
|
||||
|
||||
@ -279,7 +280,7 @@ public class PartitionRebalancer extends Rebalancer {
|
||||
.map(RootPathLoadStatistic::getPathHash).collect(Collectors.toSet());
|
||||
long pathHash = slot.takeAnAvailBalanceSlotFrom(availPath);
|
||||
if (pathHash == -1) {
|
||||
throw new SchedException(SchedException.Status.SCHEDULE_FAILED, SchedException.SubCode.WAITING_SLOT,
|
||||
throw new SchedException(SchedException.Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT,
|
||||
"paths has no available balance slot: " + availPath);
|
||||
}
|
||||
|
||||
|
||||
@ -31,6 +31,7 @@ public class SchedException extends Exception {
|
||||
NONE,
|
||||
WAITING_DECOMMISSION,
|
||||
WAITING_SLOT,
|
||||
DIAGNOSE_IGNORE, // proc '/diagnose/cluster_balance' will ignore this error
|
||||
}
|
||||
|
||||
private Status status;
|
||||
|
||||
@ -324,6 +324,10 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
this.lastVisitedTime = lastVisitedTime;
|
||||
}
|
||||
|
||||
public long getLastVisitedTime() {
|
||||
return lastVisitedTime;
|
||||
}
|
||||
|
||||
public void setFinishedTime(long finishedTime) {
|
||||
this.finishedTime = finishedTime;
|
||||
}
|
||||
@ -424,6 +428,18 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
this.errMsg = errMsg;
|
||||
}
|
||||
|
||||
public String getErrMsg() {
|
||||
return errMsg;
|
||||
}
|
||||
|
||||
public SubCode getSchedFailedCode() {
|
||||
return schedFailedCode;
|
||||
}
|
||||
|
||||
public void setSchedFailedCode(SubCode code) {
|
||||
schedFailedCode = code;
|
||||
}
|
||||
|
||||
public CloneTask getCloneTask() {
|
||||
return cloneTask;
|
||||
}
|
||||
@ -613,7 +629,7 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
}
|
||||
|
||||
if (candidates.isEmpty()) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "unable to find source replica");
|
||||
throw new SchedException(Status.UNRECOVERABLE, "unable to find copy source replica");
|
||||
}
|
||||
|
||||
// choose a replica which slot is available from candidates.
|
||||
@ -637,7 +653,7 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
return;
|
||||
}
|
||||
throw new SchedException(Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT,
|
||||
"unable to find source slot");
|
||||
"waiting for source replica's slot");
|
||||
}
|
||||
|
||||
/*
|
||||
@ -718,7 +734,7 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
|
||||
if (candidates.isEmpty()) {
|
||||
if (furtherRepairs.isEmpty()) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "unable to choose dest replica");
|
||||
throw new SchedException(Status.UNRECOVERABLE, "unable to choose copy dest replica");
|
||||
}
|
||||
|
||||
boolean allCatchup = true;
|
||||
@ -741,7 +757,7 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
if (slot == null || !slot.hasAvailableSlot(replica.getPathHash())) {
|
||||
if (!replica.needFurtherRepair()) {
|
||||
throw new SchedException(Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT,
|
||||
"replica " + replica + " has not slot");
|
||||
"dest replica " + replica + " has no slot");
|
||||
}
|
||||
|
||||
continue;
|
||||
@ -1063,20 +1079,23 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
|
||||
// 1. check the tablet status first
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrException(dbId,
|
||||
s -> new SchedException(Status.UNRECOVERABLE, "db " + dbId + " does not exist"));
|
||||
s -> new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"db " + dbId + " does not exist"));
|
||||
OlapTable olapTable = (OlapTable) db.getTableOrException(tblId,
|
||||
s -> new SchedException(Status.UNRECOVERABLE, "tbl " + tabletId + " does not exist"));
|
||||
s -> new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"tbl " + tabletId + " does not exist"));
|
||||
olapTable.writeLockOrException(new SchedException(Status.UNRECOVERABLE, "table "
|
||||
+ olapTable.getName() + " does not exist"));
|
||||
try {
|
||||
Partition partition = olapTable.getPartition(partitionId);
|
||||
if (partition == null) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "partition does not exist");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"partition does not exist");
|
||||
}
|
||||
|
||||
MaterializedIndex index = partition.getIndex(indexId);
|
||||
if (index == null) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "index does not exist");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "index does not exist");
|
||||
}
|
||||
|
||||
if (schemaHash != olapTable.getSchemaHashByIndexId(indexId)) {
|
||||
@ -1116,7 +1135,7 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
// check if replica exist
|
||||
Replica replica = tablet.getReplicaByBackendId(destBackendId);
|
||||
if (replica == null) {
|
||||
throw new SchedException(Status.UNRECOVERABLE,
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"replica does not exist. backend id: " + destBackendId);
|
||||
}
|
||||
|
||||
|
||||
@ -393,6 +393,7 @@ public class TabletScheduler extends MasterDaemon {
|
||||
throw new SchedException(Status.FINISHED, "tablet scheduler is disabled");
|
||||
}
|
||||
if (Config.disable_balance && tabletCtx.getType() == Type.BALANCE) {
|
||||
tabletCtx.setSchedFailedCode(SubCode.DIAGNOSE_IGNORE);
|
||||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE,
|
||||
"config disable balance");
|
||||
continue;
|
||||
@ -420,6 +421,7 @@ public class TabletScheduler extends MasterDaemon {
|
||||
Preconditions.checkState(e.getStatus() == Status.UNRECOVERABLE, e.getStatus());
|
||||
// discard
|
||||
stat.counterTabletScheduledDiscard.incrementAndGet();
|
||||
tabletCtx.setSchedFailedCode(e.getSubCode());
|
||||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
|
||||
}
|
||||
continue;
|
||||
@ -427,6 +429,7 @@ public class TabletScheduler extends MasterDaemon {
|
||||
LOG.warn("got unexpected exception, discard this schedule. tablet: {}",
|
||||
tabletCtx.getTabletId(), e);
|
||||
stat.counterTabletScheduledFailed.incrementAndGet();
|
||||
tabletCtx.setSchedFailedCode(SubCode.NONE);
|
||||
finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage());
|
||||
continue;
|
||||
}
|
||||
@ -475,11 +478,13 @@ public class TabletScheduler extends MasterDaemon {
|
||||
|
||||
Pair<TabletStatus, TabletSchedCtx.Priority> statusPair;
|
||||
Database db = Env.getCurrentInternalCatalog().getDbOrException(tabletCtx.getDbId(),
|
||||
s -> new SchedException(Status.UNRECOVERABLE, "db " + tabletCtx.getDbId() + " does not exist"));
|
||||
s -> new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"db " + tabletCtx.getDbId() + " does not exist"));
|
||||
OlapTable tbl = (OlapTable) db.getTableOrException(tabletCtx.getTblId(),
|
||||
s -> new SchedException(Status.UNRECOVERABLE, "tbl " + tabletCtx.getTblId() + " does not exist"));
|
||||
tbl.writeLockOrException(new SchedException(Status.UNRECOVERABLE, "table "
|
||||
+ tbl.getName() + " does not exist"));
|
||||
s -> new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"tbl " + tabletCtx.getTblId() + " does not exist"));
|
||||
tbl.writeLockOrException(new SchedException(Status.UNRECOVERABLE,
|
||||
"table " + tbl.getName() + " does not exist"));
|
||||
try {
|
||||
long tabletId = tabletCtx.getTabletId();
|
||||
|
||||
@ -489,12 +494,12 @@ public class TabletScheduler extends MasterDaemon {
|
||||
|
||||
Partition partition = tbl.getPartition(tabletCtx.getPartitionId());
|
||||
if (partition == null) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "partition does not exist");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "partition does not exist");
|
||||
}
|
||||
|
||||
MaterializedIndex idx = partition.getIndex(tabletCtx.getIndexId());
|
||||
if (idx == null) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "index does not exist");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "index does not exist");
|
||||
}
|
||||
|
||||
ReplicaAllocation replicaAlloc = null;
|
||||
@ -503,7 +508,8 @@ public class TabletScheduler extends MasterDaemon {
|
||||
if (isColocateTable) {
|
||||
GroupId groupId = colocateTableIndex.getGroup(tbl.getId());
|
||||
if (groupId == null) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "colocate group does not exist");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"colocate group does not exist");
|
||||
}
|
||||
ColocateGroupSchema groupSchema = colocateTableIndex.getGroupSchema(groupId);
|
||||
if (groupSchema == null) {
|
||||
@ -543,19 +549,20 @@ public class TabletScheduler extends MasterDaemon {
|
||||
}
|
||||
}
|
||||
|
||||
if (tabletCtx.getType() == TabletSchedCtx.Type.BALANCE && tableState != OlapTableState.NORMAL) {
|
||||
// If table is under ALTER process, do not allow to do balance.
|
||||
throw new SchedException(Status.UNRECOVERABLE, "table's state is not NORMAL");
|
||||
}
|
||||
|
||||
if (tabletCtx.getType() == TabletSchedCtx.Type.BALANCE) {
|
||||
if (tableState != OlapTableState.NORMAL) {
|
||||
// If table is under ALTER process, do not allow to do balance.
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"table's state is not NORMAL");
|
||||
}
|
||||
|
||||
try {
|
||||
DatabaseTransactionMgr dbTransactionMgr
|
||||
= Env.getCurrentGlobalTransactionMgr().getDatabaseTransactionMgr(db.getId());
|
||||
for (TransactionState transactionState : dbTransactionMgr.getPreCommittedTxnList()) {
|
||||
if (transactionState.getTableIdList().contains(tbl.getId())) {
|
||||
// If table releate to transaction with precommitted status, do not allow to do balance.
|
||||
throw new SchedException(Status.UNRECOVERABLE,
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"There exists PRECOMMITTED transaction related to table");
|
||||
}
|
||||
}
|
||||
@ -572,18 +579,19 @@ public class TabletScheduler extends MasterDaemon {
|
||||
// The WAITING_STABLE state is an exception. This state indicates that the table is
|
||||
// executing an alter job, but the alter job is in a PENDING state and is waiting for
|
||||
// the table to become stable. In this case, we allow the tablet repair to proceed.
|
||||
throw new SchedException(Status.UNRECOVERABLE,
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"table is in alter process, but tablet status is " + statusPair.first.name());
|
||||
}
|
||||
|
||||
tabletCtx.setTabletStatus(statusPair.first);
|
||||
if (statusPair.first == TabletStatus.HEALTHY && tabletCtx.getType() == TabletSchedCtx.Type.REPAIR) {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "tablet is healthy");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "tablet is healthy");
|
||||
} else if (statusPair.first != TabletStatus.HEALTHY
|
||||
&& tabletCtx.getType() == TabletSchedCtx.Type.BALANCE) {
|
||||
// we select an unhealthy tablet to do balance, which is not right.
|
||||
// so here we stop this task.
|
||||
throw new SchedException(Status.UNRECOVERABLE, "tablet is unhealthy when doing balance");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"tablet is unhealthy when doing balance");
|
||||
}
|
||||
|
||||
// for disk balance more accurately, we only schedule tablet when has lastly stat info about disk
|
||||
@ -667,7 +675,7 @@ public class TabletScheduler extends MasterDaemon {
|
||||
handleReplicaTooSlow(tabletCtx);
|
||||
break;
|
||||
case UNRECOVERABLE:
|
||||
throw new SchedException(Status.UNRECOVERABLE, "tablet is unrecoverable");
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "tablet is unrecoverable");
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1090,8 +1098,9 @@ public class TabletScheduler extends MasterDaemon {
|
||||
if (!otherCatchup) {
|
||||
LOG.info("can not delete only one replica, tabletId = {} replicaId = {}", tabletCtx.getTabletId(),
|
||||
replica.getId());
|
||||
throw new SchedException(Status.UNRECOVERABLE, "the only one latest replia can not be dropped, tabletId = "
|
||||
+ tabletCtx.getTabletId() + ", replicaId = " + replica.getId());
|
||||
throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE,
|
||||
"the only one latest replia can not be dropped, tabletId = "
|
||||
+ tabletCtx.getTabletId() + ", replicaId = " + replica.getId());
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1450,7 +1459,7 @@ public class TabletScheduler extends MasterDaemon {
|
||||
|
||||
if (hasBePath) {
|
||||
throw new SchedException(Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT,
|
||||
"unable to find dest path which can be fit in");
|
||||
"waiting for dest replica slot");
|
||||
} else {
|
||||
throw new SchedException(Status.UNRECOVERABLE,
|
||||
"unable to find dest path which can be fit in");
|
||||
@ -1790,18 +1799,27 @@ public class TabletScheduler extends MasterDaemon {
|
||||
}
|
||||
|
||||
public List<List<String>> getPendingTabletsInfo(int limit) {
|
||||
List<TabletSchedCtx> tabletCtxs = getCopiedTablets(pendingTablets, limit);
|
||||
return collectTabletCtx(tabletCtxs);
|
||||
return collectTabletCtx(getPendingTablets(limit));
|
||||
}
|
||||
|
||||
public List<TabletSchedCtx> getPendingTablets(int limit) {
|
||||
return getCopiedTablets(pendingTablets, limit);
|
||||
}
|
||||
|
||||
public List<List<String>> getRunningTabletsInfo(int limit) {
|
||||
List<TabletSchedCtx> tabletCtxs = getCopiedTablets(runningTablets.values(), limit);
|
||||
return collectTabletCtx(tabletCtxs);
|
||||
return collectTabletCtx(getRunningTablets(limit));
|
||||
}
|
||||
|
||||
public List<TabletSchedCtx> getRunningTablets(int limit) {
|
||||
return getCopiedTablets(runningTablets.values(), limit);
|
||||
}
|
||||
|
||||
public List<List<String>> getHistoryTabletsInfo(int limit) {
|
||||
List<TabletSchedCtx> tabletCtxs = getCopiedTablets(schedHistory, limit);
|
||||
return collectTabletCtx(tabletCtxs);
|
||||
return collectTabletCtx(getHistoryTablets(limit));
|
||||
}
|
||||
|
||||
public List<TabletSchedCtx> getHistoryTablets(int limit) {
|
||||
return getCopiedTablets(schedHistory, limit);
|
||||
}
|
||||
|
||||
private List<List<String>> collectTabletCtx(List<TabletSchedCtx> tabletCtxs) {
|
||||
|
||||
@ -0,0 +1,307 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.common.proc;
|
||||
|
||||
import org.apache.doris.catalog.ColocateTableIndex.GroupId;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.TabletInvertedIndex;
|
||||
import org.apache.doris.clone.BackendLoadStatistic;
|
||||
import org.apache.doris.clone.BackendLoadStatistic.Classification;
|
||||
import org.apache.doris.clone.LoadStatisticForTag;
|
||||
import org.apache.doris.clone.RootPathLoadStatistic;
|
||||
import org.apache.doris.clone.SchedException.SubCode;
|
||||
import org.apache.doris.clone.TabletSchedCtx;
|
||||
import org.apache.doris.clone.TabletScheduler;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.proc.DiagnoseProcDir.DiagnoseItem;
|
||||
import org.apache.doris.common.proc.DiagnoseProcDir.DiagnoseStatus;
|
||||
import org.apache.doris.common.proc.DiagnoseProcDir.SubProcDir;
|
||||
import org.apache.doris.common.proc.TabletHealthProcDir.DBTabletStatistic;
|
||||
import org.apache.doris.ha.FrontendNodeType;
|
||||
import org.apache.doris.resource.Tag;
|
||||
import org.apache.doris.system.SystemInfoService;
|
||||
import org.apache.doris.thrift.TStorageMedium;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/*
|
||||
* show proc "/diagnose/cluster_balance";
|
||||
*/
|
||||
public class DiagnoseClusterBalanceProcDir extends SubProcDir {
|
||||
|
||||
@Override
|
||||
public List<DiagnoseItem> getDiagnoseResult() {
|
||||
long now = System.currentTimeMillis();
|
||||
long minToMs = 60 * 1000L;
|
||||
|
||||
Env env = Env.getCurrentEnv();
|
||||
TabletScheduler tabletScheduler = env.getTabletScheduler();
|
||||
List<TabletSchedCtx> pendingTablets = tabletScheduler.getPendingTablets(1);
|
||||
List<TabletSchedCtx> runningTablets = tabletScheduler.getRunningTablets(1);
|
||||
List<TabletSchedCtx> historyTablets = tabletScheduler.getHistoryTablets(10000);
|
||||
long historyLastVisitTime = historyTablets.stream()
|
||||
.mapToLong(tablet -> Math.max(tablet.getCreateTime(), tablet.getLastVisitedTime()))
|
||||
.max().orElse(-1);
|
||||
boolean schedReady = env.getFrontends(null).stream().anyMatch(
|
||||
fe -> fe.isAlive() && now >= fe.getLastStartupTime() + 1 * minToMs
|
||||
&& (fe.getRole() == FrontendNodeType.MASTER || fe.getRole() == FrontendNodeType.FOLLOWER));
|
||||
boolean schedRecent = !pendingTablets.isEmpty() || !runningTablets.isEmpty()
|
||||
|| historyLastVisitTime >= now - 15 * minToMs;
|
||||
|
||||
List<DiagnoseItem> items = Lists.newArrayList();
|
||||
items.add(diagnoseTabletHealth(schedReady, schedRecent));
|
||||
DiagnoseItem baseBalance = diagnoseBaseBalance(schedReady, schedRecent);
|
||||
items.add(baseBalance);
|
||||
items.add(diagnoseDiskBalance(schedReady, schedRecent, baseBalance.status == DiagnoseStatus.OK));
|
||||
items.add(diagnoseColocateRebalance(schedReady, schedRecent));
|
||||
items.add(diagnoseHistorySched(historyTablets,
|
||||
items.stream().allMatch(item -> item.status == DiagnoseStatus.OK)));
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
private DiagnoseItem diagnoseTabletHealth(boolean schedReady, boolean schedRecent) {
|
||||
DiagnoseItem tabletHealth = new DiagnoseItem();
|
||||
tabletHealth.name = "Tablet Health";
|
||||
tabletHealth.status = DiagnoseStatus.OK;
|
||||
|
||||
Env env = Env.getCurrentEnv();
|
||||
List<DBTabletStatistic> statistics = env.getInternalCatalog().getDbIds().parallelStream()
|
||||
// skip information_schema database
|
||||
.flatMap(id -> Stream.of(id == 0 ? null : env.getInternalCatalog().getDbNullable(id)))
|
||||
.filter(Objects::nonNull).map(DBTabletStatistic::new)
|
||||
// sort by dbName
|
||||
.sorted(Comparator.comparing(db -> db.db.getFullName())).collect(Collectors.toList());
|
||||
|
||||
DBTabletStatistic total = statistics.stream().reduce(new DBTabletStatistic(), DBTabletStatistic::reduce);
|
||||
if (total.tabletNum != total.healthyNum) {
|
||||
tabletHealth.status = DiagnoseStatus.ERROR;
|
||||
tabletHealth.content = String.format("healthy tablet num %s < total tablet num %s",
|
||||
total.healthyNum, total.tabletNum);
|
||||
tabletHealth.detailCmd = "show proc \"/cluster_health/tablet_health\";";
|
||||
boolean changeWarning = total.unrecoverableNum == 0;
|
||||
if (Config.disable_tablet_scheduler) {
|
||||
tabletHealth.suggestion = "has disable tablet balance, ensure master fe config: "
|
||||
+ "disable_tablet_scheduler = false";
|
||||
} else if (!schedReady) {
|
||||
tabletHealth.suggestion = "check all fe are ready, and then wait some minutes for "
|
||||
+ "sheduler to migrate tablets";
|
||||
} else if (schedRecent) {
|
||||
tabletHealth.suggestion = "tablet is still scheduling, run 'show proc \"/cluster_balance\"'";
|
||||
} else {
|
||||
changeWarning = false;
|
||||
}
|
||||
if (changeWarning) {
|
||||
tabletHealth.status = DiagnoseStatus.WARNING;
|
||||
}
|
||||
}
|
||||
|
||||
return tabletHealth;
|
||||
}
|
||||
|
||||
private DiagnoseItem diagnoseBaseBalance(boolean schedReady, boolean schedRecent) {
|
||||
SystemInfoService infoService = Env.getCurrentSystemInfo();
|
||||
Map<Tag, LoadStatisticForTag> loadStatisticMap = Env.getCurrentEnv().getTabletScheduler().getStatisticMap();
|
||||
|
||||
DiagnoseItem baseBalance = new DiagnoseItem();
|
||||
baseBalance.status = DiagnoseStatus.OK;
|
||||
|
||||
// check base balance
|
||||
List<Long> availableBeIds = infoService.getAllBackendIds(true).stream()
|
||||
.filter(beId -> infoService.checkBackendScheduleAvailable(beId))
|
||||
.collect(Collectors.toList());
|
||||
boolean isPartitionBal = Config.tablet_rebalancer_type.equalsIgnoreCase("partition");
|
||||
if (isPartitionBal) {
|
||||
TabletInvertedIndex invertedIndex = Env.getCurrentInvertedIndex();
|
||||
baseBalance.name = "Partition Balance";
|
||||
List<Integer> tabletNums = availableBeIds.stream()
|
||||
.map(beId -> invertedIndex.getTabletNumByBackendId(beId))
|
||||
.collect(Collectors.toList());
|
||||
int minTabletNum = tabletNums.stream().mapToInt(v -> v).min().orElse(0);
|
||||
int maxTabletNum = tabletNums.stream().mapToInt(v -> v).max().orElse(0);
|
||||
if (maxTabletNum <= Math.max(minTabletNum * Config.diagnose_balance_max_tablet_num_ratio,
|
||||
minTabletNum + Config.diagnose_balance_max_tablet_num_diff)) {
|
||||
baseBalance.status = DiagnoseStatus.OK;
|
||||
} else {
|
||||
baseBalance.status = DiagnoseStatus.ERROR;
|
||||
baseBalance.content = String.format("tablets not balance, be %s has %s tablets, be %s has %s tablets",
|
||||
availableBeIds.get(availableBeIds.indexOf(minTabletNum)), minTabletNum,
|
||||
availableBeIds.get(availableBeIds.indexOf(maxTabletNum)), maxTabletNum);
|
||||
baseBalance.detailCmd = "show backends";
|
||||
}
|
||||
} else {
|
||||
baseBalance.name = "BeLoad Balance";
|
||||
baseBalance.status = DiagnoseStatus.OK;
|
||||
OUTER1:
|
||||
for (LoadStatisticForTag stat : loadStatisticMap.values()) {
|
||||
for (TStorageMedium storageMedium : TStorageMedium.values()) {
|
||||
List<Long> lowBEs = stat.getBackendLoadStatistics().stream()
|
||||
.filter(be -> availableBeIds.contains(be.getBeId())
|
||||
&& be.getClazz(storageMedium) == Classification.LOW)
|
||||
.map(BackendLoadStatistic::getBeId)
|
||||
.collect(Collectors.toList());
|
||||
List<Long> highBEs = stat.getBackendLoadStatistics().stream()
|
||||
.filter(be -> availableBeIds.contains(be.getBeId())
|
||||
&& be.getClazz(storageMedium) == Classification.HIGH)
|
||||
.map(BackendLoadStatistic::getBeId)
|
||||
.collect(Collectors.toList());
|
||||
if (!lowBEs.isEmpty() || !highBEs.isEmpty()) {
|
||||
baseBalance.status = DiagnoseStatus.ERROR;
|
||||
baseBalance.content = String.format("backend load not balance for tag %s, storage medium %s, "
|
||||
+ "low load backends %s, high load backends %s",
|
||||
stat.getTag(), storageMedium.name().toUpperCase(), lowBEs, highBEs);
|
||||
baseBalance.detailCmd = String.format("show proc \"/cluster_balance/cluster_load_stat/%s/%s\"",
|
||||
stat.getTag().toKey(), storageMedium.name().toUpperCase());
|
||||
break OUTER1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (baseBalance.status != DiagnoseStatus.OK) {
|
||||
if (Config.disable_tablet_scheduler || Config.disable_balance) {
|
||||
baseBalance.suggestion = "has disable tablet balance, ensure master fe config: "
|
||||
+ "disable_tablet_scheduler = false, disable_balance = false";
|
||||
} else if (!schedReady) {
|
||||
baseBalance.suggestion = "check all fe are ready, and then wait some minutes for "
|
||||
+ "sheduler to migrate tablets";
|
||||
baseBalance.status = DiagnoseStatus.WARNING;
|
||||
} else if (schedRecent) {
|
||||
baseBalance.suggestion = "tablet is still scheduling, run 'show proc \"/cluster_balance\"'";
|
||||
baseBalance.status = DiagnoseStatus.WARNING;
|
||||
}
|
||||
}
|
||||
|
||||
return baseBalance;
|
||||
}
|
||||
|
||||
|
||||
private DiagnoseItem diagnoseDiskBalance(boolean schedReady, boolean schedRecent, boolean baseBalanceOk) {
|
||||
DiagnoseItem diskBalance = new DiagnoseItem();
|
||||
diskBalance.name = "Disk Balance";
|
||||
diskBalance.status = DiagnoseStatus.OK;
|
||||
|
||||
Map<Tag, LoadStatisticForTag> loadStatisticMap = Env.getCurrentEnv().getTabletScheduler().getStatisticMap();
|
||||
|
||||
OUTER2:
|
||||
for (LoadStatisticForTag stat : loadStatisticMap.values()) {
|
||||
List<RootPathLoadStatistic> lowPaths = Lists.newArrayList();
|
||||
List<RootPathLoadStatistic> midPaths = Lists.newArrayList();
|
||||
List<RootPathLoadStatistic> highPaths = Lists.newArrayList();
|
||||
for (TStorageMedium storageMedium : TStorageMedium.values()) {
|
||||
for (BackendLoadStatistic beStat : stat.getBackendLoadStatistics()) {
|
||||
lowPaths.clear();
|
||||
midPaths.clear();
|
||||
highPaths.clear();
|
||||
beStat.getPathStatisticByClass(lowPaths, midPaths, highPaths, storageMedium);
|
||||
if (!lowPaths.isEmpty() || !highPaths.isEmpty()) {
|
||||
diskBalance.status = DiagnoseStatus.ERROR;
|
||||
diskBalance.content = String.format("backend %s is not disk balance, low paths { %s }, "
|
||||
+ "high paths { %s }", beStat.getBeId(),
|
||||
lowPaths.stream().map(RootPathLoadStatistic::getPath).collect(Collectors.toList()),
|
||||
highPaths.stream().map(RootPathLoadStatistic::getPath).collect(Collectors.toList()));
|
||||
diskBalance.detailCmd = String.format(
|
||||
"show proc \"/cluster_balance/cluster_load_stat/%s/%s/%s\"",
|
||||
stat.getTag().toKey(), storageMedium.name().toUpperCase(), beStat.getBeId());
|
||||
break OUTER2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (diskBalance.status != DiagnoseStatus.OK) {
|
||||
if (Config.disable_tablet_scheduler || Config.disable_balance || Config.disable_disk_balance) {
|
||||
diskBalance.suggestion = "has disable tablet balance, ensure master fe config: "
|
||||
+ "disable_tablet_scheduler = false, disable_balance = false, disable_disk_balance = false";
|
||||
} else if (!schedReady) {
|
||||
diskBalance.suggestion = "check all fe are ready, and then wait some minutes for "
|
||||
+ "sheduler to migrate tablets";
|
||||
diskBalance.status = DiagnoseStatus.WARNING;
|
||||
} else if (schedRecent) {
|
||||
diskBalance.suggestion = "tablet is still scheduling, run 'show proc \"/cluster_balance\"'";
|
||||
diskBalance.status = DiagnoseStatus.WARNING;
|
||||
} else if (!baseBalanceOk) {
|
||||
diskBalance.suggestion = "disk balance run after all be balance, need them finished";
|
||||
diskBalance.status = DiagnoseStatus.WARNING;
|
||||
}
|
||||
}
|
||||
|
||||
return diskBalance;
|
||||
}
|
||||
|
||||
private DiagnoseItem diagnoseColocateRebalance(boolean schedReady, boolean schedRecent) {
|
||||
DiagnoseItem colocateBalance = new DiagnoseItem();
|
||||
colocateBalance.status = DiagnoseStatus.OK;
|
||||
colocateBalance.name = "Colocate Group Stable";
|
||||
Set<GroupId> unstableGroups = Env.getCurrentEnv().getColocateTableIndex().getUnstableGroupIds();
|
||||
if (!unstableGroups.isEmpty()) {
|
||||
colocateBalance.status = DiagnoseStatus.ERROR;
|
||||
colocateBalance.content = String.format("colocate groups are unstable: %s", unstableGroups);
|
||||
colocateBalance.detailCmd = "show proc \"/colocation_group\"";
|
||||
if (Config.disable_tablet_scheduler || Config.disable_colocate_balance) {
|
||||
colocateBalance.suggestion = "has disable tablet balance, ensure master fe config: "
|
||||
+ "disable_tablet_scheduler = false, disable_colocate_balance = false";
|
||||
} else if (!schedReady) {
|
||||
colocateBalance.suggestion = "check all fe are ready, and then wait some minutes for "
|
||||
+ "sheduler to migrate tablets";
|
||||
colocateBalance.status = DiagnoseStatus.WARNING;
|
||||
} else if (schedRecent) {
|
||||
colocateBalance.suggestion = "tablet is still scheduling, run 'show proc \"/cluster_balance\"'";
|
||||
colocateBalance.status = DiagnoseStatus.WARNING;
|
||||
}
|
||||
}
|
||||
|
||||
return colocateBalance;
|
||||
}
|
||||
|
||||
private DiagnoseItem diagnoseHistorySched(List<TabletSchedCtx> historyTablets, boolean ignoreErrs) {
|
||||
DiagnoseItem historySched = new DiagnoseItem();
|
||||
historySched.name = "History Tablet Sched";
|
||||
historySched.status = DiagnoseStatus.OK;
|
||||
|
||||
if (!ignoreErrs) {
|
||||
long now = System.currentTimeMillis();
|
||||
List<TabletSchedCtx> failedTablets = historyTablets.stream()
|
||||
.filter(tablet -> tablet.getLastVisitedTime() >= now - 1800 * 1000L
|
||||
&& tablet.getSchedFailedCode() != SubCode.WAITING_SLOT
|
||||
&& tablet.getSchedFailedCode() != SubCode.WAITING_DECOMMISSION
|
||||
&& tablet.getSchedFailedCode() != SubCode.DIAGNOSE_IGNORE
|
||||
&& (tablet.getState() == TabletSchedCtx.State.CANCELLED
|
||||
|| tablet.getState() == TabletSchedCtx.State.UNEXPECTED))
|
||||
.sorted(Comparator.comparing(TabletSchedCtx::getLastVisitedTime).reversed())
|
||||
.limit(5).collect(Collectors.toList());
|
||||
if (!failedTablets.isEmpty()) {
|
||||
historySched.status = DiagnoseStatus.WARNING;
|
||||
historySched.content = String.format("tablet sched has failed: %s", failedTablets.stream()
|
||||
.map(tablet -> String.format("tablet %s error: %s", tablet.getTabletId(), tablet.getErrMsg()))
|
||||
.collect(Collectors.toList()));
|
||||
historySched.detailCmd = "show proc \"/cluster_balance/history_tablets\"";
|
||||
}
|
||||
}
|
||||
|
||||
return historySched;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,121 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.common.proc;
|
||||
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/*
|
||||
* show proc "/diagnose";
|
||||
*/
|
||||
public class DiagnoseProcDir implements ProcDirInterface {
|
||||
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
|
||||
.add("Item").add("ErrorNum").add("WarningNum").build();
|
||||
|
||||
enum DiagnoseStatus {
|
||||
OK,
|
||||
WARNING,
|
||||
ERROR,
|
||||
}
|
||||
|
||||
static class DiagnoseItem {
|
||||
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
|
||||
.add("Item").add("Status").add("Content").add("Detail Cmd").add("Suggestion").build();
|
||||
|
||||
public String name = "";
|
||||
public DiagnoseStatus status = DiagnoseStatus.OK;
|
||||
public String content = "";
|
||||
public String detailCmd = "";
|
||||
public String suggestion = "";
|
||||
|
||||
public List<String> toRow() {
|
||||
return Lists.newArrayList(name, status != null ? status.name().toUpperCase() : "", content,
|
||||
detailCmd, suggestion);
|
||||
}
|
||||
}
|
||||
|
||||
static class SubProcDir implements ProcDirInterface {
|
||||
public List<DiagnoseItem> getDiagnoseResult() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean register(String name, ProcNodeInterface node) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProcNodeInterface lookup(String name) throws AnalysisException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProcResult fetchResult() throws AnalysisException {
|
||||
List<List<String>> rows = getDiagnoseResult().stream().map(DiagnoseItem::toRow)
|
||||
.collect(Collectors.toList());
|
||||
return new BaseProcResult(DiagnoseItem.TITLE_NAMES, rows);
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, SubProcDir> subDiagnoses;
|
||||
|
||||
DiagnoseProcDir() {
|
||||
subDiagnoses = Maps.newHashMap();
|
||||
subDiagnoses.put("cluster_balance", new DiagnoseClusterBalanceProcDir());
|
||||
|
||||
// (TODO)
|
||||
//subDiagnoses.put("transactions", new DiagnoseTransactionsProcDir());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean register(String name, ProcNodeInterface node) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProcNodeInterface lookup(String name) throws AnalysisException {
|
||||
return subDiagnoses.get(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProcResult fetchResult() throws AnalysisException {
|
||||
List<List<String>> rows = subDiagnoses.entrySet().stream()
|
||||
.sorted(Comparator.comparing(it -> it.getKey()))
|
||||
.map(it -> {
|
||||
List<DiagnoseItem> items = it.getValue().getDiagnoseResult();
|
||||
long errNum = items.stream().filter(item -> item.status == DiagnoseStatus.ERROR).count();
|
||||
long warningNum = items.stream().filter(item -> item.status == DiagnoseStatus.WARNING).count();
|
||||
return Lists.newArrayList(it.getKey(), String.valueOf(errNum), String.valueOf(warningNum));
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
|
||||
long totalErrNum = rows.stream().mapToLong(row -> Long.valueOf(row.get(1))).sum();
|
||||
long totalWarningNum = rows.stream().mapToLong(row -> Long.valueOf(row.get(2))).sum();
|
||||
rows.add(Lists.newArrayList("Total", String.valueOf(totalErrNum), String.valueOf(totalWarningNum)));
|
||||
|
||||
return new BaseProcResult(TITLE_NAMES, rows);
|
||||
}
|
||||
}
|
||||
@ -57,6 +57,7 @@ public final class ProcService {
|
||||
root.register("stream_loads", new StreamLoadProcNode());
|
||||
root.register("colocation_group", new ColocationGroupProcDir());
|
||||
root.register("bdbje", new BDBJEProcDir());
|
||||
root.register("diagnose", new DiagnoseProcDir());
|
||||
}
|
||||
|
||||
// 通过指定的路径获得对应的PROC Node
|
||||
|
||||
Reference in New Issue
Block a user