Handle the situation when there is no enough backends for tablet repair (#1299)
If there are only 3 backends and replication num is 3. If one replica of a tablet is bad, there is no 4th backend for tablet repair. So we need to delete a bad replica first to make room for new replica.
This commit is contained in:
@ -69,8 +69,12 @@
|
||||
5. REDUNDANT
|
||||
|
||||
副本冗余。健康副本都在对应 cluster 内,但数量大于期望副本数。或者有多余的 unavailable 副本。
|
||||
|
||||
6. FORCE\_REDUNDANT
|
||||
|
||||
这是一个特殊状态。只会出现在当期望副本数大于等于可用节点数时,并且 Tablet 处于副本缺失状态时出现。这种情况下,需要先删除一个副本,以保证有可用节点用于创建新副本。
|
||||
|
||||
6. HEALTHY
|
||||
7. HEALTHY
|
||||
|
||||
健康分片,即条件[1-5]都不满足。
|
||||
|
||||
@ -107,6 +111,11 @@ TabletChecker 作为常驻的后台进程,会定期检查所有分片的状态
|
||||
6. 副本所在 cluster 不正确
|
||||
7. 副本所在 BE 节点负载高
|
||||
|
||||
5. FORCE\_REDUNDANT
|
||||
|
||||
不同于 REDUNDANT,因为此时虽然 Tablet 有副本缺失,但是因为已经没有额外的可用节点用于创建新的副本了。所以此时必须先删除一个副本,以腾出一个可用节点用于创建新的副本。
|
||||
删除副本的顺序同 REDUNDANT。
|
||||
|
||||
### 调度优先级
|
||||
|
||||
TabletScheduler 里等待被调度的分片会根据状态不同,赋予不同的优先级。优先级高的分片将会被优先调度。目前有以下几种优先级。
|
||||
@ -114,6 +123,7 @@ TabletScheduler 里等待被调度的分片会根据状态不同,赋予不同
|
||||
1. VERY\_HIGH
|
||||
|
||||
* REDUNDANT。对于有副本冗余的分片,我们优先处理。虽然逻辑上来讲,副本冗余的紧急程度最低,但是因为这种情况处理起来最快且可以快速释放资源(比如磁盘空间等),所以我们优先处理。
|
||||
* FORCE\_REDUNDANT。同上。
|
||||
|
||||
2. HIGH
|
||||
|
||||
|
||||
@ -974,6 +974,7 @@ public class OlapTable extends Table {
|
||||
}
|
||||
|
||||
public boolean isStable(SystemInfoService infoService, TabletScheduler tabletScheduler, String clusterName) {
|
||||
int availableBackendsNum = infoService.getClusterBackendIds(clusterName, true).size();
|
||||
for (Partition partition : idToPartition.values()) {
|
||||
long visibleVersion = partition.getVisibleVersion();
|
||||
long visibleVersionHash = partition.getVisibleVersionHash();
|
||||
@ -985,7 +986,8 @@ public class OlapTable extends Table {
|
||||
}
|
||||
|
||||
Pair<TabletStatus, TabletSchedCtx.Priority> statusPair = tablet.getHealthStatusWithPriority(
|
||||
infoService, clusterName, visibleVersion, visibleVersionHash, replicationNum);
|
||||
infoService, clusterName, visibleVersion, visibleVersionHash, replicationNum,
|
||||
availableBackendsNum);
|
||||
if (statusPair.first != TabletStatus.HEALTHY) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -54,6 +54,8 @@ public class Tablet extends MetaObject implements Writable {
|
||||
REPLICA_RELOCATING, // replica is healthy, but is under relocating (eg. BE is decommission)
|
||||
REDUNDANT, // too much replicas
|
||||
REPLICA_MISSING_IN_CLUSTER, // not enough healthy replicas in correct cluster
|
||||
FORCE_REDUNDANT, // some replica is missing or bad, but there is no other backends for repair,
|
||||
// at least one replica has to be deleted first to make room for new replica.
|
||||
}
|
||||
|
||||
private long id;
|
||||
@ -371,13 +373,14 @@ public class Tablet extends MetaObject implements Writable {
|
||||
*/
|
||||
public Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriority(
|
||||
SystemInfoService systemInfoService, String clusterName,
|
||||
long visibleVersion, long visibleVersionHash, int replicationNum) {
|
||||
long visibleVersion, long visibleVersionHash, int replicationNum,
|
||||
int availableBackendsNum) {
|
||||
|
||||
int alive = 0;
|
||||
int aliveAndVersionComplete = 0;
|
||||
int stable = 0;
|
||||
int availableInCluster = 0;
|
||||
|
||||
|
||||
for (Replica replica : replicas) {
|
||||
long backendId = replica.getBackendId();
|
||||
Backend backend = systemInfoService.getBackend(backendId);
|
||||
@ -409,7 +412,18 @@ public class Tablet extends MetaObject implements Writable {
|
||||
}
|
||||
|
||||
// 1. alive replicas are not enough
|
||||
if (alive < (replicationNum / 2) + 1) {
|
||||
if (alive < replicationNum && replicas.size() >= availableBackendsNum
|
||||
&& availableBackendsNum >= replicationNum && replicationNum > 1) {
|
||||
// there is no enough backend for us to create a new replica, so we have to delete an existing replica,
|
||||
// so there can be available backend for us to create a new replica.
|
||||
// And if there is only one replica, we will not handle it(maybe need human interference)
|
||||
// condition explain:
|
||||
// 1. alive < replicationNum: replica is missing or bad
|
||||
// 2. replicas.size() >= availableBackendsNum: the existing replicas occupies all available backends
|
||||
// 3. availableBackendsNum >= replicationNum: make sure after deleting, there will be a backend for new replica.
|
||||
// 4. replicationNum > 1: if replication num is set to 1, do not delete any replica, for safety reason
|
||||
return Pair.create(TabletStatus.FORCE_REDUNDANT, TabletSchedCtx.Priority.VERY_HIGH);
|
||||
} else if (alive < (replicationNum / 2) + 1) {
|
||||
return Pair.create(TabletStatus.REPLICA_MISSING, TabletSchedCtx.Priority.HIGH);
|
||||
} else if (alive < replicationNum) {
|
||||
return Pair.create(TabletStatus.REPLICA_MISSING, TabletSchedCtx.Priority.NORMAL);
|
||||
|
||||
@ -151,12 +151,13 @@ public class TabletInvertedIndex {
|
||||
if (needRecover(replica, tabletMeta.getOldSchemaHash(), backendTabletInfo)) {
|
||||
LOG.warn("replica {} of tablet {} on backend {} need recovery. "
|
||||
+ "replica in FE: {}, report version {}-{}, report schema hash: {},"
|
||||
+ " is bad: {}",
|
||||
+ " is bad: {}, is version missing: {}",
|
||||
replica.getId(), tabletId, backendId, replica,
|
||||
backendTabletInfo.getVersion(),
|
||||
backendTabletInfo.getVersion_hash(),
|
||||
backendTabletInfo.getSchema_hash(),
|
||||
backendTabletInfo.isSetUsed() ? backendTabletInfo.isUsed() : "unknown");
|
||||
backendTabletInfo.isSetUsed() ? backendTabletInfo.isUsed() : "unknown",
|
||||
backendTabletInfo.isSetVersion_miss() ? backendTabletInfo.isVersion_miss() : "unset");
|
||||
tabletRecoveryMap.put(tabletMeta.getDbId(), tabletId);
|
||||
}
|
||||
|
||||
@ -342,6 +343,17 @@ public class TabletInvertedIndex {
|
||||
* because of some unrecoverable failure.
|
||||
*/
|
||||
private boolean needRecover(Replica replicaInFe, int schemaHashInFe, TTabletInfo backendTabletInfo) {
|
||||
if (replicaInFe.getState() != ReplicaState.NORMAL) {
|
||||
// only normal replica need recover
|
||||
// case:
|
||||
// the replica's state is CLONE, which means this a newly created replica in clone process.
|
||||
// and an old out-of-date replica reports here, and this report should not mark this replica as
|
||||
// 'need recovery'.
|
||||
// Other state such as ROLLUP/SCHEMA_CHANGE, the replica behavior is unknown, so for safety reason,
|
||||
// also not mark this replica as 'need recovery'.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (backendTabletInfo.isSetUsed() && !backendTabletInfo.isUsed()) {
|
||||
// tablet is bad
|
||||
return true;
|
||||
@ -372,6 +384,7 @@ public class TabletInvertedIndex {
|
||||
// even if backend version is less than fe's version, but if version_miss is false,
|
||||
// which means this may be a stale report.
|
||||
// so we only return true if version_miss is true.
|
||||
LOG.info("replicaversion missing");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
@ -191,6 +191,7 @@ public class TabletChecker extends Daemon {
|
||||
|
||||
db.readLock();
|
||||
try {
|
||||
int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
|
||||
for (Table table : db.getTables()) {
|
||||
if (!table.needSchedule()) {
|
||||
continue;
|
||||
@ -210,11 +211,12 @@ public class TabletChecker extends Daemon {
|
||||
}
|
||||
|
||||
Pair<TabletStatus, TabletSchedCtx.Priority> statusWithPrio = tablet.getHealthStatusWithPriority(
|
||||
infoService,
|
||||
db.getClusterName(),
|
||||
partition.getVisibleVersion(),
|
||||
partition.getVisibleVersionHash(),
|
||||
olapTbl.getPartitionInfo().getReplicationNum(partition.getId()));
|
||||
infoService,
|
||||
db.getClusterName(),
|
||||
partition.getVisibleVersion(),
|
||||
partition.getVisibleVersionHash(),
|
||||
olapTbl.getPartitionInfo().getReplicationNum(partition.getId()),
|
||||
availableBackendsNum);
|
||||
|
||||
if (statusWithPrio.first == TabletStatus.HEALTHY) {
|
||||
// Only set last status check time when status is healthy.
|
||||
|
||||
@ -752,9 +752,11 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "tablet does not exist");
|
||||
}
|
||||
|
||||
int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
|
||||
short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partitionId);
|
||||
Pair<TabletStatus, TabletSchedCtx.Priority> pair = tablet.getHealthStatusWithPriority(
|
||||
infoService, db.getClusterName(), visibleVersion, visibleVersionHash, replicationNum);
|
||||
infoService, db.getClusterName(), visibleVersion, visibleVersionHash, replicationNum,
|
||||
availableBackendsNum);
|
||||
if (pair.first == TabletStatus.HEALTHY) {
|
||||
throw new SchedException(Status.FINISHED, "tablet is healthy");
|
||||
}
|
||||
|
||||
@ -43,6 +43,7 @@ import org.apache.doris.task.AgentTask;
|
||||
import org.apache.doris.task.AgentTaskExecutor;
|
||||
import org.apache.doris.task.AgentTaskQueue;
|
||||
import org.apache.doris.task.CloneTask;
|
||||
import org.apache.doris.task.DropReplicaTask;
|
||||
import org.apache.doris.thrift.TFinishTaskRequest;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
@ -471,11 +472,13 @@ public class TabletScheduler extends Daemon {
|
||||
Tablet tablet = idx.getTablet(tabletCtx.getTabletId());
|
||||
Preconditions.checkNotNull(tablet);
|
||||
|
||||
int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
|
||||
statusPair = tablet.getHealthStatusWithPriority(
|
||||
infoService, tabletCtx.getCluster(),
|
||||
partition.getVisibleVersion(),
|
||||
partition.getVisibleVersionHash(),
|
||||
tbl.getPartitionInfo().getReplicationNum(partition.getId()));
|
||||
tbl.getPartitionInfo().getReplicationNum(partition.getId()),
|
||||
availableBackendsNum);
|
||||
|
||||
if (tabletCtx.getType() == TabletSchedCtx.Type.BALANCE && tableState != OlapTableState.NORMAL) {
|
||||
// If table is under ALTER process, do not allow to do balance.
|
||||
@ -494,9 +497,9 @@ public class TabletScheduler extends Daemon {
|
||||
throw new SchedException(Status.UNRECOVERABLE, "tablet is healthy");
|
||||
} else if (statusPair.first != TabletStatus.HEALTHY
|
||||
&& tabletCtx.getType() == TabletSchedCtx.Type.BALANCE) {
|
||||
tabletCtx.releaseResource(this);
|
||||
// we select an unhealthy tablet to do balance, which is not right.
|
||||
// so here we change it to a REPAIR task, and also reset its priority
|
||||
tabletCtx.releaseResource(this);
|
||||
tabletCtx.setType(TabletSchedCtx.Type.REPAIR);
|
||||
tabletCtx.setOrigPriority(statusPair.second);
|
||||
tabletCtx.setLastSchedTime(currentTime);
|
||||
@ -531,7 +534,10 @@ public class TabletScheduler extends Daemon {
|
||||
handleReplicaRelocating(tabletCtx, batchTask);
|
||||
break;
|
||||
case REDUNDANT:
|
||||
handleRedundantReplica(tabletCtx);
|
||||
handleRedundantReplica(tabletCtx, false);
|
||||
break;
|
||||
case FORCE_REDUNDANT:
|
||||
handleRedundantReplica(tabletCtx, true);
|
||||
break;
|
||||
case REPLICA_MISSING_IN_CLUSTER:
|
||||
handleReplicaClusterMigration(tabletCtx, batchTask);
|
||||
@ -621,16 +627,16 @@ public class TabletScheduler extends Daemon {
|
||||
* 7. replica not in right cluster
|
||||
* 8. replica in higher load backend
|
||||
*/
|
||||
private void handleRedundantReplica(TabletSchedCtx tabletCtx) throws SchedException {
|
||||
private void handleRedundantReplica(TabletSchedCtx tabletCtx, boolean force) throws SchedException {
|
||||
stat.counterReplicaRedundantErr.incrementAndGet();
|
||||
if (deleteBackendDropped(tabletCtx)
|
||||
|| deleteBadReplica(tabletCtx)
|
||||
|| deleteBackendUnavailable(tabletCtx)
|
||||
|| deleteCloneReplica(tabletCtx)
|
||||
|| deleteReplicaWithFailedVersion(tabletCtx)
|
||||
|| deleteReplicaWithLowerVersion(tabletCtx)
|
||||
|| deleteReplicaNotInCluster(tabletCtx)
|
||||
|| deleteReplicaOnHighLoadBackend(tabletCtx)) {
|
||||
if (deleteBackendDropped(tabletCtx, force)
|
||||
|| deleteBadReplica(tabletCtx, force)
|
||||
|| deleteBackendUnavailable(tabletCtx, force)
|
||||
|| deleteCloneReplica(tabletCtx, force)
|
||||
|| deleteReplicaWithFailedVersion(tabletCtx, force)
|
||||
|| deleteReplicaWithLowerVersion(tabletCtx, force)
|
||||
|| deleteReplicaNotInCluster(tabletCtx, force)
|
||||
|| deleteReplicaOnHighLoadBackend(tabletCtx, force)) {
|
||||
// if we delete at least one redundant replica, we still throw a SchedException with status FINISHED
|
||||
// to remove this tablet from the pendingTablets(consider it as finished)
|
||||
throw new SchedException(Status.FINISHED, "redundant replica is deleted");
|
||||
@ -638,28 +644,28 @@ public class TabletScheduler extends Daemon {
|
||||
throw new SchedException(Status.SCHEDULE_FAILED, "unable to delete any redundant replicas");
|
||||
}
|
||||
|
||||
private boolean deleteBackendDropped(TabletSchedCtx tabletCtx) {
|
||||
private boolean deleteBackendDropped(TabletSchedCtx tabletCtx, boolean force) {
|
||||
for (Replica replica : tabletCtx.getReplicas()) {
|
||||
long beId = replica.getBackendId();
|
||||
if (infoService.getBackend(beId) == null) {
|
||||
deleteReplicaInternal(tabletCtx, replica, "backend dropped");
|
||||
deleteReplicaInternal(tabletCtx, replica, "backend dropped", force);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean deleteBadReplica(TabletSchedCtx tabletCtx) {
|
||||
private boolean deleteBadReplica(TabletSchedCtx tabletCtx, boolean force) {
|
||||
for (Replica replica : tabletCtx.getReplicas()) {
|
||||
if (replica.isBad()) {
|
||||
deleteReplicaInternal(tabletCtx, replica, "replica is bad");
|
||||
deleteReplicaInternal(tabletCtx, replica, "replica is bad", force);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean deleteBackendUnavailable(TabletSchedCtx tabletCtx) {
|
||||
private boolean deleteBackendUnavailable(TabletSchedCtx tabletCtx, boolean force) {
|
||||
for (Replica replica : tabletCtx.getReplicas()) {
|
||||
Backend be = infoService.getBackend(replica.getBackendId());
|
||||
if (be == null) {
|
||||
@ -667,44 +673,44 @@ public class TabletScheduler extends Daemon {
|
||||
continue;
|
||||
}
|
||||
if (!be.isAvailable()) {
|
||||
deleteReplicaInternal(tabletCtx, replica, "backend unavailable");
|
||||
deleteReplicaInternal(tabletCtx, replica, "backend unavailable", force);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean deleteCloneReplica(TabletSchedCtx tabletCtx) {
|
||||
private boolean deleteCloneReplica(TabletSchedCtx tabletCtx, boolean force) {
|
||||
for (Replica replica : tabletCtx.getReplicas()) {
|
||||
if (replica.getState() == ReplicaState.CLONE) {
|
||||
deleteReplicaInternal(tabletCtx, replica, "clone state");
|
||||
deleteReplicaInternal(tabletCtx, replica, "clone state", force);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean deleteReplicaWithFailedVersion(TabletSchedCtx tabletCtx) {
|
||||
private boolean deleteReplicaWithFailedVersion(TabletSchedCtx tabletCtx, boolean force) {
|
||||
for (Replica replica : tabletCtx.getReplicas()) {
|
||||
if (replica.getLastFailedVersion() > 0) {
|
||||
deleteReplicaInternal(tabletCtx, replica, "version incomplete");
|
||||
deleteReplicaInternal(tabletCtx, replica, "version incomplete", force);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean deleteReplicaWithLowerVersion(TabletSchedCtx tabletCtx) {
|
||||
private boolean deleteReplicaWithLowerVersion(TabletSchedCtx tabletCtx, boolean force) {
|
||||
for (Replica replica : tabletCtx.getReplicas()) {
|
||||
if (!replica.checkVersionCatchUp(tabletCtx.getCommittedVersion(), tabletCtx.getCommittedVersionHash())) {
|
||||
deleteReplicaInternal(tabletCtx, replica, "lower version");
|
||||
deleteReplicaInternal(tabletCtx, replica, "lower version", force);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean deleteReplicaNotInCluster(TabletSchedCtx tabletCtx) {
|
||||
private boolean deleteReplicaNotInCluster(TabletSchedCtx tabletCtx, boolean force) {
|
||||
for (Replica replica : tabletCtx.getReplicas()) {
|
||||
Backend be = infoService.getBackend(replica.getBackendId());
|
||||
if (be == null) {
|
||||
@ -712,14 +718,14 @@ public class TabletScheduler extends Daemon {
|
||||
continue;
|
||||
}
|
||||
if (!be.getOwnerClusterName().equals(tabletCtx.getCluster())) {
|
||||
deleteReplicaInternal(tabletCtx, replica, "not in cluster");
|
||||
deleteReplicaInternal(tabletCtx, replica, "not in cluster", force);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean deleteReplicaOnHighLoadBackend(TabletSchedCtx tabletCtx) {
|
||||
private boolean deleteReplicaOnHighLoadBackend(TabletSchedCtx tabletCtx, boolean force) {
|
||||
ClusterLoadStatistic statistic = statisticMap.get(tabletCtx.getCluster());
|
||||
if (statistic == null) {
|
||||
return false;
|
||||
@ -739,17 +745,26 @@ public class TabletScheduler extends Daemon {
|
||||
}
|
||||
|
||||
if (chosenReplica != null) {
|
||||
deleteReplicaInternal(tabletCtx, chosenReplica, "high load");
|
||||
deleteReplicaInternal(tabletCtx, chosenReplica, "high load", force);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, String reason) {
|
||||
private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, String reason, boolean force) {
|
||||
// delete this replica from catalog.
|
||||
// it will also delete replica from tablet inverted index.
|
||||
tabletCtx.deleteReplica(replica);
|
||||
|
||||
if (force) {
|
||||
// send the delete replica task.
|
||||
// also this may not be necessary, but delete it will make things simpler.
|
||||
// NOTICE: only delete the replica from meta may not work. sometimes we can depends on tablet report
|
||||
// to delete these replicas, but in FORCE_REDUNDANT case, replica may be added to meta again in report
|
||||
// process.
|
||||
sendDeleteReplicaTask(replica.getBackendId(), tabletCtx.getTabletId(), tabletCtx.getSchemaHash());
|
||||
}
|
||||
|
||||
// write edit log
|
||||
ReplicaPersistInfo info = ReplicaPersistInfo.createForDelete(tabletCtx.getDbId(),
|
||||
tabletCtx.getTblId(),
|
||||
@ -764,6 +779,14 @@ public class TabletScheduler extends Daemon {
|
||||
tabletCtx.getTabletId(), replica.getBackendId(), reason);
|
||||
}
|
||||
|
||||
private void sendDeleteReplicaTask(long backendId, long tabletId, int schemaHash) {
|
||||
DropReplicaTask task = new DropReplicaTask(backendId, tabletId, schemaHash);
|
||||
AgentBatchTask batchTask = new AgentBatchTask();
|
||||
batchTask.addTask(task);
|
||||
AgentTaskExecutor.submit(batchTask);
|
||||
LOG.info("send delete replica task for tablet {} in backend {}", tabletId, backendId);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cluster migration, which means the tablet has enough healthy replicas,
|
||||
* but some replicas are not in right cluster.
|
||||
|
||||
@ -97,6 +97,7 @@ public class StatisticProcDir implements ProcDirInterface {
|
||||
}
|
||||
|
||||
++totalDbNum;
|
||||
int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
|
||||
db.readLock();
|
||||
try {
|
||||
int dbTableNum = 0;
|
||||
@ -125,7 +126,7 @@ public class StatisticProcDir implements ProcDirInterface {
|
||||
Pair<TabletStatus, Priority> res = tablet.getHealthStatusWithPriority(
|
||||
infoService, db.getClusterName(),
|
||||
partition.getVisibleVersion(), partition.getVisibleVersionHash(),
|
||||
replicationNum);
|
||||
replicationNum, availableBackendsNum);
|
||||
|
||||
// here we treat REDUNDANT as HEALTHY, for user friendly.
|
||||
if (res.first != TabletStatus.HEALTHY && res.first != TabletStatus.REDUNDANT) {
|
||||
|
||||
@ -627,7 +627,8 @@ public class ReportHandler extends Daemon {
|
||||
TTablet backendTablet = backendTablets.get(tabletId);
|
||||
for (TTabletInfo backendTabletInfo : backendTablet.getTablet_infos()) {
|
||||
boolean needDelete = false;
|
||||
if (!foundTabletsWithValidSchema.contains(tabletId)) {
|
||||
if (!foundTabletsWithValidSchema.contains(tabletId)
|
||||
&& isBackendReplicaHealthy(backendTabletInfo)) {
|
||||
// if this tablet is not in meta. try adding it.
|
||||
// if add failed. delete this tablet from backend.
|
||||
try {
|
||||
@ -669,6 +670,17 @@ public class ReportHandler extends Daemon {
|
||||
LOG.info("add {} replica(s) to meta. backend[{}]", addToMetaCounter, backendId);
|
||||
}
|
||||
|
||||
// replica is used and no version missing
|
||||
private static boolean isBackendReplicaHealthy(TTabletInfo backendTabletInfo) {
|
||||
if (backendTabletInfo.isSetUsed() && !backendTabletInfo.isUsed()) {
|
||||
return false;
|
||||
}
|
||||
if (backendTabletInfo.isSetVersion_miss() && backendTabletInfo.isVersion_miss()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void handleMigration(ListMultimap<TStorageMedium, Long> tabletMetaMigrationMap,
|
||||
long backendId) {
|
||||
TabletInvertedIndex invertedIndex = Catalog.getCurrentInvertedIndex();
|
||||
@ -928,9 +940,10 @@ public class ReportHandler extends Daemon {
|
||||
return;
|
||||
}
|
||||
|
||||
int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
|
||||
Pair<TabletStatus, TabletSchedCtx.Priority> status = tablet.getHealthStatusWithPriority(infoService,
|
||||
db.getClusterName(), visibleVersion, visibleVersionHash,
|
||||
replicationNum);
|
||||
replicationNum, availableBackendsNum);
|
||||
|
||||
if (status.first == TabletStatus.VERSION_INCOMPLETE || status.first == TabletStatus.REPLICA_MISSING) {
|
||||
long lastFailedVersion = -1L;
|
||||
|
||||
@ -674,20 +674,20 @@ public class SystemInfoService {
|
||||
/**
|
||||
* get cluster's backend id list
|
||||
*
|
||||
* @param name
|
||||
* @param clusterName
|
||||
* @return
|
||||
*/
|
||||
public List<Long> getClusterBackendIds(String name, boolean needAlive) {
|
||||
public List<Long> getClusterBackendIds(String clusterName, boolean needAlive) {
|
||||
final Map<Long, Backend> copiedBackends = Maps.newHashMap(idToBackendRef.get());
|
||||
final List<Long> ret = new ArrayList<Long>();
|
||||
|
||||
if (Strings.isNullOrEmpty(name)) {
|
||||
if (Strings.isNullOrEmpty(clusterName)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (needAlive) {
|
||||
for (Backend backend : copiedBackends.values()) {
|
||||
if (name.equals(backend.getOwnerClusterName())) {
|
||||
if (clusterName.equals(backend.getOwnerClusterName())) {
|
||||
if (backend != null && backend.isAlive()) {
|
||||
ret.add(backend.getId());
|
||||
}
|
||||
@ -695,7 +695,7 @@ public class SystemInfoService {
|
||||
}
|
||||
} else {
|
||||
for (Backend backend : copiedBackends.values()) {
|
||||
if (name.equals(backend.getOwnerClusterName())) {
|
||||
if (clusterName.equals(backend.getOwnerClusterName())) {
|
||||
ret.add(backend.getId());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user