cherry pick from #39467
This commit is contained in:
@ -151,6 +151,10 @@ public class DiskInfo implements Writable {
|
||||
return pathHash != 0;
|
||||
}
|
||||
|
||||
public boolean isAlive() {
|
||||
return state == DiskState.ONLINE;
|
||||
}
|
||||
|
||||
public boolean isStorageMediumMatch(TStorageMedium storageMedium) {
|
||||
return this.storageMedium == storageMedium;
|
||||
}
|
||||
|
||||
@ -281,9 +281,11 @@ public class Tablet extends MetaObject implements Writable {
|
||||
}
|
||||
|
||||
// for query
|
||||
public List<Replica> getQueryableReplicas(long visibleVersion, boolean allowFailedVersion) {
|
||||
public List<Replica> getQueryableReplicas(long visibleVersion, Map<Long, Set<Long>> backendAlivePathHashs,
|
||||
boolean allowFailedVersion) {
|
||||
List<Replica> allQueryableReplica = Lists.newArrayListWithCapacity(replicas.size());
|
||||
List<Replica> auxiliaryReplica = Lists.newArrayListWithCapacity(replicas.size());
|
||||
List<Replica> deadPathReplica = Lists.newArrayList();
|
||||
for (Replica replica : replicas) {
|
||||
if (replica.isBad()) {
|
||||
continue;
|
||||
@ -294,21 +296,31 @@ public class Tablet extends MetaObject implements Writable {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!replica.checkVersionCatchUp(visibleVersion, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Set<Long> thisBeAlivePaths = backendAlivePathHashs.get(replica.getBackendId());
|
||||
ReplicaState state = replica.getState();
|
||||
if (state.canQuery()) {
|
||||
if (replica.checkVersionCatchUp(visibleVersion, false)) {
|
||||
allQueryableReplica.add(replica);
|
||||
}
|
||||
// if thisBeAlivePaths contains pathHash = 0, it mean this be hadn't report disks state.
|
||||
// should ignore this case.
|
||||
if (replica.getPathHash() != -1 && thisBeAlivePaths != null
|
||||
&& !thisBeAlivePaths.contains(replica.getPathHash())
|
||||
&& !thisBeAlivePaths.contains(0L)) {
|
||||
deadPathReplica.add(replica);
|
||||
} else if (state.canQuery()) {
|
||||
allQueryableReplica.add(replica);
|
||||
} else if (state == ReplicaState.DECOMMISSION) {
|
||||
if (replica.checkVersionCatchUp(visibleVersion, false)) {
|
||||
auxiliaryReplica.add(replica);
|
||||
}
|
||||
auxiliaryReplica.add(replica);
|
||||
}
|
||||
}
|
||||
|
||||
if (allQueryableReplica.isEmpty()) {
|
||||
allQueryableReplica = auxiliaryReplica;
|
||||
}
|
||||
if (allQueryableReplica.isEmpty()) {
|
||||
allQueryableReplica = deadPathReplica;
|
||||
}
|
||||
|
||||
if (Config.skip_compaction_slower_replica && allQueryableReplica.size() > 1) {
|
||||
long minVersionCount = allQueryableReplica.stream().mapToLong(Replica::getVisibleVersionCount)
|
||||
|
||||
@ -22,6 +22,7 @@ import org.apache.doris.catalog.BinlogConfig;
|
||||
import org.apache.doris.catalog.ColocateGroupSchema;
|
||||
import org.apache.doris.catalog.ColocateTableIndex;
|
||||
import org.apache.doris.catalog.Database;
|
||||
import org.apache.doris.catalog.DiskInfo;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.MaterializedIndex;
|
||||
import org.apache.doris.catalog.MaterializedIndex.IndexState;
|
||||
@ -808,6 +809,15 @@ public class ReportHandler extends Daemon {
|
||||
AgentBatchTask createReplicaBatchTask = new AgentBatchTask();
|
||||
TabletInvertedIndex invertedIndex = Env.getCurrentInvertedIndex();
|
||||
Map<Object, Object> objectPool = new HashMap<Object, Object>();
|
||||
Backend backend = Env.getCurrentSystemInfo().getBackend(backendId);
|
||||
Set<Long> backendHealthPathHashs;
|
||||
if (backend == null) {
|
||||
backendHealthPathHashs = Sets.newHashSet();
|
||||
} else {
|
||||
backendHealthPathHashs = backend.getDisks().values().stream()
|
||||
.filter(DiskInfo::isAlive)
|
||||
.map(DiskInfo::getPathHash).collect(Collectors.toSet());
|
||||
}
|
||||
for (Long dbId : tabletDeleteFromMeta.keySet()) {
|
||||
Database db = Env.getCurrentInternalCatalog().getDbNullable(dbId);
|
||||
if (db == null) {
|
||||
@ -863,7 +873,24 @@ public class ReportHandler extends Daemon {
|
||||
long currentBackendReportVersion = Env.getCurrentSystemInfo()
|
||||
.getBackendReportVersion(backendId);
|
||||
if (backendReportVersion < currentBackendReportVersion) {
|
||||
continue;
|
||||
|
||||
// if backendHealthPathHashs contains health path hash 0,
|
||||
// it means this backend hadn't reported disks state,
|
||||
// should ignore this case.
|
||||
boolean thisReplicaOnBadDisk = replica.getPathHash() != -1L
|
||||
&& !backendHealthPathHashs.contains(replica.getPathHash())
|
||||
&& !backendHealthPathHashs.contains(0L);
|
||||
|
||||
boolean existsOtherHealthReplica = tablet.getReplicas().stream()
|
||||
.anyMatch(r -> r.getBackendId() != replica.getBackendId()
|
||||
&& r.getVersion() >= replica.getVersion()
|
||||
&& r.getLastFailedVersion() == -1L
|
||||
&& !r.isBad());
|
||||
|
||||
// if replica is on bad disks and there are other health replicas, still delete it.
|
||||
if (!(thisReplicaOnBadDisk && existsOtherHealthReplica)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
BinlogConfig binlogConfig = new BinlogConfig(olapTable.getBinlogConfig());
|
||||
|
||||
@ -40,6 +40,7 @@ import org.apache.doris.analysis.TupleId;
|
||||
import org.apache.doris.catalog.AggregateType;
|
||||
import org.apache.doris.catalog.ColocateTableIndex;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.DiskInfo;
|
||||
import org.apache.doris.catalog.DistributionInfo;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.HashDistributionInfo;
|
||||
@ -732,7 +733,7 @@ public class OlapScanNode extends ScanNode {
|
||||
}
|
||||
|
||||
private void addScanRangeLocations(Partition partition,
|
||||
List<Tablet> tablets) throws UserException {
|
||||
List<Tablet> tablets, Map<Long, Set<Long>> backendAlivePathHashs) throws UserException {
|
||||
long visibleVersion = partition.getVisibleVersion();
|
||||
String visibleVersionStr = String.valueOf(visibleVersion);
|
||||
|
||||
@ -776,7 +777,8 @@ public class OlapScanNode extends ScanNode {
|
||||
paloRange.setTabletId(tabletId);
|
||||
|
||||
// random shuffle List && only collect one copy
|
||||
List<Replica> replicas = tablet.getQueryableReplicas(visibleVersion, skipMissingVersion);
|
||||
List<Replica> replicas = tablet.getQueryableReplicas(visibleVersion,
|
||||
backendAlivePathHashs, skipMissingVersion);
|
||||
if (replicas.isEmpty()) {
|
||||
if (ConnectContext.get().getSessionVariable().skipBadTablet) {
|
||||
continue;
|
||||
@ -1125,6 +1127,12 @@ public class OlapScanNode extends ScanNode {
|
||||
*/
|
||||
Preconditions.checkState(scanBackendIds.size() == 0);
|
||||
Preconditions.checkState(scanTabletIds.size() == 0);
|
||||
Map<Long, Set<Long>> backendAlivePathHashs = Maps.newHashMap();
|
||||
for (Backend backend : Env.getCurrentSystemInfo().getAllBackends()) {
|
||||
backendAlivePathHashs.put(backend.getId(), backend.getDisks().values().stream()
|
||||
.filter(DiskInfo::isAlive).map(DiskInfo::getPathHash).collect(Collectors.toSet()));
|
||||
}
|
||||
|
||||
for (Long partitionId : selectedPartitionIds) {
|
||||
final Partition partition = olapTable.getPartition(partitionId);
|
||||
final MaterializedIndex selectedTable = partition.getIndex(selectedIndexId);
|
||||
@ -1166,7 +1174,7 @@ public class OlapScanNode extends ScanNode {
|
||||
|
||||
totalTabletsNum += selectedTable.getTablets().size();
|
||||
selectedSplitNum += tablets.size();
|
||||
addScanRangeLocations(partition, tablets);
|
||||
addScanRangeLocations(partition, tablets, backendAlivePathHashs);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user