Fix bug that failed to get enough normal replica because path hash is not set. (#1714)
Path Hash of a replica in metadata should be set immediately after replica is created. And we should not depend on path hash to find replicas. Because path hash may be set delayed.
This commit is contained in:
@ -953,7 +953,7 @@ OLAPStatus TabletManager::report_tablet_info(TTabletInfo* tablet_info) {
|
||||
}
|
||||
|
||||
_build_tablet_info(tablet, tablet_info);
|
||||
LOG(INFO) << "success to process report tablet info.";
|
||||
VLOG(10) << "success to process report tablet info.";
|
||||
return res;
|
||||
} // report_tablet_info
|
||||
|
||||
@ -987,13 +987,6 @@ OLAPStatus TabletManager::report_all_tablets_info(std::map<TTabletId, TTablet>*
|
||||
tablet_ptr->schema_hash(), tablet_ptr->tablet_uid(), &transaction_ids);
|
||||
tablet_info.__set_transaction_ids(transaction_ids);
|
||||
|
||||
if (_available_storage_medium_type_count > 1) {
|
||||
tablet_info.__set_storage_medium(tablet_ptr->data_dir()->storage_medium());
|
||||
}
|
||||
|
||||
tablet_info.__set_version_count(tablet_ptr->version_count());
|
||||
tablet_info.__set_path_hash(tablet_ptr->data_dir()->path_hash());
|
||||
|
||||
tablet.tablet_infos.push_back(tablet_info);
|
||||
}
|
||||
|
||||
@ -1175,6 +1168,11 @@ void TabletManager::_build_tablet_info(TabletSharedPtr tablet, TTabletInfo* tabl
|
||||
tablet_info->version = version.second;
|
||||
tablet_info->version_hash = v_hash;
|
||||
tablet_info->__set_partition_id(tablet->partition_id());
|
||||
if (_available_storage_medium_type_count > 1) {
|
||||
tablet_info->__set_storage_medium(tablet->data_dir()->storage_medium());
|
||||
}
|
||||
tablet_info->__set_version_count(tablet->version_count());
|
||||
tablet_info->__set_path_hash(tablet->data_dir()->path_hash());
|
||||
}
|
||||
|
||||
void TabletManager::_build_tablet_stat() {
|
||||
|
||||
@ -595,6 +595,10 @@ public class RollupJob extends AlterJob {
|
||||
// the version is not set now
|
||||
rollupReplica.updateVersionInfo(version, versionHash, dataSize, rowCount);
|
||||
|
||||
if (finishTabletInfo.isSetPath_hash()) {
|
||||
rollupReplica.setPathHash(finishTabletInfo.getPath_hash());
|
||||
}
|
||||
|
||||
setReplicaFinished(partitionId, rollupReplicaId);
|
||||
rollupReplica.setState(ReplicaState.NORMAL);
|
||||
|
||||
|
||||
@ -621,6 +621,9 @@ public class SchemaChangeJob extends AlterJob {
|
||||
long rowCount = finishTabletInfo.getRow_count();
|
||||
// do not need check version > replica.getVersion, because the new replica's version is first set by sc
|
||||
replica.updateVersionInfo(version, versionHash, dataSize, rowCount);
|
||||
if (finishTabletInfo.isSetPath_hash()) {
|
||||
replica.setPathHash(finishTabletInfo.getPath_hash());
|
||||
}
|
||||
} finally {
|
||||
db.writeUnlock();
|
||||
}
|
||||
|
||||
@ -67,8 +67,10 @@ import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ArrayListMultimap;
|
||||
import com.google.common.collect.HashBasedTable;
|
||||
import com.google.common.collect.HashMultimap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Multimap;
|
||||
import com.google.common.collect.Range;
|
||||
import com.google.common.collect.Table.Cell;
|
||||
|
||||
@ -755,7 +757,7 @@ public class RestoreJob extends AbstractJob {
|
||||
unfinishedSignatureToId.clear();
|
||||
taskProgress.clear();
|
||||
taskErrMsg.clear();
|
||||
Map<Long, Long> pathBeMap = Maps.newHashMap();
|
||||
Multimap<Long, Long> bePathsMap = HashMultimap.create();
|
||||
batchTask = new AgentBatchTask();
|
||||
db.readLock();
|
||||
try {
|
||||
@ -774,14 +776,14 @@ public class RestoreJob extends AbstractJob {
|
||||
true /* is restore task*/);
|
||||
batchTask.addTask(task);
|
||||
unfinishedSignatureToId.put(signature, tablet.getId());
|
||||
pathBeMap.put(replica.getPathHash(), replica.getBackendId());
|
||||
bePathsMap.put(replica.getBackendId(), replica.getPathHash());
|
||||
}
|
||||
} finally {
|
||||
db.readUnlock();
|
||||
}
|
||||
|
||||
// check disk capacity
|
||||
org.apache.doris.common.Status st = Catalog.getCurrentSystemInfo().checkExceedDiskCapacityLimit(pathBeMap, true);
|
||||
org.apache.doris.common.Status st = Catalog.getCurrentSystemInfo().checkExceedDiskCapacityLimit(bePathsMap, true);
|
||||
if (!st.ok()) {
|
||||
status = new Status(ErrCode.COMMON_ERROR, st.getErrorMsg());
|
||||
return;
|
||||
|
||||
@ -26,8 +26,9 @@ import org.apache.doris.common.io.Writable;
|
||||
import org.apache.doris.system.Backend;
|
||||
import org.apache.doris.system.SystemInfoService;
|
||||
|
||||
import com.google.common.collect.HashMultimap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Multimap;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
@ -40,7 +41,6 @@ import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
@ -186,9 +186,9 @@ public class Tablet extends MetaObject implements Writable {
|
||||
return beIds;
|
||||
}
|
||||
|
||||
// return map of (path hash -> BE id) of normal replicas
|
||||
public Map<Long, Long> getNormalReplicaBackendPathMap() {
|
||||
Map<Long, Long> map = Maps.newHashMap();
|
||||
// return map of (BE id -> path hash) of normal replicas
|
||||
public Multimap<Long, Long> getNormalReplicaBackendPathMap() {
|
||||
Multimap<Long, Long> map = HashMultimap.create();
|
||||
SystemInfoService infoService = Catalog.getCurrentSystemInfo();
|
||||
for (Replica replica : replicas) {
|
||||
if (replica.isBad()) {
|
||||
@ -198,7 +198,7 @@ public class Tablet extends MetaObject implements Writable {
|
||||
ReplicaState state = replica.getState();
|
||||
if (infoService.checkBackendAlive(replica.getBackendId())
|
||||
&& (state == ReplicaState.NORMAL || state == ReplicaState.SCHEMA_CHANGE)) {
|
||||
map.put(replica.getPathHash(), replica.getBackendId());
|
||||
map.put(replica.getBackendId(), replica.getPathHash());
|
||||
}
|
||||
}
|
||||
return map;
|
||||
|
||||
@ -876,6 +876,9 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
|
||||
|
||||
replica.updateVersionInfo(reportedTablet.getVersion(), reportedTablet.getVersion_hash(),
|
||||
reportedTablet.getData_size(), reportedTablet.getRow_count());
|
||||
if (reportedTablet.isSetPath_hash()) {
|
||||
replica.setPathHash(reportedTablet.getPath_hash());
|
||||
}
|
||||
|
||||
if (this.type == Type.BALANCE) {
|
||||
long partitionVisibleVersion = partition.getVisibleVersion();
|
||||
|
||||
@ -55,8 +55,9 @@ import org.apache.doris.thrift.TUniqueId;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.HashMultimap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Multimap;
|
||||
import com.google.common.collect.Range;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
@ -295,26 +296,27 @@ public class OlapTableSink extends DataSink {
|
||||
|
||||
private TOlapTableLocationParam createLocation(OlapTable table) throws UserException {
|
||||
TOlapTableLocationParam locationParam = new TOlapTableLocationParam();
|
||||
Map<Long, Long> allPathBeMap = Maps.newHashMap();
|
||||
// BE id -> path hash
|
||||
Multimap<Long, Long> allBePathsMap = HashMultimap.create();
|
||||
for (Partition partition : table.getPartitions()) {
|
||||
int quorum = table.getPartitionInfo().getReplicationNum(partition.getId()) / 2 + 1;
|
||||
for (MaterializedIndex index : partition.getMaterializedIndices()) {
|
||||
// we should ensure the replica backend is alive
|
||||
// otherwise, there will be a 'unknown node id, id=xxx' error for stream load
|
||||
for (Tablet tablet : index.getTablets()) {
|
||||
Map<Long, Long> pathBeMap = tablet.getNormalReplicaBackendPathMap();
|
||||
if (pathBeMap.size() < quorum) {
|
||||
throw new UserException("tablet " + tablet.getId() + " has few replicas: " + pathBeMap.size());
|
||||
Multimap<Long, Long> bePathsMap = tablet.getNormalReplicaBackendPathMap();
|
||||
if (bePathsMap.keySet().size() < quorum) {
|
||||
throw new UserException("tablet " + tablet.getId() + " has few replicas: " + bePathsMap.keySet().size());
|
||||
}
|
||||
locationParam.addToTablets(new TTabletLocation(tablet.getId(), Lists.newArrayList(pathBeMap.values())));
|
||||
allPathBeMap.putAll(pathBeMap);
|
||||
locationParam.addToTablets(new TTabletLocation(tablet.getId(), Lists.newArrayList(bePathsMap.keySet())));
|
||||
allBePathsMap.putAll(bePathsMap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check if disk capacity reach limit
|
||||
// this is for load process, so use high water mark to check
|
||||
Status st = Catalog.getCurrentSystemInfo().checkExceedDiskCapacityLimit(allPathBeMap, true);
|
||||
Status st = Catalog.getCurrentSystemInfo().checkExceedDiskCapacityLimit(allBePathsMap, true);
|
||||
if (!st.ok()) {
|
||||
throw new DdlException(st.getErrorMsg());
|
||||
}
|
||||
|
||||
@ -36,6 +36,7 @@ import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Iterators;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Multimap;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.apache.commons.validator.routines.InetAddressValidator;
|
||||
@ -1116,19 +1117,21 @@ public class SystemInfoService {
|
||||
|
||||
/*
|
||||
* Check if the specified disks' capacity has reached the limit.
|
||||
* pathBeMap is (path hash -> BE id)
|
||||
* bePathsMap is (BE id -> list of path hash)
|
||||
* If floodStage is true, it will check with the floodStage threshold.
|
||||
*
|
||||
* return Status.OK if not reach the limit
|
||||
*/
|
||||
public Status checkExceedDiskCapacityLimit(Map<Long, Long> pathBeMap, boolean floodStage) {
|
||||
LOG.debug("pathBeMap: {}", pathBeMap);
|
||||
public Status checkExceedDiskCapacityLimit(Multimap<Long, Long> bePathsMap, boolean floodStage) {
|
||||
LOG.debug("pathBeMap: {}", bePathsMap);
|
||||
ImmutableMap<Long, DiskInfo> pathHashToDiskInfo = pathHashToDishInfoRef.get();
|
||||
for (Long pathHash : pathBeMap.keySet()) {
|
||||
DiskInfo diskInfo = pathHashToDiskInfo.get(pathHash);
|
||||
if (diskInfo != null && diskInfo.exceedLimit(floodStage)) {
|
||||
return new Status(TStatusCode.CANCELLED,
|
||||
"disk " + pathHash + " on backend " + pathBeMap.get(pathHash) + " exceed limit usage");
|
||||
for (Long beId : bePathsMap.keySet()) {
|
||||
for (Long pathHash : bePathsMap.get(beId)) {
|
||||
DiskInfo diskInfo = pathHashToDiskInfo.get(pathHash);
|
||||
if (diskInfo != null && diskInfo.exceedLimit(floodStage)) {
|
||||
return new Status(TStatusCode.CANCELLED,
|
||||
"disk " + pathHash + " on backend " + beId + " exceed limit usage");
|
||||
}
|
||||
}
|
||||
}
|
||||
return Status.OK;
|
||||
|
||||
Reference in New Issue
Block a user