[improvement](transaction) make commit txn fail hint more understandable (#23227)

This commit is contained in:
yujun
2023-08-23 21:50:24 +08:00
committed by GitHub
parent 448b7755c6
commit 156f7b7699
3 changed files with 62 additions and 25 deletions

View File

@ -410,7 +410,8 @@ public class OlapTableSink extends DataSink {
Multimap<Long, Long> bePathsMap = tablet.getNormalReplicaBackendPathMap();
if (bePathsMap.keySet().size() < quorum) {
throw new UserException(InternalErrorCode.REPLICA_FEW_ERR,
"tablet " + tablet.getId() + " has few replicas: " + bePathsMap.keySet().size()
"tablet " + tablet.getId() + " alive replica num " + bePathsMap.keySet().size()
+ " < quorum replica num " + quorum
+ ", alive backends: [" + StringUtils.join(bePathsMap.keySet(), ",") + "]");
}

View File

@ -81,6 +81,7 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
@ -434,6 +435,7 @@ public class DatabaseTransactionMgr {
Set<Long> errorReplicaIds, Map<Long, Set<Long>> tableToPartition,
Set<Long> totalInvolvedBackends) throws UserException {
long transactionId = transactionState.getTransactionId();
Database db = env.getInternalCatalog().getDbOrMetaException(dbId);
// update transaction state extra if exists
@ -490,6 +492,33 @@ public class DatabaseTransactionMgr {
}
tabletToBackends.get(tabletId).add(tabletCommitInfos.get(i).getBackendId());
}
List<String> tabletSuccReplicas = Lists.newArrayList();
List<String> tabletWriteFailedReplicas = Lists.newArrayList();
List<String> tabletVersionFailedReplicas = Lists.newArrayList();
Function<Replica, String> getReplicaInfo = replica -> {
StringBuilder strBuffer = new StringBuilder("[replicaId=");
strBuffer.append(replica.getId());
strBuffer.append(", backendId=");
strBuffer.append(replica.getBackendId());
strBuffer.append(", backendAlive=");
strBuffer.append(Env.getCurrentSystemInfo().checkBackendAlive(replica.getBackendId()));
strBuffer.append(", version=");
strBuffer.append(replica.getVersion());
if (replica.getLastFailedVersion() >= 0) {
strBuffer.append(", lastFailedVersion=");
strBuffer.append(replica.getLastFailedVersion());
strBuffer.append(", lastSuccessVersion=");
strBuffer.append(replica.getLastSuccessVersion());
strBuffer.append(", lastFailedTimestamp=");
strBuffer.append(replica.getLastFailedTimestamp());
}
strBuffer.append(", state=");
strBuffer.append(replica.getState().name());
strBuffer.append("]");
return strBuffer.toString();
};
for (long tableId : tableToPartition.keySet()) {
OlapTable table = (OlapTable) db.getTableOrMetaException(tableId);
for (Partition partition : table.getAllPartitions()) {
@ -533,6 +562,9 @@ public class DatabaseTransactionMgr {
.getReplicaAllocation(partition.getId()).getTotalReplicaNum() / 2 + 1;
for (MaterializedIndex index : allIndices) {
for (Tablet tablet : index.getTablets()) {
tabletSuccReplicas.clear();
tabletWriteFailedReplicas.clear();
tabletVersionFailedReplicas.clear();
int successReplicaNum = 0;
long tabletId = tablet.getId();
Set<Long> tabletBackends = tablet.getBackendIds();
@ -558,11 +590,14 @@ public class DatabaseTransactionMgr {
// for example, a replica is in clone state
if (replica.getLastFailedVersion() < 0) {
++successReplicaNum;
tabletSuccReplicas.add(getReplicaInfo.apply(replica));
} else {
errorReplicaInfo += " replica [" + replica.getId() + "], lastFailedVersion ["
+ replica.getLastFailedVersion() + "]";
tabletVersionFailedReplicas.add(getReplicaInfo.apply(replica));
}
} else {
tabletWriteFailedReplicas.add(getReplicaInfo.apply(replica));
errorBackendIdsForTablet.add(tabletBackend);
errorReplicaIds.add(replica.getId());
// not remove rollup task here, because the commit maybe failed
@ -580,9 +615,29 @@ public class DatabaseTransactionMgr {
transactionState.getTransactionId(), tablet.getId(), successReplicaNum,
quorumReplicaNum, Joiner.on(",").join(errorBackendIdsForTablet),
errorReplicaInfo, commitBackends);
throw new TabletQuorumFailedException(transactionState.getTransactionId(), tablet.getId(),
successReplicaNum, quorumReplicaNum,
errorBackendIdsForTablet);
String replicasDetailMsg = "";
if (!tabletSuccReplicas.isEmpty()) {
replicasDetailMsg += String.format("%s replicas final succ: { %s }; ",
tabletSuccReplicas.size(), Joiner.on(", ").join(tabletSuccReplicas));
}
if (!tabletWriteFailedReplicas.isEmpty()) {
replicasDetailMsg += String.format("%s replicas write data failed: { %s }; ",
tabletWriteFailedReplicas.size(),
Joiner.on(", ").join(tabletWriteFailedReplicas));
}
if (!tabletVersionFailedReplicas.isEmpty()) {
replicasDetailMsg += String.format("%s replicas write data succ but miss previous "
+ "version: { %s }.",
tabletVersionFailedReplicas.size(),
Joiner.on(", ").join(tabletVersionFailedReplicas));
}
throw new TabletQuorumFailedException(transactionId, String.format(
"Failed to commit txn %s, cause tablet %s succ replica num %s < quorum "
+ " replica num %s. table %s, partition %s, this tablet detail: %s",
transactionId, tablet.getId(), successReplicaNum, quorumReplicaNum, tableId,
partition.getId(), replicasDetailMsg));
}
}
}

View File

@ -17,27 +17,8 @@
package org.apache.doris.transaction;
import com.google.common.base.Joiner;
import com.google.common.collect.Sets;
import java.util.Set;
public class TabletQuorumFailedException extends TransactionException {
private static final String TABLET_QUORUM_FAILED_MSG = "Failed to commit txn %s. "
+ "Tablet [%s] success replica num %s is less than quorum "
+ "replica num %s while error backends %s";
private long tabletId;
private Set<Long> errorBackendIdsForTablet = Sets.newHashSet();
public TabletQuorumFailedException(long transactionId, long tabletId,
int successReplicaNum, int quorumReplicaNum,
Set<Long> errorBackendIdsForTablet) {
super(String.format(TABLET_QUORUM_FAILED_MSG, transactionId, tabletId,
successReplicaNum, quorumReplicaNum,
Joiner.on(",").join(errorBackendIdsForTablet)), transactionId);
this.tabletId = tabletId;
this.errorBackendIdsForTablet = errorBackendIdsForTablet;
public TabletQuorumFailedException(long transactionId, String message) {
super(message, transactionId);
}
}