[improvement](replica) donot care last failed version in publish (#21001)

We just care 2 things:
1. If the replica acks right
2. If the replica catches up
This commit is contained in:
Yongqiang YANG
2023-06-20 15:57:54 +08:00
committed by GitHub
parent 0b1bbe4045
commit cb89af49e7

View File

@ -938,19 +938,14 @@ public class DatabaseTransactionMgr {
for (Tablet tablet : index.getTablets()) {
int healthReplicaNum = 0;
for (Replica replica : tablet.getReplicas()) {
if (replica.getLastFailedVersion() >= 0) {
LOG.info("publish version failed for transaction {} on tablet {},"
+ " on replica {} due to lastFailedVersion >= 0",
transactionState, tablet, replica);
continue;
}
if (!errorReplicaIds.contains(replica.getId())) {
if (replica.checkVersionCatchUp(partition.getVisibleVersion(), true)) {
++healthReplicaNum;
} else {
LOG.info("publish version failed for transaction {} on tablet {},"
LOG.info("publish version {} failed for transaction {} on tablet {},"
+ " on replica {} due to not catchup",
transactionState, tablet, replica);
partitionCommitInfo.getVersion(), transactionState, tablet,
replica);
}
} else if (replica.getVersion() >= partitionCommitInfo.getVersion()) {
// the replica's version is larger than or equal to current transaction
@ -959,15 +954,17 @@ public class DatabaseTransactionMgr {
errorReplicaIds.remove(replica.getId());
++healthReplicaNum;
} else {
LOG.info("publish version failed for transaction {} on tablet {},"
+ " on replica {} due to version hole", transactionState, tablet, replica);
LOG.info("publish version {} failed for transaction {} on tablet {},"
+ " on replica {} due to version hole or error",
partitionCommitInfo.getVersion(), transactionState, tablet, replica);
}
}
if (healthReplicaNum < quorumReplicaNum) {
LOG.info("publish version failed for transaction {} on tablet {},"
LOG.info("publish version {} failed for transaction {} on tablet {},"
+ " with only {} replicas less than quorum {}",
transactionState, tablet, healthReplicaNum, quorumReplicaNum);
partitionCommitInfo.getVersion(), transactionState, tablet, healthReplicaNum,
quorumReplicaNum);
String errMsg = String.format("publish on tablet %d failed."
+ " succeed replica num %d less than quorum %d."
+ " table: %d, partition: %d, publish version: %d",
@ -1637,10 +1634,7 @@ public class DatabaseTransactionMgr {
long newVersion = newCommitVersion;
long lastSuccessVersion = replica.getLastSuccessVersion();
if (!errorReplicaIds.contains(replica.getId())) {
if (replica.getLastFailedVersion() > 0) {
// if the replica is a failed replica, then not changing version
newVersion = replica.getVersion();
} else if (!replica.checkVersionCatchUp(partition.getVisibleVersion(), true)) {
if (!replica.checkVersionCatchUp(partition.getVisibleVersion(), true)) {
// this means the replica has error in the past, but we did not observe it
// during upgrade, one job maybe in quorum finished state, for example,
// A,B,C 3 replica A,B 's version is 10, C's version is 10 but C' 10 is abnormal