[improvement](replica) donot care last failed version in publish (#21001)
We just care 2 things: 1. If the replica acks right 2. If the replica catches up
This commit is contained in:
@ -938,19 +938,14 @@ public class DatabaseTransactionMgr {
|
||||
for (Tablet tablet : index.getTablets()) {
|
||||
int healthReplicaNum = 0;
|
||||
for (Replica replica : tablet.getReplicas()) {
|
||||
if (replica.getLastFailedVersion() >= 0) {
|
||||
LOG.info("publish version failed for transaction {} on tablet {},"
|
||||
+ " on replica {} due to lastFailedVersion >= 0",
|
||||
transactionState, tablet, replica);
|
||||
continue;
|
||||
}
|
||||
if (!errorReplicaIds.contains(replica.getId())) {
|
||||
if (replica.checkVersionCatchUp(partition.getVisibleVersion(), true)) {
|
||||
++healthReplicaNum;
|
||||
} else {
|
||||
LOG.info("publish version failed for transaction {} on tablet {},"
|
||||
LOG.info("publish version {} failed for transaction {} on tablet {},"
|
||||
+ " on replica {} due to not catchup",
|
||||
transactionState, tablet, replica);
|
||||
partitionCommitInfo.getVersion(), transactionState, tablet,
|
||||
replica);
|
||||
}
|
||||
} else if (replica.getVersion() >= partitionCommitInfo.getVersion()) {
|
||||
// the replica's version is larger than or equal to current transaction
|
||||
@ -959,15 +954,17 @@ public class DatabaseTransactionMgr {
|
||||
errorReplicaIds.remove(replica.getId());
|
||||
++healthReplicaNum;
|
||||
} else {
|
||||
LOG.info("publish version failed for transaction {} on tablet {},"
|
||||
+ " on replica {} due to version hole", transactionState, tablet, replica);
|
||||
LOG.info("publish version {} failed for transaction {} on tablet {},"
|
||||
+ " on replica {} due to version hole or error",
|
||||
partitionCommitInfo.getVersion(), transactionState, tablet, replica);
|
||||
}
|
||||
}
|
||||
|
||||
if (healthReplicaNum < quorumReplicaNum) {
|
||||
LOG.info("publish version failed for transaction {} on tablet {},"
|
||||
LOG.info("publish version {} failed for transaction {} on tablet {},"
|
||||
+ " with only {} replicas less than quorum {}",
|
||||
transactionState, tablet, healthReplicaNum, quorumReplicaNum);
|
||||
partitionCommitInfo.getVersion(), transactionState, tablet, healthReplicaNum,
|
||||
quorumReplicaNum);
|
||||
String errMsg = String.format("publish on tablet %d failed."
|
||||
+ " succeed replica num %d less than quorum %d."
|
||||
+ " table: %d, partition: %d, publish version: %d",
|
||||
@ -1637,10 +1634,7 @@ public class DatabaseTransactionMgr {
|
||||
long newVersion = newCommitVersion;
|
||||
long lastSuccessVersion = replica.getLastSuccessVersion();
|
||||
if (!errorReplicaIds.contains(replica.getId())) {
|
||||
if (replica.getLastFailedVersion() > 0) {
|
||||
// if the replica is a failed replica, then not changing version
|
||||
newVersion = replica.getVersion();
|
||||
} else if (!replica.checkVersionCatchUp(partition.getVisibleVersion(), true)) {
|
||||
if (!replica.checkVersionCatchUp(partition.getVisibleVersion(), true)) {
|
||||
// this means the replica has error in the past, but we did not observe it
|
||||
// during upgrade, one job maybe in quorum finished state, for example,
|
||||
// A,B,C 3 replica A,B 's version is 10, C's version is 10 but C' 10 is abnormal
|
||||
|
||||
Reference in New Issue
Block a user