diff --git a/store/tikv/2pc.go b/store/tikv/2pc.go index 94bcdefed5..57cde6db56 100644 --- a/store/tikv/2pc.go +++ b/store/tikv/2pc.go @@ -317,6 +317,15 @@ func (c *twoPhaseCommitter) commitSingleBatch(bo *Backoffer, batch batchKeys) er }, } + // If we fail to receive response for the request that commits primary key, it will be undetermined whether this + // transaction has been successfully committed. + // Under this circumstance, we can not declare the commit is complete (may lead to data lost), nor can we throw + // an error (may lead to the duplicated key error when upper level restarts the transaction). Currently the best + // workaround seems to be an infinite retry util server recovers and returns a success or failure response. + if bytes.Compare(batch.keys[0], c.primary()) == 0 { + bo = NewBackoffer(commitPrimaryMaxBackoff, bo.ctx) + } + resp, err := c.store.SendKVReq(bo, req, batch.region, readTimeoutShort) if err != nil { return errors.Trace(err) diff --git a/store/tikv/backoff.go b/store/tikv/backoff.go index 4944481a2c..d5b3eef44d 100644 --- a/store/tikv/backoff.go +++ b/store/tikv/backoff.go @@ -119,6 +119,7 @@ const ( getMaxBackoff = 10000 prewriteMaxBackoff = 10000 commitMaxBackoff = 10000 + commitPrimaryMaxBackoff = -1 cleanupMaxBackoff = 10000 gcMaxBackoff = 100000 gcResolveLockMaxBackoff = 100000 @@ -169,7 +170,7 @@ func (b *Backoffer) Backoff(typ backoffType, err error) error { log.Warnf("%v, retry later(totalSleep %dms, maxSleep %dms)", err, b.totalSleep, b.maxSleep) b.errors = append(b.errors, err) - if b.totalSleep >= b.maxSleep { + if b.maxSleep > 0 && b.totalSleep >= b.maxSleep { errMsg := fmt.Sprintf("backoffer.maxSleep %dms is exceeded, errors:", b.maxSleep) for i, err := range b.errors { // Print only last 3 errors for non-DEBUG log levels.