From 60065dfbee0a37dbad3380f6a389b970e581d64e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B1=B1=E5=B2=9A?= <36239017+YuJuncen@users.noreply.github.com> Date: Fri, 20 Aug 2021 22:00:01 +0800 Subject: [PATCH] restore: fix failed to retry grpc errors (#27423) --- br/pkg/restore/backoff.go | 5 +++-- br/pkg/restore/import.go | 4 ++++ br/tests/br_full/run.sh | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/br/pkg/restore/backoff.go b/br/pkg/restore/backoff.go index 1f39ff78d1..80e8c91edb 100644 --- a/br/pkg/restore/backoff.go +++ b/br/pkg/restore/backoff.go @@ -56,7 +56,8 @@ func (bo *importerBackoffer) NextBackoff(err error) time.Duration { bo.delayTime = 2 * bo.delayTime bo.attempt-- } else { - switch errors.Cause(err) { // nolint:errorlint + e := errors.Cause(err) + switch e { // nolint:errorlint case berrors.ErrKVEpochNotMatch, berrors.ErrKVDownloadFailed, berrors.ErrKVIngestFailed: bo.delayTime = 2 * bo.delayTime bo.attempt-- @@ -65,7 +66,7 @@ func (bo *importerBackoffer) NextBackoff(err error) time.Duration { bo.delayTime = 0 bo.attempt = 0 default: - switch status.Code(err) { + switch status.Code(e) { case codes.Unavailable, codes.Aborted: bo.delayTime = 2 * bo.delayTime bo.attempt-- diff --git a/br/pkg/restore/import.go b/br/pkg/restore/import.go index 9cd7f0a2fa..83ec6c5957 100644 --- a/br/pkg/restore/import.go +++ b/br/pkg/restore/import.go @@ -317,6 +317,10 @@ func (importer *FileImporter) Import( log.Debug("failpoint restore-storage-error injected.", zap.String("msg", msg)) e = errors.Annotate(e, msg) }) + failpoint.Inject("restore-gRPC-error", func(_ failpoint.Value) { + log.Warn("the connection to TiKV has been cut by a neko, meow :3") + e = status.Error(codes.Unavailable, "the connection to TiKV has been cut by a neko, meow :3") + }) if e != nil { remainFiles = remainFiles[i:] return errors.Trace(e) diff --git a/br/tests/br_full/run.sh b/br/tests/br_full/run.sh index 4fab796863..0d15794788 100755 --- a/br/tests/br_full/run.sh +++ b/br/tests/br_full/run.sh @@ -70,7 +70,7 @@ for ct in limit lz4 zstd; do # restore full echo "restore with $ct backup start..." - export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/restore/restore-storage-error=1*return(\"connection refused\")" + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/restore/restore-storage-error=1*return(\"connection refused\");github.com/pingcap/tidb/br/pkg/restore/restore-gRPC-error=1*return(true)" run_br restore full -s "local://$TEST_DIR/$DB-$ct" --pd $PD_ADDR --ratelimit 1024 export GO_FAILPOINTS=""