From c12bf3fa024d88db3c7d3bbbc1bcf66d345fd0db Mon Sep 17 00:00:00 2001 From: ris <79858083+RidRisR@users.noreply.github.com> Date: Wed, 19 Jun 2024 16:56:47 +0800 Subject: [PATCH] br: fix backoffer can't handle multierrs (#54084) close pingcap/tidb#54053 --- br/pkg/restore/log_client/BUILD.bazel | 2 +- .../restore/log_client/import_retry_test.go | 22 +++++++++++++++++++ br/pkg/utils/backoff.go | 9 +++++--- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/br/pkg/restore/log_client/BUILD.bazel b/br/pkg/restore/log_client/BUILD.bazel index f6c932a04a..e297db4829 100644 --- a/br/pkg/restore/log_client/BUILD.bazel +++ b/br/pkg/restore/log_client/BUILD.bazel @@ -82,7 +82,7 @@ go_test( ], embed = [":log_client"], flaky = True, - shard_count = 38, + shard_count = 39, deps = [ "//br/pkg/errors", "//br/pkg/gluetidb", diff --git a/br/pkg/restore/log_client/import_retry_test.go b/br/pkg/restore/log_client/import_retry_test.go index 4234f6fd63..5c47f1f3ac 100644 --- a/br/pkg/restore/log_client/import_retry_test.go +++ b/br/pkg/restore/log_client/import_retry_test.go @@ -609,3 +609,25 @@ func TestPaginateScanLeader(t *testing.T) { }) assertRegions(t, collectedRegions, "", "aay", "bba") } + +func TestRetryRecognizeErrCode(t *testing.T) { + waitTime := 1 * time.Millisecond + maxWaitTime := 16 * time.Millisecond + ctx := context.Background() + inner := 0 + outer := 0 + utils.WithRetry(ctx, func() error { + e := utils.WithRetry(ctx, func() error { + inner++ + e := status.Error(codes.Unavailable, "the connection to TiKV has been cut by a neko, meow :3") + if e != nil { + return errors.Trace(e) + } + return nil + }, utils.NewBackoffer(10, waitTime, maxWaitTime, utils.NewErrorContext("download sst", 3))) + outer++ + return errors.Trace(e) + }, utils.NewBackoffer(10, waitTime, maxWaitTime, utils.NewErrorContext("import sst", 3))) + require.Equal(t, 10, outer) + require.Equal(t, 100, inner) +} diff --git a/br/pkg/utils/backoff.go b/br/pkg/utils/backoff.go index f669b07a51..d1f82db6fe 100644 --- a/br/pkg/utils/backoff.go +++ b/br/pkg/utils/backoff.go @@ -14,6 +14,7 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/log" berrors "github.com/pingcap/tidb/br/pkg/errors" + "go.uber.org/multierr" "go.uber.org/zap" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -169,12 +170,14 @@ func NewBackupSSTBackoffer() Backoffer { func (bo *importerBackoffer) NextBackoff(err error) time.Duration { // we don't care storeID here. - res := bo.errContext.HandleErrorMsg(err.Error(), 0) + errs := multierr.Errors(err) + lastErr := errs[len(errs)-1] + res := bo.errContext.HandleErrorMsg(lastErr.Error(), 0) if res.Strategy == RetryStrategy { bo.delayTime = 2 * bo.delayTime bo.attempt-- } else { - e := errors.Cause(err) + e := errors.Cause(lastErr) switch e { // nolint:errorlint case berrors.ErrKVEpochNotMatch, berrors.ErrKVDownloadFailed, berrors.ErrKVIngestFailed, berrors.ErrPDLeaderNotFound: bo.delayTime = 2 * bo.delayTime @@ -189,7 +192,7 @@ func (bo *importerBackoffer) NextBackoff(err error) time.Duration { bo.delayTime = 2 * bo.delayTime bo.attempt-- case codes.Canceled: - if isGRPCCancel(err) { + if isGRPCCancel(lastErr) { bo.delayTime = 2 * bo.delayTime bo.attempt-- } else {