pdutil/backend: enlarge max retry time and fix nested retriable error (#48210)

close pingcap/tidb#46950
This commit is contained in:
D3Hunter
2023-11-02 15:41:08 +08:00
committed by GitHub
parent 44cbc91a63
commit 9c92e06573
3 changed files with 9 additions and 2 deletions

View File

@ -105,7 +105,9 @@ func isSingleRetryableError(err error) bool {
if nerr.Timeout() {
return true
}
if syscallErr, ok := goerrors.Unwrap(err).(*os.SyscallError); ok {
// the error might be nested, such as *url.Error -> *net.OpError -> *os.SyscallError
var syscallErr *os.SyscallError
if goerrors.As(nerr, &syscallErr) {
return syscallErr.Err == syscall.ECONNREFUSED || syscallErr.Err == syscall.ECONNRESET
}
return false

View File

@ -19,6 +19,7 @@ import (
"fmt"
"io"
"net"
"net/url"
"testing"
"github.com/go-sql-driver/mysql"
@ -66,6 +67,9 @@ func TestIsRetryableError(t *testing.T) {
_, err := net.Dial("tcp", "localhost:65533")
require.Error(t, err)
require.True(t, IsRetryableError(err))
// wrap net.OpErr inside url.Error
urlErr := &url.Error{Op: "post", Err: err}
require.True(t, IsRetryableError(urlErr))
// MySQL Errors
require.False(t, IsRetryableError(&mysql.MySQLError{}))

View File

@ -41,7 +41,7 @@ const (
maxMsgSize = int(128 * units.MiB) // pd.ScanRegion may return a large response
pauseTimeout = 5 * time.Minute
// pd request retry time when connection fail
pdRequestRetryTime = 10
pdRequestRetryTime = 120
// set max-pending-peer-count to a large value to avoid scatter region failed.
maxPendingPeerUnlimited uint64 = math.MaxInt32
)
@ -157,6 +157,7 @@ func pdRequestWithCode(
resp *http.Response
)
count := 0
// the total retry duration: 120*1 = 2min
for {
req, err = http.NewRequestWithContext(ctx, method, reqURL, body)
if err != nil {