318 lines
9.2 KiB
Go
318 lines
9.2 KiB
Go
// Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0.
|
|
package split
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/pingcap/failpoint"
|
|
"github.com/pingcap/kvproto/pkg/metapb"
|
|
"github.com/pingcap/kvproto/pkg/pdpb"
|
|
berrors "github.com/pingcap/tidb/br/pkg/errors"
|
|
"github.com/pingcap/tidb/br/pkg/utils"
|
|
"github.com/stretchr/testify/require"
|
|
pd "github.com/tikv/pd/client"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/status"
|
|
)
|
|
|
|
func TestScanRegionBackOfferWithSuccess(t *testing.T) {
|
|
var counter int
|
|
bo := NewWaitRegionOnlineBackoffer()
|
|
|
|
err := utils.WithRetry(context.Background(), func() error {
|
|
defer func() {
|
|
counter++
|
|
}()
|
|
|
|
if counter == 3 {
|
|
return nil
|
|
}
|
|
return berrors.ErrPDBatchScanRegion
|
|
}, bo)
|
|
require.NoError(t, err)
|
|
require.Equal(t, counter, 4)
|
|
}
|
|
|
|
func TestScanRegionBackOfferWithFail(t *testing.T) {
|
|
_ = failpoint.Enable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff", "return(true)")
|
|
defer func() {
|
|
_ = failpoint.Disable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff")
|
|
}()
|
|
|
|
var counter int
|
|
bo := NewWaitRegionOnlineBackoffer()
|
|
|
|
err := utils.WithRetry(context.Background(), func() error {
|
|
defer func() {
|
|
counter++
|
|
}()
|
|
return berrors.ErrPDBatchScanRegion
|
|
}, bo)
|
|
require.Error(t, err)
|
|
require.Equal(t, counter, WaitRegionOnlineAttemptTimes)
|
|
}
|
|
|
|
func TestScanRegionBackOfferWithStopRetry(t *testing.T) {
|
|
_ = failpoint.Enable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff", "return(true)")
|
|
defer func() {
|
|
_ = failpoint.Disable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff")
|
|
}()
|
|
|
|
var counter int
|
|
bo := NewWaitRegionOnlineBackoffer()
|
|
|
|
err := utils.WithRetry(context.Background(), func() error {
|
|
defer func() {
|
|
counter++
|
|
}()
|
|
|
|
if counter < 5 {
|
|
return berrors.ErrPDBatchScanRegion
|
|
}
|
|
return berrors.ErrKVUnknown
|
|
}, bo)
|
|
require.Error(t, err)
|
|
require.Equal(t, counter, 6)
|
|
}
|
|
|
|
type mockScatterFailedPDClient struct {
|
|
pd.Client
|
|
failed map[uint64]int
|
|
failedBefore int
|
|
}
|
|
|
|
func (c *mockScatterFailedPDClient) ScatterRegion(ctx context.Context, regionID uint64) error {
|
|
if c.failed == nil {
|
|
c.failed = make(map[uint64]int)
|
|
}
|
|
c.failed[regionID]++
|
|
if c.failed[regionID] > c.failedBefore {
|
|
return nil
|
|
}
|
|
return status.Errorf(codes.Unknown, "region %d is not fully replicated", regionID)
|
|
}
|
|
|
|
type recordCntBackoffer struct {
|
|
already int
|
|
}
|
|
|
|
func (b *recordCntBackoffer) NextBackoff(error) time.Duration {
|
|
b.already++
|
|
return 0
|
|
}
|
|
|
|
func (b *recordCntBackoffer) Attempt() int {
|
|
return 100
|
|
}
|
|
|
|
func TestScatterSequentiallyRetryCnt(t *testing.T) {
|
|
client := pdClient{
|
|
needScatterVal: true,
|
|
client: &mockScatterFailedPDClient{failedBefore: 7},
|
|
}
|
|
client.needScatterInit.Do(func() {})
|
|
|
|
ctx := context.Background()
|
|
regions := []*RegionInfo{
|
|
{
|
|
Region: &metapb.Region{
|
|
Id: 1,
|
|
},
|
|
},
|
|
{
|
|
Region: &metapb.Region{
|
|
Id: 2,
|
|
},
|
|
},
|
|
}
|
|
backoffer := &recordCntBackoffer{}
|
|
client.scatterRegionsSequentially(
|
|
ctx,
|
|
regions,
|
|
backoffer,
|
|
)
|
|
require.Equal(t, 7, backoffer.already)
|
|
}
|
|
|
|
type mockOldPDClient struct {
|
|
pd.Client
|
|
|
|
scattered map[uint64]int
|
|
getOperatorResps map[uint64][]*pdpb.GetOperatorResponse
|
|
}
|
|
|
|
func (c *mockOldPDClient) ScatterRegion(_ context.Context, regionID uint64) error {
|
|
if c.scattered == nil {
|
|
c.scattered = make(map[uint64]int)
|
|
}
|
|
c.scattered[regionID]++
|
|
return nil
|
|
}
|
|
|
|
func (c *mockOldPDClient) ScatterRegions(context.Context, []uint64, ...pd.RegionsOption) (*pdpb.ScatterRegionResponse, error) {
|
|
return nil, status.Error(codes.Unimplemented, "Ah, yep")
|
|
}
|
|
|
|
func (c *mockOldPDClient) GetOperator(_ context.Context, regionID uint64) (*pdpb.GetOperatorResponse, error) {
|
|
ret := c.getOperatorResps[regionID][0]
|
|
c.getOperatorResps[regionID] = c.getOperatorResps[regionID][1:]
|
|
return ret, nil
|
|
}
|
|
|
|
func TestScatterBackwardCompatibility(t *testing.T) {
|
|
client := pdClient{
|
|
needScatterVal: true,
|
|
client: &mockOldPDClient{},
|
|
}
|
|
client.needScatterInit.Do(func() {})
|
|
|
|
ctx := context.Background()
|
|
regions := []*RegionInfo{
|
|
{
|
|
Region: &metapb.Region{
|
|
Id: 1,
|
|
},
|
|
},
|
|
{
|
|
Region: &metapb.Region{
|
|
Id: 2,
|
|
},
|
|
},
|
|
}
|
|
err := client.ScatterRegions(ctx, regions)
|
|
require.NoError(t, err)
|
|
require.Equal(t, map[uint64]int{1: 1, 2: 1}, client.client.(*mockOldPDClient).scattered)
|
|
}
|
|
|
|
func TestWaitForScatterRegions(t *testing.T) {
|
|
mockPDCli := &mockOldPDClient{}
|
|
client := pdClient{
|
|
needScatterVal: true,
|
|
client: mockPDCli,
|
|
}
|
|
client.needScatterInit.Do(func() {})
|
|
regionCnt := 6
|
|
checkGetOperatorRespsDrained := func() {
|
|
for i := 1; i <= regionCnt; i++ {
|
|
require.Len(t, mockPDCli.getOperatorResps[uint64(i)], 0)
|
|
}
|
|
}
|
|
checkNoRetry := func() {
|
|
for i := 1; i <= regionCnt; i++ {
|
|
require.Equal(t, 0, mockPDCli.scattered[uint64(i)])
|
|
}
|
|
}
|
|
|
|
ctx := context.Background()
|
|
regions := make([]*RegionInfo, 0, regionCnt)
|
|
for i := 1; i <= regionCnt; i++ {
|
|
regions = append(regions, &RegionInfo{
|
|
Region: &metapb.Region{
|
|
Id: uint64(i),
|
|
},
|
|
})
|
|
}
|
|
|
|
mockPDCli.scattered = make(map[uint64]int)
|
|
mockPDCli.getOperatorResps = make(map[uint64][]*pdpb.GetOperatorResponse)
|
|
mockPDCli.getOperatorResps[1] = []*pdpb.GetOperatorResponse{
|
|
{Header: &pdpb.ResponseHeader{Error: &pdpb.Error{Type: pdpb.ErrorType_REGION_NOT_FOUND}}},
|
|
}
|
|
mockPDCli.getOperatorResps[2] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("not-scatter-region")},
|
|
}
|
|
mockPDCli.getOperatorResps[3] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
|
|
}
|
|
mockPDCli.getOperatorResps[4] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING},
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_TIMEOUT},
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
|
|
}
|
|
mockPDCli.getOperatorResps[5] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_CANCEL},
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_CANCEL},
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_CANCEL},
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING},
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING},
|
|
{Desc: []byte("not-scatter-region")},
|
|
}
|
|
// should trigger a retry
|
|
mockPDCli.getOperatorResps[6] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_REPLACE},
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
|
|
}
|
|
|
|
left, err := client.WaitRegionsScattered(ctx, regions)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, left)
|
|
for i := 1; i <= 3; i++ {
|
|
require.Equal(t, 0, mockPDCli.scattered[uint64(i)])
|
|
}
|
|
// OperatorStatus_TIMEOUT should trigger rescatter once
|
|
require.Equal(t, 1, mockPDCli.scattered[uint64(4)])
|
|
// 3 * OperatorStatus_CANCEL should trigger 3 * rescatter
|
|
require.Equal(t, 3, mockPDCli.scattered[uint64(5)])
|
|
// OperatorStatus_REPLACE should trigger rescatter once
|
|
require.Equal(t, 1, mockPDCli.scattered[uint64(6)])
|
|
checkGetOperatorRespsDrained()
|
|
|
|
// test non-retryable error
|
|
|
|
mockPDCli.scattered = make(map[uint64]int)
|
|
mockPDCli.getOperatorResps = make(map[uint64][]*pdpb.GetOperatorResponse)
|
|
mockPDCli.getOperatorResps[1] = []*pdpb.GetOperatorResponse{
|
|
{Header: &pdpb.ResponseHeader{Error: &pdpb.Error{Type: pdpb.ErrorType_REGION_NOT_FOUND}}},
|
|
}
|
|
mockPDCli.getOperatorResps[2] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("not-scatter-region")},
|
|
}
|
|
// mimic non-retryable error
|
|
mockPDCli.getOperatorResps[3] = []*pdpb.GetOperatorResponse{
|
|
{Header: &pdpb.ResponseHeader{Error: &pdpb.Error{Type: pdpb.ErrorType_DATA_COMPACTED}}},
|
|
}
|
|
left, err = client.WaitRegionsScattered(ctx, regions)
|
|
require.ErrorContains(t, err, "get operator error: DATA_COMPACTED")
|
|
require.Equal(t, 4, left) // region 3,4,5,6 is not scattered
|
|
checkGetOperatorRespsDrained()
|
|
checkNoRetry()
|
|
|
|
// test backoff is timed-out
|
|
|
|
backup := WaitRegionOnlineAttemptTimes
|
|
WaitRegionOnlineAttemptTimes = 2
|
|
t.Cleanup(func() {
|
|
WaitRegionOnlineAttemptTimes = backup
|
|
})
|
|
|
|
mockPDCli.scattered = make(map[uint64]int)
|
|
mockPDCli.getOperatorResps = make(map[uint64][]*pdpb.GetOperatorResponse)
|
|
mockPDCli.getOperatorResps[1] = []*pdpb.GetOperatorResponse{
|
|
{Header: &pdpb.ResponseHeader{Error: &pdpb.Error{Type: pdpb.ErrorType_REGION_NOT_FOUND}}},
|
|
}
|
|
mockPDCli.getOperatorResps[2] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("not-scatter-region")},
|
|
}
|
|
mockPDCli.getOperatorResps[3] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
|
|
}
|
|
mockPDCli.getOperatorResps[4] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING},
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING}, // first retry
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING}, // second retry
|
|
}
|
|
mockPDCli.getOperatorResps[5] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("not-scatter-region")},
|
|
}
|
|
mockPDCli.getOperatorResps[6] = []*pdpb.GetOperatorResponse{
|
|
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
|
|
}
|
|
left, err = client.WaitRegionsScattered(ctx, regions)
|
|
require.ErrorContains(t, err, "wait for scatter region timeout, print the first unfinished region: id:4")
|
|
require.Equal(t, 1, left)
|
|
checkGetOperatorRespsDrained()
|
|
checkNoRetry()
|
|
}
|