Files
tidb/br/pkg/restore/split/split_test.go

318 lines
9.2 KiB
Go

// Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0.
package split
import (
"context"
"testing"
"time"
"github.com/pingcap/failpoint"
"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/kvproto/pkg/pdpb"
berrors "github.com/pingcap/tidb/br/pkg/errors"
"github.com/pingcap/tidb/br/pkg/utils"
"github.com/stretchr/testify/require"
pd "github.com/tikv/pd/client"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
func TestScanRegionBackOfferWithSuccess(t *testing.T) {
var counter int
bo := NewWaitRegionOnlineBackoffer()
err := utils.WithRetry(context.Background(), func() error {
defer func() {
counter++
}()
if counter == 3 {
return nil
}
return berrors.ErrPDBatchScanRegion
}, bo)
require.NoError(t, err)
require.Equal(t, counter, 4)
}
func TestScanRegionBackOfferWithFail(t *testing.T) {
_ = failpoint.Enable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff", "return(true)")
defer func() {
_ = failpoint.Disable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff")
}()
var counter int
bo := NewWaitRegionOnlineBackoffer()
err := utils.WithRetry(context.Background(), func() error {
defer func() {
counter++
}()
return berrors.ErrPDBatchScanRegion
}, bo)
require.Error(t, err)
require.Equal(t, counter, WaitRegionOnlineAttemptTimes)
}
func TestScanRegionBackOfferWithStopRetry(t *testing.T) {
_ = failpoint.Enable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff", "return(true)")
defer func() {
_ = failpoint.Disable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff")
}()
var counter int
bo := NewWaitRegionOnlineBackoffer()
err := utils.WithRetry(context.Background(), func() error {
defer func() {
counter++
}()
if counter < 5 {
return berrors.ErrPDBatchScanRegion
}
return berrors.ErrKVUnknown
}, bo)
require.Error(t, err)
require.Equal(t, counter, 6)
}
type mockScatterFailedPDClient struct {
pd.Client
failed map[uint64]int
failedBefore int
}
func (c *mockScatterFailedPDClient) ScatterRegion(ctx context.Context, regionID uint64) error {
if c.failed == nil {
c.failed = make(map[uint64]int)
}
c.failed[regionID]++
if c.failed[regionID] > c.failedBefore {
return nil
}
return status.Errorf(codes.Unknown, "region %d is not fully replicated", regionID)
}
type recordCntBackoffer struct {
already int
}
func (b *recordCntBackoffer) NextBackoff(error) time.Duration {
b.already++
return 0
}
func (b *recordCntBackoffer) Attempt() int {
return 100
}
func TestScatterSequentiallyRetryCnt(t *testing.T) {
client := pdClient{
needScatterVal: true,
client: &mockScatterFailedPDClient{failedBefore: 7},
}
client.needScatterInit.Do(func() {})
ctx := context.Background()
regions := []*RegionInfo{
{
Region: &metapb.Region{
Id: 1,
},
},
{
Region: &metapb.Region{
Id: 2,
},
},
}
backoffer := &recordCntBackoffer{}
client.scatterRegionsSequentially(
ctx,
regions,
backoffer,
)
require.Equal(t, 7, backoffer.already)
}
type mockOldPDClient struct {
pd.Client
scattered map[uint64]int
getOperatorResps map[uint64][]*pdpb.GetOperatorResponse
}
func (c *mockOldPDClient) ScatterRegion(_ context.Context, regionID uint64) error {
if c.scattered == nil {
c.scattered = make(map[uint64]int)
}
c.scattered[regionID]++
return nil
}
func (c *mockOldPDClient) ScatterRegions(context.Context, []uint64, ...pd.RegionsOption) (*pdpb.ScatterRegionResponse, error) {
return nil, status.Error(codes.Unimplemented, "Ah, yep")
}
func (c *mockOldPDClient) GetOperator(_ context.Context, regionID uint64) (*pdpb.GetOperatorResponse, error) {
ret := c.getOperatorResps[regionID][0]
c.getOperatorResps[regionID] = c.getOperatorResps[regionID][1:]
return ret, nil
}
func TestScatterBackwardCompatibility(t *testing.T) {
client := pdClient{
needScatterVal: true,
client: &mockOldPDClient{},
}
client.needScatterInit.Do(func() {})
ctx := context.Background()
regions := []*RegionInfo{
{
Region: &metapb.Region{
Id: 1,
},
},
{
Region: &metapb.Region{
Id: 2,
},
},
}
err := client.ScatterRegions(ctx, regions)
require.NoError(t, err)
require.Equal(t, map[uint64]int{1: 1, 2: 1}, client.client.(*mockOldPDClient).scattered)
}
func TestWaitForScatterRegions(t *testing.T) {
mockPDCli := &mockOldPDClient{}
client := pdClient{
needScatterVal: true,
client: mockPDCli,
}
client.needScatterInit.Do(func() {})
regionCnt := 6
checkGetOperatorRespsDrained := func() {
for i := 1; i <= regionCnt; i++ {
require.Len(t, mockPDCli.getOperatorResps[uint64(i)], 0)
}
}
checkNoRetry := func() {
for i := 1; i <= regionCnt; i++ {
require.Equal(t, 0, mockPDCli.scattered[uint64(i)])
}
}
ctx := context.Background()
regions := make([]*RegionInfo, 0, regionCnt)
for i := 1; i <= regionCnt; i++ {
regions = append(regions, &RegionInfo{
Region: &metapb.Region{
Id: uint64(i),
},
})
}
mockPDCli.scattered = make(map[uint64]int)
mockPDCli.getOperatorResps = make(map[uint64][]*pdpb.GetOperatorResponse)
mockPDCli.getOperatorResps[1] = []*pdpb.GetOperatorResponse{
{Header: &pdpb.ResponseHeader{Error: &pdpb.Error{Type: pdpb.ErrorType_REGION_NOT_FOUND}}},
}
mockPDCli.getOperatorResps[2] = []*pdpb.GetOperatorResponse{
{Desc: []byte("not-scatter-region")},
}
mockPDCli.getOperatorResps[3] = []*pdpb.GetOperatorResponse{
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
}
mockPDCli.getOperatorResps[4] = []*pdpb.GetOperatorResponse{
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING},
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_TIMEOUT},
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
}
mockPDCli.getOperatorResps[5] = []*pdpb.GetOperatorResponse{
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_CANCEL},
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_CANCEL},
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_CANCEL},
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING},
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING},
{Desc: []byte("not-scatter-region")},
}
// should trigger a retry
mockPDCli.getOperatorResps[6] = []*pdpb.GetOperatorResponse{
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_REPLACE},
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
}
left, err := client.WaitRegionsScattered(ctx, regions)
require.NoError(t, err)
require.Equal(t, 0, left)
for i := 1; i <= 3; i++ {
require.Equal(t, 0, mockPDCli.scattered[uint64(i)])
}
// OperatorStatus_TIMEOUT should trigger rescatter once
require.Equal(t, 1, mockPDCli.scattered[uint64(4)])
// 3 * OperatorStatus_CANCEL should trigger 3 * rescatter
require.Equal(t, 3, mockPDCli.scattered[uint64(5)])
// OperatorStatus_REPLACE should trigger rescatter once
require.Equal(t, 1, mockPDCli.scattered[uint64(6)])
checkGetOperatorRespsDrained()
// test non-retryable error
mockPDCli.scattered = make(map[uint64]int)
mockPDCli.getOperatorResps = make(map[uint64][]*pdpb.GetOperatorResponse)
mockPDCli.getOperatorResps[1] = []*pdpb.GetOperatorResponse{
{Header: &pdpb.ResponseHeader{Error: &pdpb.Error{Type: pdpb.ErrorType_REGION_NOT_FOUND}}},
}
mockPDCli.getOperatorResps[2] = []*pdpb.GetOperatorResponse{
{Desc: []byte("not-scatter-region")},
}
// mimic non-retryable error
mockPDCli.getOperatorResps[3] = []*pdpb.GetOperatorResponse{
{Header: &pdpb.ResponseHeader{Error: &pdpb.Error{Type: pdpb.ErrorType_DATA_COMPACTED}}},
}
left, err = client.WaitRegionsScattered(ctx, regions)
require.ErrorContains(t, err, "get operator error: DATA_COMPACTED")
require.Equal(t, 4, left) // region 3,4,5,6 is not scattered
checkGetOperatorRespsDrained()
checkNoRetry()
// test backoff is timed-out
backup := WaitRegionOnlineAttemptTimes
WaitRegionOnlineAttemptTimes = 2
t.Cleanup(func() {
WaitRegionOnlineAttemptTimes = backup
})
mockPDCli.scattered = make(map[uint64]int)
mockPDCli.getOperatorResps = make(map[uint64][]*pdpb.GetOperatorResponse)
mockPDCli.getOperatorResps[1] = []*pdpb.GetOperatorResponse{
{Header: &pdpb.ResponseHeader{Error: &pdpb.Error{Type: pdpb.ErrorType_REGION_NOT_FOUND}}},
}
mockPDCli.getOperatorResps[2] = []*pdpb.GetOperatorResponse{
{Desc: []byte("not-scatter-region")},
}
mockPDCli.getOperatorResps[3] = []*pdpb.GetOperatorResponse{
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
}
mockPDCli.getOperatorResps[4] = []*pdpb.GetOperatorResponse{
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING},
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING}, // first retry
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_RUNNING}, // second retry
}
mockPDCli.getOperatorResps[5] = []*pdpb.GetOperatorResponse{
{Desc: []byte("not-scatter-region")},
}
mockPDCli.getOperatorResps[6] = []*pdpb.GetOperatorResponse{
{Desc: []byte("scatter-region"), Status: pdpb.OperatorStatus_SUCCESS},
}
left, err = client.WaitRegionsScattered(ctx, regions)
require.ErrorContains(t, err, "wait for scatter region timeout, print the first unfinished region: id:4")
require.Equal(t, 1, left)
checkGetOperatorRespsDrained()
checkNoRetry()
}