649 lines
20 KiB
Go
Executable File
649 lines
20 KiB
Go
Executable File
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.
|
|
|
|
package restore
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/tls"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/pingcap/errors"
|
|
"github.com/pingcap/failpoint"
|
|
"github.com/pingcap/kvproto/pkg/errorpb"
|
|
"github.com/pingcap/kvproto/pkg/kvrpcpb"
|
|
"github.com/pingcap/kvproto/pkg/metapb"
|
|
"github.com/pingcap/kvproto/pkg/pdpb"
|
|
"github.com/pingcap/kvproto/pkg/tikvpb"
|
|
"github.com/pingcap/log"
|
|
"github.com/pingcap/tidb/br/pkg/conn"
|
|
berrors "github.com/pingcap/tidb/br/pkg/errors"
|
|
"github.com/pingcap/tidb/br/pkg/httputil"
|
|
"github.com/pingcap/tidb/br/pkg/logutil"
|
|
pd "github.com/tikv/pd/client"
|
|
"github.com/tikv/pd/server/config"
|
|
"github.com/tikv/pd/server/schedule/placement"
|
|
"go.uber.org/multierr"
|
|
"go.uber.org/zap"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/credentials"
|
|
"google.golang.org/grpc/status"
|
|
)
|
|
|
|
const (
|
|
splitRegionMaxRetryTime = 4
|
|
)
|
|
|
|
// SplitClient is an external client used by RegionSplitter.
|
|
type SplitClient interface {
|
|
// GetStore gets a store by a store id.
|
|
GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error)
|
|
// GetRegion gets a region which includes a specified key.
|
|
GetRegion(ctx context.Context, key []byte) (*RegionInfo, error)
|
|
// GetRegionByID gets a region by a region id.
|
|
GetRegionByID(ctx context.Context, regionID uint64) (*RegionInfo, error)
|
|
// SplitRegion splits a region from a key, if key is not included in the region, it will return nil.
|
|
// note: the key should not be encoded
|
|
SplitRegion(ctx context.Context, regionInfo *RegionInfo, key []byte) (*RegionInfo, error)
|
|
// BatchSplitRegions splits a region from a batch of keys.
|
|
// note: the keys should not be encoded
|
|
BatchSplitRegions(ctx context.Context, regionInfo *RegionInfo, keys [][]byte) ([]*RegionInfo, error)
|
|
// BatchSplitRegionsWithOrigin splits a region from a batch of keys and return the original region and split new regions
|
|
BatchSplitRegionsWithOrigin(ctx context.Context, regionInfo *RegionInfo, keys [][]byte) (*RegionInfo, []*RegionInfo, error)
|
|
// ScatterRegion scatters a specified region.
|
|
ScatterRegion(ctx context.Context, regionInfo *RegionInfo) error
|
|
// ScatterRegions scatters regions in a batch.
|
|
ScatterRegions(ctx context.Context, regionInfo []*RegionInfo) error
|
|
// GetOperator gets the status of operator of the specified region.
|
|
GetOperator(ctx context.Context, regionID uint64) (*pdpb.GetOperatorResponse, error)
|
|
// ScanRegion gets a list of regions, starts from the region that contains key.
|
|
// Limit limits the maximum number of regions returned.
|
|
ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*RegionInfo, error)
|
|
// GetPlacementRule loads a placement rule from PD.
|
|
GetPlacementRule(ctx context.Context, groupID, ruleID string) (placement.Rule, error)
|
|
// SetPlacementRule insert or update a placement rule to PD.
|
|
SetPlacementRule(ctx context.Context, rule placement.Rule) error
|
|
// DeletePlacementRule removes a placement rule from PD.
|
|
DeletePlacementRule(ctx context.Context, groupID, ruleID string) error
|
|
// SetStoreLabel add or update specified label of stores. If labelValue
|
|
// is empty, it clears the label.
|
|
SetStoresLabel(ctx context.Context, stores []uint64, labelKey, labelValue string) error
|
|
}
|
|
|
|
// pdClient is a wrapper of pd client, can be used by RegionSplitter.
|
|
type pdClient struct {
|
|
mu sync.Mutex
|
|
client pd.Client
|
|
tlsConf *tls.Config
|
|
storeCache map[uint64]*metapb.Store
|
|
|
|
// FIXME when config changed during the lifetime of pdClient,
|
|
// this may mislead the scatter.
|
|
needScatterVal bool
|
|
needScatterInit sync.Once
|
|
}
|
|
|
|
// NewSplitClient returns a client used by RegionSplitter.
|
|
func NewSplitClient(client pd.Client, tlsConf *tls.Config) SplitClient {
|
|
cli := &pdClient{
|
|
client: client,
|
|
tlsConf: tlsConf,
|
|
storeCache: make(map[uint64]*metapb.Store),
|
|
}
|
|
return cli
|
|
}
|
|
|
|
func (c *pdClient) needScatter(ctx context.Context) bool {
|
|
c.needScatterInit.Do(func() {
|
|
var err error
|
|
c.needScatterVal, err = c.checkNeedScatter(ctx)
|
|
if err != nil {
|
|
log.Warn("failed to check whether need to scatter, use permissive strategy: always scatter", logutil.ShortError(err))
|
|
c.needScatterVal = true
|
|
}
|
|
if !c.needScatterVal {
|
|
log.Info("skipping scatter because the replica number isn't less than store count.")
|
|
}
|
|
})
|
|
return c.needScatterVal
|
|
}
|
|
|
|
// ScatterRegions scatters regions in a batch.
|
|
func (c *pdClient) ScatterRegions(ctx context.Context, regionInfo []*RegionInfo) error {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
regionsID := make([]uint64, 0, len(regionInfo))
|
|
for _, v := range regionInfo {
|
|
regionsID = append(regionsID, v.Region.Id)
|
|
}
|
|
resp, err := c.client.ScatterRegions(ctx, regionsID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if pbErr := resp.GetHeader().GetError(); pbErr.GetType() != pdpb.ErrorType_OK {
|
|
return errors.Annotatef(berrors.ErrPDInvalidResponse, "pd returns error during batch scattering: %s", pbErr)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *pdClient) GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
store, ok := c.storeCache[storeID]
|
|
if ok {
|
|
return store, nil
|
|
}
|
|
store, err := c.client.GetStore(ctx, storeID)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
c.storeCache[storeID] = store
|
|
return store, nil
|
|
}
|
|
|
|
func (c *pdClient) GetRegion(ctx context.Context, key []byte) (*RegionInfo, error) {
|
|
region, err := c.client.GetRegion(ctx, key)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if region == nil {
|
|
return nil, nil
|
|
}
|
|
return &RegionInfo{
|
|
Region: region.Meta,
|
|
Leader: region.Leader,
|
|
}, nil
|
|
}
|
|
|
|
func (c *pdClient) GetRegionByID(ctx context.Context, regionID uint64) (*RegionInfo, error) {
|
|
region, err := c.client.GetRegionByID(ctx, regionID)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if region == nil {
|
|
return nil, nil
|
|
}
|
|
return &RegionInfo{
|
|
Region: region.Meta,
|
|
Leader: region.Leader,
|
|
PendingPeers: region.PendingPeers,
|
|
DownPeers: region.DownPeers,
|
|
}, nil
|
|
}
|
|
|
|
func (c *pdClient) SplitRegion(ctx context.Context, regionInfo *RegionInfo, key []byte) (*RegionInfo, error) {
|
|
var peer *metapb.Peer
|
|
if regionInfo.Leader != nil {
|
|
peer = regionInfo.Leader
|
|
} else {
|
|
if len(regionInfo.Region.Peers) == 0 {
|
|
return nil, errors.Annotate(berrors.ErrRestoreNoPeer, "region does not have peer")
|
|
}
|
|
peer = regionInfo.Region.Peers[0]
|
|
}
|
|
storeID := peer.GetStoreId()
|
|
store, err := c.GetStore(ctx, storeID)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
conn, err := grpc.Dial(store.GetAddress(), grpc.WithInsecure())
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
defer conn.Close()
|
|
|
|
client := tikvpb.NewTikvClient(conn)
|
|
resp, err := client.SplitRegion(ctx, &kvrpcpb.SplitRegionRequest{
|
|
Context: &kvrpcpb.Context{
|
|
RegionId: regionInfo.Region.Id,
|
|
RegionEpoch: regionInfo.Region.RegionEpoch,
|
|
Peer: peer,
|
|
},
|
|
SplitKey: key,
|
|
})
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if resp.RegionError != nil {
|
|
log.Error("fail to split region",
|
|
logutil.Region(regionInfo.Region),
|
|
logutil.Key("key", key),
|
|
zap.Stringer("regionErr", resp.RegionError))
|
|
return nil, errors.Annotatef(berrors.ErrRestoreSplitFailed, "err=%v", resp.RegionError)
|
|
}
|
|
|
|
// BUG: Left is deprecated, it may be nil even if split is succeed!
|
|
// Assume the new region is the left one.
|
|
newRegion := resp.GetLeft()
|
|
if newRegion == nil {
|
|
regions := resp.GetRegions()
|
|
for _, r := range regions {
|
|
if bytes.Equal(r.GetStartKey(), regionInfo.Region.GetStartKey()) {
|
|
newRegion = r
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if newRegion == nil {
|
|
return nil, errors.Annotate(berrors.ErrRestoreSplitFailed, "new region is nil")
|
|
}
|
|
var leader *metapb.Peer
|
|
// Assume the leaders will be at the same store.
|
|
if regionInfo.Leader != nil {
|
|
for _, p := range newRegion.GetPeers() {
|
|
if p.GetStoreId() == regionInfo.Leader.GetStoreId() {
|
|
leader = p
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return &RegionInfo{
|
|
Region: newRegion,
|
|
Leader: leader,
|
|
}, nil
|
|
}
|
|
|
|
func splitRegionWithFailpoint(
|
|
ctx context.Context,
|
|
regionInfo *RegionInfo,
|
|
peer *metapb.Peer,
|
|
client tikvpb.TikvClient,
|
|
keys [][]byte,
|
|
) (*kvrpcpb.SplitRegionResponse, error) {
|
|
failpoint.Inject("not-leader-error", func(injectNewLeader failpoint.Value) {
|
|
log.Debug("failpoint not-leader-error injected.")
|
|
resp := &kvrpcpb.SplitRegionResponse{
|
|
RegionError: &errorpb.Error{
|
|
NotLeader: &errorpb.NotLeader{
|
|
RegionId: regionInfo.Region.Id,
|
|
},
|
|
},
|
|
}
|
|
if injectNewLeader.(bool) {
|
|
resp.RegionError.NotLeader.Leader = regionInfo.Leader
|
|
}
|
|
failpoint.Return(resp, nil)
|
|
})
|
|
failpoint.Inject("somewhat-retryable-error", func() {
|
|
log.Debug("failpoint somewhat-retryable-error injected.")
|
|
failpoint.Return(&kvrpcpb.SplitRegionResponse{
|
|
RegionError: &errorpb.Error{
|
|
ServerIsBusy: &errorpb.ServerIsBusy{},
|
|
},
|
|
}, nil)
|
|
})
|
|
return client.SplitRegion(ctx, &kvrpcpb.SplitRegionRequest{
|
|
Context: &kvrpcpb.Context{
|
|
RegionId: regionInfo.Region.Id,
|
|
RegionEpoch: regionInfo.Region.RegionEpoch,
|
|
Peer: peer,
|
|
},
|
|
SplitKeys: keys,
|
|
})
|
|
}
|
|
|
|
func (c *pdClient) sendSplitRegionRequest(
|
|
ctx context.Context, regionInfo *RegionInfo, keys [][]byte,
|
|
) (*kvrpcpb.SplitRegionResponse, error) {
|
|
var splitErrors error
|
|
for i := 0; i < splitRegionMaxRetryTime; i++ {
|
|
var peer *metapb.Peer
|
|
// scanRegions may return empty Leader in https://github.com/tikv/pd/blob/v4.0.8/server/grpc_service.go#L524
|
|
// so wee also need check Leader.Id != 0
|
|
if regionInfo.Leader != nil && regionInfo.Leader.Id != 0 {
|
|
peer = regionInfo.Leader
|
|
} else {
|
|
if len(regionInfo.Region.Peers) == 0 {
|
|
return nil, multierr.Append(splitErrors,
|
|
errors.Annotatef(berrors.ErrRestoreNoPeer, "region[%d] doesn't have any peer", regionInfo.Region.GetId()))
|
|
}
|
|
peer = regionInfo.Region.Peers[0]
|
|
}
|
|
storeID := peer.GetStoreId()
|
|
store, err := c.GetStore(ctx, storeID)
|
|
if err != nil {
|
|
return nil, multierr.Append(splitErrors, err)
|
|
}
|
|
opt := grpc.WithInsecure()
|
|
if c.tlsConf != nil {
|
|
opt = grpc.WithTransportCredentials(credentials.NewTLS(c.tlsConf))
|
|
}
|
|
conn, err := grpc.Dial(store.GetAddress(), opt)
|
|
if err != nil {
|
|
return nil, multierr.Append(splitErrors, err)
|
|
}
|
|
defer conn.Close()
|
|
client := tikvpb.NewTikvClient(conn)
|
|
resp, err := splitRegionWithFailpoint(ctx, regionInfo, peer, client, keys)
|
|
if err != nil {
|
|
return nil, multierr.Append(splitErrors, err)
|
|
}
|
|
if resp.RegionError != nil {
|
|
log.Warn("fail to split region",
|
|
logutil.Region(regionInfo.Region),
|
|
zap.Stringer("regionErr", resp.RegionError))
|
|
splitErrors = multierr.Append(splitErrors,
|
|
errors.Annotatef(berrors.ErrRestoreSplitFailed, "split region failed: err=%v", resp.RegionError))
|
|
if nl := resp.RegionError.NotLeader; nl != nil {
|
|
if leader := nl.GetLeader(); leader != nil {
|
|
regionInfo.Leader = leader
|
|
} else {
|
|
newRegionInfo, findLeaderErr := c.GetRegionByID(ctx, nl.RegionId)
|
|
if findLeaderErr != nil {
|
|
return nil, multierr.Append(splitErrors, findLeaderErr)
|
|
}
|
|
if !checkRegionEpoch(newRegionInfo, regionInfo) {
|
|
return nil, multierr.Append(splitErrors, berrors.ErrKVEpochNotMatch)
|
|
}
|
|
log.Info("find new leader", zap.Uint64("new leader", newRegionInfo.Leader.Id))
|
|
regionInfo = newRegionInfo
|
|
}
|
|
log.Info("split region meet not leader error, retrying",
|
|
zap.Int("retry times", i),
|
|
zap.Uint64("regionID", regionInfo.Region.Id),
|
|
zap.Any("new leader", regionInfo.Leader),
|
|
)
|
|
continue
|
|
}
|
|
// TODO: we don't handle RegionNotMatch and RegionNotFound here,
|
|
// because I think we don't have enough information to retry.
|
|
// But maybe we can handle them here by some information the error itself provides.
|
|
if resp.RegionError.ServerIsBusy != nil ||
|
|
resp.RegionError.StaleCommand != nil {
|
|
log.Warn("a error occurs on split region",
|
|
zap.Int("retry times", i),
|
|
zap.Uint64("regionID", regionInfo.Region.Id),
|
|
zap.String("error", resp.RegionError.Message),
|
|
zap.Any("error verbose", resp.RegionError),
|
|
)
|
|
continue
|
|
}
|
|
return nil, errors.Trace(splitErrors)
|
|
}
|
|
return resp, nil
|
|
}
|
|
return nil, errors.Trace(splitErrors)
|
|
}
|
|
|
|
func (c *pdClient) BatchSplitRegionsWithOrigin(
|
|
ctx context.Context, regionInfo *RegionInfo, keys [][]byte,
|
|
) (*RegionInfo, []*RegionInfo, error) {
|
|
resp, err := c.sendSplitRegionRequest(ctx, regionInfo, keys)
|
|
if err != nil {
|
|
return nil, nil, errors.Trace(err)
|
|
}
|
|
|
|
regions := resp.GetRegions()
|
|
newRegionInfos := make([]*RegionInfo, 0, len(regions))
|
|
var originRegion *RegionInfo
|
|
for _, region := range regions {
|
|
var leader *metapb.Peer
|
|
|
|
// Assume the leaders will be at the same store.
|
|
if regionInfo.Leader != nil {
|
|
for _, p := range region.GetPeers() {
|
|
if p.GetStoreId() == regionInfo.Leader.GetStoreId() {
|
|
leader = p
|
|
break
|
|
}
|
|
}
|
|
}
|
|
// original region
|
|
if region.GetId() == regionInfo.Region.GetId() {
|
|
originRegion = &RegionInfo{
|
|
Region: region,
|
|
Leader: leader,
|
|
}
|
|
continue
|
|
}
|
|
newRegionInfos = append(newRegionInfos, &RegionInfo{
|
|
Region: region,
|
|
Leader: leader,
|
|
})
|
|
}
|
|
return originRegion, newRegionInfos, nil
|
|
}
|
|
|
|
func (c *pdClient) BatchSplitRegions(
|
|
ctx context.Context, regionInfo *RegionInfo, keys [][]byte,
|
|
) ([]*RegionInfo, error) {
|
|
_, newRegions, err := c.BatchSplitRegionsWithOrigin(ctx, regionInfo, keys)
|
|
return newRegions, err
|
|
}
|
|
|
|
func (c *pdClient) getStoreCount(ctx context.Context) (int, error) {
|
|
stores, err := conn.GetAllTiKVStores(ctx, c.client, conn.SkipTiFlash)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return len(stores), err
|
|
}
|
|
|
|
func (c *pdClient) getMaxReplica(ctx context.Context) (int, error) {
|
|
api := c.getPDAPIAddr()
|
|
configAPI := api + "/pd/api/v1/config"
|
|
req, err := http.NewRequestWithContext(ctx, "GET", configAPI, nil)
|
|
if err != nil {
|
|
return 0, errors.Trace(err)
|
|
}
|
|
res, err := httputil.NewClient(c.tlsConf).Do(req)
|
|
if err != nil {
|
|
return 0, errors.Trace(err)
|
|
}
|
|
defer func() {
|
|
if err = res.Body.Close(); err != nil {
|
|
log.Error("Response fail to close", zap.Error(err))
|
|
}
|
|
}()
|
|
var conf config.Config
|
|
if err := json.NewDecoder(res.Body).Decode(&conf); err != nil {
|
|
return 0, errors.Trace(err)
|
|
}
|
|
return int(conf.Replication.MaxReplicas), nil
|
|
}
|
|
|
|
func (c *pdClient) checkNeedScatter(ctx context.Context) (bool, error) {
|
|
storeCount, err := c.getStoreCount(ctx)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
maxReplica, err := c.getMaxReplica(ctx)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
log.Info("checking whether need to scatter", zap.Int("store", storeCount), zap.Int("max-replica", maxReplica))
|
|
// Skipping scatter may lead to leader unbalanced,
|
|
// currently, we skip scatter only when:
|
|
// 1. max-replica > store-count (Probably a misconfigured or playground cluster.)
|
|
// 2. store-count == 1 (No meaning for scattering.)
|
|
// We can still omit scatter when `max-replica == store-count`, if we create a BalanceLeader operator here,
|
|
// however, there isn't evidence for transform leader is much faster than scattering empty regions.
|
|
return storeCount >= maxReplica && storeCount > 1, nil
|
|
}
|
|
|
|
func (c *pdClient) ScatterRegion(ctx context.Context, regionInfo *RegionInfo) error {
|
|
if !c.needScatter(ctx) {
|
|
return nil
|
|
}
|
|
return c.client.ScatterRegion(ctx, regionInfo.Region.GetId())
|
|
}
|
|
|
|
func (c *pdClient) GetOperator(ctx context.Context, regionID uint64) (*pdpb.GetOperatorResponse, error) {
|
|
return c.client.GetOperator(ctx, regionID)
|
|
}
|
|
|
|
func (c *pdClient) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*RegionInfo, error) {
|
|
regions, err := c.client.ScanRegions(ctx, key, endKey, limit)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
regionInfos := make([]*RegionInfo, 0, len(regions))
|
|
for _, region := range regions {
|
|
regionInfos = append(regionInfos, &RegionInfo{
|
|
Region: region.Meta,
|
|
Leader: region.Leader,
|
|
})
|
|
}
|
|
return regionInfos, nil
|
|
}
|
|
|
|
func (c *pdClient) GetPlacementRule(ctx context.Context, groupID, ruleID string) (placement.Rule, error) {
|
|
var rule placement.Rule
|
|
addr := c.getPDAPIAddr()
|
|
if addr == "" {
|
|
return rule, errors.Annotate(berrors.ErrRestoreSplitFailed, "failed to add stores labels: no leader")
|
|
}
|
|
req, err := http.NewRequestWithContext(ctx, "GET", addr+path.Join("/pd/api/v1/config/rule", groupID, ruleID), nil)
|
|
if err != nil {
|
|
return rule, errors.Trace(err)
|
|
}
|
|
res, err := httputil.NewClient(c.tlsConf).Do(req)
|
|
if err != nil {
|
|
return rule, errors.Trace(err)
|
|
}
|
|
defer func() {
|
|
if err = res.Body.Close(); err != nil {
|
|
log.Error("Response fail to close", zap.Error(err))
|
|
}
|
|
}()
|
|
b, err := io.ReadAll(res.Body)
|
|
if err != nil {
|
|
return rule, errors.Trace(err)
|
|
}
|
|
err = json.Unmarshal(b, &rule)
|
|
if err != nil {
|
|
return rule, errors.Trace(err)
|
|
}
|
|
return rule, nil
|
|
}
|
|
|
|
func (c *pdClient) SetPlacementRule(ctx context.Context, rule placement.Rule) error {
|
|
addr := c.getPDAPIAddr()
|
|
if addr == "" {
|
|
return errors.Annotate(berrors.ErrPDLeaderNotFound, "failed to add stores labels")
|
|
}
|
|
m, _ := json.Marshal(rule)
|
|
req, err := http.NewRequestWithContext(ctx, "POST", addr+path.Join("/pd/api/v1/config/rule"), bytes.NewReader(m))
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
res, err := httputil.NewClient(c.tlsConf).Do(req)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
return errors.Trace(res.Body.Close())
|
|
}
|
|
|
|
func (c *pdClient) DeletePlacementRule(ctx context.Context, groupID, ruleID string) error {
|
|
addr := c.getPDAPIAddr()
|
|
if addr == "" {
|
|
return errors.Annotate(berrors.ErrPDLeaderNotFound, "failed to add stores labels")
|
|
}
|
|
req, err := http.NewRequestWithContext(ctx, "DELETE", addr+path.Join("/pd/api/v1/config/rule", groupID, ruleID), nil)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
res, err := httputil.NewClient(c.tlsConf).Do(req)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
return errors.Trace(res.Body.Close())
|
|
}
|
|
|
|
func (c *pdClient) SetStoresLabel(
|
|
ctx context.Context, stores []uint64, labelKey, labelValue string,
|
|
) error {
|
|
b := []byte(fmt.Sprintf(`{"%s": "%s"}`, labelKey, labelValue))
|
|
addr := c.getPDAPIAddr()
|
|
if addr == "" {
|
|
return errors.Annotate(berrors.ErrPDLeaderNotFound, "failed to add stores labels")
|
|
}
|
|
httpCli := httputil.NewClient(c.tlsConf)
|
|
for _, id := range stores {
|
|
req, err := http.NewRequestWithContext(
|
|
ctx, "POST",
|
|
addr+path.Join("/pd/api/v1/store", strconv.FormatUint(id, 10), "label"),
|
|
bytes.NewReader(b),
|
|
)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
res, err := httpCli.Do(req)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
err = res.Body.Close()
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *pdClient) getPDAPIAddr() string {
|
|
addr := c.client.GetLeaderAddr()
|
|
if addr != "" && !strings.HasPrefix(addr, "http") {
|
|
addr = "http://" + addr
|
|
}
|
|
return strings.TrimRight(addr, "/")
|
|
}
|
|
|
|
func checkRegionEpoch(_new, _old *RegionInfo) bool {
|
|
return _new.Region.GetId() == _old.Region.GetId() &&
|
|
_new.Region.GetRegionEpoch().GetVersion() == _old.Region.GetRegionEpoch().GetVersion() &&
|
|
_new.Region.GetRegionEpoch().GetConfVer() == _old.Region.GetRegionEpoch().GetConfVer()
|
|
}
|
|
|
|
// exponentialBackoffer trivially retry any errors it meets.
|
|
// It's useful when the caller has handled the errors but
|
|
// only want to a more semantic backoff implementation.
|
|
type exponentialBackoffer struct {
|
|
attempt int
|
|
baseBackoff time.Duration
|
|
}
|
|
|
|
func (b *exponentialBackoffer) exponentialBackoff() time.Duration {
|
|
bo := b.baseBackoff
|
|
b.attempt--
|
|
if b.attempt == 0 {
|
|
return 0
|
|
}
|
|
b.baseBackoff *= 2
|
|
return bo
|
|
}
|
|
|
|
func pdErrorCanRetry(err error) bool {
|
|
// There are 3 type of reason that PD would reject a `scatter` request:
|
|
// (1) region %d has no leader
|
|
// (2) region %d is hot
|
|
// (3) region %d is not fully replicated
|
|
//
|
|
// (2) shouldn't happen in a recently splitted region.
|
|
// (1) and (3) might happen, and should be retried.
|
|
grpcErr := status.Convert(err)
|
|
if grpcErr == nil {
|
|
return false
|
|
}
|
|
return strings.Contains(grpcErr.Message(), "is not fully replicated") ||
|
|
strings.Contains(grpcErr.Message(), "has no leader")
|
|
}
|
|
|
|
// NextBackoff returns a duration to wait before retrying again.
|
|
func (b *exponentialBackoffer) NextBackoff(error) time.Duration {
|
|
// trivially exponential back off, because we have handled the error at upper level.
|
|
return b.exponentialBackoff()
|
|
}
|
|
|
|
// Attempt returns the remain attempt times
|
|
func (b *exponentialBackoffer) Attempt() int {
|
|
return b.attempt
|
|
}
|