store/tikv: always switch to a different peer when meets no-leader (#22449)

This commit is contained in:
Lei Zhao
2021-03-26 19:05:23 +08:00
committed by GitHub
parent fe250b9efb
commit 2df2ca06d2
5 changed files with 116 additions and 22 deletions

View File

@ -47,6 +47,12 @@ func (s *testPessimisticSuite) newAsyncCommitTestKitWithInit(c *C) *testkit.Test
return tk
}
func (s *testPessimisticSuite) new1PCTestKitWithInit(c *C) *testkit.TestKit {
tk := testkit.NewTestKitWithInit(c, s.store)
tk.Se.GetSessionVars().Enable1PC = true
return tk
}
type testPessimisticSuite struct {
testSessionSuiteBase
}
@ -2122,9 +2128,9 @@ func (s *testPessimisticSuite) Test1PCWithSchemaChange(c *C) {
conf.TiKVClient.AsyncCommit.AllowedClockDrift = 0
})
tk := s.newAsyncCommitTestKitWithInit(c)
tk2 := s.newAsyncCommitTestKitWithInit(c)
tk3 := s.newAsyncCommitTestKitWithInit(c)
tk := s.new1PCTestKitWithInit(c)
tk2 := s.new1PCTestKitWithInit(c)
tk3 := s.new1PCTestKitWithInit(c)
tk.MustExec("drop table if exists tk")
tk.MustExec("create table tk (c1 int primary key, c2 int)")
@ -2160,7 +2166,7 @@ func (s *testPessimisticSuite) Test1PCWithSchemaChange(c *C) {
time.Sleep(200 * time.Millisecond)
tk2.MustExec("alter table tk add index k2(c2)")
}()
c.Assert(failpoint.Enable("github.com/pingcap/tidb/store/tikv/beforePrewrite", "1*sleep(1000)"), IsNil)
c.Assert(failpoint.Enable("github.com/pingcap/tidb/store/tikv/beforePrewrite", "1*sleep(1200)"), IsNil)
tk.MustExec("commit")
c.Assert(failpoint.Disable("github.com/pingcap/tidb/store/tikv/beforePrewrite"), IsNil)
tk3.MustExec("admin check table tk")

View File

@ -58,15 +58,38 @@ var RegionCacheTTLSec int64 = 600
const (
updated int32 = iota // region is updated and no need to reload.
needSync // need sync new region info.
needSync // need sync new region info.
)
// InvalidReason is the reason why a cached region is invalidated.
// The region cache may take different strategies to handle different reasons.
// For example, when a cached region is invalidated due to no leader, region cache
// will always access to a different peer.
type InvalidReason int32
const (
// Ok indicates the cached region is valid
Ok InvalidReason = iota
// NoLeader indicates it's invalidated due to no leader
NoLeader
// RegionNotFound indicates it's invalidated due to region not found in the store
RegionNotFound
// EpochNotMatch indicates it's invalidated due to epoch not match
EpochNotMatch
// StoreNotFound indicates it's invalidated due to store not found in PD
StoreNotFound
// Other indicates it's invalidated due to other reasons, e.g., the store
// is removed from the cluster, fail to send requests to the store.
Other
)
// Region presents kv region
type Region struct {
meta *metapb.Region // raw region meta from PD immutable after init
store unsafe.Pointer // point to region store info, see RegionStore
syncFlag int32 // region need be sync in next turn
lastAccess int64 // last region access time, see checkRegionCacheTTL
meta *metapb.Region // raw region meta from PD immutable after init
store unsafe.Pointer // point to region store info, see RegionStore
syncFlag int32 // region need be sync in next turn
lastAccess int64 // last region access time, see checkRegionCacheTTL
invalidReason InvalidReason // the reason why the region is invalidated
}
// AccessIndex represent the index for accessIndex array
@ -203,7 +226,7 @@ func (r *Region) compareAndSwapStore(oldStore, newStore *RegionStore) bool {
func (r *Region) checkRegionCacheTTL(ts int64) bool {
// Only consider use percentage on this failpoint, for example, "2%return"
failpoint.Inject("invalidateRegionCache", func() {
r.invalidate()
r.invalidate(Other)
})
for {
lastAccess := atomic.LoadInt64(&r.lastAccess)
@ -217,8 +240,9 @@ func (r *Region) checkRegionCacheTTL(ts int64) bool {
}
// invalidate invalidates a region, next time it will got null result.
func (r *Region) invalidate() {
func (r *Region) invalidate(reason InvalidReason) {
metrics.RegionCacheCounterWithInvalidateRegionFromCacheOK.Inc()
atomic.StoreInt32((*int32)(&r.invalidReason), int32(reason))
atomic.StoreInt64(&r.lastAccess, invalidatedLastAccessTime)
}
@ -456,13 +480,13 @@ func (c *RegionCache) GetTiKVRPCContext(bo *Backoffer, id RegionVerID, replicaRe
})
if store == nil || len(addr) == 0 {
// Store not found, region must be out of date.
cachedRegion.invalidate()
cachedRegion.invalidate(StoreNotFound)
return nil, nil
}
storeFailEpoch := atomic.LoadUint32(&store.epoch)
if storeFailEpoch != regionStore.storeEpochs[storeIdx] {
cachedRegion.invalidate()
cachedRegion.invalidate(Other)
logutil.BgLogger().Info("invalidate current region, because others failed on same store",
zap.Uint64("region", id.GetID()),
zap.String("store", store.addr))
@ -529,7 +553,7 @@ func (c *RegionCache) GetTiFlashRPCContext(bo *Backoffer, id RegionVerID) (*RPCC
return nil, err
}
if len(addr) == 0 {
cachedRegion.invalidate()
cachedRegion.invalidate(StoreNotFound)
return nil, nil
}
if store.getResolveState() == needCheck {
@ -540,7 +564,7 @@ func (c *RegionCache) GetTiFlashRPCContext(bo *Backoffer, id RegionVerID) (*RPCC
peer := cachedRegion.meta.Peers[storeIdx]
storeFailEpoch := atomic.LoadUint32(&store.epoch)
if storeFailEpoch != regionStore.storeEpochs[storeIdx] {
cachedRegion.invalidate()
cachedRegion.invalidate(Other)
logutil.BgLogger().Info("invalidate current region, because others failed on same store",
zap.Uint64("region", id.GetID()),
zap.String("store", store.addr))
@ -559,7 +583,7 @@ func (c *RegionCache) GetTiFlashRPCContext(bo *Backoffer, id RegionVerID) (*RPCC
}, nil
}
cachedRegion.invalidate()
cachedRegion.invalidate(Other)
return nil, nil
}
@ -901,11 +925,16 @@ func (c *RegionCache) BatchLoadRegionsFromKey(bo *Backoffer, startKey []byte, co
// InvalidateCachedRegion removes a cached Region.
func (c *RegionCache) InvalidateCachedRegion(id RegionVerID) {
c.InvalidateCachedRegionWithReason(id, Other)
}
// InvalidateCachedRegionWithReason removes a cached Region with the reason why it's invalidated.
func (c *RegionCache) InvalidateCachedRegionWithReason(id RegionVerID, reason InvalidReason) {
cachedRegion := c.getCachedRegionWithRLock(id)
if cachedRegion == nil {
return
}
cachedRegion.invalidate()
cachedRegion.invalidate(reason)
}
// UpdateLeader update some region cache with newer leader info.
@ -932,7 +961,7 @@ func (c *RegionCache) UpdateLeader(regionID RegionVerID, leaderStoreID uint64, c
zap.Uint64("regionID", regionID.GetID()),
zap.Int("currIdx", int(currentPeerIdx)),
zap.Uint64("leaderStoreID", leaderStoreID))
r.invalidate()
r.invalidate(StoreNotFound)
} else {
logutil.BgLogger().Info("switch region leader to specific leader due to kv return NotLeader",
zap.Uint64("regionID", regionID.GetID()),
@ -942,13 +971,25 @@ func (c *RegionCache) UpdateLeader(regionID RegionVerID, leaderStoreID uint64, c
}
// insertRegionToCache tries to insert the Region to cache.
// It should be protected by c.mu.Lock().
func (c *RegionCache) insertRegionToCache(cachedRegion *Region) {
old := c.mu.sorted.ReplaceOrInsert(newBtreeItem(cachedRegion))
if old != nil {
store := cachedRegion.getStore()
oldRegion := old.(*btreeItem).cachedRegion
oldRegionStore := oldRegion.getStore()
// Joint consensus is enabled in v5.0, which is possible to make a leader step down as a learner during a conf change.
// And if hibernate region is enabled, after the leader step down, there can be a long time that there is no leader
// in the region and the leader info in PD is stale until requests are sent to followers or hibernate timeout.
// To solve it, one solution is always to try a different peer if the invalid reason of the old cached region is no-leader.
// There is a small probability that the current peer who reports no-leader becomes a leader and TiDB has to retry once in this case.
if InvalidReason(atomic.LoadInt32((*int32)(&oldRegion.invalidReason))) == NoLeader {
store.workTiKVIdx = (oldRegionStore.workTiKVIdx + 1) % AccessIndex(store.accessStoreNum(TiKVOnly))
}
// Don't refresh TiFlash work idx for region. Otherwise, it will always goto a invalid store which
// is under transferring regions.
atomic.StoreInt32(&cachedRegion.getStore().workTiFlashIdx, atomic.LoadInt32(&old.(*btreeItem).cachedRegion.getStore().workTiFlashIdx))
delete(c.mu.regions, old.(*btreeItem).cachedRegion.VerID())
store.workTiFlashIdx = atomic.LoadInt32(&oldRegionStore.workTiFlashIdx)
delete(c.mu.regions, oldRegion.VerID())
}
c.mu.regions[cachedRegion.VerID()] = cachedRegion
}
@ -1364,7 +1405,7 @@ func (c *RegionCache) OnRegionEpochNotMatch(bo *Backoffer, ctx *RPCContext, curr
if needInvalidateOld {
cachedRegion, ok := c.mu.regions[ctx.Region]
if ok {
cachedRegion.invalidate()
cachedRegion.invalidate(EpochNotMatch)
}
}
return nil

View File

@ -1392,6 +1392,25 @@ func (s *testRegionCacheSuite) TestContainsByEnd(c *C) {
c.Assert(createSampleRegion([]byte{10}, []byte{20}).ContainsByEnd([]byte{30}), IsFalse)
}
func (s *testRegionCacheSuite) TestSwitchPeerWhenNoLeader(c *C) {
var prevCtx *RPCContext
for i := 0; i <= len(s.cluster.GetAllStores()); i++ {
loc, err := s.cache.LocateKey(s.bo, []byte("a"))
c.Assert(err, IsNil)
ctx, err := s.cache.GetTiKVRPCContext(s.bo, loc.Region, kv.ReplicaReadLeader, 0)
c.Assert(err, IsNil)
if prevCtx == nil {
c.Assert(i, Equals, 0)
} else {
c.Assert(ctx.AccessIdx, Not(Equals), prevCtx.AccessIdx)
c.Assert(ctx.Peer, Not(DeepEquals), prevCtx.Peer)
}
s.cache.InvalidateCachedRegionWithReason(loc.Region, NoLeader)
c.Assert(s.cache.getCachedRegionWithRLock(loc.Region).invalidReason, Equals, NoLeader)
prevCtx = ctx
}
}
func BenchmarkOnRequestFail(b *testing.B) {
/*
This benchmark simulate many concurrent requests call OnSendRequestFail method

View File

@ -655,7 +655,7 @@ func (s *RegionRequestSender) onRegionError(bo *Backoffer, ctx *RPCContext, seed
// the Raft group is in an election, but it's possible that the peer is
// isolated and removed from the Raft group. So it's necessary to reload
// the region from PD.
s.regionCache.InvalidateCachedRegion(ctx.Region)
s.regionCache.InvalidateCachedRegionWithReason(ctx.Region, NoLeader)
if err = bo.Backoff(BoRegionMiss, errors.Errorf("not leader: %v, ctx: %v", notLeader, ctx)); err != nil {
return false, errors.Trace(err)
}

View File

@ -572,6 +572,34 @@ func (s *testRegionRequestToSingleStoreSuite) TestOnMaxTimestampNotSyncedError(c
}()
}
func (s *testRegionRequestToThreeStoresSuite) TestSwitchPeerWhenNoLeader(c *C) {
var leaderAddr string
s.regionRequestSender.client = &fnClient{func(ctx context.Context, addr string, req *tikvrpc.Request, timeout time.Duration) (response *tikvrpc.Response, err error) {
if leaderAddr == "" {
leaderAddr = addr
}
// Returns OK when switches to a different peer.
if leaderAddr != addr {
return &tikvrpc.Response{Resp: &kvrpcpb.RawPutResponse{}}, nil
}
return &tikvrpc.Response{Resp: &kvrpcpb.RawPutResponse{
RegionError: &errorpb.Error{NotLeader: &errorpb.NotLeader{}},
}}, nil
}}
req := tikvrpc.NewRequest(tikvrpc.CmdRawPut, &kvrpcpb.RawPutRequest{
Key: []byte("key"),
Value: []byte("value"),
})
bo := NewBackofferWithVars(context.Background(), 5, nil)
loc, err := s.cache.LocateKey(s.bo, []byte("key"))
c.Assert(err, IsNil)
resp, err := s.regionRequestSender.SendReq(bo, req, loc.Region, time.Second)
c.Assert(err, IsNil)
c.Assert(resp, NotNil)
}
func (s *testRegionRequestToThreeStoresSuite) loadAndGetLeaderStore(c *C) (*Store, string) {
region, err := s.regionRequestSender.regionCache.findRegionByKey(s.bo, []byte("a"), false)
c.Assert(err, IsNil)