Files
tidb/pkg/ddl/serverstate/syncer.go

211 lines
6.8 KiB
Go

// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package serverstate
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/ddl/logutil"
"github.com/pingcap/tidb/pkg/ddl/util"
"github.com/pingcap/tidb/pkg/metrics"
tidbutil "github.com/pingcap/tidb/pkg/util"
"go.etcd.io/etcd/api/v3/mvccpb"
clientv3 "go.etcd.io/etcd/client/v3"
"go.etcd.io/etcd/client/v3/concurrency"
atomicutil "go.uber.org/atomic"
"go.uber.org/zap"
)
const (
keyOpDefaultRetryCnt = 3
// keyOpDefaultTimeout is the default time out for etcd store.
keyOpDefaultTimeout = 1 * time.Second
statePrompt = "global-state-syncer"
// StateUpgrading represents the cluster global state is upgrading. It is exports for testing.
StateUpgrading = "upgrading"
// StateNormalRunning represents the cluster global state is normal running. It is exports for testing.
StateNormalRunning = ""
)
// Syncer is used to synchronize server state.
// currently there are only 2 states: running/upgrading, and is only used for the
// 'smooth upgrade' feature.
type Syncer interface {
// Init sets the global schema version path to etcd if it isn't exist,
// then watch this path, and initializes the self schema version to etcd.
Init(ctx context.Context) error
// UpdateGlobalState updates the latest version to the global path on etcd until updating is successful or the ctx is done.
UpdateGlobalState(ctx context.Context, stateInfo *StateInfo) error
// GetGlobalState gets the global state from etcd.
GetGlobalState(ctx context.Context) (*StateInfo, error)
// IsUpgradingState returns whether the cluster state is upgrading.
IsUpgradingState() bool
WatchChan() clientv3.WatchChan
Rewatch(ctx context.Context)
}
// StateInfo is the tidb cluster state.
// It will not be updated when the tidb cluster upgrading.
type StateInfo struct {
State string `json:"state"`
}
// NewStateInfo is new a StateInfo.
func NewStateInfo(state string) *StateInfo {
return &StateInfo{State: state}
}
// Marshal `StateInfo` into bytes.
func (info *StateInfo) Marshal() ([]byte, error) {
infoBuf, err := json.Marshal(info)
if err != nil {
return nil, errors.Trace(err)
}
return infoBuf, nil
}
// Unmarshal `StateInfo` from bytes.
func (info *StateInfo) Unmarshal(v []byte) error {
return json.Unmarshal(v, info)
}
// etcdSyncer is a Syncer implementation based on etcd.
type etcdSyncer struct {
etcdPath string
prompt string
etcdCli *clientv3.Client
session *concurrency.Session
clusterState *atomicutil.Pointer[StateInfo]
globalStateWatcher util.Watcher
}
// NewEtcdSyncer creates a new Syncer.
func NewEtcdSyncer(etcdCli *clientv3.Client, etcdPath string) Syncer {
return &etcdSyncer{
etcdCli: etcdCli,
etcdPath: etcdPath,
clusterState: atomicutil.NewPointer(NewStateInfo(StateNormalRunning)),
prompt: statePrompt,
globalStateWatcher: util.NewWatcher(),
}
}
// Init implements Syncer.Init interface.
func (s *etcdSyncer) Init(ctx context.Context) error {
startTime := time.Now()
var err error
defer func() {
metrics.DeploySyncerHistogram.WithLabelValues(metrics.StateSyncerInit, metrics.RetLabel(err)).Observe(time.Since(startTime).Seconds())
}()
logPrefix := fmt.Sprintf("[%s] %s", s.prompt, s.etcdPath)
s.session, err = tidbutil.NewSession(ctx, logPrefix, s.etcdCli, tidbutil.NewSessionDefaultRetryCnt, util.SessionTTL)
if err != nil {
return errors.Trace(err)
}
clusterState, err := s.GetGlobalState(ctx)
if err != nil {
return errors.Trace(err)
}
s.clusterState.Store(clusterState)
s.globalStateWatcher.Watch(ctx, s.etcdCli, s.etcdPath)
return errors.Trace(err)
}
// WatchChan implements Syncer.WatchChan interface.
func (s *etcdSyncer) WatchChan() clientv3.WatchChan {
return s.globalStateWatcher.WatchChan()
}
// Rewatch implements Syncer.Rewatch interface.
func (s *etcdSyncer) Rewatch(ctx context.Context) {
s.globalStateWatcher.Rewatch(ctx, s.etcdCli, s.etcdPath)
}
// IsUpgradingState implements Syncer.IsUpgradingState interface.
func (s *etcdSyncer) IsUpgradingState() bool {
return s.clusterState.Load().State == StateUpgrading
}
func (*etcdSyncer) getKeyValue(ctx context.Context, etcdCli *clientv3.Client, key string, retryCnt int, timeout time.Duration, opts ...clientv3.OpOption) ([]*mvccpb.KeyValue, error) {
var err error
var resp *clientv3.GetResponse
for i := 0; i < retryCnt; i++ {
select {
case <-ctx.Done():
err = errors.Trace(ctx.Err())
return nil, err
default:
}
childCtx, cancel := context.WithTimeout(ctx, timeout)
resp, err = etcdCli.Get(childCtx, key, opts...)
cancel()
if err != nil {
logutil.DDLLogger().Info("get key failed", zap.String("key", key), zap.Error(err))
time.Sleep(200 * time.Millisecond)
continue
}
if len(resp.Kvs) == 0 {
return nil, nil
}
return resp.Kvs, nil
}
return nil, errors.Trace(err)
}
// GetGlobalState implements Syncer.GetGlobalState interface.
func (s *etcdSyncer) GetGlobalState(ctx context.Context) (*StateInfo, error) {
startTime := time.Now()
kvs, err := s.getKeyValue(ctx, s.etcdCli, s.etcdPath, keyOpDefaultRetryCnt, keyOpDefaultTimeout)
if err != nil {
return nil, errors.Trace(err)
}
state := &StateInfo{}
if len(kvs) != 1 {
if len(kvs) > 0 {
return nil, errors.Errorf("get key value count:%d wrong", len(kvs))
}
s.clusterState.Store(state)
return state, nil
}
err = state.Unmarshal(kvs[0].Value)
if err != nil {
logutil.DDLLogger().Warn("get global state failed", zap.String("key", s.etcdPath), zap.ByteString("value", kvs[0].Value), zap.Error(err))
return nil, errors.Trace(err)
}
s.clusterState.Store(state)
metrics.OwnerHandleSyncerHistogram.WithLabelValues(metrics.UpdateGlobalState, metrics.RetLabel(err)).Observe(time.Since(startTime).Seconds())
return state, nil
}
// UpdateGlobalState implements Syncer.UpdateGlobalState interface.
func (s *etcdSyncer) UpdateGlobalState(ctx context.Context, stateInfo *StateInfo) error {
startTime := time.Now()
stateStr, err := stateInfo.Marshal()
if err != nil {
return err
}
err = util.PutKVToEtcd(ctx, s.etcdCli, keyOpDefaultRetryCnt, s.etcdPath, string(stateStr))
metrics.OwnerHandleSyncerHistogram.WithLabelValues(metrics.UpdateGlobalState, metrics.RetLabel(err)).Observe(time.Since(startTime).Seconds())
return errors.Trace(err)
}