155 lines
5.0 KiB
Go
155 lines
5.0 KiB
Go
// Copyright 2023 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package session
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/pingcap/failpoint"
|
|
"github.com/pingcap/log"
|
|
"github.com/pingcap/tidb/pkg/ddl"
|
|
"github.com/pingcap/tidb/pkg/ddl/serverstate"
|
|
"github.com/pingcap/tidb/pkg/domain"
|
|
dist_store "github.com/pingcap/tidb/pkg/dxf/framework/storage"
|
|
"github.com/pingcap/tidb/pkg/kv"
|
|
"github.com/pingcap/tidb/pkg/owner"
|
|
"github.com/pingcap/tidb/pkg/sessionctx"
|
|
"github.com/pingcap/tidb/pkg/util/logutil"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// isContextDone checks if context is done.
|
|
func isContextDone(ctx context.Context) bool {
|
|
select {
|
|
case <-ctx.Done():
|
|
return true
|
|
default:
|
|
}
|
|
return false
|
|
}
|
|
|
|
// SyncUpgradeState syncs upgrade state to etcd.
|
|
func SyncUpgradeState(s sessionctx.Context, timeout time.Duration) error {
|
|
ctx, cancelFunc := context.WithTimeout(context.Background(), timeout)
|
|
defer cancelFunc()
|
|
dom := domain.GetDomain(s)
|
|
err := dom.DDL().StateSyncer().UpdateGlobalState(ctx, serverstate.NewStateInfo(serverstate.StateUpgrading))
|
|
logger := logutil.BgLogger().With(zap.String("category", "upgrading"))
|
|
if err != nil {
|
|
logger.Error("update global state failed", zap.String("state", serverstate.StateUpgrading), zap.Error(err))
|
|
return err
|
|
}
|
|
|
|
interval := 200 * time.Millisecond
|
|
for i := 0; ; i++ {
|
|
if isContextDone(ctx) {
|
|
logger.Error("get owner op failed", zap.Duration("timeout", timeout), zap.Error(err))
|
|
return ctx.Err()
|
|
}
|
|
|
|
var op owner.OpType
|
|
childCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
|
|
op, err = owner.GetOwnerOpValue(childCtx, dom.GetEtcdClient(), ddl.DDLOwnerKey)
|
|
cancel()
|
|
if err == nil && op.IsSyncedUpgradingState() {
|
|
break
|
|
}
|
|
if i%10 == 0 {
|
|
logger.Warn("get owner op failed", zap.Stringer("op", op), zap.Error(err))
|
|
}
|
|
time.Sleep(interval)
|
|
}
|
|
|
|
logger.Info("update global state to upgrading", zap.String("state", serverstate.StateUpgrading))
|
|
return nil
|
|
}
|
|
|
|
// SyncNormalRunning syncs normal state to etcd.
|
|
func SyncNormalRunning(s sessionctx.Context) error {
|
|
bgCtx := context.Background()
|
|
failpoint.Inject("mockResumeAllJobsFailed", func(val failpoint.Value) {
|
|
if val.(bool) {
|
|
dom := domain.GetDomain(s)
|
|
//nolint: errcheck
|
|
dom.DDL().StateSyncer().UpdateGlobalState(bgCtx, serverstate.NewStateInfo(serverstate.StateNormalRunning))
|
|
failpoint.Return(nil)
|
|
}
|
|
})
|
|
|
|
logger := logutil.BgLogger().With(zap.String("category", "upgrading"))
|
|
jobErrs, err := ddl.ResumeAllJobsBySystem(s)
|
|
if err != nil {
|
|
logger.Warn("resume all paused jobs failed", zap.Error(err))
|
|
}
|
|
for _, e := range jobErrs {
|
|
logger.Warn("resume the job failed", zap.Error(e))
|
|
}
|
|
|
|
if mgr, _ := dist_store.GetTaskManager(); mgr != nil {
|
|
ctx := kv.WithInternalSourceType(bgCtx, kv.InternalDistTask)
|
|
err := mgr.AdjustTaskOverflowConcurrency(ctx, s)
|
|
if err != nil {
|
|
log.Warn("cannot adjust task overflow concurrency", zap.Error(err))
|
|
}
|
|
}
|
|
|
|
ctx, cancelFunc := context.WithTimeout(bgCtx, 3*time.Second)
|
|
defer cancelFunc()
|
|
dom := domain.GetDomain(s)
|
|
err = dom.DDL().StateSyncer().UpdateGlobalState(ctx, serverstate.NewStateInfo(serverstate.StateNormalRunning))
|
|
if err != nil {
|
|
logger.Error("update global state to normal failed", zap.Error(err))
|
|
return err
|
|
}
|
|
logger.Info("update global state to normal running finished")
|
|
return nil
|
|
}
|
|
|
|
// IsUpgradingClusterState checks whether the global state is upgrading.
|
|
func IsUpgradingClusterState(s sessionctx.Context) (bool, error) {
|
|
dom := domain.GetDomain(s)
|
|
ctx, cancelFunc := context.WithTimeout(context.Background(), 3*time.Second)
|
|
defer cancelFunc()
|
|
stateInfo, err := dom.DDL().StateSyncer().GetGlobalState(ctx)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
return stateInfo.State == serverstate.StateUpgrading, nil
|
|
}
|
|
|
|
func isUpgradingClusterStateWithRetry(s sessionctx.Context, oldVer, newVer int64, timeout time.Duration) {
|
|
now := time.Now()
|
|
interval := 200 * time.Millisecond
|
|
logger := logutil.BgLogger().With(zap.String("category", "upgrading"))
|
|
for i := 0; ; i++ {
|
|
isUpgrading, err := IsUpgradingClusterState(s)
|
|
if err == nil {
|
|
logger.Info("get global state", zap.Int64("old version", oldVer), zap.Int64("latest version", newVer), zap.Bool("is upgrading state", isUpgrading))
|
|
return
|
|
}
|
|
|
|
if time.Since(now) >= timeout {
|
|
logger.Error("get global state failed", zap.Int64("old version", oldVer), zap.Int64("latest version", newVer), zap.Error(err))
|
|
return
|
|
}
|
|
if i%25 == 0 {
|
|
logger.Warn("get global state failed", zap.Int64("old version", oldVer), zap.Int64("latest version", newVer), zap.Error(err))
|
|
}
|
|
time.Sleep(interval)
|
|
}
|
|
}
|