Files
tidb/br/pkg/task/operator/cmd.go

104 lines
2.7 KiB
Go

// Copyright 2023 PingCAP, Inc. Licensed under Apache-2.0.
package operator
import (
"context"
"crypto/tls"
"strings"
"time"
"github.com/pingcap/log"
"github.com/pingcap/tidb/br/pkg/logutil"
"github.com/pingcap/tidb/br/pkg/pdutil"
"github.com/pingcap/tidb/br/pkg/task"
"github.com/pingcap/tidb/br/pkg/utils"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
)
func dialPD(ctx context.Context, cfg *task.Config) (*pdutil.PdController, error) {
pdAddrs := strings.Join(cfg.PD, ",")
var tc *tls.Config
if cfg.TLS.IsEnabled() {
var err error
tc, err = cfg.TLS.ToTLSConfig()
if err != nil {
return nil, err
}
}
mgr, err := pdutil.NewPdController(ctx, pdAddrs, tc, cfg.TLS.ToPDSecurityOption())
if err != nil {
return nil, err
}
return mgr, nil
}
func cleanUpWith(f func(ctx context.Context)) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
f(ctx)
}
// PauseGCAndScheduler blocks the current goroutine and pause the GC safepoint and remove the scheduler by the config.
// This function will block until the context being canceled.
func PauseGCAndScheduler(ctx context.Context, cfg *PauseGcConfig) error {
mgr, err := dialPD(ctx, &cfg.Config)
if err != nil {
return err
}
eg, ectx := errgroup.WithContext(ctx)
eg.Go(func() error { return pauseGCKeeper(ectx, cfg, mgr) })
eg.Go(func() error { return pauseSchedulerKeeper(ectx, mgr) })
return eg.Wait()
}
func pauseGCKeeper(ctx context.Context, cfg *PauseGcConfig, ctl *pdutil.PdController) error {
// Note: should we remove the service safepoint as soon as this exits?
sp := utils.BRServiceSafePoint{
ID: utils.MakeSafePointID(),
TTL: int64(cfg.TTL.Seconds()),
BackupTS: cfg.SafePoint,
}
if sp.BackupTS == 0 {
rts, err := ctl.GetMinResolvedTS(ctx)
if err != nil {
return err
}
log.Info("No service safepoint provided, using the minimal resolved TS.", zap.Uint64("min-resolved-ts", rts))
sp.BackupTS = rts
}
err := utils.StartServiceSafePointKeeper(ctx, ctl.GetPDClient(), sp)
if err != nil {
return err
}
log.Info("GC is paused.", zap.Object("safepoint", sp))
// Note: in fact we can directly return here.
// But the name `keeper` implies once the function exits,
// the GC should be resume, so let's block here.
<-ctx.Done()
return nil
}
func pauseSchedulerKeeper(ctx context.Context, ctl *pdutil.PdController) error {
undo, err := ctl.RemoveAllPDSchedulers(ctx)
if undo != nil {
defer cleanUpWith(func(ctx context.Context) {
if err := undo(ctx); err != nil {
log.Warn("failed to restore pd scheduler.", logutil.ShortError(err))
}
})
}
if err != nil {
return err
}
log.Info("Schedulers are paused.")
// Wait until the context canceled.
// So we can properly do the clean up work.
<-ctx.Done()
return nil
}