880 lines
24 KiB
Go
880 lines
24 KiB
Go
// Copyright 2022 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package checkpoint
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math/rand"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/pingcap/errors"
|
|
"github.com/pingcap/failpoint"
|
|
backuppb "github.com/pingcap/kvproto/pkg/brpb"
|
|
"github.com/pingcap/log"
|
|
"github.com/pingcap/tidb/br/pkg/logutil"
|
|
"github.com/pingcap/tidb/br/pkg/metautil"
|
|
"github.com/pingcap/tidb/br/pkg/rtree"
|
|
"github.com/pingcap/tidb/br/pkg/storage"
|
|
"github.com/pingcap/tidb/br/pkg/summary"
|
|
"github.com/pingcap/tidb/br/pkg/utils"
|
|
"github.com/tikv/client-go/v2/oracle"
|
|
"go.uber.org/zap"
|
|
"golang.org/x/sync/errgroup"
|
|
)
|
|
|
|
const CheckpointDir = "/checkpoints"
|
|
|
|
type flushPosition struct {
|
|
CheckpointDataDir string
|
|
CheckpointChecksumDir string
|
|
CheckpointLockPath string
|
|
}
|
|
|
|
const MaxChecksumTotalCost float64 = 60.0
|
|
|
|
const defaultTickDurationForFlush = 30 * time.Second
|
|
|
|
const defaultTckDurationForChecksum = 5 * time.Second
|
|
|
|
const defaultTickDurationForLock = 4 * time.Minute
|
|
|
|
const lockTimeToLive = 5 * time.Minute
|
|
|
|
type KeyType interface {
|
|
~BackupKeyType | ~RestoreKeyType
|
|
}
|
|
|
|
type RangeType struct {
|
|
*rtree.Range
|
|
}
|
|
|
|
func (r RangeType) IdentKey() []byte {
|
|
return r.StartKey
|
|
}
|
|
|
|
type ValueType interface {
|
|
IdentKey() []byte
|
|
}
|
|
|
|
type CheckpointMessage[K KeyType, V ValueType] struct {
|
|
// start-key of the origin range
|
|
GroupKey K
|
|
|
|
Group []V
|
|
}
|
|
|
|
// A Checkpoint Range File is like this:
|
|
//
|
|
// CheckpointData
|
|
// +----------------+ RangeGroupData RangeGroup
|
|
// | DureTime | +--------------------------+ encrypted +--------------------+
|
|
// | RangeGroupData-+---> | RangeGroupsEncriptedData-+----------> | GroupKey/TableID |
|
|
// | RangeGroupData | | Checksum | | Range |
|
|
// | ... | | CipherIv | | ... |
|
|
// | RangeGroupData | | Size | | Range |
|
|
// +----------------+ +--------------------------+ +--------------------+
|
|
//
|
|
// For restore, because there is no group key, so there is only one RangeGroupData
|
|
// with multi-ranges in the ChecksumData.
|
|
|
|
type RangeGroup[K KeyType, V ValueType] struct {
|
|
GroupKey K `json:"group-key"`
|
|
Group []V `json:"groups"`
|
|
}
|
|
|
|
type RangeGroupData struct {
|
|
RangeGroupsEncriptedData []byte
|
|
Checksum []byte
|
|
CipherIv []byte
|
|
|
|
Size int
|
|
}
|
|
|
|
type CheckpointData struct {
|
|
DureTime time.Duration `json:"dure-time"`
|
|
RangeGroupMetas []*RangeGroupData `json:"range-group-metas"`
|
|
}
|
|
|
|
// A Checkpoint Checksum File is like this:
|
|
//
|
|
// ChecksumInfo ChecksumItems ChecksumItem
|
|
// +------------+ +--------------+ +--------------+
|
|
// | Content--+--> | ChecksumItem-+---> | TableID |
|
|
// | Checksum | | ChecksumItem | | Crc64xor |
|
|
// | DureTime | | ... | | TotalKvs |
|
|
// +------------+ | ChecksumItem | | TotalBytes |
|
|
// +--------------+ +--------------+
|
|
|
|
type ChecksumItem struct {
|
|
TableID int64 `json:"table-id"`
|
|
Crc64xor uint64 `json:"crc64-xor"`
|
|
TotalKvs uint64 `json:"total-kvs"`
|
|
TotalBytes uint64 `json:"total-bytes"`
|
|
}
|
|
|
|
type ChecksumItems struct {
|
|
Items []*ChecksumItem `json:"checksum-items"`
|
|
}
|
|
|
|
type ChecksumInfo struct {
|
|
Content []byte `json:"content"`
|
|
Checksum []byte `json:"checksum"`
|
|
DureTime time.Duration `json:"dure-time"`
|
|
}
|
|
|
|
type GlobalTimer interface {
|
|
GetTS(context.Context) (int64, int64, error)
|
|
}
|
|
|
|
type CheckpointRunner[K KeyType, V ValueType] struct {
|
|
flushPosition
|
|
lockId uint64
|
|
|
|
meta map[K]*RangeGroup[K, V]
|
|
checksum ChecksumItems
|
|
|
|
valueMarshaler func(*RangeGroup[K, V]) ([]byte, error)
|
|
|
|
storage storage.ExternalStorage
|
|
cipher *backuppb.CipherInfo
|
|
timer GlobalTimer
|
|
|
|
appendCh chan *CheckpointMessage[K, V]
|
|
checksumCh chan *ChecksumItem
|
|
doneCh chan bool
|
|
metaCh chan map[K]*RangeGroup[K, V]
|
|
checksumMetaCh chan ChecksumItems
|
|
lockCh chan struct{}
|
|
errCh chan error
|
|
err error
|
|
errLock sync.RWMutex
|
|
|
|
wg sync.WaitGroup
|
|
}
|
|
|
|
func newCheckpointRunner[K KeyType, V ValueType](
|
|
ctx context.Context,
|
|
storage storage.ExternalStorage,
|
|
cipher *backuppb.CipherInfo,
|
|
timer GlobalTimer,
|
|
f flushPosition,
|
|
vm func(*RangeGroup[K, V]) ([]byte, error),
|
|
) *CheckpointRunner[K, V] {
|
|
return &CheckpointRunner[K, V]{
|
|
flushPosition: f,
|
|
|
|
meta: make(map[K]*RangeGroup[K, V]),
|
|
checksum: ChecksumItems{Items: make([]*ChecksumItem, 0)},
|
|
|
|
valueMarshaler: vm,
|
|
|
|
storage: storage,
|
|
cipher: cipher,
|
|
timer: timer,
|
|
|
|
appendCh: make(chan *CheckpointMessage[K, V]),
|
|
checksumCh: make(chan *ChecksumItem),
|
|
doneCh: make(chan bool, 1),
|
|
metaCh: make(chan map[K]*RangeGroup[K, V]),
|
|
checksumMetaCh: make(chan ChecksumItems),
|
|
lockCh: make(chan struct{}),
|
|
errCh: make(chan error, 1),
|
|
err: nil,
|
|
}
|
|
}
|
|
|
|
func (r *CheckpointRunner[K, V]) FlushChecksum(
|
|
ctx context.Context,
|
|
tableID int64,
|
|
crc64xor uint64,
|
|
totalKvs uint64,
|
|
totalBytes uint64,
|
|
) error {
|
|
checksumItem := &ChecksumItem{
|
|
TableID: tableID,
|
|
Crc64xor: crc64xor,
|
|
TotalKvs: totalKvs,
|
|
TotalBytes: totalBytes,
|
|
}
|
|
return r.FlushChecksumItem(ctx, checksumItem)
|
|
}
|
|
|
|
func (r *CheckpointRunner[K, V]) FlushChecksumItem(
|
|
ctx context.Context,
|
|
checksumItem *ChecksumItem,
|
|
) error {
|
|
select {
|
|
case <-ctx.Done():
|
|
return errors.Annotatef(ctx.Err(), "failed to append checkpoint checksum item")
|
|
case err, ok := <-r.errCh:
|
|
if !ok {
|
|
r.errLock.RLock()
|
|
err = r.err
|
|
r.errLock.RUnlock()
|
|
return errors.Annotate(err, "[checkpoint] Checksum: failed to append checkpoint checksum item")
|
|
}
|
|
return err
|
|
case r.checksumCh <- checksumItem:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func (r *CheckpointRunner[K, V]) Append(
|
|
ctx context.Context,
|
|
message *CheckpointMessage[K, V],
|
|
) error {
|
|
select {
|
|
case <-ctx.Done():
|
|
return errors.Annotatef(ctx.Err(), "failed to append checkpoint message")
|
|
case err, ok := <-r.errCh:
|
|
if !ok {
|
|
r.errLock.RLock()
|
|
err = r.err
|
|
r.errLock.RUnlock()
|
|
return errors.Annotate(err, "[checkpoint] Append: failed to append checkpoint message")
|
|
}
|
|
return err
|
|
case r.appendCh <- message:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// Note: Cannot be parallel with `Append` function
|
|
func (r *CheckpointRunner[K, V]) WaitForFinish(ctx context.Context, flush bool) {
|
|
if r.doneCh != nil {
|
|
select {
|
|
case r.doneCh <- flush:
|
|
|
|
default:
|
|
log.Warn("not the first close the checkpoint runner", zap.String("category", "checkpoint"))
|
|
}
|
|
}
|
|
// wait the range flusher exit
|
|
r.wg.Wait()
|
|
// remove the checkpoint lock
|
|
if r.lockId > 0 {
|
|
err := r.storage.DeleteFile(ctx, r.CheckpointLockPath)
|
|
if err != nil {
|
|
log.Warn("failed to remove the checkpoint lock", zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Send the checksum to the flush goroutine, and reset the CheckpointRunner's checksum
|
|
func (r *CheckpointRunner[K, V]) flushChecksum(ctx context.Context, errCh chan error) error {
|
|
checksum := ChecksumItems{
|
|
Items: r.checksum.Items,
|
|
}
|
|
r.checksum.Items = make([]*ChecksumItem, 0)
|
|
// do flush
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case err := <-errCh:
|
|
return err
|
|
case r.checksumMetaCh <- checksum:
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Send the meta to the flush goroutine, and reset the CheckpointRunner's meta
|
|
func (r *CheckpointRunner[K, V]) flushMeta(ctx context.Context, errCh chan error) error {
|
|
meta := r.meta
|
|
r.meta = make(map[K]*RangeGroup[K, V])
|
|
// do flush
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case err := <-errCh:
|
|
return err
|
|
case r.metaCh <- meta:
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *CheckpointRunner[K, V]) setLock(ctx context.Context, errCh chan error) error {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case err := <-errCh:
|
|
return err
|
|
case r.lockCh <- struct{}{}:
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// start a goroutine to flush the meta, which is sent from `checkpoint looper`, to the external storage
|
|
func (r *CheckpointRunner[K, V]) startCheckpointFlushLoop(ctx context.Context, wg *sync.WaitGroup) chan error {
|
|
errCh := make(chan error, 1)
|
|
wg.Add(1)
|
|
flushWorker := func(ctx context.Context, errCh chan error) {
|
|
defer wg.Done()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
if err := ctx.Err(); err != nil {
|
|
errCh <- err
|
|
}
|
|
return
|
|
case meta, ok := <-r.metaCh:
|
|
if !ok {
|
|
log.Info("stop checkpoint flush worker")
|
|
return
|
|
}
|
|
if err := r.doFlush(ctx, meta); err != nil {
|
|
errCh <- errors.Annotate(err, "failed to flush checkpoint data.")
|
|
return
|
|
}
|
|
case checksums, ok := <-r.checksumMetaCh:
|
|
if !ok {
|
|
log.Info("stop checkpoint flush worker")
|
|
return
|
|
}
|
|
if err := r.doChecksumFlush(ctx, checksums); err != nil {
|
|
errCh <- errors.Annotate(err, "failed to flush checkpoint checksum.")
|
|
return
|
|
}
|
|
case _, ok := <-r.lockCh:
|
|
if !ok {
|
|
log.Info("stop checkpoint flush worker")
|
|
return
|
|
}
|
|
if err := r.updateLock(ctx); err != nil {
|
|
errCh <- errors.Annotate(err, "failed to update checkpoint lock.")
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
go flushWorker(ctx, errCh)
|
|
return errCh
|
|
}
|
|
|
|
func (r *CheckpointRunner[K, V]) sendError(err error) {
|
|
select {
|
|
case r.errCh <- err:
|
|
log.Error("send the error", zap.String("category", "checkpoint"), zap.Error(err))
|
|
r.errLock.Lock()
|
|
r.err = err
|
|
r.errLock.Unlock()
|
|
close(r.errCh)
|
|
default:
|
|
log.Error("errCh is blocked", logutil.ShortError(err))
|
|
}
|
|
}
|
|
|
|
func (r *CheckpointRunner[K, V]) startCheckpointMainLoop(
|
|
ctx context.Context,
|
|
tickDurationForFlush,
|
|
tickDurationForChecksum,
|
|
tickDurationForLock time.Duration,
|
|
) {
|
|
failpoint.Inject("checkpoint-more-quickly-flush", func(_ failpoint.Value) {
|
|
tickDurationForChecksum = 1 * time.Second
|
|
tickDurationForFlush = 3 * time.Second
|
|
if tickDurationForLock > 0 {
|
|
tickDurationForLock = 1 * time.Second
|
|
}
|
|
log.Info("adjust the tick duration for flush or lock",
|
|
zap.Duration("flush", tickDurationForFlush),
|
|
zap.Duration("checksum", tickDurationForChecksum),
|
|
zap.Duration("lock", tickDurationForLock),
|
|
)
|
|
})
|
|
r.wg.Add(1)
|
|
checkpointLoop := func(ctx context.Context) {
|
|
defer r.wg.Done()
|
|
cctx, cancel := context.WithCancel(ctx)
|
|
defer cancel()
|
|
var wg sync.WaitGroup
|
|
errCh := r.startCheckpointFlushLoop(cctx, &wg)
|
|
flushTicker := time.NewTicker(tickDurationForFlush)
|
|
defer flushTicker.Stop()
|
|
checksumTicker := time.NewTicker(tickDurationForChecksum)
|
|
defer checksumTicker.Stop()
|
|
// register time ticker, the lock ticker is optional
|
|
lockTicker := dispatcherTicker(tickDurationForLock)
|
|
defer lockTicker.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
if err := ctx.Err(); err != nil {
|
|
r.sendError(err)
|
|
}
|
|
return
|
|
case <-lockTicker.Ch():
|
|
if err := r.setLock(ctx, errCh); err != nil {
|
|
r.sendError(err)
|
|
return
|
|
}
|
|
case <-checksumTicker.C:
|
|
if err := r.flushChecksum(ctx, errCh); err != nil {
|
|
r.sendError(err)
|
|
return
|
|
}
|
|
case <-flushTicker.C:
|
|
if err := r.flushMeta(ctx, errCh); err != nil {
|
|
r.sendError(err)
|
|
return
|
|
}
|
|
case msg := <-r.appendCh:
|
|
groups, exist := r.meta[msg.GroupKey]
|
|
if !exist {
|
|
groups = &RangeGroup[K, V]{
|
|
GroupKey: msg.GroupKey,
|
|
Group: make([]V, 0),
|
|
}
|
|
r.meta[msg.GroupKey] = groups
|
|
}
|
|
groups.Group = append(groups.Group, msg.Group...)
|
|
case msg := <-r.checksumCh:
|
|
r.checksum.Items = append(r.checksum.Items, msg)
|
|
case flush := <-r.doneCh:
|
|
log.Info("stop checkpoint runner")
|
|
if flush {
|
|
// NOTE: the exit step, don't send error any more.
|
|
if err := r.flushMeta(ctx, errCh); err != nil {
|
|
log.Error("failed to flush checkpoint meta", zap.Error(err))
|
|
} else if err := r.flushChecksum(ctx, errCh); err != nil {
|
|
log.Error("failed to flush checkpoint checksum", zap.Error(err))
|
|
}
|
|
}
|
|
// close the channel to flush worker
|
|
// and wait it to consumes all the metas
|
|
close(r.metaCh)
|
|
close(r.checksumMetaCh)
|
|
close(r.lockCh)
|
|
wg.Wait()
|
|
return
|
|
case err := <-errCh:
|
|
// pass flush worker's error back
|
|
r.sendError(err)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
go checkpointLoop(ctx)
|
|
}
|
|
|
|
// flush the checksum to the external storage
|
|
func (r *CheckpointRunner[K, V]) doChecksumFlush(ctx context.Context, checksumItems ChecksumItems) error {
|
|
if len(checksumItems.Items) == 0 {
|
|
return nil
|
|
}
|
|
content, err := json.Marshal(checksumItems)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
checksum := sha256.Sum256(content)
|
|
checksumInfo := &ChecksumInfo{
|
|
Content: content,
|
|
Checksum: checksum[:],
|
|
DureTime: summary.NowDureTime(),
|
|
}
|
|
|
|
data, err := json.Marshal(checksumInfo)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
fname := fmt.Sprintf("%s/t%d_and__.cpt", r.CheckpointChecksumDir, checksumItems.Items[0].TableID)
|
|
if err = r.storage.WriteFile(ctx, fname, data); err != nil {
|
|
return errors.Annotatef(err, "failed to write file %s for checkpoint checksum", fname)
|
|
}
|
|
|
|
failpoint.Inject("failed-after-checkpoint-flushes-checksum", func(_ failpoint.Value) {
|
|
failpoint.Return(errors.Errorf("failpoint: failed after checkpoint flushes checksum"))
|
|
})
|
|
return nil
|
|
}
|
|
|
|
// flush the meta to the external storage
|
|
func (r *CheckpointRunner[K, V]) doFlush(ctx context.Context, meta map[K]*RangeGroup[K, V]) error {
|
|
if len(meta) == 0 {
|
|
return nil
|
|
}
|
|
|
|
checkpointData := &CheckpointData{
|
|
DureTime: summary.NowDureTime(),
|
|
RangeGroupMetas: make([]*RangeGroupData, 0, len(meta)),
|
|
}
|
|
|
|
var fname []byte = nil
|
|
|
|
for _, group := range meta {
|
|
if len(group.Group) == 0 {
|
|
continue
|
|
}
|
|
|
|
// use the first item's group-key and sub-range-key as the filename
|
|
if len(fname) == 0 {
|
|
fname = append([]byte(fmt.Sprint(group.GroupKey, '.', '.')), group.Group[0].IdentKey()...)
|
|
}
|
|
|
|
// Flush the metaFile to storage
|
|
content, err := r.valueMarshaler(group)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
encryptBuff, iv, err := metautil.Encrypt(content, r.cipher)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
checksum := sha256.Sum256(content)
|
|
|
|
checkpointData.RangeGroupMetas = append(checkpointData.RangeGroupMetas, &RangeGroupData{
|
|
RangeGroupsEncriptedData: encryptBuff,
|
|
Checksum: checksum[:],
|
|
Size: len(content),
|
|
CipherIv: iv,
|
|
})
|
|
}
|
|
|
|
if len(checkpointData.RangeGroupMetas) > 0 {
|
|
data, err := json.Marshal(checkpointData)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
checksum := sha256.Sum256(fname)
|
|
checksumEncoded := base64.URLEncoding.EncodeToString(checksum[:])
|
|
path := fmt.Sprintf("%s/%s_%d.cpt", r.CheckpointDataDir, checksumEncoded, rand.Uint64())
|
|
if err := r.storage.WriteFile(ctx, path, data); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
}
|
|
|
|
failpoint.Inject("failed-after-checkpoint-flushes", func(_ failpoint.Value) {
|
|
failpoint.Return(errors.Errorf("failpoint: failed after checkpoint flushes"))
|
|
})
|
|
return nil
|
|
}
|
|
|
|
type CheckpointLock struct {
|
|
LockId uint64 `json:"lock-id"`
|
|
ExpireAt int64 `json:"expire-at"`
|
|
}
|
|
|
|
// get ts with retry
|
|
func (r *CheckpointRunner[K, V]) getTS(ctx context.Context) (int64, int64, error) {
|
|
var (
|
|
p int64 = 0
|
|
l int64 = 0
|
|
retry int = 0
|
|
)
|
|
errRetry := utils.WithRetry(ctx, func() error {
|
|
var err error
|
|
p, l, err = r.timer.GetTS(ctx)
|
|
if err != nil {
|
|
retry++
|
|
log.Info("failed to get ts", zap.Int("retry", retry), zap.Error(err))
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}, utils.NewPDReqBackoffer())
|
|
|
|
return p, l, errors.Trace(errRetry)
|
|
}
|
|
|
|
// flush the lock to the external storage
|
|
func (r *CheckpointRunner[K, V]) flushLock(ctx context.Context, p int64) error {
|
|
lock := &CheckpointLock{
|
|
LockId: r.lockId,
|
|
ExpireAt: p + lockTimeToLive.Milliseconds(),
|
|
}
|
|
log.Info("start to flush the checkpoint lock", zap.Int64("lock-at", p),
|
|
zap.Int64("expire-at", lock.ExpireAt))
|
|
data, err := json.Marshal(lock)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
err = r.storage.WriteFile(ctx, r.CheckpointLockPath, data)
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
// check whether this lock belongs to this BR
|
|
func (r *CheckpointRunner[K, V]) checkLockFile(ctx context.Context, now int64) error {
|
|
data, err := r.storage.ReadFile(ctx, r.CheckpointLockPath)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
lock := &CheckpointLock{}
|
|
err = json.Unmarshal(data, lock)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
if lock.ExpireAt <= now {
|
|
if lock.LockId > r.lockId {
|
|
return errors.Errorf("There are another BR(%d) running after but setting lock before this one(%d). "+
|
|
"Please check whether the BR is running. If not, you can retry.", lock.LockId, r.lockId)
|
|
}
|
|
if lock.LockId == r.lockId {
|
|
log.Warn("The lock has expired.",
|
|
zap.Int64("expire-at(ms)", lock.ExpireAt), zap.Int64("now(ms)", now))
|
|
}
|
|
} else if lock.LockId != r.lockId {
|
|
return errors.Errorf("The existing lock will expire in %d seconds. "+
|
|
"There may be another BR(%d) running. If not, you can wait for the lock to expire, "+
|
|
"or delete the file `%s%s` manually.",
|
|
(lock.ExpireAt-now)/1000, lock.LockId, strings.TrimRight(r.storage.URI(), "/"), r.CheckpointLockPath)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// generate a new lock and flush the lock to the external storage
|
|
func (r *CheckpointRunner[K, V]) updateLock(ctx context.Context) error {
|
|
p, _, err := r.getTS(ctx)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
if err = r.checkLockFile(ctx, p); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
if err = r.flushLock(ctx, p); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
failpoint.Inject("failed-after-checkpoint-updates-lock", func(_ failpoint.Value) {
|
|
failpoint.Return(errors.Errorf("failpoint: failed after checkpoint updates lock"))
|
|
})
|
|
|
|
return nil
|
|
}
|
|
|
|
// Attempt to initialize the lock. Need to stop the backup when there is an unexpired locks.
|
|
func (r *CheckpointRunner[K, V]) initialLock(ctx context.Context) error {
|
|
p, l, err := r.getTS(ctx)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
r.lockId = oracle.ComposeTS(p, l)
|
|
exist, err := r.storage.FileExists(ctx, r.CheckpointLockPath)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
if exist {
|
|
if err := r.checkLockFile(ctx, p); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
}
|
|
if err = r.flushLock(ctx, p); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
// wait for 3 seconds to check whether the lock file is overwritten by another BR
|
|
time.Sleep(3 * time.Second)
|
|
err = r.checkLockFile(ctx, p)
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
// walk the whole checkpoint range files and retrieve the metadata of backed up/restored ranges
|
|
// and return the total time cost in the past executions
|
|
func walkCheckpointFile[K KeyType, V ValueType](
|
|
ctx context.Context,
|
|
s storage.ExternalStorage,
|
|
cipher *backuppb.CipherInfo,
|
|
subDir string,
|
|
fn func(groupKey K, value V),
|
|
) (time.Duration, error) {
|
|
// records the total time cost in the past executions
|
|
var pastDureTime time.Duration = 0
|
|
err := s.WalkDir(ctx, &storage.WalkOption{SubDir: subDir}, func(path string, size int64) error {
|
|
if strings.HasSuffix(path, ".cpt") {
|
|
content, err := s.ReadFile(ctx, path)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
checkpointData := &CheckpointData{}
|
|
if err = json.Unmarshal(content, checkpointData); err != nil {
|
|
log.Error("failed to unmarshal the checkpoint data info, skip it", zap.Error(err))
|
|
return nil
|
|
}
|
|
|
|
if checkpointData.DureTime > pastDureTime {
|
|
pastDureTime = checkpointData.DureTime
|
|
}
|
|
for _, meta := range checkpointData.RangeGroupMetas {
|
|
decryptContent, err := metautil.Decrypt(meta.RangeGroupsEncriptedData, cipher, meta.CipherIv)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
checksum := sha256.Sum256(decryptContent)
|
|
if !bytes.Equal(meta.Checksum, checksum[:]) {
|
|
log.Error("checkpoint checksum info's checksum mismatch, skip it",
|
|
zap.ByteString("expect", meta.Checksum),
|
|
zap.ByteString("got", checksum[:]),
|
|
)
|
|
continue
|
|
}
|
|
|
|
group := &RangeGroup[K, V]{}
|
|
if err = json.Unmarshal(decryptContent, group); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
for _, g := range group.Group {
|
|
fn(group.GroupKey, g)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
|
|
return pastDureTime, errors.Trace(err)
|
|
}
|
|
|
|
// load checkpoint meta data from external storage and unmarshal back
|
|
func loadCheckpointMeta[T any](ctx context.Context, s storage.ExternalStorage, path string, m *T) error {
|
|
data, err := s.ReadFile(ctx, path)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
err = json.Unmarshal(data, m)
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
// walk the whole checkpoint checksum files and retrieve checksum information of tables calculated
|
|
func loadCheckpointChecksum(
|
|
ctx context.Context,
|
|
s storage.ExternalStorage,
|
|
subDir string,
|
|
) (map[int64]*ChecksumItem, time.Duration, error) {
|
|
var pastDureTime time.Duration = 0
|
|
checkpointChecksum := make(map[int64]*ChecksumItem)
|
|
err := s.WalkDir(ctx, &storage.WalkOption{SubDir: subDir}, func(path string, size int64) error {
|
|
data, err := s.ReadFile(ctx, path)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
info := &ChecksumInfo{}
|
|
err = json.Unmarshal(data, info)
|
|
if err != nil {
|
|
log.Error("failed to unmarshal the checkpoint checksum info, skip it", zap.Error(err))
|
|
return nil
|
|
}
|
|
|
|
checksum := sha256.Sum256(info.Content)
|
|
if !bytes.Equal(info.Checksum, checksum[:]) {
|
|
log.Error("checkpoint checksum info's checksum mismatch, skip it",
|
|
zap.ByteString("expect", info.Checksum),
|
|
zap.ByteString("got", checksum[:]),
|
|
)
|
|
return nil
|
|
}
|
|
|
|
if info.DureTime > pastDureTime {
|
|
pastDureTime = info.DureTime
|
|
}
|
|
|
|
items := &ChecksumItems{}
|
|
err = json.Unmarshal(info.Content, items)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
for _, c := range items.Items {
|
|
checkpointChecksum[c.TableID] = c
|
|
}
|
|
return nil
|
|
})
|
|
return checkpointChecksum, pastDureTime, errors.Trace(err)
|
|
}
|
|
|
|
func saveCheckpointMetadata[T any](ctx context.Context, s storage.ExternalStorage, meta *T, path string) error {
|
|
data, err := json.Marshal(meta)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
err = s.WriteFile(ctx, path, data)
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
func removeCheckpointData(ctx context.Context, s storage.ExternalStorage, subDir string) error {
|
|
var (
|
|
// Generate one file every 30 seconds, so there are only 1200 files in 10 hours.
|
|
removedFileNames = make([]string, 0, 1200)
|
|
|
|
removeCnt int = 0
|
|
removeSize int64 = 0
|
|
)
|
|
err := s.WalkDir(ctx, &storage.WalkOption{SubDir: subDir}, func(path string, size int64) error {
|
|
if !strings.HasSuffix(path, ".cpt") && !strings.HasSuffix(path, ".meta") && !strings.HasSuffix(path, ".lock") {
|
|
return nil
|
|
}
|
|
removedFileNames = append(removedFileNames, path)
|
|
removeCnt += 1
|
|
removeSize += size
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
log.Info("start to remove checkpoint data",
|
|
zap.String("checkpoint task", subDir),
|
|
zap.Int("remove-count", removeCnt),
|
|
zap.Int64("remove-size", removeSize),
|
|
)
|
|
|
|
maxFailedFilesNum := 16
|
|
failedFilesCount := struct {
|
|
lock sync.Mutex
|
|
count int
|
|
}{
|
|
count: 0,
|
|
}
|
|
pool := utils.NewWorkerPool(4, "checkpoint remove worker")
|
|
eg, gCtx := errgroup.WithContext(ctx)
|
|
for _, filename := range removedFileNames {
|
|
name := filename
|
|
pool.ApplyOnErrorGroup(eg, func() error {
|
|
if err := s.DeleteFile(gCtx, name); err != nil {
|
|
log.Warn("failed to remove the file", zap.String("filename", name), zap.Error(err))
|
|
failedFilesCount.lock.Lock()
|
|
failedFilesCount.count += 1
|
|
if failedFilesCount.count >= maxFailedFilesNum {
|
|
failedFilesCount.lock.Unlock()
|
|
return errors.Annotate(err, "failed to delete too many files")
|
|
}
|
|
failedFilesCount.lock.Unlock()
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
if err := eg.Wait(); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
log.Info("all the checkpoint data has been removed", zap.String("checkpoint task", subDir))
|
|
return nil
|
|
}
|