Files
tidb/br/pkg/restore/misc.go

392 lines
14 KiB
Go

// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package restore
import (
"bytes"
"context"
"crypto/sha256"
"encoding/base64"
"encoding/binary"
"fmt"
"path"
"strconv"
"strings"
"github.com/gogo/protobuf/proto"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/log"
berrors "github.com/pingcap/tidb/br/pkg/errors"
"github.com/pingcap/tidb/br/pkg/logutil"
"github.com/pingcap/tidb/br/pkg/storage"
"github.com/pingcap/tidb/br/pkg/utils"
"github.com/pingcap/tidb/pkg/domain"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/meta"
"github.com/pingcap/tidb/pkg/meta/metadef"
"github.com/pingcap/tidb/pkg/meta/model"
"github.com/pingcap/tidb/pkg/parser/ast"
tidbutil "github.com/pingcap/tidb/pkg/util"
"github.com/tikv/client-go/v2/oracle"
pd "github.com/tikv/pd/client"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
)
// deprecated parameter
type Granularity string
const (
FineGrained Granularity = "fine-grained"
CoarseGrained Granularity = "coarse-grained"
)
const logRestoreTableIDBlocklistFilePrefix = "v1/log_restore_tables_blocklists"
type LogRestoreTableIDsBlocklistFile struct {
// RestoreCommitTs records the timestamp after PITR restore done. Only the later PITR restore from the log backup of the cluster,
// whose BackupTS is not less than it, can ignore the restore table IDs blocklist recorded in the file.
RestoreCommitTs uint64 `protobuf:"varint,1,opt,name=restore_commit_ts,proto3"`
// SnapshotBackupTs records the BackupTS of the PITR restore. Any PITR restore from the log backup of the cluster, whose restoredTS
// is less than it, can ignore the restore table IDs blocklist recorded in the file.
SnapshotBackupTs uint64 `protobuf:"varint,2,opt,name=snapshot_backup_ts,proto3"`
// RewriteTs records the rewritten timestamp of the meta kvs in this PITR restore.
RewriteTs uint64 `protobuf:"varint,6,opt,name=rewrite_ts,proto3"`
// TableIds records the table IDs blocklist of the cluster running the log backup task.
TableIds []int64 `protobuf:"varint,3,rep,packed,name=table_ids,proto3"`
// DbIds records the database IDs blocklist of the cluster running the log backup task.
DbIds []int64 `protobuf:"varint,5,rep,packed,name=db_ids,proto3"`
// Checksum records the checksum of other fields.
Checksum []byte `protobuf:"bytes,4,opt,name=checksum,proto3"`
}
func (m *LogRestoreTableIDsBlocklistFile) Reset() { *m = LogRestoreTableIDsBlocklistFile{} }
func (m *LogRestoreTableIDsBlocklistFile) String() string { return proto.CompactTextString(m) }
func (m *LogRestoreTableIDsBlocklistFile) ProtoMessage() {}
func (m *LogRestoreTableIDsBlocklistFile) filename() string {
return fmt.Sprintf("%s/R%016X_S%016X.meta", logRestoreTableIDBlocklistFilePrefix, m.RestoreCommitTs, m.SnapshotBackupTs)
}
func parseLogRestoreTableIDsBlocklistFileName(filename string) (restoreCommitTs, snapshotBackupTs uint64, parsed bool) {
filename = path.Base(filename)
if !strings.HasSuffix(filename, ".meta") {
return 0, 0, false
}
if filename[0] != 'R' {
return 0, 0, false
}
ts, err := strconv.ParseUint(filename[1:17], 16, 64)
if err != nil {
log.Warn("failed to parse log restore table IDs blocklist file name", zap.String("filename", filename), zap.Error(err))
return 0, 0, false
}
restoreCommitTs = ts
if filename[17] != '_' || filename[18] != 'S' {
return 0, 0, false
}
ts, err = strconv.ParseUint(filename[19:35], 16, 64)
if err != nil {
log.Warn("failed to parse log restore table IDs blocklist file name", zap.String("filename", filename), zap.Error(err))
return 0, 0, false
}
snapshotBackupTs = ts
return restoreCommitTs, snapshotBackupTs, true
}
func (m *LogRestoreTableIDsBlocklistFile) checksumLogRestoreTableIDsBlocklistFile() []byte {
hasher := sha256.New()
hasher.Write(binary.LittleEndian.AppendUint64(nil, m.RestoreCommitTs))
hasher.Write(binary.LittleEndian.AppendUint64(nil, m.SnapshotBackupTs))
hasher.Write(binary.LittleEndian.AppendUint64(nil, m.RewriteTs))
for _, tableId := range m.TableIds {
hasher.Write(binary.LittleEndian.AppendUint64(nil, uint64(tableId)))
}
for _, dbId := range m.DbIds {
hasher.Write(binary.LittleEndian.AppendUint64(nil, uint64(dbId)))
}
return hasher.Sum(nil)
}
func (m *LogRestoreTableIDsBlocklistFile) setChecksumLogRestoreTableIDsBlocklistFile() {
m.Checksum = m.checksumLogRestoreTableIDsBlocklistFile()
}
// MarshalLogRestoreTableIDsBlocklistFile generates an Blocklist file and marshals it. It returns its filename and the marshaled data.
func MarshalLogRestoreTableIDsBlocklistFile(restoreCommitTs, snapshotBackupTs, rewriteTs uint64, tableIds, dbIds []int64) (string, []byte, error) {
blocklistFile := &LogRestoreTableIDsBlocklistFile{
RestoreCommitTs: restoreCommitTs,
SnapshotBackupTs: snapshotBackupTs,
RewriteTs: rewriteTs,
TableIds: tableIds,
DbIds: dbIds,
}
blocklistFile.setChecksumLogRestoreTableIDsBlocklistFile()
filename := blocklistFile.filename()
data, err := proto.Marshal(blocklistFile)
if err != nil {
return "", nil, errors.Trace(err)
}
return filename, data, nil
}
// unmarshalLogRestoreTableIDsBlocklistFile unmarshals the given blocklist file.
func unmarshalLogRestoreTableIDsBlocklistFile(data []byte) (*LogRestoreTableIDsBlocklistFile, error) {
blocklistFile := &LogRestoreTableIDsBlocklistFile{}
if err := proto.Unmarshal(data, blocklistFile); err != nil {
return nil, errors.Trace(err)
}
if !bytes.Equal(blocklistFile.checksumLogRestoreTableIDsBlocklistFile(), blocklistFile.Checksum) {
return nil, errors.Errorf(
"checksum mismatch (calculated checksum is %s but the recorded checksum is %s), the log restore table IDs blocklist file may be corrupted",
base64.StdEncoding.EncodeToString(blocklistFile.checksumLogRestoreTableIDsBlocklistFile()),
base64.StdEncoding.EncodeToString(blocklistFile.Checksum),
)
}
return blocklistFile, nil
}
func fastWalkLogRestoreTableIDsBlocklistFile(
ctx context.Context,
s storage.ExternalStorage,
filterOutFn func(restoreCommitTs, snapshotBackupTs uint64) bool,
executionFn func(ctx context.Context, filename string, restoreCommitTs, rewriteTs uint64, tableIds, dbIds []int64) error,
) error {
filenames := make([]string, 0)
if err := s.WalkDir(ctx, &storage.WalkOption{SubDir: logRestoreTableIDBlocklistFilePrefix}, func(path string, _ int64) error {
restoreCommitTs, snapshotBackupTs, parsed := parseLogRestoreTableIDsBlocklistFileName(path)
if parsed {
if filterOutFn(restoreCommitTs, snapshotBackupTs) {
return nil
}
}
filenames = append(filenames, path)
return nil
}); err != nil {
return errors.Trace(err)
}
workerpool := tidbutil.NewWorkerPool(8, "walk dir log restore table IDs blocklist files")
eg, ectx := errgroup.WithContext(ctx)
for _, filename := range filenames {
if ectx.Err() != nil {
break
}
workerpool.ApplyOnErrorGroup(eg, func() error {
data, err := s.ReadFile(ectx, filename)
if err != nil {
return errors.Trace(err)
}
blocklistFile, err := unmarshalLogRestoreTableIDsBlocklistFile(data)
if err != nil {
return errors.Trace(err)
}
if filterOutFn(blocklistFile.RestoreCommitTs, blocklistFile.SnapshotBackupTs) {
return nil
}
err = executionFn(ectx, filename, blocklistFile.RestoreCommitTs, blocklistFile.RewriteTs, blocklistFile.TableIds, blocklistFile.DbIds)
return errors.Trace(err)
})
}
return errors.Trace(eg.Wait())
}
// CheckTableTrackerContainsTableIDsFromBlocklistFiles checks whether pitr id tracker contains the filtered table IDs from blocklist file.
func CheckTableTrackerContainsTableIDsFromBlocklistFiles(
ctx context.Context,
s storage.ExternalStorage,
tracker *utils.PiTRIdTracker,
startTs, restoredTs uint64,
tableNameByTableId func(tableId int64) string,
dbNameByDbId func(dbId int64) string,
checkTableIdLost func(tableId int64) bool,
checkDBIdlost func(dbId int64) bool,
cleanError func(rewriteTs uint64),
) error {
err := fastWalkLogRestoreTableIDsBlocklistFile(ctx, s, func(restoreCommitTs, snapshotBackupTs uint64) bool {
return startTs >= restoreCommitTs || restoredTs <= snapshotBackupTs
}, func(_ context.Context, _ string, restoreCommitTs, rewriteTs uint64, tableIds, dbIds []int64) error {
for _, tableId := range tableIds {
if tracker.ContainsTableId(tableId) || tracker.ContainsPartitionId(tableId) {
return errors.Errorf(
"cannot restore the table(Id=%d, name=%s at %d) because it is log restored(at %d) before snapshot backup(at %d). "+
"Please respecify the filter that does not contain the table or replace with a newer snapshot backup.",
tableId, tableNameByTableId(tableId), restoredTs, restoreCommitTs, startTs)
}
// the meta kv may not be backed by log restore
if checkTableIdLost(tableId) {
log.Warn("the table is lost in the log backup storage, so that it can not be restored.", zap.Int64("table id", tableId))
}
}
for _, dbId := range dbIds {
if tracker.ContainsDB(dbId) {
return errors.Errorf(
"cannot restore the database(Id=%d, name %s at %d) because it is log restored(at %d) before snapshot backup(at %d). "+
"Please respecify the filter that does not contain the database or replace with a newer snapshot backup.",
dbId, dbNameByDbId(dbId), restoredTs, restoreCommitTs, startTs)
}
// the meta kv may not be backed by log restore
if checkDBIdlost(dbId) {
log.Warn("the database is lost in the log backup storage, so that it can not be restored.", zap.Int64("database id", dbId))
}
}
cleanError(rewriteTs)
return nil
})
return errors.Trace(err)
}
// TruncateLogRestoreTableIDsBlocklistFiles truncates the blocklist files whose restore commit ts is not larger than truncate until ts.
func TruncateLogRestoreTableIDsBlocklistFiles(
ctx context.Context,
s storage.ExternalStorage,
untilTs uint64,
) error {
err := fastWalkLogRestoreTableIDsBlocklistFile(ctx, s, func(restoreCommitTs, snapshotBackupTs uint64) bool {
return untilTs < restoreCommitTs
}, func(ctx context.Context, filename string, _, _ uint64, _, _ []int64) error {
return s.DeleteFile(ctx, filename)
})
return errors.Trace(err)
}
type UniqueTableName struct {
DB string
Table string
}
func TransferBoolToValue(enable bool) string {
if enable {
return "ON"
}
return "OFF"
}
// GetTableSchema returns the schema of a table from TiDB.
func GetTableSchema(
dom *domain.Domain,
dbName ast.CIStr,
tableName ast.CIStr,
) (*model.TableInfo, error) {
info := dom.InfoSchema()
table, err := info.TableByName(context.Background(), dbName, tableName)
if err != nil {
return nil, errors.Trace(err)
}
return table.Meta(), nil
}
const maxUserTablesNum = 10
// AssertUserDBsEmpty check whether user dbs exist in the cluster
func AssertUserDBsEmpty(dom *domain.Domain) error {
databases := dom.InfoSchema().AllSchemas()
m := meta.NewReader(dom.Store().GetSnapshot(kv.MaxVersion))
userTables := make([]string, 0, maxUserTablesNum+1)
appendTables := func(dbName, tableName string) bool {
if len(userTables) >= maxUserTablesNum {
userTables = append(userTables, "...")
return true
}
userTables = append(userTables, fmt.Sprintf("%s.%s", dbName, tableName))
return false
}
LISTDBS:
for _, db := range databases {
dbName := db.Name.L
if metadef.IsMemOrSysDB(dbName) {
continue
}
tables, err := m.ListSimpleTables(db.ID)
if err != nil {
return errors.Annotatef(err, "failed to iterator tables of database[id=%d]", db.ID)
}
if len(tables) == 0 {
// tidb create test db on fresh cluster
// if it's empty we don't take it as user db
if dbName != "test" {
if appendTables(db.Name.O, "") {
break LISTDBS
}
}
continue
}
for _, table := range tables {
if appendTables(db.Name.O, table.Name.O) {
break LISTDBS
}
}
}
if len(userTables) > 0 {
return errors.Annotate(berrors.ErrRestoreNotFreshCluster,
"user db/tables: "+strings.Join(userTables, ", "))
}
return nil
}
// GetTS gets a new timestamp from PD.
func GetTS(ctx context.Context, pdClient pd.Client) (uint64, error) {
p, l, err := pdClient.GetTS(ctx)
if err != nil {
return 0, errors.Trace(err)
}
restoreTS := oracle.ComposeTS(p, l)
return restoreTS, nil
}
// GetTSWithRetry gets a new timestamp with retry from PD.
func GetTSWithRetry(ctx context.Context, pdClient pd.Client) (uint64, error) {
var (
startTS uint64
getTSErr error
retry uint
)
err := utils.WithRetry(ctx, func() error {
startTS, getTSErr = GetTS(ctx, pdClient)
failpoint.Inject("get-ts-error", func(val failpoint.Value) {
if val.(bool) && retry < 3 {
getTSErr = errors.Errorf("rpc error: code = Unknown desc = [PD:tso:ErrGenerateTimestamp]generate timestamp failed, requested pd is not leader of cluster")
}
})
retry++
if getTSErr != nil {
log.Warn("failed to get TS, retry it", zap.Uint("retry time", retry), logutil.ShortError(getTSErr))
}
return getTSErr
}, utils.NewAggressivePDBackoffStrategy())
if err != nil {
log.Error("failed to get TS", zap.Error(err))
}
return startTS, errors.Trace(err)
}
// HasRestoreIDColumn checks if the tidb_pitr_id_map table has restore_id column
func HasRestoreIDColumn(dom *domain.Domain) bool {
table, err := GetTableSchema(dom, ast.NewCIStr("mysql"), ast.NewCIStr("tidb_pitr_id_map"))
if err != nil {
return false
}
for _, col := range table.Columns {
if col.Name.L == "restore_id" {
return true
}
}
return false
}