diff --git a/br/cmd/br/BUILD.bazel b/br/cmd/br/BUILD.bazel index 33d3b6cef5..d2e1669e0c 100644 --- a/br/cmd/br/BUILD.bazel +++ b/br/cmd/br/BUILD.bazel @@ -21,8 +21,9 @@ go_library( "//br/pkg/logutil", "//br/pkg/metautil", "//br/pkg/mock/mockid", - "//br/pkg/restore", + "//br/pkg/restore/utils", "//br/pkg/rtree", + "//br/pkg/stream", "//br/pkg/streamhelper/config", "//br/pkg/summary", "//br/pkg/task", diff --git a/br/cmd/br/debug.go b/br/cmd/br/debug.go index 064572f2dd..27e55e276b 100644 --- a/br/cmd/br/debug.go +++ b/br/cmd/br/debug.go @@ -21,8 +21,9 @@ import ( "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/metautil" "github.com/pingcap/tidb/br/pkg/mock/mockid" - "github.com/pingcap/tidb/br/pkg/restore" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/rtree" + "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/task" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/version/build" @@ -207,7 +208,7 @@ func newBackupMetaValidateCommand() *cobra.Command { for offset := uint64(0); offset < tableIDOffset; offset++ { _, _ = tableIDAllocator.Alloc() // Ignore error } - rewriteRules := &restore.RewriteRules{ + rewriteRules := &restoreutils.RewriteRules{ Data: make([]*import_sstpb.RewriteRule, 0), } tableIDMap := make(map[int64]int64) @@ -245,13 +246,13 @@ func newBackupMetaValidateCommand() *cobra.Command { } } - rules := restore.GetRewriteRules(newTable, table.Info, 0, true) + rules := restoreutils.GetRewriteRules(newTable, table.Info, 0, true) rewriteRules.Data = append(rewriteRules.Data, rules.Data...) tableIDMap[table.Info.ID] = int64(tableID) } // Validate rewrite rules for _, file := range files { - err = restore.ValidateFileRewriteRule(file, rewriteRules) + err = restoreutils.ValidateFileRewriteRule(file, rewriteRules) if err != nil { return errors.Trace(err) } @@ -447,8 +448,8 @@ func searchStreamBackupCommand() *cobra.Command { if err != nil { return errors.Trace(err) } - comparator := restore.NewStartWithComparator() - bs := restore.NewStreamBackupSearch(s, comparator, keyBytes) + comparator := stream.NewStartWithComparator() + bs := stream.NewStreamBackupSearch(s, comparator, keyBytes) bs.SetStartTS(startTs) bs.SetEndTs(endTs) diff --git a/br/pkg/restore/BUILD.bazel b/br/pkg/restore/BUILD.bazel index 283ec4a525..cf4baffc33 100644 --- a/br/pkg/restore/BUILD.bazel +++ b/br/pkg/restore/BUILD.bazel @@ -5,19 +5,9 @@ go_library( srcs = [ "batcher.go", "client.go", - "data.go", "db.go", - "import.go", - "import_retry.go", - "log_client.go", "logutil.go", - "merge.go", "pipeline_items.go", - "range.go", - "rawkv_client.go", - "search.go", - "split.go", - "stream_metas.go", "systable_restore.go", "util.go", ], @@ -26,7 +16,6 @@ go_library( deps = [ "//br/pkg/checkpoint", "//br/pkg/checksum", - "//br/pkg/common", "//br/pkg/conn", "//br/pkg/conn/util", "//br/pkg/errors", @@ -34,17 +23,20 @@ go_library( "//br/pkg/logutil", "//br/pkg/metautil", "//br/pkg/pdutil", + "//br/pkg/restore/file_importer", "//br/pkg/restore/ingestrec", + "//br/pkg/restore/log_restore", "//br/pkg/restore/prealloc_table_id", + "//br/pkg/restore/rawkv", "//br/pkg/restore/split", "//br/pkg/restore/tiflashrec", + "//br/pkg/restore/utils", "//br/pkg/rtree", "//br/pkg/storage", "//br/pkg/stream", "//br/pkg/summary", "//br/pkg/utils", "//br/pkg/utils/iter", - "//br/pkg/utils/storewatch", "//br/pkg/version", "//pkg/bindinfo", "//pkg/ddl", @@ -63,41 +55,26 @@ go_library( "//pkg/util/codec", "//pkg/util/collate", "//pkg/util/engine", - "//pkg/util/hack", - "//pkg/util/mathutil", "//pkg/util/redact", "//pkg/util/table-filter", - "@com_github_emirpasic_gods//maps/treemap", "@com_github_fatih_color//:color", "@com_github_go_sql_driver_mysql//:mysql", - "@com_github_google_uuid//:uuid", "@com_github_opentracing_opentracing_go//:opentracing-go", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_kvproto//pkg/brpb", - "@com_github_pingcap_kvproto//pkg/errorpb", "@com_github_pingcap_kvproto//pkg/import_sstpb", - "@com_github_pingcap_kvproto//pkg/kvrpcpb", "@com_github_pingcap_kvproto//pkg/metapb", - "@com_github_pingcap_kvproto//pkg/recoverdatapb", "@com_github_pingcap_log//:log", - "@com_github_tikv_client_go_v2//config", - "@com_github_tikv_client_go_v2//kv", "@com_github_tikv_client_go_v2//oracle", - "@com_github_tikv_client_go_v2//rawkv", - "@com_github_tikv_client_go_v2//tikv", - "@com_github_tikv_client_go_v2//txnkv/rangetask", "@com_github_tikv_client_go_v2//util", "@com_github_tikv_pd_client//:client", "@com_github_tikv_pd_client//http", "@org_golang_google_grpc//:grpc", "@org_golang_google_grpc//backoff", - "@org_golang_google_grpc//codes", "@org_golang_google_grpc//credentials", "@org_golang_google_grpc//credentials/insecure", "@org_golang_google_grpc//keepalive", - "@org_golang_google_grpc//status", - "@org_golang_x_exp//maps", "@org_golang_x_sync//errgroup", "@org_uber_go_multierr//:multierr", "@org_uber_go_zap//:zap", @@ -111,79 +88,56 @@ go_test( srcs = [ "batcher_test.go", "client_test.go", - "data_test.go", "db_test.go", - "import_retry_test.go", - "log_client_test.go", "main_test.go", - "merge_test.go", - "range_test.go", - "rawkv_client_test.go", - "search_test.go", - "split_test.go", - "stream_metas_test.go", "util_test.go", ], embed = [":restore"], flaky = True, race = "off", - shard_count = 50, + shard_count = 47, deps = [ "//br/pkg/backup", - "//br/pkg/conn", "//br/pkg/errors", "//br/pkg/glue", "//br/pkg/gluetidb", "//br/pkg/logutil", "//br/pkg/metautil", "//br/pkg/mock", - "//br/pkg/pdutil", + "//br/pkg/restore/file_importer", + "//br/pkg/restore/log_restore", "//br/pkg/restore/split", "//br/pkg/restore/tiflashrec", + "//br/pkg/restore/utils", "//br/pkg/rtree", "//br/pkg/storage", "//br/pkg/stream", "//br/pkg/utils", "//br/pkg/utils/iter", "//pkg/infoschema", - "//pkg/kv", "//pkg/meta/autoid", "//pkg/parser/model", "//pkg/parser/mysql", "//pkg/parser/types", "//pkg/session", - "//pkg/sessionctx/stmtctx", - "//pkg/store/pdtypes", "//pkg/tablecodec", "//pkg/testkit", "//pkg/testkit/testsetup", - "//pkg/types", - "//pkg/util/codec", - "//pkg/util/intest", "//pkg/util/table-filter", - "@com_github_fsouza_fake_gcs_server//fakestorage", "@com_github_golang_protobuf//proto", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_kvproto//pkg/brpb", "@com_github_pingcap_kvproto//pkg/encryptionpb", - "@com_github_pingcap_kvproto//pkg/errorpb", "@com_github_pingcap_kvproto//pkg/import_sstpb", "@com_github_pingcap_kvproto//pkg/metapb", - "@com_github_pingcap_kvproto//pkg/pdpb", - "@com_github_pingcap_kvproto//pkg/recoverdatapb", "@com_github_pingcap_log//:log", "@com_github_stretchr_testify//assert", "@com_github_stretchr_testify//require", "@com_github_tikv_client_go_v2//oracle", - "@com_github_tikv_client_go_v2//rawkv", - "@com_github_tikv_client_go_v2//testutils", "@com_github_tikv_pd_client//:client", - "@org_golang_google_grpc//codes", "@org_golang_google_grpc//keepalive", - "@org_golang_google_grpc//status", "@org_uber_go_goleak//:goleak", "@org_uber_go_zap//:zap", - "@org_uber_go_zap//zapcore", ], ) diff --git a/br/pkg/restore/batcher.go b/br/pkg/restore/batcher.go index 033ef13f45..cf7c41a915 100644 --- a/br/pkg/restore/batcher.go +++ b/br/pkg/restore/batcher.go @@ -12,6 +12,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/glue" + "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/rtree" "github.com/pingcap/tidb/br/pkg/summary" "go.uber.org/zap" @@ -227,7 +228,7 @@ type DrainResult struct { BlankTablesAfterSend []CreatedTable // RewriteRules are the rewrite rules for the tables. // the key is the table id after rewritten. - RewriteRulesMap map[int64]*RewriteRules + RewriteRulesMap map[int64]*utils.RewriteRules Ranges []rtree.Range // Record which part of ranges belongs to the table TableEndOffsetInRanges []int @@ -245,7 +246,7 @@ func (result DrainResult) Files() []TableIDWithFiles { for _, rg := range ranges { files = append(files, rg.Files...) } - var rules *RewriteRules + var rules *utils.RewriteRules if r, ok := result.RewriteRulesMap[tableID]; ok { rules = r } @@ -266,7 +267,7 @@ func newDrainResult() DrainResult { return DrainResult{ TablesToSend: make([]CreatedTable, 0), BlankTablesAfterSend: make([]CreatedTable, 0), - RewriteRulesMap: EmptyRewriteRulesMap(), + RewriteRulesMap: utils.EmptyRewriteRulesMap(), Ranges: make([]rtree.Range, 0), TableEndOffsetInRanges: make([]int, 0), } diff --git a/br/pkg/restore/batcher_test.go b/br/pkg/restore/batcher_test.go index dad4634bec..e1ef4ab8b6 100644 --- a/br/pkg/restore/batcher_test.go +++ b/br/pkg/restore/batcher_test.go @@ -14,6 +14,7 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/metautil" "github.com/pingcap/tidb/br/pkg/restore" + "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/rtree" "github.com/pingcap/tidb/pkg/parser/model" "github.com/stretchr/testify/require" @@ -23,7 +24,7 @@ import ( type drySender struct { mu *sync.Mutex - rewriteRules *restore.RewriteRules + rewriteRules *utils.RewriteRules ranges []rtree.Range nBatch int @@ -60,7 +61,7 @@ func (sender *drySender) Ranges() []rtree.Range { func newDrySender() *drySender { return &drySender{ - rewriteRules: restore.EmptyRewriteRule(), + rewriteRules: utils.EmptyRewriteRule(), ranges: []rtree.Range{}, mu: new(sync.Mutex), } @@ -161,7 +162,7 @@ func fakeTableWithRange(id int64, rngs []rtree.Range) restore.TableWithRange { } tblWithRng := restore.TableWithRange{ CreatedTable: restore.CreatedTable{ - RewriteRule: restore.EmptyRewriteRule(), + RewriteRule: utils.EmptyRewriteRule(), Table: tbl.Info, OldTable: tbl, }, @@ -170,8 +171,8 @@ func fakeTableWithRange(id int64, rngs []rtree.Range) restore.TableWithRange { return tblWithRng } -func fakeRewriteRules(oldPrefix string, newPrefix string) *restore.RewriteRules { - return &restore.RewriteRules{ +func fakeRewriteRules(oldPrefix string, newPrefix string) *utils.RewriteRules { + return &utils.RewriteRules{ Data: []*import_sstpb.RewriteRule{ { OldKeyPrefix: []byte(oldPrefix), @@ -299,7 +300,7 @@ func TestRewriteRules(t *testing.T) { fakeRange("can", "cao"), fakeRange("cap", "caq"), }, } - rewriteRules := []*restore.RewriteRules{ + rewriteRules := []*utils.RewriteRules{ fakeRewriteRules("a", "ada"), fakeRewriteRules("b", "bob"), fakeRewriteRules("c", "cpp"), diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index 3b1c4c709e..770fe28eba 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -34,10 +34,14 @@ import ( "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/metautil" "github.com/pingcap/tidb/br/pkg/pdutil" + fileimporter "github.com/pingcap/tidb/br/pkg/restore/file_importer" "github.com/pingcap/tidb/br/pkg/restore/ingestrec" + logrestore "github.com/pingcap/tidb/br/pkg/restore/log_restore" tidalloc "github.com/pingcap/tidb/br/pkg/restore/prealloc_table_id" + "github.com/pingcap/tidb/br/pkg/restore/rawkv" "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/rtree" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/stream" @@ -105,8 +109,8 @@ type Client struct { pdClient pd.Client pdHTTPClient pdhttp.Client toolClient split.SplitClient - fileImporter FileImporter - rawKVClient *RawKVBatchClient + fileImporter fileimporter.FileImporter + rawKVClient *rawkv.RawKVBatchClient workerPool *tidbutil.WorkerPool tlsConf *tls.Config keepaliveConf keepalive.ClientParameters @@ -170,7 +174,7 @@ type Client struct { // clusterID is the cluster id from down-stream cluster. clusterID uint64 - *logFileManager + *logrestore.LogFileManager // storage for log restore storage storage.ExternalStorage @@ -198,7 +202,7 @@ type Client struct { preallocedTableIDs *tidalloc.PreallocIDs // the rewrite mode of the downloaded SST files in TiKV. - rewriteMode RewriteMode + rewriteMode fileimporter.RewriteMode // checkpoint information for snapshot restore checkpointRunner *checkpoint.CheckpointRunner[checkpoint.RestoreKeyType, checkpoint.RestoreValueType] @@ -229,6 +233,21 @@ func NewRestoreClient( } } +// makeDBPool makes a session pool with specficated size by sessionFactory. +func makeDBPool(size uint, dbFactory func() (*DB, error)) ([]*DB, error) { + dbPool := make([]*DB, 0, size) + for i := uint(0); i < size; i++ { + db, e := dbFactory() + if e != nil { + return dbPool, e + } + if db != nil { + dbPool = append(dbPool, db) + } + } + return dbPool, nil +} + // Init create db connection and domain for storage. func (rc *Client) Init(g glue.Glue, store kv.Storage) error { // setDB must happen after set PolicyMode. @@ -494,11 +513,11 @@ func (rc *Client) GetBatchDdlSize() uint { return rc.batchDdlSize } -func (rc *Client) SetRewriteMode(mode RewriteMode) { +func (rc *Client) SetRewriteMode(mode fileimporter.RewriteMode) { rc.rewriteMode = mode } -func (rc *Client) GetRewriteMode() RewriteMode { +func (rc *Client) GetRewriteMode() fileimporter.RewriteMode { return rc.rewriteMode } @@ -561,11 +580,11 @@ func (rc *Client) InitClients(ctx context.Context, backend *backuppb.StorageBack splitClientOpts = append(splitClientOpts, split.WithRawKV()) } metaClient := split.NewClient(rc.pdClient, rc.pdHTTPClient, rc.tlsConf, maxSplitKeysOnce, rc.GetStoreCount()+1, splitClientOpts...) - importCli := NewImportClient(metaClient, rc.tlsConf, rc.keepaliveConf) - rc.fileImporter = NewFileImporter(metaClient, importCli, backend, isRawKvMode, isTxnKvMode, stores, rc.rewriteMode, concurrencyPerStore, useTokenBucket) + importCli := fileimporter.NewImportClient(metaClient, rc.tlsConf, rc.keepaliveConf) + rc.fileImporter = fileimporter.NewFileImporter(metaClient, importCli, backend, isRawKvMode, isTxnKvMode, stores, rc.rewriteMode, concurrencyPerStore, useTokenBucket) } -func (rc *Client) SetRawKVClient(c *RawKVBatchClient) { +func (rc *Client) SetRawKVClient(c *rawkv.RawKVBatchClient) { rc.rawKVClient = c } @@ -912,8 +931,8 @@ func (rc *Client) CreateTables( dom *domain.Domain, tables []*metautil.Table, newTS uint64, -) (*RewriteRules, []*model.TableInfo, error) { - rewriteRules := &RewriteRules{ +) (*restoreutils.RewriteRules, []*model.TableInfo, error) { + rewriteRules := &restoreutils.RewriteRules{ Data: make([]*import_sstpb.RewriteRule, 0), } newTables := make([]*model.TableInfo, 0, len(tables)) @@ -971,7 +990,7 @@ func (rc *Client) createTables( table.Info.IsCommonHandle, newTableInfo.IsCommonHandle) } - rules := GetRewriteRules(newTableInfo, table.Info, newTS, true) + rules := restoreutils.GetRewriteRules(newTableInfo, table.Info, newTS, true) ct := CreatedTable{ RewriteRule: rules, Table: newTableInfo, @@ -1009,7 +1028,7 @@ func (rc *Client) createTable( table.Info.IsCommonHandle, newTableInfo.IsCommonHandle) } - rules := GetRewriteRules(newTableInfo, table.Info, newTS, true) + rules := restoreutils.GetRewriteRules(newTableInfo, table.Info, newTS, true) et := CreatedTable{ RewriteRule: rules, Table: newTableInfo, @@ -1318,11 +1337,11 @@ func (rc *Client) ExecDDLs(ctx context.Context, ddlJobs []*model.Job) error { } // Mock the call of setSpeedLimit function -func MockCallSetSpeedLimit(ctx context.Context, fakeImportClient ImporterClient, rc *Client, concurrency uint) error { +func MockCallSetSpeedLimit(ctx context.Context, fakeImportClient fileimporter.ImporterClient, rc *Client, concurrency uint) error { rc.SetRateLimit(42) rc.SetConcurrency(concurrency) rc.hasSpeedLimited = false - rc.fileImporter = NewFileImporter(nil, fakeImportClient, nil, false, false, nil, rc.rewriteMode, 128, false) + rc.fileImporter = fileimporter.NewFileImporter(nil, fakeImportClient, nil, false, false, nil, rc.rewriteMode, 128, false) return rc.setSpeedLimit(ctx, rc.rateLimit) } @@ -1351,7 +1370,7 @@ func (rc *Client) setSpeedLimit(ctx context.Context, rateLimit uint64) error { finalStore := store rc.workerPool.ApplyOnErrorGroup(eg, func() error { - err := rc.fileImporter.setDownloadSpeedLimit(ectx, finalStore.GetId(), rateLimit) + err := rc.fileImporter.SetDownloadSpeedLimit(ectx, finalStore.GetId(), rateLimit) if err != nil { return errors.Trace(err) } @@ -1420,7 +1439,7 @@ func (rc *Client) SplitRanges(ctx context.Context, return SplitRanges(ctx, rc, ranges, updateCh, isRawKv) } -func (rc *Client) WrapLogFilesIterWithSplitHelper(logIter LogIter, rules map[int64]*RewriteRules, g glue.Glue, store kv.Storage) (LogIter, error) { +func (rc *Client) WrapLogFilesIterWithSplitHelper(logIter logrestore.LogIter, rules map[int64]*restoreutils.RewriteRules, g glue.Glue, store kv.Storage) (logrestore.LogIter, error) { se, err := g.CreateSession(store) if err != nil { return nil, errors.Trace(err) @@ -1429,11 +1448,11 @@ func (rc *Client) WrapLogFilesIterWithSplitHelper(logIter LogIter, rules map[int splitSize, splitKeys := utils.GetRegionSplitInfo(execCtx) log.Info("get split threshold from tikv config", zap.Uint64("split-size", splitSize), zap.Int64("split-keys", splitKeys)) client := split.NewClient(rc.GetPDClient(), rc.pdHTTPClient, rc.GetTLSConfig(), maxSplitKeysOnce, 3) - return NewLogFilesIterWithSplitHelper(logIter, rules, client, splitSize, splitKeys), nil + return logrestore.NewLogFilesIterWithSplitHelper(logIter, rules, client, splitSize, splitKeys), nil } -func (rc *Client) generateKvFilesSkipMap(ctx context.Context, downstreamIdset map[int64]struct{}, taskName string) (*LogFilesSkipMap, error) { - skipMap := NewLogFilesSkipMap() +func (rc *Client) generateKvFilesSkipMap(ctx context.Context, downstreamIdset map[int64]struct{}, taskName string) (*restoreutils.LogFilesSkipMap, error) { + skipMap := restoreutils.NewLogFilesSkipMap() t, err := checkpoint.WalkCheckpointFileForRestore(ctx, rc.storage, rc.cipher, taskName, func(groupKey checkpoint.LogRestoreKeyType, off checkpoint.LogRestoreValueMarshaled) { for tableID, foffs := range off.Foffs { // filter out the checkpoint data of dropped table @@ -1453,17 +1472,17 @@ func (rc *Client) generateKvFilesSkipMap(ctx context.Context, downstreamIdset ma func (rc *Client) WrapLogFilesIterWithCheckpoint( ctx context.Context, - logIter LogIter, + logIter logrestore.LogIter, downstreamIdset map[int64]struct{}, taskName string, updateStats func(kvCount, size uint64), onProgress func(), -) (LogIter, error) { +) (logrestore.LogIter, error) { skipMap, err := rc.generateKvFilesSkipMap(ctx, downstreamIdset, taskName) if err != nil { return nil, errors.Trace(err) } - return iter.FilterOut(logIter, func(d *LogDataFileInfo) bool { + return iter.FilterOut(logIter, func(d *logrestore.LogDataFileInfo) bool { if skipMap.NeedSkip(d.MetaDataGroupName, d.OffsetInMetaGroup, d.OffsetInMergedGroup) { onProgress() updateStats(uint64(d.NumberOfEntries), d.Length) @@ -1525,7 +1544,7 @@ LOOPFORTABLE: break LOOPFORTABLE } restoreFn := func() error { - filesGroups := getGroupFiles(filesReplica, rc.fileImporter.supportMultiIngest) + filesGroups := getGroupFiles(filesReplica, rc.fileImporter.SupportMultiIngest) for _, filesGroup := range filesGroups { if importErr := func(fs []*backuppb.File) (err error) { fileStart := time.Now() @@ -1554,12 +1573,7 @@ LOOPFORTABLE: return nil } if rc.granularity == string(CoarseGrained) { - rc.fileImporter.cond.L.Lock() - for rc.fileImporter.ShouldBlock() { - // wait for download worker notified - rc.fileImporter.cond.Wait() - } - rc.fileImporter.cond.L.Unlock() + rc.fileImporter.WaitUntilUnblock() rc.workerPool.ApplyOnErrorGroup(eg, restoreFn) } else { // if we are not use coarse granularity which means @@ -1597,7 +1611,7 @@ func (rc *Client) WaitForFilesRestored(ctx context.Context, files []*backuppb.Fi log.Info("import sst files done", logutil.Files(files)) updateCh.Inc() }() - return rc.fileImporter.ImportSSTFiles(ectx, []*backuppb.File{fileReplica}, EmptyRewriteRule(), rc.cipher, rc.backupMeta.ApiVersion) + return rc.fileImporter.ImportSSTFiles(ectx, []*backuppb.File{fileReplica}, restoreutils.EmptyRewriteRule(), rc.cipher, rc.backupMeta.ApiVersion) }) } if err := eg.Wait(); err != nil { @@ -1914,7 +1928,7 @@ func (rc *Client) GoUpdateMetaAndLoadStats( zap.Int64("new id", tbl.Table.ID), ) start := time.Now() - rewriteIDMap := getTableIDMap(tbl.Table, tbl.OldTable.Info) + rewriteIDMap := restoreutils.GetTableIDMap(tbl.Table, tbl.OldTable.Info) if statsErr = metautil.RestoreStats(ctx, s, cipher, rc.statsHandler, tbl.Table, oldTable.StatsFileIndexes, rewriteIDMap); statsErr != nil { log.Error("analyze table failed", zap.Any("table", oldTable.StatsFileIndexes), zap.Error(statsErr)) } @@ -2023,7 +2037,7 @@ func (rc *Client) FailpointDoChecksumForLogRestore( kvClient kv.Client, pdClient pd.Client, idrules map[int64]int64, - rewriteRules map[int64]*RewriteRules, + rewriteRules map[int64]*restoreutils.RewriteRules, ) (finalErr error) { startTS, err := rc.GetTSWithRetry(ctx) if err != nil { @@ -2422,7 +2436,7 @@ func (rc *Client) PreCheckTableClusterIndex( } func (rc *Client) InstallLogFileManager(ctx context.Context, startTS, restoreTS uint64, metadataDownloadBatchSize uint) error { - init := LogFileManagerInit{ + init := logrestore.LogFileManagerInit{ StartTS: startTS, RestoreTS: restoreTS, Storage: rc.storage, @@ -2430,7 +2444,7 @@ func (rc *Client) InstallLogFileManager(ctx context.Context, startTS, restoreTS MetadataDownloadBatchSize: metadataDownloadBatchSize, } var err error - rc.logFileManager, err = CreateLogFileManager(ctx, init) + rc.LogFileManager, err = logrestore.CreateLogFileManager(ctx, init) if err != nil { return err } @@ -2502,9 +2516,9 @@ type FilesInRegion struct { writeSize uint64 writeKVCount int64 - defaultFiles []*LogDataFileInfo - writeFiles []*LogDataFileInfo - deleteFiles []*LogDataFileInfo + defaultFiles []*logrestore.LogDataFileInfo + writeFiles []*logrestore.LogDataFileInfo + deleteFiles []*logrestore.LogDataFileInfo } type FilesInTable struct { @@ -2513,15 +2527,15 @@ type FilesInTable struct { func ApplyKVFilesWithBatchMethod( ctx context.Context, - logIter LogIter, + logIter logrestore.LogIter, batchCount int, batchSize uint64, - applyFunc func(files []*LogDataFileInfo, kvCount int64, size uint64), + applyFunc func(files []*logrestore.LogDataFileInfo, kvCount int64, size uint64), applyWg *sync.WaitGroup, ) error { var ( tableMapFiles = make(map[int64]*FilesInTable) - tmpFiles = make([]*LogDataFileInfo, 0, batchCount) + tmpFiles = make([]*logrestore.LogDataFileInfo, 0, batchCount) tmpSize uint64 = 0 tmpKVCount int64 = 0 ) @@ -2532,7 +2546,7 @@ func ApplyKVFilesWithBatchMethod( f := r.Item if f.GetType() == backuppb.FileType_Put && f.GetLength() >= batchSize { - applyFunc([]*LogDataFileInfo{f}, f.GetNumberOfEntries(), f.GetLength()) + applyFunc([]*logrestore.LogDataFileInfo{f}, f.GetNumberOfEntries(), f.GetLength()) continue } @@ -2551,13 +2565,13 @@ func ApplyKVFilesWithBatchMethod( if f.GetType() == backuppb.FileType_Delete { if fs.defaultFiles == nil { - fs.deleteFiles = make([]*LogDataFileInfo, 0) + fs.deleteFiles = make([]*logrestore.LogDataFileInfo, 0) } fs.deleteFiles = append(fs.deleteFiles, f) } else { if f.GetCf() == stream.DefaultCF { if fs.defaultFiles == nil { - fs.defaultFiles = make([]*LogDataFileInfo, 0, batchCount) + fs.defaultFiles = make([]*logrestore.LogDataFileInfo, 0, batchCount) } fs.defaultFiles = append(fs.defaultFiles, f) fs.defaultSize += f.Length @@ -2570,7 +2584,7 @@ func ApplyKVFilesWithBatchMethod( } } else { if fs.writeFiles == nil { - fs.writeFiles = make([]*LogDataFileInfo, 0, batchCount) + fs.writeFiles = make([]*logrestore.LogDataFileInfo, 0, batchCount) } fs.writeFiles = append(fs.writeFiles, f) fs.writeSize += f.GetLength() @@ -2606,14 +2620,14 @@ func ApplyKVFilesWithBatchMethod( if len(tmpFiles) >= batchCount || tmpSize >= batchSize { applyFunc(tmpFiles, tmpKVCount, tmpSize) - tmpFiles = make([]*LogDataFileInfo, 0, batchCount) + tmpFiles = make([]*logrestore.LogDataFileInfo, 0, batchCount) tmpSize = 0 tmpKVCount = 0 } } if len(tmpFiles) > 0 { applyFunc(tmpFiles, tmpKVCount, tmpSize) - tmpFiles = make([]*LogDataFileInfo, 0, batchCount) + tmpFiles = make([]*logrestore.LogDataFileInfo, 0, batchCount) tmpSize = 0 tmpKVCount = 0 } @@ -2625,11 +2639,11 @@ func ApplyKVFilesWithBatchMethod( func ApplyKVFilesWithSingelMethod( ctx context.Context, - files LogIter, - applyFunc func(file []*LogDataFileInfo, kvCount int64, size uint64), + files logrestore.LogIter, + applyFunc func(file []*logrestore.LogDataFileInfo, kvCount int64, size uint64), applyWg *sync.WaitGroup, ) error { - deleteKVFiles := make([]*LogDataFileInfo, 0) + deleteKVFiles := make([]*logrestore.LogDataFileInfo, 0) for r := files.TryNext(ctx); !r.Finished; r = files.TryNext(ctx) { if r.Err != nil { @@ -2641,14 +2655,14 @@ func ApplyKVFilesWithSingelMethod( deleteKVFiles = append(deleteKVFiles, f) continue } - applyFunc([]*LogDataFileInfo{f}, f.GetNumberOfEntries(), f.GetLength()) + applyFunc([]*logrestore.LogDataFileInfo{f}, f.GetNumberOfEntries(), f.GetLength()) } applyWg.Wait() log.Info("restore delete files", zap.Int("count", len(deleteKVFiles))) for _, file := range deleteKVFiles { f := file - applyFunc([]*LogDataFileInfo{f}, f.GetNumberOfEntries(), f.GetLength()) + applyFunc([]*logrestore.LogDataFileInfo{f}, f.GetNumberOfEntries(), f.GetLength()) } return nil @@ -2656,9 +2670,9 @@ func ApplyKVFilesWithSingelMethod( func (rc *Client) RestoreKVFiles( ctx context.Context, - rules map[int64]*RewriteRules, + rules map[int64]*restoreutils.RewriteRules, idrules map[int64]int64, - logIter LogIter, + logIter logrestore.LogIter, runner *checkpoint.CheckpointRunner[checkpoint.LogRestoreKeyType, checkpoint.LogRestoreValueType], pitrBatchCount uint32, pitrBatchSize uint32, @@ -2688,7 +2702,7 @@ func (rc *Client) RestoreKVFiles( var applyWg sync.WaitGroup eg, ectx := errgroup.WithContext(ctx) - applyFunc := func(files []*LogDataFileInfo, kvCount int64, size uint64) { + applyFunc := func(files []*logrestore.LogDataFileInfo, kvCount int64, size uint64) { if len(files) == 0 { return } @@ -2735,7 +2749,7 @@ func (rc *Client) RestoreKVFiles( } }() - return rc.fileImporter.ImportKVFiles(ectx, files, rule, rc.shiftStartTS, rc.startTS, rc.restoreTS, supportBatch) + return rc.fileImporter.ImportKVFiles(ectx, files, rule, rc.ShiftStartTS, rc.StartTS, rc.RestoreTS, supportBatch) }) } } @@ -2880,7 +2894,7 @@ func (rc *Client) InitSchemasReplaceForDDL( if !cfg.IsNewTask { log.Info("try to load pitr id maps") needConstructIdMap = false - dbMaps, err = rc.initSchemasMap(ctx, rc.GetClusterID(ctx), rc.restoreTS) + dbMaps, err = rc.initSchemasMap(ctx, rc.GetClusterID(ctx), rc.RestoreTS) if err != nil { return nil, errors.Trace(err) } @@ -2889,9 +2903,9 @@ func (rc *Client) InitSchemasReplaceForDDL( // a new task, but without full snapshot restore, tries to load // schemas map whose `restore-ts`` is the task's `start-ts`. if len(dbMaps) <= 0 && !cfg.HasFullRestore { - log.Info("try to load pitr id maps of the previous task", zap.Uint64("start-ts", rc.startTS)) + log.Info("try to load pitr id maps of the previous task", zap.Uint64("start-ts", rc.StartTS)) needConstructIdMap = true - dbMaps, err = rc.initSchemasMap(ctx, rc.GetClusterID(ctx), rc.startTS) + dbMaps, err = rc.initSchemasMap(ctx, rc.GetClusterID(ctx), rc.StartTS) if err != nil { return nil, errors.Trace(err) } @@ -2935,8 +2949,8 @@ func (rc *Client) InitSchemasReplaceForDDL( dbReplace.TableMap[t.Info.ID] = &stream.TableReplace{ Name: newTableInfo.Name.O, TableID: newTableInfo.ID, - PartitionMap: getPartitionIDMap(newTableInfo, t.Info), - IndexMap: getIndexIDMap(newTableInfo, t.Info), + PartitionMap: restoreutils.GetPartitionIDMap(newTableInfo, t.Info), + IndexMap: restoreutils.GetIndexIDMap(newTableInfo, t.Info), } } } else { @@ -3088,7 +3102,7 @@ func (rc *Client) constructIDMap( } for _, entry := range entries { - if _, err := sr.RewriteKvEntry(&entry.e, f.GetCf()); err != nil { + if _, err := sr.RewriteKvEntry(&entry.E, f.GetCf()); err != nil { return errors.Trace(err) } } @@ -3107,12 +3121,12 @@ func (rc *Client) RestoreMetaKVFilesWithBatchMethod( ctx context.Context, files []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - kvEntries []*KvEntryWithTS, + kvEntries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*KvEntryWithTS, error), + ) ([]*logrestore.KvEntryWithTS, error), ) error { // the average size of each KV is 2560 Bytes // kvEntries is kvs left by the previous batch @@ -3126,8 +3140,8 @@ func (rc *Client) RestoreMetaKVFilesWithBatchMethod( defaultIdx int = 0 writeIdx int = 0 - defaultKvEntries = make([]*KvEntryWithTS, 0) - writeKvEntries = make([]*KvEntryWithTS, 0) + defaultKvEntries = make([]*logrestore.KvEntryWithTS, 0) + writeKvEntries = make([]*logrestore.KvEntryWithTS, 0) ) // Set restoreKV to SchemaReplace. schemasReplace.SetRestoreKVStatus() @@ -3186,31 +3200,25 @@ func (rc *Client) RestoreMetaKVFilesWithBatchMethod( return nil } -// the kv entry with ts, the ts is decoded from entry. -type KvEntryWithTS struct { - e kv.Entry - ts uint64 -} - func (rc *Client) RestoreBatchMetaKVFiles( ctx context.Context, files []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - kvEntries []*KvEntryWithTS, + kvEntries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, -) ([]*KvEntryWithTS, error) { - nextKvEntries := make([]*KvEntryWithTS, 0) - curKvEntries := make([]*KvEntryWithTS, 0) +) ([]*logrestore.KvEntryWithTS, error) { + nextKvEntries := make([]*logrestore.KvEntryWithTS, 0) + curKvEntries := make([]*logrestore.KvEntryWithTS, 0) if len(files) == 0 && len(kvEntries) == 0 { return nextKvEntries, nil } // filter the kv from kvEntries again. for _, kv := range kvEntries { - if kv.ts < filterTS { + if kv.Ts < filterTS { curKvEntries = append(curKvEntries, kv) } else { nextKvEntries = append(nextKvEntries, kv) @@ -3229,8 +3237,8 @@ func (rc *Client) RestoreBatchMetaKVFiles( } // sort these entries. - slices.SortFunc(curKvEntries, func(i, j *KvEntryWithTS) int { - return cmp.Compare(i.ts, j.ts) + slices.SortFunc(curKvEntries, func(i, j *logrestore.KvEntryWithTS) int { + return cmp.Compare(i.Ts, j.Ts) }) // restore these entries with rawPut() method. @@ -3251,7 +3259,7 @@ func (rc *Client) RestoreBatchMetaKVFiles( func (rc *Client) restoreMetaKvEntries( ctx context.Context, sr *stream.SchemasReplace, - entries []*KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, columnFamily string, ) (uint64, uint64, error) { var ( @@ -3262,24 +3270,24 @@ func (rc *Client) restoreMetaKvEntries( rc.rawKVClient.SetColumnFamily(columnFamily) for _, entry := range entries { - log.Debug("before rewrte entry", zap.Uint64("key-ts", entry.ts), zap.Int("key-len", len(entry.e.Key)), - zap.Int("value-len", len(entry.e.Value)), zap.ByteString("key", entry.e.Key)) + log.Debug("before rewrte entry", zap.Uint64("key-ts", entry.Ts), zap.Int("key-len", len(entry.E.Key)), + zap.Int("value-len", len(entry.E.Value)), zap.ByteString("key", entry.E.Key)) - newEntry, err := sr.RewriteKvEntry(&entry.e, columnFamily) + newEntry, err := sr.RewriteKvEntry(&entry.E, columnFamily) if err != nil { - log.Error("rewrite txn entry failed", zap.Int("klen", len(entry.e.Key)), - logutil.Key("txn-key", entry.e.Key)) + log.Error("rewrite txn entry failed", zap.Int("klen", len(entry.E.Key)), + logutil.Key("txn-key", entry.E.Key)) return 0, 0, errors.Trace(err) } else if newEntry == nil { continue } log.Debug("after rewrite entry", zap.Int("new-key-len", len(newEntry.Key)), - zap.Int("new-value-len", len(entry.e.Value)), zap.ByteString("new-key", newEntry.Key)) + zap.Int("new-value-len", len(entry.E.Value)), zap.ByteString("new-key", newEntry.Key)) failpoint.Inject("failed-to-restore-metakv", func(_ failpoint.Value) { failpoint.Return(0, 0, errors.Errorf("failpoint: failed to restore metakv")) }) - if err := rc.rawKVClient.Put(ctx, newEntry.Key, newEntry.Value, entry.ts); err != nil { + if err := rc.rawKVClient.Put(ctx, newEntry.Key, newEntry.Value, entry.Ts); err != nil { return 0, 0, errors.Trace(err) } // for failpoint, we need to flush the cache in rawKVClient every time @@ -3645,7 +3653,7 @@ func (rc *Client) SaveIDMap( ) error { idMaps := sr.TidySchemaMaps() clusterID := rc.GetClusterID(ctx) - metaFileName := metautil.PitrIDMapsFilename(clusterID, rc.restoreTS) + metaFileName := metautil.PitrIDMapsFilename(clusterID, rc.RestoreTS) metaWriter := metautil.NewMetaWriter(rc.storage, metautil.MetaFileSize, false, metaFileName, nil) metaWriter.Update(func(m *backuppb.BackupMeta) { // save log startTS to backupmeta file @@ -3664,8 +3672,8 @@ func (rc *Client) SaveIDMap( log.Info("save checkpoint task info with InLogRestoreAndIdMapPersist status") if err := checkpoint.SaveCheckpointTaskInfoForLogRestore(ctx, rc.storage, &checkpoint.CheckpointTaskInfoForLogRestore{ Progress: checkpoint.InLogRestoreAndIdMapPersist, - StartTS: rc.startTS, - RestoreTS: rc.restoreTS, + StartTS: rc.StartTS, + RestoreTS: rc.RestoreTS, RewriteTS: rc.currentTS, TiFlashItems: items, }, rc.GetClusterID(ctx)); err != nil { @@ -3761,14 +3769,14 @@ func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, storage // RangeFilterFromIngestRecorder rewrites the table id of items in the ingestRecorder // TODO: need to implement the range filter out feature -func (rc *Client) RangeFilterFromIngestRecorder(recorder *ingestrec.IngestRecorder, rewriteRules map[int64]*RewriteRules) error { +func (rc *Client) RangeFilterFromIngestRecorder(recorder *ingestrec.IngestRecorder, rewriteRules map[int64]*restoreutils.RewriteRules) error { err := recorder.RewriteTableID(func(tableID int64) (int64, bool, error) { rewriteRule, exists := rewriteRules[tableID] if !exists { // since the table's files will be skipped restoring, here also skips. return 0, true, nil } - newTableID := GetRewriteTableID(tableID, rewriteRule) + newTableID := restoreutils.GetRewriteTableID(tableID, rewriteRule) if newTableID == 0 { return 0, false, errors.Errorf("newTableID is 0, tableID: %d", tableID) } diff --git a/br/pkg/restore/client_test.go b/br/pkg/restore/client_test.go index 247e7a9764..b7497c23e4 100644 --- a/br/pkg/restore/client_test.go +++ b/br/pkg/restore/client_test.go @@ -23,6 +23,8 @@ import ( "github.com/pingcap/tidb/br/pkg/metautil" "github.com/pingcap/tidb/br/pkg/mock" "github.com/pingcap/tidb/br/pkg/restore" + fileimporter "github.com/pingcap/tidb/br/pkg/restore/file_importer" + logrestore "github.com/pingcap/tidb/br/pkg/restore/log_restore" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/utils" @@ -497,7 +499,7 @@ func TestPreCheckTableTiFlashReplicas(t *testing.T) { // Mock ImporterClient interface type FakeImporterClient struct { - restore.ImporterClient + fileimporter.ImporterClient } // Record the stores that have communicated @@ -765,7 +767,7 @@ func TestRestoreBatchMetaKVFiles(t *testing.T) { client := restore.MockClient(nil) files := []*backuppb.DataFileInfo{} // test empty files and entries - next, err := client.RestoreBatchMetaKVFiles(context.Background(), files[0:], nil, make([]*restore.KvEntryWithTS, 0), math.MaxUint64, nil, nil, "") + next, err := client.RestoreBatchMetaKVFiles(context.Background(), files[0:], nil, make([]*logrestore.KvEntryWithTS, 0), math.MaxUint64, nil, nil, "") require.NoError(t, err) require.Equal(t, 0, len(next)) } @@ -788,12 +790,12 @@ func TestRestoreMetaKVFilesWithBatchMethod1(t *testing.T) { ctx context.Context, files []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - entries []*restore.KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*restore.KvEntryWithTS, error) { + ) ([]*logrestore.KvEntryWithTS, error) { require.Equal(t, 0, len(entries)) require.Equal(t, 0, len(files)) batchCount++ @@ -828,12 +830,12 @@ func TestRestoreMetaKVFilesWithBatchMethod2_default_empty(t *testing.T) { ctx context.Context, files []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - entries []*restore.KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*restore.KvEntryWithTS, error) { + ) ([]*logrestore.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { require.Equal(t, stream.DefaultCF, cf) batchCount++ @@ -875,12 +877,12 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_1(t *testing.T) { ctx context.Context, files []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - entries []*restore.KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*restore.KvEntryWithTS, error) { + ) ([]*logrestore.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { require.Equal(t, stream.WriteCF, cf) batchCount++ @@ -930,12 +932,12 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { ctx context.Context, files []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - entries []*restore.KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*restore.KvEntryWithTS, error) { + ) ([]*logrestore.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { // write - write require.Equal(t, stream.WriteCF, cf) @@ -997,12 +999,12 @@ func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { ctx context.Context, files []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - entries []*restore.KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*restore.KvEntryWithTS, error) { + ) ([]*logrestore.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { // write - write require.Equal(t, stream.WriteCF, cf) @@ -1105,17 +1107,17 @@ func TestRestoreMetaKVFilesWithBatchMethod3(t *testing.T) { ctx context.Context, fs []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - entries []*restore.KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*restore.KvEntryWithTS, error) { + ) ([]*logrestore.KvEntryWithTS, error) { result[batchCount] = fs t.Log(filterTS) resultKV[batchCount] = len(entries) batchCount++ - return make([]*restore.KvEntryWithTS, batchCount), nil + return make([]*logrestore.KvEntryWithTS, batchCount), nil }, ) require.Nil(t, err) @@ -1192,12 +1194,12 @@ func TestRestoreMetaKVFilesWithBatchMethod4(t *testing.T) { ctx context.Context, fs []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - entries []*restore.KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*restore.KvEntryWithTS, error) { + ) ([]*logrestore.KvEntryWithTS, error) { result[batchCount] = fs batchCount++ return nil, nil @@ -1273,12 +1275,12 @@ func TestRestoreMetaKVFilesWithBatchMethod5(t *testing.T) { ctx context.Context, fs []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - entries []*restore.KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*restore.KvEntryWithTS, error) { + ) ([]*logrestore.KvEntryWithTS, error) { result[batchCount] = fs batchCount++ return nil, nil @@ -1371,17 +1373,17 @@ func TestRestoreMetaKVFilesWithBatchMethod6(t *testing.T) { ctx context.Context, fs []*backuppb.DataFileInfo, schemasReplace *stream.SchemasReplace, - entries []*restore.KvEntryWithTS, + entries []*logrestore.KvEntryWithTS, filterTS uint64, updateStats func(kvCount uint64, size uint64), progressInc func(), cf string, - ) ([]*restore.KvEntryWithTS, error) { + ) ([]*logrestore.KvEntryWithTS, error) { result[batchCount] = fs t.Log(filterTS) resultKV[batchCount] = len(entries) batchCount++ - return make([]*restore.KvEntryWithTS, batchCount), nil + return make([]*logrestore.KvEntryWithTS, batchCount), nil }, ) require.Nil(t, err) @@ -1443,9 +1445,9 @@ func TestSortMetaKVFiles(t *testing.T) { require.Equal(t, files[4].Path, "f5") } -func toLogDataFileInfoIter(logIter iter.TryNextor[*backuppb.DataFileInfo]) restore.LogIter { - return iter.Map(logIter, func(d *backuppb.DataFileInfo) *restore.LogDataFileInfo { - return &restore.LogDataFileInfo{ +func toLogDataFileInfoIter(logIter iter.TryNextor[*backuppb.DataFileInfo]) logrestore.LogIter { + return iter.Map(logIter, func(d *backuppb.DataFileInfo) *logrestore.LogDataFileInfo { + return &logrestore.LogDataFileInfo{ DataFileInfo: d, } }) @@ -1481,7 +1483,7 @@ func TestApplyKVFilesWithSingelMethod(t *testing.T) { } var applyWg sync.WaitGroup applyFunc := func( - files []*restore.LogDataFileInfo, + files []*logrestore.LogDataFileInfo, kvCount int64, size uint64, ) { @@ -1553,7 +1555,7 @@ func TestApplyKVFilesWithBatchMethod1(t *testing.T) { } var applyWg sync.WaitGroup applyFunc := func( - files []*restore.LogDataFileInfo, + files []*logrestore.LogDataFileInfo, kvCount int64, size uint64, ) { @@ -1643,7 +1645,7 @@ func TestApplyKVFilesWithBatchMethod2(t *testing.T) { } var applyWg sync.WaitGroup applyFunc := func( - files []*restore.LogDataFileInfo, + files []*logrestore.LogDataFileInfo, kvCount int64, size uint64, ) { @@ -1727,7 +1729,7 @@ func TestApplyKVFilesWithBatchMethod3(t *testing.T) { } var applyWg sync.WaitGroup applyFunc := func( - files []*restore.LogDataFileInfo, + files []*logrestore.LogDataFileInfo, kvCount int64, size uint64, ) { @@ -1809,7 +1811,7 @@ func TestApplyKVFilesWithBatchMethod4(t *testing.T) { } var applyWg sync.WaitGroup applyFunc := func( - files []*restore.LogDataFileInfo, + files []*logrestore.LogDataFileInfo, kvCount int64, size uint64, ) { @@ -1887,7 +1889,7 @@ func TestApplyKVFilesWithBatchMethod5(t *testing.T) { } var applyWg sync.WaitGroup applyFunc := func( - files []*restore.LogDataFileInfo, + files []*logrestore.LogDataFileInfo, kvCount int64, size uint64, ) { diff --git a/br/pkg/restore/data/BUILD.bazel b/br/pkg/restore/data/BUILD.bazel new file mode 100644 index 0000000000..76e082ee4c --- /dev/null +++ b/br/pkg/restore/data/BUILD.bazel @@ -0,0 +1,57 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "data", + srcs = [ + "data.go", + "key.go", + "recover.go", + ], + importpath = "github.com/pingcap/tidb/br/pkg/restore/data", + visibility = ["//visibility:public"], + deps = [ + "//br/pkg/common", + "//br/pkg/conn", + "//br/pkg/errors", + "//br/pkg/glue", + "//br/pkg/logutil", + "//br/pkg/utils", + "//br/pkg/utils/storewatch", + "//pkg/ddl", + "//pkg/util", + "@com_github_emirpasic_gods//maps/treemap", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_kvproto//pkg/metapb", + "@com_github_pingcap_kvproto//pkg/recoverdatapb", + "@com_github_pingcap_log//:log", + "@com_github_tikv_client_go_v2//kv", + "@com_github_tikv_client_go_v2//tikv", + "@com_github_tikv_client_go_v2//txnkv/rangetask", + "@org_golang_google_grpc//:grpc", + "@org_golang_google_grpc//backoff", + "@org_golang_x_sync//errgroup", + "@org_uber_go_zap//:zap", + ], +) + +go_test( + name = "data_test", + timeout = "short", + srcs = [ + "data_test.go", + "key_test.go", + ], + flaky = True, + shard_count = 6, + deps = [ + ":data", + "//br/pkg/conn", + "//br/pkg/gluetidb", + "//br/pkg/pdutil", + "@com_github_pingcap_kvproto//pkg/metapb", + "@com_github_pingcap_kvproto//pkg/recoverdatapb", + "@com_github_stretchr_testify//require", + "@com_github_tikv_client_go_v2//testutils", + "@com_github_tikv_pd_client//:client", + ], +) diff --git a/br/pkg/restore/data.go b/br/pkg/restore/data/data.go similarity index 99% rename from br/pkg/restore/data.go rename to br/pkg/restore/data/data.go index 849905c1ca..342ac20e09 100644 --- a/br/pkg/restore/data.go +++ b/br/pkg/restore/data/data.go @@ -1,5 +1,5 @@ // Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package data import ( "context" @@ -27,6 +27,8 @@ import ( "google.golang.org/grpc/backoff" ) +const gRPCBackOffMaxDelay = 3 * time.Second + type RecoveryStage int const ( diff --git a/br/pkg/restore/data_test.go b/br/pkg/restore/data/data_test.go similarity index 93% rename from br/pkg/restore/data_test.go rename to br/pkg/restore/data/data_test.go index 47c0668417..7c3a5a3242 100644 --- a/br/pkg/restore/data_test.go +++ b/br/pkg/restore/data/data_test.go @@ -1,5 +1,5 @@ // Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. -package restore_test +package data_test import ( "context" @@ -10,7 +10,7 @@ import ( "github.com/pingcap/tidb/br/pkg/conn" "github.com/pingcap/tidb/br/pkg/gluetidb" "github.com/pingcap/tidb/br/pkg/pdutil" - "github.com/pingcap/tidb/br/pkg/restore" + "github.com/pingcap/tidb/br/pkg/restore/data" "github.com/stretchr/testify/require" "github.com/tikv/client-go/v2/testutils" pd "github.com/tikv/pd/client" @@ -27,7 +27,7 @@ type testData struct { cancel context.CancelFunc mockPDClient pd.Client - mockRecovery restore.Recovery + mockRecovery data.Recovery } func newRegionMeta( @@ -55,19 +55,19 @@ func newRegionMeta( } func (t *testData) generateRegionMeta() { - storeMeta0 := restore.NewStoreMeta(1) + storeMeta0 := data.NewStoreMeta(1) storeMeta0.RegionMetas = append(storeMeta0.RegionMetas, newRegionMeta(11, 24, 8, 5, 4, 1, false, []byte(""), []byte("b"))) storeMeta0.RegionMetas = append(storeMeta0.RegionMetas, newRegionMeta(12, 34, 5, 6, 5, 1, false, []byte("b"), []byte("c"))) storeMeta0.RegionMetas = append(storeMeta0.RegionMetas, newRegionMeta(13, 44, 1200, 7, 6, 1, false, []byte("c"), []byte(""))) t.mockRecovery.StoreMetas[0] = storeMeta0 - storeMeta1 := restore.NewStoreMeta(2) + storeMeta1 := data.NewStoreMeta(2) storeMeta1.RegionMetas = append(storeMeta1.RegionMetas, newRegionMeta(11, 25, 7, 6, 4, 1, false, []byte(""), []byte("b"))) storeMeta1.RegionMetas = append(storeMeta1.RegionMetas, newRegionMeta(12, 35, 5, 6, 5, 1, false, []byte("b"), []byte("c"))) storeMeta1.RegionMetas = append(storeMeta1.RegionMetas, newRegionMeta(13, 45, 1200, 6, 6, 1, false, []byte("c"), []byte(""))) t.mockRecovery.StoreMetas[1] = storeMeta1 - storeMeta2 := restore.NewStoreMeta(3) + storeMeta2 := data.NewStoreMeta(3) storeMeta2.RegionMetas = append(storeMeta2.RegionMetas, newRegionMeta(11, 26, 7, 5, 4, 1, false, []byte(""), []byte("b"))) storeMeta2.RegionMetas = append(storeMeta2.RegionMetas, newRegionMeta(12, 36, 5, 6, 6, 1, false, []byte("b"), []byte("c"))) storeMeta2.RegionMetas = append(storeMeta2.RegionMetas, newRegionMeta(13, maxAllocateId, 1200, 6, 6, 1, false, []byte("c"), []byte(""))) @@ -96,7 +96,7 @@ func createDataSuite(t *testing.T) *testData { fakeProgress := mockGlue.StartProgress(ctx, "Restore Data", int64(numOnlineStore*3), false) - var recovery = restore.NewRecovery(createStores(), mockMgr, fakeProgress, 64) + var recovery = data.NewRecovery(createStores(), mockMgr, fakeProgress, 64) tikvClient.Close() return &testData{ ctx: ctx, diff --git a/br/pkg/restore/data/key.go b/br/pkg/restore/data/key.go new file mode 100644 index 0000000000..b70930b853 --- /dev/null +++ b/br/pkg/restore/data/key.go @@ -0,0 +1,68 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package data + +func keyEq(a, b []byte) bool { + if len(a) != len(b) { + return false + } + for i := 0; i < len(a); i++ { + if a[i] != b[i] { + return false + } + } + return true +} + +func keyCmp(a, b []byte) int { + var length int + var chosen int + if len(a) < len(b) { + length = len(a) + chosen = -1 + } else if len(a) == len(b) { + length = len(a) + chosen = 0 + } else { + length = len(b) + chosen = 1 + } + for i := 0; i < length; i++ { + if a[i] < b[i] { + return -1 + } else if a[i] > b[i] { + return 1 + } + } + return chosen +} + +func keyCmpInterface(a, b any) int { + return keyCmp(a.([]byte), b.([]byte)) +} + +func PrefixStartKey(key []byte) []byte { + var sk = make([]byte, 0, len(key)+1) + sk = append(sk, 'z') + sk = append(sk, key...) + return sk +} + +func PrefixEndKey(key []byte) []byte { + if len(key) == 0 { + return []byte{'z' + 1} + } + return PrefixStartKey(key) +} diff --git a/br/pkg/restore/data/key_test.go b/br/pkg/restore/data/key_test.go new file mode 100644 index 0000000000..2564bef34b --- /dev/null +++ b/br/pkg/restore/data/key_test.go @@ -0,0 +1,181 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package data_test + +import ( + "testing" + + recover_data "github.com/pingcap/kvproto/pkg/recoverdatapb" + "github.com/pingcap/tidb/br/pkg/restore/data" + "github.com/stretchr/testify/require" +) + +func newPeerMeta( + regionId uint64, + peerId uint64, + storeId uint64, + startKey []byte, + endKey []byte, + lastLogTerm uint64, + lastIndex uint64, + commitIndex uint64, + version uint64, + tombstone bool, +) *data.RecoverRegion { + return &data.RecoverRegion{ + RegionMeta: &recover_data.RegionMeta{ + RegionId: regionId, + PeerId: peerId, + StartKey: startKey, + EndKey: endKey, + LastLogTerm: lastLogTerm, + LastIndex: lastIndex, + CommitIndex: commitIndex, + Version: version, + Tombstone: tombstone, + }, + StoreId: storeId, + } +} + +func newRecoverRegionInfo(r *data.RecoverRegion) *data.RecoverRegionInfo { + return &data.RecoverRegionInfo{ + RegionVersion: r.Version, + RegionId: r.RegionId, + StartKey: data.PrefixStartKey(r.StartKey), + EndKey: data.PrefixEndKey(r.EndKey), + TombStone: r.Tombstone, + } +} + +func TestSortRecoverRegions(t *testing.T) { + selectedPeer1 := newPeerMeta(9, 11, 2, []byte("aa"), nil, 2, 0, 0, 0, false) + selectedPeer2 := newPeerMeta(19, 22, 3, []byte("bbb"), nil, 2, 1, 0, 1, false) + selectedPeer3 := newPeerMeta(29, 30, 1, []byte("c"), nil, 2, 1, 1, 2, false) + regions := map[uint64][]*data.RecoverRegion{ + 9: { + // peer 11 should be selected because of log term + newPeerMeta(9, 10, 1, []byte("a"), nil, 1, 1, 1, 1, false), + selectedPeer1, + newPeerMeta(9, 12, 3, []byte("aaa"), nil, 0, 0, 0, 0, false), + }, + 19: { + // peer 22 should be selected because of log index + newPeerMeta(19, 20, 1, []byte("b"), nil, 1, 1, 1, 1, false), + newPeerMeta(19, 21, 2, []byte("bb"), nil, 2, 0, 0, 0, false), + selectedPeer2, + }, + 29: { + // peer 30 should be selected because of log index + selectedPeer3, + newPeerMeta(29, 31, 2, []byte("cc"), nil, 2, 0, 0, 0, false), + newPeerMeta(29, 32, 3, []byte("ccc"), nil, 2, 1, 0, 0, false), + }, + } + regionsInfos := data.SortRecoverRegions(regions) + expectRegionInfos := []*data.RecoverRegionInfo{ + newRecoverRegionInfo(selectedPeer3), + newRecoverRegionInfo(selectedPeer2), + newRecoverRegionInfo(selectedPeer1), + } + require.Equal(t, expectRegionInfos, regionsInfos) +} + +func TestCheckConsistencyAndValidPeer(t *testing.T) { + //key space is continuous + validPeer1 := newPeerMeta(9, 11, 2, []byte(""), []byte("bb"), 2, 0, 0, 0, false) + validPeer2 := newPeerMeta(19, 22, 3, []byte("bb"), []byte("cc"), 2, 1, 0, 1, false) + validPeer3 := newPeerMeta(29, 30, 1, []byte("cc"), []byte(""), 2, 1, 1, 2, false) + + validRegionInfos := []*data.RecoverRegionInfo{ + newRecoverRegionInfo(validPeer1), + newRecoverRegionInfo(validPeer2), + newRecoverRegionInfo(validPeer3), + } + + validPeer, err := data.CheckConsistencyAndValidPeer(validRegionInfos) + require.NoError(t, err) + require.Equal(t, 3, len(validPeer)) + var regions = make(map[uint64]struct{}, 3) + regions[9] = struct{}{} + regions[19] = struct{}{} + regions[29] = struct{}{} + + require.Equal(t, regions, validPeer) + + //key space is not continuous + invalidPeer1 := newPeerMeta(9, 11, 2, []byte("aa"), []byte("cc"), 2, 0, 0, 0, false) + invalidPeer2 := newPeerMeta(19, 22, 3, []byte("dd"), []byte("cc"), 2, 1, 0, 1, false) + invalidPeer3 := newPeerMeta(29, 30, 1, []byte("cc"), []byte("dd"), 2, 1, 1, 2, false) + + invalidRegionInfos := []*data.RecoverRegionInfo{ + newRecoverRegionInfo(invalidPeer1), + newRecoverRegionInfo(invalidPeer2), + newRecoverRegionInfo(invalidPeer3), + } + + _, err = data.CheckConsistencyAndValidPeer(invalidRegionInfos) + require.Error(t, err) + require.Regexp(t, ".*invalid restore range.*", err.Error()) +} + +func TestLeaderCandidates(t *testing.T) { + //key space is continuous + validPeer1 := newPeerMeta(9, 11, 2, []byte(""), []byte("bb"), 2, 1, 0, 0, false) + validPeer2 := newPeerMeta(19, 22, 3, []byte("bb"), []byte("cc"), 2, 1, 0, 1, false) + validPeer3 := newPeerMeta(29, 30, 1, []byte("cc"), []byte(""), 2, 1, 0, 2, false) + + peers := []*data.RecoverRegion{ + validPeer1, + validPeer2, + validPeer3, + } + + candidates, err := data.LeaderCandidates(peers) + require.NoError(t, err) + require.Equal(t, 3, len(candidates)) +} + +func TestSelectRegionLeader(t *testing.T) { + validPeer1 := newPeerMeta(9, 11, 2, []byte(""), []byte("bb"), 2, 1, 0, 0, false) + validPeer2 := newPeerMeta(19, 22, 3, []byte("bb"), []byte("cc"), 2, 1, 0, 1, false) + validPeer3 := newPeerMeta(29, 30, 1, []byte("cc"), []byte(""), 2, 1, 0, 2, false) + + peers := []*data.RecoverRegion{ + validPeer1, + validPeer2, + validPeer3, + } + // init store banlance score all is 0 + storeBalanceScore := make(map[uint64]int, len(peers)) + leader := data.SelectRegionLeader(storeBalanceScore, peers) + require.Equal(t, validPeer1, leader) + + // change store banlance store + storeBalanceScore[2] = 3 + storeBalanceScore[3] = 2 + storeBalanceScore[1] = 1 + leader = data.SelectRegionLeader(storeBalanceScore, peers) + require.Equal(t, validPeer3, leader) + + // one peer + peer := []*data.RecoverRegion{ + validPeer3, + } + // init store banlance score all is 0 + storeScore := make(map[uint64]int, len(peer)) + leader = data.SelectRegionLeader(storeScore, peer) + require.Equal(t, validPeer3, leader) +} diff --git a/br/pkg/restore/data/recover.go b/br/pkg/restore/data/recover.go new file mode 100644 index 0000000000..ddcf5f3adc --- /dev/null +++ b/br/pkg/restore/data/recover.go @@ -0,0 +1,161 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package data + +import ( + "sort" + + "github.com/emirpasic/gods/maps/treemap" + "github.com/pingcap/errors" + "github.com/pingcap/log" + berrors "github.com/pingcap/tidb/br/pkg/errors" + "go.uber.org/zap" +) + +type RecoverRegionInfo struct { + RegionId uint64 + RegionVersion uint64 + StartKey []byte + EndKey []byte + TombStone bool +} + +func SortRecoverRegions(regions map[uint64][]*RecoverRegion) []*RecoverRegionInfo { + // last log term -> last index -> commit index + cmps := []func(a, b *RecoverRegion) int{ + func(a, b *RecoverRegion) int { + return int(a.GetLastLogTerm() - b.GetLastLogTerm()) + }, + func(a, b *RecoverRegion) int { + return int(a.GetLastIndex() - b.GetLastIndex()) + }, + func(a, b *RecoverRegion) int { + return int(a.GetCommitIndex() - b.GetCommitIndex()) + }, + } + + // Sort region peer by last log term -> last index -> commit index, and collect all regions' version. + var regionInfos = make([]*RecoverRegionInfo, 0, len(regions)) + for regionId, peers := range regions { + sort.Slice(peers, func(i, j int) bool { + for _, cmp := range cmps { + if v := cmp(peers[i], peers[j]); v != 0 { + return v > 0 + } + } + return false + }) + v := peers[0].Version + sk := PrefixStartKey(peers[0].StartKey) + ek := PrefixEndKey(peers[0].EndKey) + regionInfos = append(regionInfos, &RecoverRegionInfo{ + RegionId: regionId, + RegionVersion: v, + StartKey: sk, + EndKey: ek, + TombStone: peers[0].Tombstone, + }) + } + + sort.Slice(regionInfos, func(i, j int) bool { return regionInfos[i].RegionVersion > regionInfos[j].RegionVersion }) + return regionInfos +} + +func CheckConsistencyAndValidPeer(regionInfos []*RecoverRegionInfo) (map[uint64]struct{}, error) { + // split and merge in progressing during the backup, there may some overlap region, we have to handle it + // Resolve version conflicts. + var treeMap = treemap.NewWith(keyCmpInterface) + for _, p := range regionInfos { + var fk, fv any + fk, _ = treeMap.Ceiling(p.StartKey) + // keyspace overlap sk within ceiling - fk + if fk != nil && (keyEq(fk.([]byte), p.StartKey) || keyCmp(fk.([]byte), p.EndKey) < 0) { + continue + } + + // keyspace overlap sk within floor - fk.end_key + fk, fv = treeMap.Floor(p.StartKey) + if fk != nil && keyCmp(fv.(*RecoverRegionInfo).EndKey, p.StartKey) > 0 { + continue + } + treeMap.Put(p.StartKey, p) + } + + // After resolved, all validPeer regions shouldn't be tombstone. + // do some sanity check + var validPeers = make(map[uint64]struct{}, 0) + var iter = treeMap.Iterator() + var prevEndKey = PrefixStartKey([]byte{}) + var prevRegion uint64 = 0 + for iter.Next() { + v := iter.Value().(*RecoverRegionInfo) + if v.TombStone { + log.Error("validPeer shouldn't be tombstone", zap.Uint64("region id", v.RegionId)) + // TODO, some enhancement may need, a PoC or test may need for decision + return nil, errors.Annotatef(berrors.ErrRestoreInvalidPeer, + "Peer shouldn't be tombstone") + } + if !keyEq(prevEndKey, iter.Key().([]byte)) { + log.Error("regions are not adjacent", zap.Uint64("pre region", prevRegion), zap.Uint64("cur region", v.RegionId)) + // TODO, some enhancement may need, a PoC or test may need for decision + return nil, errors.Annotatef(berrors.ErrInvalidRange, + "invalid region range") + } + prevEndKey = v.EndKey + prevRegion = v.RegionId + validPeers[v.RegionId] = struct{}{} + } + return validPeers, nil +} + +// in cloud, since iops and bandwidth limitation, write operator in raft is slow, so raft state (logterm, lastlog, commitlog...) are the same among the peers +// LeaderCandidates select all peers can be select as a leader during the restore +func LeaderCandidates(peers []*RecoverRegion) ([]*RecoverRegion, error) { + if peers == nil { + return nil, errors.Annotatef(berrors.ErrRestoreRegionWithoutPeer, + "invalid region range") + } + candidates := make([]*RecoverRegion, 0, len(peers)) + // by default, the peers[0] to be assign as a leader, since peers already sorted by leader selection rule + leader := peers[0] + candidates = append(candidates, leader) + for _, peer := range peers[1:] { + // qualificated candidate is leader.logterm = candidate.logterm && leader.lastindex = candidate.lastindex && && leader.commitindex = candidate.commitindex + if peer.LastLogTerm == leader.LastLogTerm && peer.LastIndex == leader.LastIndex && peer.CommitIndex == leader.CommitIndex { + log.Debug("leader candidate", zap.Uint64("store id", peer.StoreId), zap.Uint64("region id", peer.RegionId), zap.Uint64("peer id", peer.PeerId)) + candidates = append(candidates, peer) + } + } + return candidates, nil +} + +// for region A, has candidate leader x, y, z +// peer x on store 1 with storeBalanceScore 3 +// peer y on store 3 with storeBalanceScore 2 +// peer z on store 4 with storeBalanceScore 1 +// result: peer z will be select as leader on store 4 +func SelectRegionLeader(storeBalanceScore map[uint64]int, peers []*RecoverRegion) *RecoverRegion { + // by default, the peers[0] to be assign as a leader + leader := peers[0] + minLeaderStore := storeBalanceScore[leader.StoreId] + for _, peer := range peers[1:] { + log.Debug("leader candidate", zap.Int("score", storeBalanceScore[peer.StoreId]), zap.Int("min-score", minLeaderStore), zap.Uint64("store id", peer.StoreId), zap.Uint64("region id", peer.RegionId), zap.Uint64("peer id", peer.PeerId)) + if storeBalanceScore[peer.StoreId] < minLeaderStore { + minLeaderStore = storeBalanceScore[peer.StoreId] + leader = peer + } + } + return leader +} diff --git a/br/pkg/restore/file_importer/BUILD.bazel b/br/pkg/restore/file_importer/BUILD.bazel new file mode 100644 index 0000000000..a9d6cc246a --- /dev/null +++ b/br/pkg/restore/file_importer/BUILD.bazel @@ -0,0 +1,77 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "file_importer", + srcs = [ + "import.go", + "import_retry.go", + ], + importpath = "github.com/pingcap/tidb/br/pkg/restore/file_importer", + visibility = ["//visibility:public"], + deps = [ + "//br/pkg/conn", + "//br/pkg/conn/util", + "//br/pkg/errors", + "//br/pkg/logutil", + "//br/pkg/restore/log_restore", + "//br/pkg/restore/split", + "//br/pkg/restore/utils", + "//br/pkg/stream", + "//br/pkg/summary", + "//br/pkg/utils", + "//pkg/kv", + "//pkg/util/codec", + "@com_github_google_uuid//:uuid", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_pingcap_kvproto//pkg/brpb", + "@com_github_pingcap_kvproto//pkg/errorpb", + "@com_github_pingcap_kvproto//pkg/import_sstpb", + "@com_github_pingcap_kvproto//pkg/kvrpcpb", + "@com_github_pingcap_kvproto//pkg/metapb", + "@com_github_pingcap_log//:log", + "@com_github_tikv_client_go_v2//kv", + "@com_github_tikv_client_go_v2//util", + "@com_github_tikv_pd_client//:client", + "@org_golang_google_grpc//:grpc", + "@org_golang_google_grpc//backoff", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//credentials", + "@org_golang_google_grpc//credentials/insecure", + "@org_golang_google_grpc//keepalive", + "@org_golang_google_grpc//status", + "@org_golang_x_exp//maps", + "@org_golang_x_sync//errgroup", + "@org_uber_go_multierr//:multierr", + "@org_uber_go_zap//:zap", + ], +) + +go_test( + name = "file_importer_test", + timeout = "short", + srcs = [ + "import_retry_test.go", + "import_test.go", + ], + flaky = True, + shard_count = 11, + deps = [ + ":file_importer", + "//br/pkg/restore/split", + "//br/pkg/restore/utils", + "//br/pkg/utils", + "//pkg/store/pdtypes", + "//pkg/util/codec", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_pingcap_kvproto//pkg/brpb", + "@com_github_pingcap_kvproto//pkg/errorpb", + "@com_github_pingcap_kvproto//pkg/import_sstpb", + "@com_github_pingcap_kvproto//pkg/metapb", + "@com_github_pingcap_kvproto//pkg/pdpb", + "@com_github_stretchr_testify//require", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//status", + ], +) diff --git a/br/pkg/restore/import.go b/br/pkg/restore/file_importer/import.go similarity index 89% rename from br/pkg/restore/import.go rename to br/pkg/restore/file_importer/import.go index 5000931a09..30320ee22e 100644 --- a/br/pkg/restore/import.go +++ b/br/pkg/restore/file_importer/import.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package file_importer import ( "bytes" @@ -25,7 +25,9 @@ import ( "github.com/pingcap/tidb/br/pkg/conn/util" berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/logutil" + logrestore "github.com/pingcap/tidb/br/pkg/restore/log_restore" "github.com/pingcap/tidb/br/pkg/restore/split" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" @@ -381,7 +383,7 @@ type FileImporter struct { kvMode KvMode rawStartKey []byte rawEndKey []byte - supportMultiIngest bool + SupportMultiIngest bool rewriteMode RewriteMode cacheKey string @@ -430,6 +432,15 @@ func NewFileImporter( } } +func (importer *FileImporter) WaitUntilUnblock() { + importer.cond.L.Lock() + for importer.ShouldBlock() { + // wait for download worker notified + importer.cond.Wait() + } + importer.cond.L.Unlock() +} + func (importer *FileImporter) ShouldBlock() bool { if importer != nil && importer.useTokenBucket { return importer.downloadTokensMap.ShouldBlock() || importer.ingestTokensMap.ShouldBlock() @@ -470,7 +481,7 @@ func (importer *FileImporter) CheckMultiIngestSupport(ctx context.Context, pdCli if err != nil { return errors.Trace(err) } - importer.supportMultiIngest = support + importer.SupportMultiIngest = support log.L().Info("multi ingest support", zap.Bool("support", support)) return nil } @@ -485,14 +496,14 @@ func (importer *FileImporter) SetRawRange(startKey, endKey []byte) error { return nil } -func getKeyRangeByMode(mode KvMode) func(f *backuppb.File, rules *RewriteRules) ([]byte, []byte, error) { +func getKeyRangeByMode(mode KvMode) func(f *backuppb.File, rules *restoreutils.RewriteRules) ([]byte, []byte, error) { switch mode { case Raw: - return func(f *backuppb.File, rules *RewriteRules) ([]byte, []byte, error) { + return func(f *backuppb.File, rules *restoreutils.RewriteRules) ([]byte, []byte, error) { return f.GetStartKey(), f.GetEndKey(), nil } case Txn: - return func(f *backuppb.File, rules *RewriteRules) ([]byte, []byte, error) { + return func(f *backuppb.File, rules *restoreutils.RewriteRules) ([]byte, []byte, error) { start, end := f.GetStartKey(), f.GetEndKey() if len(start) != 0 { start = codec.EncodeBytes([]byte{}, f.GetStartKey()) @@ -503,16 +514,18 @@ func getKeyRangeByMode(mode KvMode) func(f *backuppb.File, rules *RewriteRules) return start, end, nil } default: - return func(f *backuppb.File, rules *RewriteRules) ([]byte, []byte, error) { - return GetRewriteRawKeys(f, rules) + return func(f *backuppb.File, rules *restoreutils.RewriteRules) ([]byte, []byte, error) { + return restoreutils.GetRewriteRawKeys(f, rules) } } } +var GetKeyRangeByModeForTest = getKeyRangeByMode + // getKeyRangeForFiles gets the maximum range on files. func (importer *FileImporter) getKeyRangeForFiles( files []*backuppb.File, - rewriteRules *RewriteRules, + rewriteRules *restoreutils.RewriteRules, ) ([]byte, []byte, error) { var ( startKey, endKey []byte @@ -541,8 +554,8 @@ func (importer *FileImporter) getKeyRangeForFiles( // Import tries to import a file. func (importer *FileImporter) ImportKVFileForRegion( ctx context.Context, - files []*LogDataFileInfo, - rule *RewriteRules, + files []*logrestore.LogDataFileInfo, + rule *restoreutils.RewriteRules, shiftStartTS uint64, startTS uint64, restoreTS uint64, @@ -592,17 +605,17 @@ func (importer *FileImporter) ClearFiles(ctx context.Context, pdClient pd.Client } func FilterFilesByRegion( - files []*LogDataFileInfo, + files []*logrestore.LogDataFileInfo, ranges []kv.KeyRange, r *split.RegionInfo, -) ([]*LogDataFileInfo, error) { +) ([]*logrestore.LogDataFileInfo, error) { if len(files) != len(ranges) { return nil, errors.Annotatef(berrors.ErrInvalidArgument, "count of files no equals count of ranges, file-count:%v, ranges-count:%v", len(files), len(ranges)) } - output := make([]*LogDataFileInfo, 0, len(files)) + output := make([]*logrestore.LogDataFileInfo, 0, len(files)) if r != nil && r.Region != nil { for i, f := range files { if bytes.Compare(r.Region.StartKey, ranges[i].EndKey) <= 0 && @@ -620,8 +633,8 @@ func FilterFilesByRegion( // ImportKVFiles restores the kv events. func (importer *FileImporter) ImportKVFiles( ctx context.Context, - files []*LogDataFileInfo, - rule *RewriteRules, + files []*logrestore.LogDataFileInfo, + rule *restoreutils.RewriteRules, shiftStartTS uint64, startTS uint64, restoreTS uint64, @@ -641,7 +654,7 @@ func (importer *FileImporter) ImportKVFiles( log.Debug("import kv files", zap.Int("batch file count", len(files))) for i, f := range files { - ranges[i].StartKey, ranges[i].EndKey, err = GetRewriteEncodedKeys(f, rule) + ranges[i].StartKey, ranges[i].EndKey, err = restoreutils.GetRewriteEncodedKeys(f, rule) if err != nil { return errors.Trace(err) } @@ -678,7 +691,7 @@ func (importer *FileImporter) ImportKVFiles( func (importer *FileImporter) ImportSSTFiles( ctx context.Context, files []*backuppb.File, - rewriteRules *RewriteRules, + rewriteRules *restoreutils.RewriteRules, cipher *backuppb.CipherInfo, apiVersion kvrpcpb.APIVersion, ) error { @@ -763,7 +776,7 @@ func (importer *FileImporter) ImportSSTFiles( return nil } -func (importer *FileImporter) setDownloadSpeedLimit(ctx context.Context, storeID, rateLimit uint64) error { +func (importer *FileImporter) SetDownloadSpeedLimit(ctx context.Context, storeID, rateLimit uint64) error { req := &import_sstpb.SetDownloadSpeedLimitRequest{ SpeedLimit: rateLimit, } @@ -775,7 +788,7 @@ func (importer *FileImporter) download( ctx context.Context, regionInfo *split.RegionInfo, files []*backuppb.File, - rewriteRules *RewriteRules, + rewriteRules *restoreutils.RewriteRules, cipher *backuppb.CipherInfo, apiVersion kvrpcpb.APIVersion, ) ([]*import_sstpb.SSTMeta, error) { @@ -826,18 +839,97 @@ func (importer *FileImporter) download( return downloadMetas, errDownload } +// GetSSTMetaFromFile compares the keys in file, region and rewrite rules, then returns a sst conn. +// The range of the returned sst meta is [regionRule.NewKeyPrefix, append(regionRule.NewKeyPrefix, 0xff)]. +func GetSSTMetaFromFile( + id []byte, + file *backuppb.File, + region *metapb.Region, + regionRule *import_sstpb.RewriteRule, + rewriteMode RewriteMode, +) (meta *import_sstpb.SSTMeta, err error) { + r := *region + // If the rewrite mode is for keyspace, then the region bound should be decoded. + if rewriteMode == RewriteModeKeyspace { + if len(region.GetStartKey()) > 0 { + _, r.StartKey, err = codec.DecodeBytes(region.GetStartKey(), nil) + if err != nil { + return + } + } + if len(region.GetEndKey()) > 0 { + _, r.EndKey, err = codec.DecodeBytes(region.GetEndKey(), nil) + if err != nil { + return + } + } + } + + // Get the column family of the file by the file name. + var cfName string + if strings.Contains(file.GetName(), restoreutils.DefaultCFName) { + cfName = restoreutils.DefaultCFName + } else if strings.Contains(file.GetName(), restoreutils.WriteCFName) { + cfName = restoreutils.WriteCFName + } + // Find the overlapped part between the file and the region. + // Here we rewrites the keys to compare with the keys of the region. + rangeStart := regionRule.GetNewKeyPrefix() + // rangeStart = max(rangeStart, region.StartKey) + if bytes.Compare(rangeStart, r.GetStartKey()) < 0 { + rangeStart = r.GetStartKey() + } + + // Append 10 * 0xff to make sure rangeEnd cover all file key + // If choose to regionRule.NewKeyPrefix + 1, it may cause WrongPrefix here + // https://github.com/tikv/tikv/blob/970a9bf2a9ea782a455ae579ad237aaf6cb1daec/ + // components/sst_importer/src/sst_importer.rs#L221 + suffix := []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} + rangeEnd := append(append([]byte{}, regionRule.GetNewKeyPrefix()...), suffix...) + // rangeEnd = min(rangeEnd, region.EndKey) + if len(r.GetEndKey()) > 0 && bytes.Compare(rangeEnd, r.GetEndKey()) > 0 { + rangeEnd = r.GetEndKey() + } + + if bytes.Compare(rangeStart, rangeEnd) > 0 { + log.Panic("range start exceed range end", + logutil.File(file), + logutil.Key("startKey", rangeStart), + logutil.Key("endKey", rangeEnd)) + } + + log.Debug("get sstMeta", + logutil.Region(region), + logutil.File(file), + logutil.Key("startKey", rangeStart), + logutil.Key("endKey", rangeEnd)) + + return &import_sstpb.SSTMeta{ + Uuid: id, + CfName: cfName, + Range: &import_sstpb.Range{ + Start: rangeStart, + End: rangeEnd, + }, + Length: file.GetSize_(), + RegionId: region.GetId(), + RegionEpoch: region.GetRegionEpoch(), + CipherIv: file.GetCipherIv(), + }, nil +} + func (importer *FileImporter) downloadSST( ctx context.Context, regionInfo *split.RegionInfo, file *backuppb.File, - rewriteRules *RewriteRules, + rewriteRules *restoreutils.RewriteRules, cipher *backuppb.CipherInfo, apiVersion kvrpcpb.APIVersion, ) (*import_sstpb.SSTMeta, error) { uid := uuid.New() id := uid[:] // Get the rewrite rule for the file. - fileRule := findMatchedRewriteRule(file, rewriteRules) + fileRule := restoreutils.FindMatchedRewriteRule(file, rewriteRules) if fileRule == nil { return nil, errors.Trace(berrors.ErrKVRewriteRuleNotFound) } @@ -852,8 +944,8 @@ func (importer *FileImporter) downloadSST( rule := *fileRule // for the legacy rewrite mode if importer.rewriteMode == RewriteModeLegacy { - rule.OldKeyPrefix = encodeKeyPrefix(fileRule.GetOldKeyPrefix()) - rule.NewKeyPrefix = encodeKeyPrefix(fileRule.GetNewKeyPrefix()) + rule.OldKeyPrefix = restoreutils.EncodeKeyPrefix(fileRule.GetOldKeyPrefix()) + rule.NewKeyPrefix = restoreutils.EncodeKeyPrefix(fileRule.GetNewKeyPrefix()) } sstMeta, err := GetSSTMetaFromFile(id, file, regionInfo.Region, &rule, importer.rewriteMode) @@ -917,8 +1009,8 @@ func (importer *FileImporter) downloadSST( } downloadResp := atomicResp.Load() - sstMeta.Range.Start = TruncateTS(downloadResp.Range.GetStart()) - sstMeta.Range.End = TruncateTS(downloadResp.Range.GetEnd()) + sstMeta.Range.Start = restoreutils.TruncateTS(downloadResp.Range.GetStart()) + sstMeta.Range.End = restoreutils.TruncateTS(downloadResp.Range.GetEnd()) sstMeta.ApiVersion = apiVersion return sstMeta, nil } @@ -1008,7 +1100,7 @@ func (importer *FileImporter) downloadV2( ctx context.Context, regionInfo *split.RegionInfo, files []*backuppb.File, - rewriteRules *RewriteRules, + rewriteRules *restoreutils.RewriteRules, cipher *backuppb.CipherInfo, apiVersion kvrpcpb.APIVersion, ) ([]*import_sstpb.SSTMeta, error) { @@ -1053,14 +1145,14 @@ func (importer *FileImporter) downloadV2( func (importer *FileImporter) buildDownloadRequest( file *backuppb.File, - rewriteRules *RewriteRules, + rewriteRules *restoreutils.RewriteRules, regionInfo *split.RegionInfo, cipher *backuppb.CipherInfo, ) (*import_sstpb.DownloadRequest, import_sstpb.SSTMeta, error) { uid := uuid.New() id := uid[:] // Get the rewrite rule for the file. - fileRule := findMatchedRewriteRule(file, rewriteRules) + fileRule := restoreutils.FindMatchedRewriteRule(file, rewriteRules) if fileRule == nil { return nil, import_sstpb.SSTMeta{}, errors.Trace(berrors.ErrKVRewriteRuleNotFound) } @@ -1075,8 +1167,8 @@ func (importer *FileImporter) buildDownloadRequest( rule := *fileRule // for the legacy rewrite mode if importer.rewriteMode == RewriteModeLegacy { - rule.OldKeyPrefix = encodeKeyPrefix(fileRule.GetOldKeyPrefix()) - rule.NewKeyPrefix = encodeKeyPrefix(fileRule.GetNewKeyPrefix()) + rule.OldKeyPrefix = restoreutils.EncodeKeyPrefix(fileRule.GetOldKeyPrefix()) + rule.NewKeyPrefix = restoreutils.EncodeKeyPrefix(fileRule.GetNewKeyPrefix()) } sstMeta, err := GetSSTMetaFromFile(id, file, regionInfo.Region, &rule, importer.rewriteMode) @@ -1107,7 +1199,7 @@ func (importer *FileImporter) downloadSSTV2( ctx context.Context, regionInfo *split.RegionInfo, files []*backuppb.File, - rewriteRules *RewriteRules, + rewriteRules *restoreutils.RewriteRules, cipher *backuppb.CipherInfo, apiVersion kvrpcpb.APIVersion, ) ([]*import_sstpb.SSTMeta, error) { @@ -1169,8 +1261,8 @@ func (importer *FileImporter) downloadSSTV2( return errors.Errorf("not found file %s for download sstMeta", file.Name) } sstMeta.Range = &import_sstpb.Range{ - Start: TruncateTS(resp.Range.GetStart()), - End: TruncateTS(resp.Range.GetEnd()), + Start: restoreutils.TruncateTS(resp.Range.GetStart()), + End: restoreutils.TruncateTS(resp.Range.GetEnd()), } resultMetasMap[file.Name] = &sstMeta mu.Unlock() @@ -1369,7 +1461,7 @@ func (importer *FileImporter) ingestSSTs( RequestSource: kvutil.BuildRequestSource(true, kv.InternalTxnBR, kvutil.ExplicitTypeBR), } - if !importer.supportMultiIngest { + if !importer.SupportMultiIngest { // TODO: not sure we need this check if len(sstMetas) != 1 { panic("do not support batch ingest") @@ -1394,8 +1486,8 @@ func (importer *FileImporter) ingestSSTs( func (importer *FileImporter) downloadAndApplyKVFile( ctx context.Context, - files []*LogDataFileInfo, - rules *RewriteRules, + files []*logrestore.LogDataFileInfo, + rules *restoreutils.RewriteRules, regionInfo *split.RegionInfo, shiftStartTS uint64, startTS uint64, @@ -1413,14 +1505,14 @@ func (importer *FileImporter) downloadAndApplyKVFile( for _, file := range files { // Get the rewrite rule for the file. - fileRule := findMatchedRewriteRule(file, rules) + fileRule := restoreutils.FindMatchedRewriteRule(file, rules) if fileRule == nil { return RPCResultFromError(errors.Annotatef(berrors.ErrKVRewriteRuleNotFound, "rewrite rule for file %+v not find (in %+v)", file, rules)) } rule := import_sstpb.RewriteRule{ - OldKeyPrefix: encodeKeyPrefix(fileRule.GetOldKeyPrefix()), - NewKeyPrefix: encodeKeyPrefix(fileRule.GetNewKeyPrefix()), + OldKeyPrefix: restoreutils.EncodeKeyPrefix(fileRule.GetOldKeyPrefix()), + NewKeyPrefix: restoreutils.EncodeKeyPrefix(fileRule.GetNewKeyPrefix()), } meta := &import_sstpb.KVMeta{ diff --git a/br/pkg/restore/import_retry.go b/br/pkg/restore/file_importer/import_retry.go similarity index 98% rename from br/pkg/restore/import_retry.go rename to br/pkg/restore/file_importer/import_retry.go index 3ce16f4816..b394ba372d 100644 --- a/br/pkg/restore/import_retry.go +++ b/br/pkg/restore/file_importer/import_retry.go @@ -1,6 +1,6 @@ // Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package file_importer import ( "context" @@ -15,6 +15,7 @@ import ( berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/restore/split" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/utils" "github.com/tikv/client-go/v2/kv" "go.uber.org/multierr" @@ -41,7 +42,7 @@ func OverRegionsInRange(start, end []byte, metaClient split.SplitClient, retrySt // but scanRegion will drop the TimeStamp and the end key is exclusive. // if we do not use PrefixNextKey. we might scan fewer regions than we expected. // and finally cause the data lost. - end = TruncateTS(end) + end = restoreutils.TruncateTS(end) end = kv.PrefixNextKey(end) return OverRegionsInRangeController{ diff --git a/br/pkg/restore/import_retry_test.go b/br/pkg/restore/file_importer/import_retry_test.go similarity index 66% rename from br/pkg/restore/import_retry_test.go rename to br/pkg/restore/file_importer/import_retry_test.go index 8e2a386b0e..1dfa55fb1f 100644 --- a/br/pkg/restore/import_retry_test.go +++ b/br/pkg/restore/file_importer/import_retry_test.go @@ -1,26 +1,27 @@ // Copyright 2021 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package file_importer_test import ( + "bytes" "context" "encoding/hex" "fmt" "os" "strconv" + "sync" "testing" "time" "github.com/pingcap/errors" "github.com/pingcap/failpoint" - backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/kvproto/pkg/errorpb" "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/metapb" - berrors "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/kvproto/pkg/pdpb" + fileimporter "github.com/pingcap/tidb/br/pkg/restore/file_importer" "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/br/pkg/utils" - "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/store/pdtypes" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" @@ -50,6 +51,138 @@ func assertRegions(t *testing.T, regions []*split.RegionInfo, keys ...string) { } } +type TestClient struct { + split.SplitClient + + mu sync.RWMutex + stores map[uint64]*metapb.Store + regions map[uint64]*split.RegionInfo + regionsInfo *pdtypes.RegionTree // For now it's only used in ScanRegions + nextRegionID uint64 + + scattered map[uint64]bool + InjectErr bool + InjectTimes int32 +} + +func NewTestClient( + stores map[uint64]*metapb.Store, + regions map[uint64]*split.RegionInfo, + nextRegionID uint64, +) *TestClient { + regionsInfo := &pdtypes.RegionTree{} + for _, regionInfo := range regions { + regionsInfo.SetRegion(pdtypes.NewRegionInfo(regionInfo.Region, regionInfo.Leader)) + } + return &TestClient{ + stores: stores, + regions: regions, + regionsInfo: regionsInfo, + nextRegionID: nextRegionID, + scattered: map[uint64]bool{}, + } +} + +func (c *TestClient) GetAllRegions() map[uint64]*split.RegionInfo { + c.mu.RLock() + defer c.mu.RUnlock() + return c.regions +} + +func (c *TestClient) GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error) { + c.mu.RLock() + defer c.mu.RUnlock() + store, ok := c.stores[storeID] + if !ok { + return nil, errors.Errorf("store not found") + } + return store, nil +} + +func (c *TestClient) GetRegion(ctx context.Context, key []byte) (*split.RegionInfo, error) { + c.mu.RLock() + defer c.mu.RUnlock() + for _, region := range c.regions { + if bytes.Compare(key, region.Region.StartKey) >= 0 && + (len(region.Region.EndKey) == 0 || bytes.Compare(key, region.Region.EndKey) < 0) { + return region, nil + } + } + return nil, errors.Errorf("region not found: key=%s", string(key)) +} + +func (c *TestClient) GetRegionByID(ctx context.Context, regionID uint64) (*split.RegionInfo, error) { + c.mu.RLock() + defer c.mu.RUnlock() + region, ok := c.regions[regionID] + if !ok { + return nil, errors.Errorf("region not found: id=%d", regionID) + } + return region, nil +} + +func (c *TestClient) SplitWaitAndScatter(_ context.Context, _ *split.RegionInfo, keys [][]byte) ([]*split.RegionInfo, error) { + c.mu.Lock() + defer c.mu.Unlock() + newRegions := make([]*split.RegionInfo, 0) + for _, key := range keys { + var target *split.RegionInfo + splitKey := codec.EncodeBytes([]byte{}, key) + for _, region := range c.regions { + if region.ContainsInterior(splitKey) { + target = region + } + } + if target == nil { + continue + } + newRegion := &split.RegionInfo{ + Region: &metapb.Region{ + Peers: target.Region.Peers, + Id: c.nextRegionID, + StartKey: target.Region.StartKey, + EndKey: splitKey, + }, + } + c.regions[c.nextRegionID] = newRegion + c.nextRegionID++ + target.Region.StartKey = splitKey + c.regions[target.Region.Id] = target + newRegions = append(newRegions, newRegion) + } + return newRegions, nil +} + +func (c *TestClient) GetOperator(context.Context, uint64) (*pdpb.GetOperatorResponse, error) { + return &pdpb.GetOperatorResponse{ + Header: new(pdpb.ResponseHeader), + }, nil +} + +func (c *TestClient) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*split.RegionInfo, error) { + if c.InjectErr && c.InjectTimes > 0 { + c.InjectTimes -= 1 + return nil, status.Error(codes.Unavailable, "not leader") + } + if len(key) != 0 && bytes.Equal(key, endKey) { + return nil, status.Error(codes.Internal, "key and endKey are the same") + } + + infos := c.regionsInfo.ScanRange(key, endKey, limit) + regions := make([]*split.RegionInfo, 0, len(infos)) + for _, info := range infos { + regions = append(regions, &split.RegionInfo{ + Region: info.Meta, + Leader: info.Leader, + }) + } + return regions, nil +} + +func (c *TestClient) WaitRegionsScattered(context.Context, []*split.RegionInfo) (int, error) { + return 0, nil +} + // region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) func initTestClient(isRawKv bool) *TestClient { peers := make([]*metapb.Peer, 1) @@ -95,35 +228,35 @@ func TestScanSuccess(t *testing.T) { ctx := context.Background() // make exclusive to inclusive. - ctl := OverRegionsInRange([]byte("aa"), []byte("aay"), cli, &rs) + ctl := fileimporter.OverRegionsInRange([]byte("aa"), []byte("aay"), cli, &rs) collectedRegions := []*split.RegionInfo{} - ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { collectedRegions = append(collectedRegions, r) - return RPCResultOK() + return fileimporter.RPCResultOK() }) assertRegions(t, collectedRegions, "", "aay", "bba") - ctl = OverRegionsInRange([]byte("aaz"), []byte("bb"), cli, &rs) + ctl = fileimporter.OverRegionsInRange([]byte("aaz"), []byte("bb"), cli, &rs) collectedRegions = []*split.RegionInfo{} - ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { collectedRegions = append(collectedRegions, r) - return RPCResultOK() + return fileimporter.RPCResultOK() }) assertRegions(t, collectedRegions, "aay", "bba", "bbh", "cca") - ctl = OverRegionsInRange([]byte("aa"), []byte("cc"), cli, &rs) + ctl = fileimporter.OverRegionsInRange([]byte("aa"), []byte("cc"), cli, &rs) collectedRegions = []*split.RegionInfo{} - ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { collectedRegions = append(collectedRegions, r) - return RPCResultOK() + return fileimporter.RPCResultOK() }) assertRegions(t, collectedRegions, "", "aay", "bba", "bbh", "cca", "") - ctl = OverRegionsInRange([]byte("aa"), []byte(""), cli, &rs) + ctl = fileimporter.OverRegionsInRange([]byte("aa"), []byte(""), cli, &rs) collectedRegions = []*split.RegionInfo{} - ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { collectedRegions = append(collectedRegions, r) - return RPCResultOK() + return fileimporter.RPCResultOK() }) assertRegions(t, collectedRegions, "", "aay", "bba", "bbh", "cca", "") } @@ -132,7 +265,7 @@ func TestNotLeader(t *testing.T) { // region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) cli := initTestClient(false) rs := utils.InitialRetryState(1, 0, 0) - ctl := OverRegionsInRange([]byte(""), []byte(""), cli, &rs) + ctl := fileimporter.OverRegionsInRange([]byte(""), []byte(""), cli, &rs) ctx := context.Background() notLeader := errorpb.Error{ @@ -146,17 +279,17 @@ func TestNotLeader(t *testing.T) { meetRegions := []*split.RegionInfo{} // record all regions we meet with id == 2. idEqualsTo2Regions := []*split.RegionInfo{} - err := ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + err := ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { if r.Region.Id == 2 { idEqualsTo2Regions = append(idEqualsTo2Regions, r) } if r.Region.Id == 2 && (r.Leader == nil || r.Leader.Id != 42) { - return RPCResult{ + return fileimporter.RPCResult{ StoreError: ¬Leader, } } meetRegions = append(meetRegions, r) - return RPCResultOK() + return fileimporter.RPCResultOK() }) require.NoError(t, err) @@ -172,7 +305,7 @@ func TestServerIsBusy(t *testing.T) { // region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) cli := initTestClient(false) rs := utils.InitialRetryState(2, 0, 0) - ctl := OverRegionsInRange([]byte(""), []byte(""), cli, &rs) + ctl := fileimporter.OverRegionsInRange([]byte(""), []byte(""), cli, &rs) ctx := context.Background() serverIsBusy := errorpb.Error{ @@ -186,16 +319,16 @@ func TestServerIsBusy(t *testing.T) { // record all regions we meet with id == 2. idEqualsTo2Regions := []*split.RegionInfo{} theFirstRun := true - err := ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + err := ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { if theFirstRun && r.Region.Id == 2 { idEqualsTo2Regions = append(idEqualsTo2Regions, r) theFirstRun = false - return RPCResult{ + return fileimporter.RPCResult{ StoreError: &serverIsBusy, } } meetRegions = append(meetRegions, r) - return RPCResultOK() + return fileimporter.RPCResultOK() }) require.NoError(t, err) @@ -213,7 +346,7 @@ func TestServerIsBusyWithMemoryIsLimited(t *testing.T) { // region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) cli := initTestClient(false) rs := utils.InitialRetryState(2, 0, 0) - ctl := OverRegionsInRange([]byte(""), []byte(""), cli, &rs) + ctl := fileimporter.OverRegionsInRange([]byte(""), []byte(""), cli, &rs) ctx := context.Background() serverIsBusy := errorpb.Error{ @@ -227,16 +360,16 @@ func TestServerIsBusyWithMemoryIsLimited(t *testing.T) { // record all regions we meet with id == 2. idEqualsTo2Regions := []*split.RegionInfo{} theFirstRun := true - err := ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + err := ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { if theFirstRun && r.Region.Id == 2 { idEqualsTo2Regions = append(idEqualsTo2Regions, r) theFirstRun = false - return RPCResult{ + return fileimporter.RPCResult{ StoreError: &serverIsBusy, } } meetRegions = append(meetRegions, r) - return RPCResultOK() + return fileimporter.RPCResultOK() }) require.NoError(t, err) @@ -265,7 +398,7 @@ func TestEpochNotMatch(t *testing.T) { // region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) cli := initTestClient(false) rs := utils.InitialRetryState(2, 0, 0) - ctl := OverRegionsInRange([]byte(""), []byte(""), cli, &rs) + ctl := fileimporter.OverRegionsInRange([]byte(""), []byte(""), cli, &rs) ctx := context.Background() printPDRegion("cli", cli.regionsInfo.Regions) @@ -299,18 +432,18 @@ func TestEpochNotMatch(t *testing.T) { firstRunRegions := []*split.RegionInfo{} secondRunRegions := []*split.RegionInfo{} isSecondRun := false - err = ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + err = ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { if !isSecondRun && r.Region.Id == left.Region.Id { mergeRegion() isSecondRun = true - return RPCResultFromPBError(epochNotMatch) + return fileimporter.RPCResultFromPBError(epochNotMatch) } if isSecondRun { secondRunRegions = append(secondRunRegions, r) } else { firstRunRegions = append(firstRunRegions, r) } - return RPCResultOK() + return fileimporter.RPCResultOK() }) printRegion("first", firstRunRegions) printRegion("second", secondRunRegions) @@ -324,7 +457,7 @@ func TestRegionSplit(t *testing.T) { // region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) cli := initTestClient(false) rs := utils.InitialRetryState(2, 0, 0) - ctl := OverRegionsInRange([]byte(""), []byte(""), cli, &rs) + ctl := fileimporter.OverRegionsInRange([]byte(""), []byte(""), cli, &rs) ctx := context.Background() printPDRegion("cli", cli.regionsInfo.Regions) @@ -377,18 +510,18 @@ func TestRegionSplit(t *testing.T) { firstRunRegions := []*split.RegionInfo{} secondRunRegions := []*split.RegionInfo{} isSecondRun := false - err = ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + err = ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { if !isSecondRun && r.Region.Id == target.Region.Id { splitRegion() isSecondRun = true - return RPCResultFromPBError(epochNotMatch) + return fileimporter.RPCResultFromPBError(epochNotMatch) } if isSecondRun { secondRunRegions = append(secondRunRegions, r) } else { firstRunRegions = append(firstRunRegions, r) } - return RPCResultOK() + return fileimporter.RPCResultOK() }) printRegion("first", firstRunRegions) printRegion("second", secondRunRegions) @@ -402,7 +535,7 @@ func TestRetryBackoff(t *testing.T) { // region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) cli := initTestClient(false) rs := utils.InitialRetryState(2, time.Millisecond, 10*time.Millisecond) - ctl := OverRegionsInRange([]byte(""), []byte(""), cli, &rs) + ctl := fileimporter.OverRegionsInRange([]byte(""), []byte(""), cli, &rs) ctx := context.Background() printPDRegion("cli", cli.regionsInfo.Regions) @@ -419,12 +552,12 @@ func TestRetryBackoff(t *testing.T) { }, }} isSecondRun := false - err = ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + err = ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { if !isSecondRun && r.Region.Id == left.Region.Id { isSecondRun = true - return RPCResultFromPBError(epochNotLeader) + return fileimporter.RPCResultFromPBError(epochNotLeader) } - return RPCResultOK() + return fileimporter.RPCResultOK() }) printPDRegion("cli", cli.regionsInfo.Regions) require.Equal(t, 1, rs.Attempt()) @@ -434,10 +567,10 @@ func TestRetryBackoff(t *testing.T) { } func TestWrappedError(t *testing.T) { - result := RPCResultFromError(errors.Trace(status.Error(codes.Unavailable, "the server is slacking. ><=·>"))) - require.Equal(t, result.StrategyForRetry(), StrategyFromThisRegion) - result = RPCResultFromError(errors.Trace(status.Error(codes.Unknown, "the server said something hard to understand"))) - require.Equal(t, result.StrategyForRetry(), StrategyGiveUp) + result := fileimporter.RPCResultFromError(errors.Trace(status.Error(codes.Unavailable, "the server is slacking. ><=·>"))) + require.Equal(t, result.StrategyForRetry(), fileimporter.StrategyFromThisRegion) + result = fileimporter.RPCResultFromError(errors.Trace(status.Error(codes.Unknown, "the server said something hard to understand"))) + require.Equal(t, result.StrategyForRetry(), fileimporter.StrategyGiveUp) } func envInt(name string, def int) int { @@ -453,186 +586,15 @@ func TestPaginateScanLeader(t *testing.T) { // region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) cli := initTestClient(false) rs := utils.InitialRetryState(2, time.Millisecond, 10*time.Millisecond) - ctl := OverRegionsInRange([]byte("aa"), []byte("aaz"), cli, &rs) + ctl := fileimporter.OverRegionsInRange([]byte("aa"), []byte("aaz"), cli, &rs) ctx := context.Background() cli.InjectErr = true cli.InjectTimes = int32(envInt("PAGINATE_SCAN_LEADER_FAILURE_COUNT", 2)) collectedRegions := []*split.RegionInfo{} - ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) RPCResult { + ctl.Run(ctx, func(ctx context.Context, r *split.RegionInfo) fileimporter.RPCResult { collectedRegions = append(collectedRegions, r) - return RPCResultOK() + return fileimporter.RPCResultOK() }) assertRegions(t, collectedRegions, "", "aay", "bba") } - -func TestImportKVFiles(t *testing.T) { - var ( - importer = FileImporter{} - ctx = context.Background() - shiftStartTS uint64 = 100 - startTS uint64 = 200 - restoreTS uint64 = 300 - ) - - err := importer.ImportKVFiles( - ctx, - []*LogDataFileInfo{ - { - DataFileInfo: &backuppb.DataFileInfo{ - Path: "log3", - }, - }, - { - DataFileInfo: &backuppb.DataFileInfo{ - Path: "log1", - }, - }, - }, - nil, - shiftStartTS, - startTS, - restoreTS, - false, - ) - require.True(t, berrors.ErrInvalidArgument.Equal(err)) -} - -func TestFilterFilesByRegion(t *testing.T) { - files := []*LogDataFileInfo{ - { - DataFileInfo: &backuppb.DataFileInfo{ - Path: "log3", - }, - }, - { - DataFileInfo: &backuppb.DataFileInfo{ - Path: "log1", - }, - }, - } - ranges := []kv.KeyRange{ - { - StartKey: []byte("1111"), - EndKey: []byte("2222"), - }, { - StartKey: []byte("3333"), - EndKey: []byte("4444"), - }, - } - - testCases := []struct { - r split.RegionInfo - subfiles []*LogDataFileInfo - err error - }{ - { - r: split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte("0000"), - EndKey: []byte("1110"), - }, - }, - subfiles: []*LogDataFileInfo{}, - err: nil, - }, - { - r: split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte("0000"), - EndKey: []byte("1111"), - }, - }, - subfiles: []*LogDataFileInfo{ - files[0], - }, - err: nil, - }, - { - r: split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte("0000"), - EndKey: []byte("2222"), - }, - }, - subfiles: []*LogDataFileInfo{ - files[0], - }, - err: nil, - }, - { - r: split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte("2222"), - EndKey: []byte("3332"), - }, - }, - subfiles: []*LogDataFileInfo{ - files[0], - }, - err: nil, - }, - { - r: split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte("2223"), - EndKey: []byte("3332"), - }, - }, - subfiles: []*LogDataFileInfo{}, - err: nil, - }, - { - r: split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte("3332"), - EndKey: []byte("3333"), - }, - }, - subfiles: []*LogDataFileInfo{ - files[1], - }, - err: nil, - }, - { - r: split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte("4444"), - EndKey: []byte("5555"), - }, - }, - subfiles: []*LogDataFileInfo{ - files[1], - }, - err: nil, - }, - { - r: split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte("4444"), - EndKey: nil, - }, - }, - subfiles: []*LogDataFileInfo{ - files[1], - }, - err: nil, - }, - { - r: split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte("0000"), - EndKey: nil, - }, - }, - subfiles: files, - err: nil, - }, - } - - for _, c := range testCases { - subfile, err := FilterFilesByRegion(files, ranges, &c.r) - require.Equal(t, err, c.err) - require.Equal(t, subfile, c.subfiles) - } -} diff --git a/br/pkg/restore/file_importer/import_test.go b/br/pkg/restore/file_importer/import_test.go new file mode 100644 index 0000000000..3d90711baa --- /dev/null +++ b/br/pkg/restore/file_importer/import_test.go @@ -0,0 +1,106 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file_importer_test + +import ( + "testing" + + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/kvproto/pkg/import_sstpb" + "github.com/pingcap/kvproto/pkg/metapb" + fileimporter "github.com/pingcap/tidb/br/pkg/restore/file_importer" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" + "github.com/pingcap/tidb/pkg/util/codec" + "github.com/stretchr/testify/require" +) + +func TestGetKeyRangeByMode(t *testing.T) { + file := &backuppb.File{ + Name: "file_write.sst", + StartKey: []byte("t1a"), + EndKey: []byte("t1ccc"), + } + endFile := &backuppb.File{ + Name: "file_write.sst", + StartKey: []byte("t1a"), + EndKey: []byte(""), + } + rule := &restoreutils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: []byte("t1"), + NewKeyPrefix: []byte("t2"), + }, + }, + } + // raw kv + testRawFn := fileimporter.GetKeyRangeByModeForTest(fileimporter.Raw) + start, end, err := testRawFn(file, rule) + require.NoError(t, err) + require.Equal(t, []byte("t1a"), start) + require.Equal(t, []byte("t1ccc"), end) + + start, end, err = testRawFn(endFile, rule) + require.NoError(t, err) + require.Equal(t, []byte("t1a"), start) + require.Equal(t, []byte(""), end) + + // txn kv: the keys must be encoded. + testTxnFn := fileimporter.GetKeyRangeByModeForTest(fileimporter.Txn) + start, end, err = testTxnFn(file, rule) + require.NoError(t, err) + require.Equal(t, codec.EncodeBytes(nil, []byte("t1a")), start) + require.Equal(t, codec.EncodeBytes(nil, []byte("t1ccc")), end) + + start, end, err = testTxnFn(endFile, rule) + require.NoError(t, err) + require.Equal(t, codec.EncodeBytes(nil, []byte("t1a")), start) + require.Equal(t, []byte(""), end) + + // normal kv: the keys must be encoded. + testFn := fileimporter.GetKeyRangeByModeForTest(fileimporter.TiDB) + start, end, err = testFn(file, rule) + require.NoError(t, err) + require.Equal(t, codec.EncodeBytes(nil, []byte("t2a")), start) + require.Equal(t, codec.EncodeBytes(nil, []byte("t2ccc")), end) + + // TODO maybe fix later + // current restore does not support rewrite empty endkey. + // because backup guarantees that the end key is not empty. + // start, end, err = testFn(endFile, rule) + // require.NoError(t, err) + // require.Equal(t, codec.EncodeBytes(nil, []byte("t2a")), start) + // require.Equal(t, []byte(""), end) +} + +func TestGetSSTMetaFromFile(t *testing.T) { + file := &backuppb.File{ + Name: "file_write.sst", + StartKey: []byte("t1a"), + EndKey: []byte("t1ccc"), + } + rule := &import_sstpb.RewriteRule{ + OldKeyPrefix: []byte("t1"), + NewKeyPrefix: []byte("t2"), + } + region := &metapb.Region{ + StartKey: []byte("t2abc"), + EndKey: []byte("t3a"), + } + sstMeta, err := fileimporter.GetSSTMetaFromFile([]byte{}, file, region, rule, fileimporter.RewriteModeLegacy) + require.Nil(t, err) + require.Equal(t, "t2abc", string(sstMeta.GetRange().GetStart())) + require.Equal(t, "t2\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", string(sstMeta.GetRange().GetEnd())) +} diff --git a/br/pkg/restore/log_restore/BUILD.bazel b/br/pkg/restore/log_restore/BUILD.bazel new file mode 100644 index 0000000000..a720b5c129 --- /dev/null +++ b/br/pkg/restore/log_restore/BUILD.bazel @@ -0,0 +1,64 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "log_restore", + srcs = [ + "log_client.go", + "split.go", + ], + importpath = "github.com/pingcap/tidb/br/pkg/restore/log_restore", + visibility = ["//visibility:public"], + deps = [ + "//br/pkg/errors", + "//br/pkg/restore/split", + "//br/pkg/restore/utils", + "//br/pkg/rtree", + "//br/pkg/storage", + "//br/pkg/stream", + "//br/pkg/utils/iter", + "//pkg/kv", + "//pkg/tablecodec", + "//pkg/util", + "//pkg/util/codec", + "//pkg/util/redact", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_kvproto//pkg/brpb", + "@com_github_pingcap_log//:log", + "@org_golang_x_sync//errgroup", + "@org_uber_go_zap//:zap", + ], +) + +go_test( + name = "log_restore_test", + timeout = "short", + srcs = [ + "export_test.go", + "import_test.go", + "log_client_test.go", + "split_test.go", + ], + embed = [":log_restore"], + flaky = True, + shard_count = 9, + deps = [ + "//br/pkg/errors", + "//br/pkg/restore/file_importer", + "//br/pkg/restore/split", + "//br/pkg/restore/utils", + "//br/pkg/storage", + "//br/pkg/stream", + "//br/pkg/utils/iter", + "//pkg/kv", + "//pkg/tablecodec", + "//pkg/util/codec", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_kvproto//pkg/brpb", + "@com_github_pingcap_kvproto//pkg/import_sstpb", + "@com_github_pingcap_kvproto//pkg/metapb", + "@com_github_pingcap_log//:log", + "@com_github_stretchr_testify//require", + "@org_uber_go_zap//:zap", + "@org_uber_go_zap//zapcore", + ], +) diff --git a/br/pkg/restore/log_restore/export_test.go b/br/pkg/restore/log_restore/export_test.go new file mode 100644 index 0000000000..492230146c --- /dev/null +++ b/br/pkg/restore/log_restore/export_test.go @@ -0,0 +1,35 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package logrestore + +import ( + "context" + + "github.com/pingcap/errors" + "github.com/pingcap/tidb/br/pkg/utils/iter" +) + +// readStreamMetaByTS is used for streaming task. collect all meta file by TS, it is for test usage. +func (rc *LogFileManager) ReadStreamMeta(ctx context.Context) ([]Meta, error) { + metas, err := rc.streamingMeta(ctx) + if err != nil { + return nil, err + } + r := iter.CollectAll(ctx, metas) + if r.Err != nil { + return nil, errors.Trace(r.Err) + } + return r.Item, nil +} diff --git a/br/pkg/restore/log_restore/import_test.go b/br/pkg/restore/log_restore/import_test.go new file mode 100644 index 0000000000..7867660375 --- /dev/null +++ b/br/pkg/restore/log_restore/import_test.go @@ -0,0 +1,200 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package logrestore_test + +import ( + "context" + "testing" + + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/kvproto/pkg/metapb" + berrors "github.com/pingcap/tidb/br/pkg/errors" + fileimporter "github.com/pingcap/tidb/br/pkg/restore/file_importer" + logrestore "github.com/pingcap/tidb/br/pkg/restore/log_restore" + "github.com/pingcap/tidb/br/pkg/restore/split" + "github.com/pingcap/tidb/pkg/kv" + "github.com/stretchr/testify/require" +) + +func TestImportKVFiles(t *testing.T) { + var ( + importer = fileimporter.FileImporter{} + ctx = context.Background() + shiftStartTS uint64 = 100 + startTS uint64 = 200 + restoreTS uint64 = 300 + ) + + err := importer.ImportKVFiles( + ctx, + []*logrestore.LogDataFileInfo{ + { + DataFileInfo: &backuppb.DataFileInfo{ + Path: "log3", + }, + }, + { + DataFileInfo: &backuppb.DataFileInfo{ + Path: "log1", + }, + }, + }, + nil, + shiftStartTS, + startTS, + restoreTS, + false, + ) + require.True(t, berrors.ErrInvalidArgument.Equal(err)) +} + +func TestFilterFilesByRegion(t *testing.T) { + files := []*logrestore.LogDataFileInfo{ + { + DataFileInfo: &backuppb.DataFileInfo{ + Path: "log3", + }, + }, + { + DataFileInfo: &backuppb.DataFileInfo{ + Path: "log1", + }, + }, + } + ranges := []kv.KeyRange{ + { + StartKey: []byte("1111"), + EndKey: []byte("2222"), + }, { + StartKey: []byte("3333"), + EndKey: []byte("4444"), + }, + } + + testCases := []struct { + r split.RegionInfo + subfiles []*logrestore.LogDataFileInfo + err error + }{ + { + r: split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte("0000"), + EndKey: []byte("1110"), + }, + }, + subfiles: []*logrestore.LogDataFileInfo{}, + err: nil, + }, + { + r: split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte("0000"), + EndKey: []byte("1111"), + }, + }, + subfiles: []*logrestore.LogDataFileInfo{ + files[0], + }, + err: nil, + }, + { + r: split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte("0000"), + EndKey: []byte("2222"), + }, + }, + subfiles: []*logrestore.LogDataFileInfo{ + files[0], + }, + err: nil, + }, + { + r: split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte("2222"), + EndKey: []byte("3332"), + }, + }, + subfiles: []*logrestore.LogDataFileInfo{ + files[0], + }, + err: nil, + }, + { + r: split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte("2223"), + EndKey: []byte("3332"), + }, + }, + subfiles: []*logrestore.LogDataFileInfo{}, + err: nil, + }, + { + r: split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte("3332"), + EndKey: []byte("3333"), + }, + }, + subfiles: []*logrestore.LogDataFileInfo{ + files[1], + }, + err: nil, + }, + { + r: split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte("4444"), + EndKey: []byte("5555"), + }, + }, + subfiles: []*logrestore.LogDataFileInfo{ + files[1], + }, + err: nil, + }, + { + r: split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte("4444"), + EndKey: nil, + }, + }, + subfiles: []*logrestore.LogDataFileInfo{ + files[1], + }, + err: nil, + }, + { + r: split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte("0000"), + EndKey: nil, + }, + }, + subfiles: files, + err: nil, + }, + } + + for _, c := range testCases { + subfile, err := fileimporter.FilterFilesByRegion(files, ranges, &c.r) + require.Equal(t, err, c.err) + require.Equal(t, subfile, c.subfiles) + } +} diff --git a/br/pkg/restore/log_client.go b/br/pkg/restore/log_restore/log_client.go similarity index 81% rename from br/pkg/restore/log_client.go rename to br/pkg/restore/log_restore/log_client.go index f01b736a71..c64a24c00a 100644 --- a/br/pkg/restore/log_client.go +++ b/br/pkg/restore/log_restore/log_client.go @@ -1,6 +1,6 @@ // Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package logrestore import ( "bytes" @@ -18,6 +18,8 @@ import ( "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/kv" + "github.com/pingcap/tidb/pkg/util/codec" + "github.com/pingcap/tidb/pkg/util/redact" "go.uber.org/zap" ) @@ -43,19 +45,19 @@ type Meta = *backuppb.Metadata // Log is the metadata of one file recording KV sequences. type Log = *backuppb.DataFileInfo -// logFileManager is the manager for log files of a certain restoration, +// LogFileManager is the manager for log files of a certain restoration, // which supports read / filter from the log backup archive with static start TS / restore TS. -type logFileManager struct { +type LogFileManager struct { // startTS and restoreTS are used for kv file restore. // TiKV will filter the key space that don't belong to [startTS, restoreTS]. - startTS uint64 - restoreTS uint64 + StartTS uint64 + RestoreTS uint64 // If the commitTS of txn-entry belong to [startTS, restoreTS], // the startTS of txn-entry may be smaller than startTS. // We need maintain and restore more entries in default cf // (the startTS in these entries belong to [shiftStartTS, startTS]). - shiftStartTS uint64 + ShiftStartTS uint64 storage storage.ExternalStorage helper *stream.MetadataHelper @@ -79,10 +81,10 @@ type DDLMetaGroup struct { // CreateLogFileManager creates a log file manager using the specified config. // Generally the config cannot be changed during its lifetime. -func CreateLogFileManager(ctx context.Context, init LogFileManagerInit) (*logFileManager, error) { - fm := &logFileManager{ - startTS: init.StartTS, - restoreTS: init.RestoreTS, +func CreateLogFileManager(ctx context.Context, init LogFileManagerInit) (*LogFileManager, error) { + fm := &LogFileManager{ + StartTS: init.StartTS, + RestoreTS: init.RestoreTS, storage: init.Storage, helper: stream.NewMetadataHelper(), @@ -95,11 +97,11 @@ func CreateLogFileManager(ctx context.Context, init LogFileManagerInit) (*logFil return fm, nil } -func (rc *logFileManager) ShiftTS() uint64 { - return rc.shiftStartTS +func (rc *LogFileManager) ShiftTS() uint64 { + return rc.ShiftStartTS } -func (rc *logFileManager) loadShiftTS(ctx context.Context) error { +func (rc *LogFileManager) loadShiftTS(ctx context.Context) error { shiftTS := struct { sync.Mutex value uint64 @@ -113,7 +115,7 @@ func (rc *logFileManager) loadShiftTS(ctx context.Context) error { log.Info("read meta from storage and parse", zap.String("path", path), zap.Uint64("min-ts", m.MinTs), zap.Uint64("max-ts", m.MaxTs), zap.Int32("meta-version", int32(m.MetaVersion))) - ts, ok := UpdateShiftTS(m, rc.startTS, rc.restoreTS) + ts, ok := stream.UpdateShiftTS(m, rc.StartTS, rc.RestoreTS) shiftTS.Lock() if ok && (!shiftTS.exists || shiftTS.value > ts) { shiftTS.value = ts @@ -127,29 +129,29 @@ func (rc *logFileManager) loadShiftTS(ctx context.Context) error { return err } if !shiftTS.exists { - rc.shiftStartTS = rc.startTS + rc.ShiftStartTS = rc.StartTS return nil } - rc.shiftStartTS = shiftTS.value + rc.ShiftStartTS = shiftTS.value return nil } -func (rc *logFileManager) streamingMeta(ctx context.Context) (MetaIter, error) { - return rc.streamingMetaByTS(ctx, rc.restoreTS) +func (rc *LogFileManager) streamingMeta(ctx context.Context) (MetaIter, error) { + return rc.streamingMetaByTS(ctx, rc.RestoreTS) } -func (rc *logFileManager) streamingMetaByTS(ctx context.Context, restoreTS uint64) (MetaIter, error) { +func (rc *LogFileManager) streamingMetaByTS(ctx context.Context, restoreTS uint64) (MetaIter, error) { it, err := rc.createMetaIterOver(ctx, rc.storage) if err != nil { return nil, err } filtered := iter.FilterOut(it, func(metadata *backuppb.Metadata) bool { - return restoreTS < metadata.MinTs || metadata.MaxTs < rc.shiftStartTS + return restoreTS < metadata.MinTs || metadata.MaxTs < rc.ShiftStartTS }) return filtered, nil } -func (rc *logFileManager) createMetaIterOver(ctx context.Context, s storage.ExternalStorage) (MetaIter, error) { +func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.ExternalStorage) (MetaIter, error) { opt := &storage.WalkOption{SubDir: stream.GetStreamBackupMetaPrefix()} names := []string{} err := s.WalkDir(ctx, opt, func(path string, size int64) error { @@ -181,7 +183,7 @@ func (rc *logFileManager) createMetaIterOver(ctx context.Context, s storage.Exte return reader, nil } -func (rc *logFileManager) FilterDataFiles(ms MetaIter) LogIter { +func (rc *LogFileManager) FilterDataFiles(ms MetaIter) LogIter { return iter.FlatMap(ms, func(m *backuppb.Metadata) LogIter { return iter.FlatMap(iter.Enumerate(iter.FromSlice(m.FileGroups)), func(gi iter.Indexed[*backuppb.DataFileGroup]) LogIter { return iter.Map( @@ -210,13 +212,13 @@ func (rc *logFileManager) FilterDataFiles(ms MetaIter) LogIter { } // ShouldFilterOut checks whether a file should be filtered out via the current client. -func (rc *logFileManager) ShouldFilterOut(d *backuppb.DataFileInfo) bool { - return d.MinTs > rc.restoreTS || - (d.Cf == stream.WriteCF && d.MaxTs < rc.startTS) || - (d.Cf == stream.DefaultCF && d.MaxTs < rc.shiftStartTS) +func (rc *LogFileManager) ShouldFilterOut(d *backuppb.DataFileInfo) bool { + return d.MinTs > rc.RestoreTS || + (d.Cf == stream.WriteCF && d.MaxTs < rc.StartTS) || + (d.Cf == stream.DefaultCF && d.MaxTs < rc.ShiftStartTS) } -func (rc *logFileManager) collectDDLFilesAndPrepareCache( +func (rc *LogFileManager) collectDDLFilesAndPrepareCache( ctx context.Context, files MetaGroupIter, ) ([]Log, error) { @@ -237,7 +239,7 @@ func (rc *logFileManager) collectDDLFilesAndPrepareCache( // LoadDDLFilesAndCountDMLFiles loads all DDL files needs to be restored in the restoration. // At the same time, if the `counter` isn't nil, counting the DML file needs to be restored into `counter`. // This function returns all DDL files needing directly because we need sort all of them. -func (rc *logFileManager) LoadDDLFilesAndCountDMLFiles(ctx context.Context, counter *int) ([]Log, error) { +func (rc *LogFileManager) LoadDDLFilesAndCountDMLFiles(ctx context.Context, counter *int) ([]Log, error) { m, err := rc.streamingMeta(ctx) if err != nil { return nil, err @@ -260,7 +262,7 @@ func (rc *logFileManager) LoadDDLFilesAndCountDMLFiles(ctx context.Context, coun // LoadDMLFiles loads all DML files needs to be restored in the restoration. // This function returns a stream, because there are usually many DML files need to be restored. -func (rc *logFileManager) LoadDMLFiles(ctx context.Context) (LogIter, error) { +func (rc *LogFileManager) LoadDMLFiles(ctx context.Context) (LogIter, error) { m, err := rc.streamingMeta(ctx) if err != nil { return nil, err @@ -270,20 +272,7 @@ func (rc *logFileManager) LoadDMLFiles(ctx context.Context) (LogIter, error) { return mg, nil } -// readStreamMetaByTS is used for streaming task. collect all meta file by TS, it is for test usage. -func (rc *logFileManager) readStreamMeta(ctx context.Context) ([]Meta, error) { - metas, err := rc.streamingMeta(ctx) - if err != nil { - return nil, err - } - r := iter.CollectAll(ctx, metas) - if r.Err != nil { - return nil, errors.Trace(r.Err) - } - return r.Item, nil -} - -func (rc *logFileManager) FilterMetaFiles(ms MetaIter) MetaGroupIter { +func (rc *LogFileManager) FilterMetaFiles(ms MetaIter) MetaGroupIter { return iter.FlatMap(ms, func(m Meta) MetaGroupIter { return iter.Map(iter.FromSlice(m.FileGroups), func(g *backuppb.DataFileGroup) DDLMetaGroup { metas := iter.FilterOut(iter.FromSlice(g.DataFilesInfo), func(d Log) bool { @@ -302,8 +291,24 @@ func (rc *logFileManager) FilterMetaFiles(ms MetaIter) MetaGroupIter { }) } +// the kv entry with ts, the ts is decoded from entry. +type KvEntryWithTS struct { + E kv.Entry + Ts uint64 +} + +func getKeyTS(key []byte) (uint64, error) { + if len(key) < 8 { + return 0, errors.Annotatef(berrors.ErrInvalidArgument, + "the length of key is smaller than 8, key:%s", redact.Key(key)) + } + + _, ts, err := codec.DecodeUintDesc(key[len(key)-8:]) + return ts, err +} + // ReadAllEntries loads content of a log file, with filtering out no needed entries. -func (rc *logFileManager) ReadAllEntries( +func (rc *LogFileManager) ReadAllEntries( ctx context.Context, file Log, filterTS uint64, @@ -335,18 +340,18 @@ func (rc *logFileManager) ReadAllEntries( continue } - ts, err := GetKeyTS(txnEntry.Key) + ts, err := getKeyTS(txnEntry.Key) if err != nil { return nil, nil, errors.Trace(err) } // The commitTs in write CF need be limited on [startTs, restoreTs]. // We can restore more key-value in default CF. - if ts > rc.restoreTS { + if ts > rc.RestoreTS { continue - } else if file.Cf == stream.WriteCF && ts < rc.startTS { + } else if file.Cf == stream.WriteCF && ts < rc.StartTS { continue - } else if file.Cf == stream.DefaultCF && ts < rc.shiftStartTS { + } else if file.Cf == stream.DefaultCF && ts < rc.ShiftStartTS { continue } @@ -360,9 +365,9 @@ func (rc *logFileManager) ReadAllEntries( } if ts < filterTS { - kvEntries = append(kvEntries, &KvEntryWithTS{e: txnEntry, ts: ts}) + kvEntries = append(kvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) } else { - nextKvEntries = append(nextKvEntries, &KvEntryWithTS{e: txnEntry, ts: ts}) + nextKvEntries = append(nextKvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) } } diff --git a/br/pkg/restore/log_client_test.go b/br/pkg/restore/log_restore/log_client_test.go similarity index 92% rename from br/pkg/restore/log_client_test.go rename to br/pkg/restore/log_restore/log_client_test.go index 4aafce0985..14be787d69 100644 --- a/br/pkg/restore/log_client_test.go +++ b/br/pkg/restore/log_restore/log_client_test.go @@ -3,7 +3,7 @@ // NOTE: we need to create client with only `storage` field. // However adding a public API for that is weird, so this test uses the `restore` package instead of `restore_test`. // Maybe we should refactor these APIs when possible. -package restore +package logrestore_test import ( "context" @@ -19,6 +19,7 @@ import ( "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" + logrestore "github.com/pingcap/tidb/br/pkg/restore/log_restore" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/utils/iter" @@ -226,17 +227,17 @@ func testReadMetaBetweenTSWithVersion(t *testing.T, m metaMaker) { os.RemoveAll(temp) } }() - init := LogFileManagerInit{ + init := logrestore.LogFileManagerInit{ StartTS: c.startTS, RestoreTS: c.endTS, Storage: loc, MetadataDownloadBatchSize: 32, } - cli, err := CreateLogFileManager(ctx, init) + cli, err := logrestore.CreateLogFileManager(ctx, init) req.Equal(cli.ShiftTS(), c.expectedShiftTS) req.NoError(err) - metas, err := cli.readStreamMeta(ctx) + metas, err := cli.ReadStreamMeta(ctx) req.NoError(err) actualStoreIDs := make([]int64, 0, len(metas)) for _, meta := range metas { @@ -301,13 +302,14 @@ func testReadFromMetadataWithVersion(t *testing.T, m metaMaker) { } }() - meta := new(StreamMetadataSet) + meta := new(stream.StreamMetadataSet) meta.Helper = stream.NewMetadataHelper() meta.MetadataDownloadBatchSize = 128 - meta.LoadUntilAndCalculateShiftTS(ctx, loc, c.untilTS) + _, err := meta.LoadUntilAndCalculateShiftTS(ctx, loc, c.untilTS) + require.NoError(t, err) var metas []*backuppb.Metadata - for path := range meta.metadataInfos { + for path := range meta.TEST_GetMetadataInfos() { data, err := loc.ReadFile(ctx, path) require.NoError(t, err) @@ -459,7 +461,7 @@ func testFileManagerWithMeta(t *testing.T, m metaMaker) { } }() ctx := context.Background() - fm, err := CreateLogFileManager(ctx, LogFileManagerInit{ + fm, err := logrestore.CreateLogFileManager(ctx, logrestore.LogFileManagerInit{ StartTS: start, RestoreTS: end, Storage: loc, @@ -476,7 +478,7 @@ func testFileManagerWithMeta(t *testing.T, m metaMaker) { ctx, iter.Map( datas, - func(d *LogDataFileInfo) *backuppb.DataFileInfo { + func(d *logrestore.LogDataFileInfo) *backuppb.DataFileInfo { return d.DataFileInfo }, ), @@ -509,10 +511,23 @@ func TestFileManger(t *testing.T) { func TestFilterDataFiles(t *testing.T) { req := require.New(t) ctx := context.Background() - fm := logFileManager{ - startTS: 0, - restoreTS: 10, - } + loc, temp := (&mockMetaBuilder{ + metas: nil, + }).b(true) + defer func() { + t.Log("temp dir", temp) + if !t.Failed() { + os.RemoveAll(temp) + } + }() + fm, err := logrestore.CreateLogFileManager(ctx, logrestore.LogFileManagerInit{ + StartTS: 0, + RestoreTS: 10, + Storage: loc, + + MetadataDownloadBatchSize: 32, + }) + req.NoError(err) metas := []*backuppb.Metadata{ m2(wr(1, 1, 1), wr(2, 2, 2), wr(3, 3, 3), wr(4, 4, 4)), m2(wr(1, 1, 1), wr(2, 2, 2), wr(3, 3, 3), wr(4, 4, 4), wr(5, 5, 5)), @@ -520,7 +535,7 @@ func TestFilterDataFiles(t *testing.T) { } metaIter := iter.FromSlice(metas) files := iter.CollectAll(ctx, fm.FilterDataFiles(metaIter)).Item - check := func(file *LogDataFileInfo, metaKey string, goff, foff int) { + check := func(file *logrestore.LogDataFileInfo, metaKey string, goff, foff int) { req.Equal(file.MetaDataGroupName, metaKey) req.Equal(file.OffsetInMetaGroup, goff) req.Equal(file.OffsetInMergedGroup, foff) diff --git a/br/pkg/restore/split.go b/br/pkg/restore/log_restore/split.go similarity index 59% rename from br/pkg/restore/split.go rename to br/pkg/restore/log_restore/split.go index 456c3f0cc4..4e0cdb3d8c 100644 --- a/br/pkg/restore/split.go +++ b/br/pkg/restore/log_restore/split.go @@ -1,25 +1,32 @@ -// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. -package restore +package logrestore import ( "bytes" "context" "sort" - "strings" "sync" "time" - "github.com/opentracing/opentracing-go" "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" - sst "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/log" - berrors "github.com/pingcap/tidb/br/pkg/errors" - "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/restore/split" + "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/rtree" - "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util" @@ -28,163 +35,10 @@ import ( "golang.org/x/sync/errgroup" ) -type Granularity string - -const ( - FineGrained Granularity = "fine-grained" - CoarseGrained Granularity = "coarse-grained" - maxSplitKeysOnce = 10240 -) - -// RegionSplitter is a executor of region split by rules. -type RegionSplitter struct { - client split.SplitClient -} - -// NewRegionSplitter returns a new RegionSplitter. -func NewRegionSplitter(client split.SplitClient) *RegionSplitter { - return &RegionSplitter{ - client: client, - } -} - -// OnSplitFunc is called before split a range. -type OnSplitFunc func(key [][]byte) - -// ExecuteSplit executes regions split and make sure new splitted regions are balance. -// It will split regions by the rewrite rules, -// then it will split regions by the end key of each range. -// tableRules includes the prefix of a table, since some ranges may have -// a prefix with record sequence or index sequence. -// note: all ranges and rewrite rules must have raw key. -func (rs *RegionSplitter) ExecuteSplit( - ctx context.Context, - ranges []rtree.Range, -) error { - if len(ranges) == 0 { - log.Info("skip split regions, no range") - return nil - } - - if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { - span1 := span.Tracer().StartSpan("RegionSplitter.Split", opentracing.ChildOf(span.Context())) - defer span1.Finish() - ctx = opentracing.ContextWithSpan(ctx, span1) - } - - // Sort the range for getting the min and max key of the ranges - // TODO: this sort may not needed if we sort tables after creatation outside. - sortedRanges, errSplit := SortRanges(ranges) - if errSplit != nil { - return errors.Trace(errSplit) - } - if len(sortedRanges) == 0 { - log.Info("skip split regions after sorted, no range") - return nil - } - sortedKeys := make([][]byte, 0, len(sortedRanges)) - totalRangeSize := uint64(0) - for _, r := range sortedRanges { - sortedKeys = append(sortedKeys, r.EndKey) - totalRangeSize += r.Size - } - // the range size must be greater than 0 here - return rs.executeSplitByRanges(ctx, sortedKeys) -} - -func (rs *RegionSplitter) executeSplitByRanges( - ctx context.Context, - sortedKeys [][]byte, -) error { - startTime := time.Now() - // Choose the rough region split keys, - // each splited region contains 128 regions to be splitted. - const regionIndexStep = 128 - - roughSortedSplitKeys := make([][]byte, 0, len(sortedKeys)/regionIndexStep+1) - for curRegionIndex := regionIndexStep; curRegionIndex < len(sortedKeys); curRegionIndex += regionIndexStep { - roughSortedSplitKeys = append(roughSortedSplitKeys, sortedKeys[curRegionIndex]) - } - if len(roughSortedSplitKeys) > 0 { - if err := rs.executeSplitByKeys(ctx, roughSortedSplitKeys); err != nil { - return errors.Trace(err) - } - } - log.Info("finish spliting regions roughly", zap.Duration("take", time.Since(startTime))) - - // Then send split requests to each TiKV. - if err := rs.executeSplitByKeys(ctx, sortedKeys); err != nil { - return errors.Trace(err) - } - - log.Info("finish spliting and scattering regions", zap.Duration("take", time.Since(startTime))) - return nil -} - -// executeSplitByKeys will split regions by **sorted** keys with following steps. -// 1. locate regions with correspond keys. -// 2. split these regions with correspond keys. -// 3. make sure new split regions are balanced. -func (rs *RegionSplitter) executeSplitByKeys( - ctx context.Context, - sortedKeys [][]byte, -) error { - startTime := time.Now() - scatterRegions, err := rs.client.SplitKeysAndScatter(ctx, sortedKeys) - if err != nil { - return errors.Trace(err) - } - if len(scatterRegions) > 0 { - log.Info("finish splitting and scattering regions. and starts to wait", zap.Int("regions", len(scatterRegions)), - zap.Duration("take", time.Since(startTime))) - rs.waitRegionsScattered(ctx, scatterRegions, split.ScatterWaitUpperInterval) - } else { - log.Info("finish splitting regions.", zap.Duration("take", time.Since(startTime))) - } - return nil -} - -// waitRegionsScattered try to wait mutilple regions scatterd in 3 minutes. -// this could timeout, but if many regions scatterd the restore could continue -// so we don't wait long time here. -func (rs *RegionSplitter) waitRegionsScattered(ctx context.Context, scatterRegions []*split.RegionInfo, timeout time.Duration) { - log.Info("start to wait for scattering regions", zap.Int("regions", len(scatterRegions))) - startTime := time.Now() - leftCnt := rs.WaitForScatterRegionsTimeout(ctx, scatterRegions, timeout) - if leftCnt == 0 { - log.Info("waiting for scattering regions done", - zap.Int("regions", len(scatterRegions)), - zap.Duration("take", time.Since(startTime))) - } else { - log.Warn("waiting for scattering regions timeout", - zap.Int("not scattered Count", leftCnt), - zap.Int("regions", len(scatterRegions)), - zap.Duration("take", time.Since(startTime))) - } -} - -func (rs *RegionSplitter) WaitForScatterRegionsTimeout(ctx context.Context, regionInfos []*split.RegionInfo, timeout time.Duration) int { - ctx2, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - leftRegions, _ := rs.client.WaitRegionsScattered(ctx2, regionInfos) - return leftRegions -} - -func replacePrefix(s []byte, rewriteRules *RewriteRules) ([]byte, *sst.RewriteRule) { - // We should search the dataRules firstly. - for _, rule := range rewriteRules.Data { - if bytes.HasPrefix(s, rule.GetOldKeyPrefix()) { - return append(append([]byte{}, rule.GetNewKeyPrefix()...), s[len(rule.GetOldKeyPrefix()):]...), rule - } - } - - return s, nil -} - type rewriteSplitter struct { rewriteKey []byte tableID int64 - rule *RewriteRules + rule *utils.RewriteRules splitter *split.SplitHelper } @@ -192,7 +46,7 @@ type splitHelperIterator struct { tableSplitters []*rewriteSplitter } -func (iter *splitHelperIterator) Traverse(fn func(v split.Valued, endKey []byte, rule *RewriteRules) bool) { +func (iter *splitHelperIterator) Traverse(fn func(v split.Valued, endKey []byte, rule *utils.RewriteRules) bool) { for _, entry := range iter.tableSplitters { endKey := codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(entry.tableID+1)) rule := entry.rule @@ -202,7 +56,7 @@ func (iter *splitHelperIterator) Traverse(fn func(v split.Valued, endKey []byte, } } -func NewSplitHelperIteratorForTest(helper *split.SplitHelper, tableID int64, rule *RewriteRules) *splitHelperIterator { +func NewSplitHelperIteratorForTest(helper *split.SplitHelper, tableID int64, rule *utils.RewriteRules) *splitHelperIterator { return &splitHelperIterator{ tableSplitters: []*rewriteSplitter{ { @@ -216,7 +70,7 @@ func NewSplitHelperIteratorForTest(helper *split.SplitHelper, tableID int64, rul type LogSplitHelper struct { tableSplitter map[int64]*split.SplitHelper - rules map[int64]*RewriteRules + rules map[int64]*utils.RewriteRules client split.SplitClient pool *util.WorkerPool eg *errgroup.Group @@ -226,7 +80,7 @@ type LogSplitHelper struct { splitThreSholdKeys int64 } -func NewLogSplitHelper(rules map[int64]*RewriteRules, client split.SplitClient, splitSize uint64, splitKeys int64) *LogSplitHelper { +func NewLogSplitHelper(rules map[int64]*utils.RewriteRules, client split.SplitClient, splitSize uint64, splitKeys int64) *LogSplitHelper { return &LogSplitHelper{ tableSplitter: make(map[int64]*split.SplitHelper), rules: rules, @@ -248,7 +102,7 @@ func (helper *LogSplitHelper) iterator() *splitHelperIterator { log.Info("skip splitting due to no table id matched", zap.Int64("tableID", tableID)) continue } - newTableID := GetRewriteTableID(tableID, rewriteRule) + newTableID := utils.GetRewriteTableID(tableID, rewriteRule) if newTableID == 0 { log.Warn("failed to get the rewrite table id", zap.Int64("tableID", tableID)) continue @@ -297,11 +151,11 @@ func (helper *LogSplitHelper) Merge(file *backuppb.DataFileInfo) { }) } -type splitFunc = func(context.Context, *RegionSplitter, uint64, int64, *split.RegionInfo, []split.Valued) error +type splitFunc = func(context.Context, *utils.RegionSplitter, uint64, int64, *split.RegionInfo, []split.Valued) error func (helper *LogSplitHelper) splitRegionByPoints( ctx context.Context, - regionSplitter *RegionSplitter, + regionSplitter *utils.RegionSplitter, initialLength uint64, initialNumber int64, region *split.RegionInfo, @@ -331,7 +185,7 @@ func (helper *LogSplitHelper) splitRegionByPoints( } helper.pool.ApplyOnErrorGroup(helper.eg, func() error { - newRegions, errSplit := regionSplitter.client.SplitWaitAndScatter(ctx, region, splitPoints) + newRegions, errSplit := regionSplitter.SplitWaitAndScatter(ctx, region, splitPoints) if errSplit != nil { log.Warn("failed to split the scaned region", zap.Error(errSplit)) _, startKey, _ := codec.DecodeBytes(region.Region.StartKey, nil) @@ -354,17 +208,6 @@ func (helper *LogSplitHelper) splitRegionByPoints( return nil } -// GetRewriteTableID gets rewrite table id by the rewrite rule and original table id -func GetRewriteTableID(tableID int64, rewriteRules *RewriteRules) int64 { - tableKey := tablecodec.GenTableRecordPrefix(tableID) - rule := matchOldPrefix(tableKey, rewriteRules) - if rule == nil { - return 0 - } - - return tablecodec.DecodeTableID(rule.GetNewKeyPrefix()) -} - // SplitPoint selects ranges overlapped with each region, and calls `splitF` to split the region func SplitPoint( ctx context.Context, @@ -374,7 +217,7 @@ func SplitPoint( ) (err error) { // common status var ( - regionSplitter *RegionSplitter = NewRegionSplitter(client) + regionSplitter *utils.RegionSplitter = utils.NewRegionSplitter(client) ) // region traverse status var ( @@ -401,7 +244,7 @@ func SplitPoint( regionOverCount uint64 = 0 ) - iter.Traverse(func(v split.Valued, endKey []byte, rule *RewriteRules) bool { + iter.Traverse(func(v split.Valued, endKey []byte, rule *utils.RewriteRules) bool { if v.Value.Number == 0 || v.Value.Size == 0 { return true } @@ -410,7 +253,7 @@ func SplitPoint( vEndKey []byte ) // use `vStartKey` and `vEndKey` to compare with region's key - vStartKey, vEndKey, err = GetRewriteEncodedKeys(v, rule) + vStartKey, vEndKey, err = utils.GetRewriteEncodedKeys(v, rule) if err != nil { return false } @@ -526,7 +369,7 @@ func (helper *LogSplitHelper) Split(ctx context.Context) error { } } - regionSplitter := NewRegionSplitter(helper.client) + regionSplitter := utils.NewRegionSplitter(helper.client) // It is too expensive to stop recovery and wait for a small number of regions // to complete scatter, so the maximum waiting time is reduced to 1 minute. _ = regionSplitter.WaitForScatterRegionsTimeout(ctx, scatterRegions, time.Minute) @@ -558,7 +401,7 @@ type LogFilesIterWithSplitHelper struct { const SplitFilesBufferSize = 4096 -func NewLogFilesIterWithSplitHelper(iter LogIter, rules map[int64]*RewriteRules, client split.SplitClient, splitSize uint64, splitKeys int64) LogIter { +func NewLogFilesIterWithSplitHelper(iter LogIter, rules map[int64]*utils.RewriteRules, client split.SplitClient, splitSize uint64, splitKeys int64) LogIter { return &LogFilesIterWithSplitHelper{ iter: iter, helper: NewLogSplitHelper(rules, client, splitSize, splitKeys), @@ -597,32 +440,3 @@ func (splitIter *LogFilesIterWithSplitHelper) TryNext(ctx context.Context) iter. splitIter.next += 1 return res } - -type splitBackoffer struct { - state utils.RetryState -} - -func newSplitBackoffer() *splitBackoffer { - return &splitBackoffer{ - state: utils.InitialRetryState(split.SplitRetryTimes, split.SplitRetryInterval, split.SplitMaxRetryInterval), - } -} - -func (bo *splitBackoffer) NextBackoff(err error) time.Duration { - switch { - case berrors.ErrPDBatchScanRegion.Equal(err): - log.Warn("inconsistent region info get.", logutil.ShortError(err)) - return time.Second - case strings.Contains(err.Error(), "no valid key"): - bo.state.GiveUp() - return 0 - case berrors.ErrInvalidRange.Equal(err): - bo.state.GiveUp() - return 0 - } - return bo.state.ExponentialBackoff() -} - -func (bo *splitBackoffer) Attempt() int { - return bo.state.Attempt() -} diff --git a/br/pkg/restore/log_restore/split_test.go b/br/pkg/restore/log_restore/split_test.go new file mode 100644 index 0000000000..27af9db297 --- /dev/null +++ b/br/pkg/restore/log_restore/split_test.go @@ -0,0 +1,239 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package logrestore + +import ( + "bytes" + "context" + "fmt" + "testing" + + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/kvproto/pkg/import_sstpb" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/tidb/br/pkg/restore/split" + "github.com/pingcap/tidb/br/pkg/restore/utils" + "github.com/pingcap/tidb/br/pkg/utils/iter" + "github.com/pingcap/tidb/pkg/tablecodec" + "github.com/pingcap/tidb/pkg/util/codec" + "github.com/stretchr/testify/require" +) + +type fakeSplitClient struct { + split.SplitClient + regions []*split.RegionInfo +} + +func newFakeSplitClient() *fakeSplitClient { + return &fakeSplitClient{ + regions: make([]*split.RegionInfo, 0), + } +} + +func (f *fakeSplitClient) AppendRegion(startKey, endKey []byte) { + f.regions = append(f.regions, &split.RegionInfo{ + Region: &metapb.Region{ + StartKey: startKey, + EndKey: endKey, + }, + }) +} + +func (f *fakeSplitClient) ScanRegions(ctx context.Context, startKey, endKey []byte, limit int) ([]*split.RegionInfo, error) { + result := make([]*split.RegionInfo, 0) + count := 0 + for _, rng := range f.regions { + if bytes.Compare(rng.Region.StartKey, endKey) <= 0 && bytes.Compare(rng.Region.EndKey, startKey) > 0 { + result = append(result, rng) + count++ + } + if count >= limit { + break + } + } + return result, nil +} + +func (f *fakeSplitClient) WaitRegionsScattered(context.Context, []*split.RegionInfo) (int, error) { + return 0, nil +} + +func keyWithTablePrefix(tableID int64, key string) []byte { + rawKey := append(tablecodec.GenTableRecordPrefix(tableID), []byte(key)...) + return codec.EncodeBytes([]byte{}, rawKey) +} + +func TestSplitPoint(t *testing.T) { + ctx := context.Background() + var oldTableID int64 = 50 + var tableID int64 = 100 + rewriteRules := &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), + NewKeyPrefix: tablecodec.EncodeTablePrefix(tableID), + }, + }, + } + + // range: b c d e g i + // +---+ +---+ +---------+ + // +-------------+----------+---------+ + // region: a f h j + splitHelper := split.NewSplitHelper() + splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "b"), EndKey: keyWithTablePrefix(oldTableID, "c")}, Value: split.Value{Size: 100, Number: 100}}) + splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "d"), EndKey: keyWithTablePrefix(oldTableID, "e")}, Value: split.Value{Size: 200, Number: 200}}) + splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "g"), EndKey: keyWithTablePrefix(oldTableID, "i")}, Value: split.Value{Size: 300, Number: 300}}) + client := newFakeSplitClient() + client.AppendRegion(keyWithTablePrefix(tableID, "a"), keyWithTablePrefix(tableID, "f")) + client.AppendRegion(keyWithTablePrefix(tableID, "f"), keyWithTablePrefix(tableID, "h")) + client.AppendRegion(keyWithTablePrefix(tableID, "h"), keyWithTablePrefix(tableID, "j")) + client.AppendRegion(keyWithTablePrefix(tableID, "j"), keyWithTablePrefix(tableID+1, "a")) + + iter := NewSplitHelperIteratorForTest(splitHelper, tableID, rewriteRules) + err := SplitPoint(ctx, iter, client, func(ctx context.Context, rs *utils.RegionSplitter, u uint64, o int64, ri *split.RegionInfo, v []split.Valued) error { + require.Equal(t, u, uint64(0)) + require.Equal(t, o, int64(0)) + require.Equal(t, ri.Region.StartKey, keyWithTablePrefix(tableID, "a")) + require.Equal(t, ri.Region.EndKey, keyWithTablePrefix(tableID, "f")) + require.EqualValues(t, v[0].Key.StartKey, keyWithTablePrefix(tableID, "b")) + require.EqualValues(t, v[0].Key.EndKey, keyWithTablePrefix(tableID, "c")) + require.EqualValues(t, v[1].Key.StartKey, keyWithTablePrefix(tableID, "d")) + require.EqualValues(t, v[1].Key.EndKey, keyWithTablePrefix(tableID, "e")) + require.Equal(t, len(v), 2) + return nil + }) + require.NoError(t, err) +} + +func getCharFromNumber(prefix string, i int) string { + c := '1' + (i % 10) + b := '1' + (i%100)/10 + a := '1' + i/100 + return fmt.Sprintf("%s%c%c%c", prefix, a, b, c) +} + +func TestSplitPoint2(t *testing.T) { + ctx := context.Background() + var oldTableID int64 = 50 + var tableID int64 = 100 + rewriteRules := &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), + NewKeyPrefix: tablecodec.EncodeTablePrefix(tableID), + }, + }, + } + + // range: b c d e f i j k l n + // +---+ +---+ +-----------------+ +----+ +--------+ + // +---------------+--+.....+----+------------+---------+ + // region: a g >128 h m o + splitHelper := split.NewSplitHelper() + splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "b"), EndKey: keyWithTablePrefix(oldTableID, "c")}, Value: split.Value{Size: 100, Number: 100}}) + splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "d"), EndKey: keyWithTablePrefix(oldTableID, "e")}, Value: split.Value{Size: 200, Number: 200}}) + splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "f"), EndKey: keyWithTablePrefix(oldTableID, "i")}, Value: split.Value{Size: 300, Number: 300}}) + splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "j"), EndKey: keyWithTablePrefix(oldTableID, "k")}, Value: split.Value{Size: 200, Number: 200}}) + splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "l"), EndKey: keyWithTablePrefix(oldTableID, "n")}, Value: split.Value{Size: 200, Number: 200}}) + client := newFakeSplitClient() + client.AppendRegion(keyWithTablePrefix(tableID, "a"), keyWithTablePrefix(tableID, "g")) + client.AppendRegion(keyWithTablePrefix(tableID, "g"), keyWithTablePrefix(tableID, getCharFromNumber("g", 0))) + for i := 0; i < 256; i++ { + client.AppendRegion(keyWithTablePrefix(tableID, getCharFromNumber("g", i)), keyWithTablePrefix(tableID, getCharFromNumber("g", i+1))) + } + client.AppendRegion(keyWithTablePrefix(tableID, getCharFromNumber("g", 256)), keyWithTablePrefix(tableID, "h")) + client.AppendRegion(keyWithTablePrefix(tableID, "h"), keyWithTablePrefix(tableID, "m")) + client.AppendRegion(keyWithTablePrefix(tableID, "m"), keyWithTablePrefix(tableID, "o")) + client.AppendRegion(keyWithTablePrefix(tableID, "o"), keyWithTablePrefix(tableID+1, "a")) + + firstSplit := true + iter := NewSplitHelperIteratorForTest(splitHelper, tableID, rewriteRules) + err := SplitPoint(ctx, iter, client, func(ctx context.Context, rs *utils.RegionSplitter, u uint64, o int64, ri *split.RegionInfo, v []split.Valued) error { + if firstSplit { + require.Equal(t, u, uint64(0)) + require.Equal(t, o, int64(0)) + require.Equal(t, ri.Region.StartKey, keyWithTablePrefix(tableID, "a")) + require.Equal(t, ri.Region.EndKey, keyWithTablePrefix(tableID, "g")) + require.EqualValues(t, v[0].Key.StartKey, keyWithTablePrefix(tableID, "b")) + require.EqualValues(t, v[0].Key.EndKey, keyWithTablePrefix(tableID, "c")) + require.EqualValues(t, v[1].Key.StartKey, keyWithTablePrefix(tableID, "d")) + require.EqualValues(t, v[1].Key.EndKey, keyWithTablePrefix(tableID, "e")) + require.EqualValues(t, v[2].Key.StartKey, keyWithTablePrefix(tableID, "f")) + require.EqualValues(t, v[2].Key.EndKey, keyWithTablePrefix(tableID, "g")) + require.Equal(t, v[2].Value.Size, uint64(1)) + require.Equal(t, v[2].Value.Number, int64(1)) + require.Equal(t, len(v), 3) + firstSplit = false + } else { + require.Equal(t, u, uint64(1)) + require.Equal(t, o, int64(1)) + require.Equal(t, ri.Region.StartKey, keyWithTablePrefix(tableID, "h")) + require.Equal(t, ri.Region.EndKey, keyWithTablePrefix(tableID, "m")) + require.EqualValues(t, v[0].Key.StartKey, keyWithTablePrefix(tableID, "j")) + require.EqualValues(t, v[0].Key.EndKey, keyWithTablePrefix(tableID, "k")) + require.EqualValues(t, v[1].Key.StartKey, keyWithTablePrefix(tableID, "l")) + require.EqualValues(t, v[1].Key.EndKey, keyWithTablePrefix(tableID, "m")) + require.Equal(t, v[1].Value.Size, uint64(100)) + require.Equal(t, v[1].Value.Number, int64(100)) + require.Equal(t, len(v), 2) + } + return nil + }) + require.NoError(t, err) +} + +type mockLogIter struct { + next int +} + +func (m *mockLogIter) TryNext(ctx context.Context) iter.IterResult[*LogDataFileInfo] { + if m.next > 10000 { + return iter.Done[*LogDataFileInfo]() + } + m.next += 1 + return iter.Emit(&LogDataFileInfo{ + DataFileInfo: &backuppb.DataFileInfo{ + StartKey: []byte(fmt.Sprintf("a%d", m.next)), + EndKey: []byte("b"), + Length: 1024, // 1 KB + }, + }) +} + +func TestLogFilesIterWithSplitHelper(t *testing.T) { + var tableID int64 = 76 + var oldTableID int64 = 80 + rewriteRules := &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), + NewKeyPrefix: tablecodec.EncodeTablePrefix(tableID), + }, + }, + } + rewriteRulesMap := map[int64]*utils.RewriteRules{ + oldTableID: rewriteRules, + } + mockIter := &mockLogIter{} + ctx := context.Background() + logIter := NewLogFilesIterWithSplitHelper(mockIter, rewriteRulesMap, newFakeSplitClient(), 144*1024*1024, 1440000) + next := 0 + for r := logIter.TryNext(ctx); !r.Finished; r = logIter.TryNext(ctx) { + require.NoError(t, r.Err) + next += 1 + require.Equal(t, []byte(fmt.Sprintf("a%d", next)), r.Item.StartKey) + } +} diff --git a/br/pkg/restore/pipeline_items.go b/br/pkg/restore/pipeline_items.go index 3933b9f408..58c7b9b100 100644 --- a/br/pkg/restore/pipeline_items.go +++ b/br/pkg/restore/pipeline_items.go @@ -13,6 +13,7 @@ import ( "github.com/pingcap/tidb/br/pkg/glue" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/metautil" + "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/rtree" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/pkg/parser/model" @@ -148,7 +149,7 @@ func splitPrepareWork(ctx context.Context, client *Client, tables []*model.Table // CreatedTable is a table created on restore process, // but not yet filled with data. type CreatedTable struct { - RewriteRule *RewriteRules + RewriteRule *utils.RewriteRules Table *model.TableInfo OldTable *metautil.Table } @@ -172,7 +173,7 @@ type TableIDWithFiles struct { // RewriteRules is the rewrite rules for the specify table. // because these rules belongs to the *one table*. // we can hold them here. - RewriteRules *RewriteRules + RewriteRules *utils.RewriteRules } // Exhaust drains all remaining errors in the channel, into a slice of errors. diff --git a/br/pkg/restore/range.go b/br/pkg/restore/range.go deleted file mode 100644 index 14aed01ae8..0000000000 --- a/br/pkg/restore/range.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. - -package restore - -import ( - "github.com/pingcap/errors" - "github.com/pingcap/kvproto/pkg/import_sstpb" - "github.com/pingcap/log" - berrors "github.com/pingcap/tidb/br/pkg/errors" - "github.com/pingcap/tidb/br/pkg/logutil" - "github.com/pingcap/tidb/br/pkg/rtree" -) - -// Range record start and end key for localStoreDir.DB -// so we can write it to tikv in streaming -type Range struct { - Start []byte - End []byte -} - -// SortRanges checks if the range overlapped and sort them. -func SortRanges(ranges []rtree.Range) ([]rtree.Range, error) { - rangeTree := rtree.NewRangeTree() - for _, rg := range ranges { - if out := rangeTree.InsertRange(rg); out != nil { - log.Error("insert ranges overlapped", - logutil.Key("startKeyOut", out.StartKey), - logutil.Key("endKeyOut", out.EndKey), - logutil.Key("startKeyIn", rg.StartKey), - logutil.Key("endKeyIn", rg.EndKey)) - return nil, errors.Annotatef(berrors.ErrInvalidRange, "ranges overlapped") - } - } - sortedRanges := rangeTree.GetSortedRanges() - return sortedRanges, nil -} - -// RewriteRules contains rules for rewriting keys of tables. -type RewriteRules struct { - Data []*import_sstpb.RewriteRule - OldKeyspace []byte - NewKeyspace []byte -} - -// Append append its argument to this rewrite rules. -func (r *RewriteRules) Append(other RewriteRules) { - r.Data = append(r.Data, other.Data...) -} - -// EmptyRewriteRule make a map of new, empty rewrite rules. -func EmptyRewriteRulesMap() map[int64]*RewriteRules { - return make(map[int64]*RewriteRules) -} - -// EmptyRewriteRule make a new, empty rewrite rule. -func EmptyRewriteRule() *RewriteRules { - return &RewriteRules{ - Data: []*import_sstpb.RewriteRule{}, - } -} diff --git a/br/pkg/restore/range_test.go b/br/pkg/restore/range_test.go deleted file mode 100644 index 2c84e2b7f0..0000000000 --- a/br/pkg/restore/range_test.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. - -package restore - -import ( - "testing" - - "github.com/pingcap/kvproto/pkg/import_sstpb" - "github.com/pingcap/tidb/br/pkg/rtree" - "github.com/pingcap/tidb/pkg/tablecodec" - "github.com/stretchr/testify/require" -) - -func rangeEquals(t *testing.T, obtained, expected []rtree.Range) { - require.Equal(t, len(expected), len(obtained)) - for i := range obtained { - require.Equal(t, expected[i].StartKey, obtained[i].StartKey) - require.Equal(t, expected[i].EndKey, obtained[i].EndKey) - } -} - -func TestSortRange(t *testing.T) { - dataRules := []*import_sstpb.RewriteRule{ - {OldKeyPrefix: tablecodec.GenTableRecordPrefix(1), NewKeyPrefix: tablecodec.GenTableRecordPrefix(4)}, - {OldKeyPrefix: tablecodec.GenTableRecordPrefix(2), NewKeyPrefix: tablecodec.GenTableRecordPrefix(5)}, - } - rewriteRules := &RewriteRules{ - Data: dataRules, - } - ranges1 := []rtree.Range{ - { - StartKey: append(tablecodec.GenTableRecordPrefix(1), []byte("aaa")...), - EndKey: append(tablecodec.GenTableRecordPrefix(1), []byte("bbb")...), Files: nil, - }, - } - for i, rg := range ranges1 { - tmp, _ := RewriteRange(&rg, rewriteRules) - ranges1[i] = *tmp - } - rs1, err := SortRanges(ranges1) - require.NoErrorf(t, err, "sort range1 failed: %v", err) - rangeEquals(t, rs1, []rtree.Range{ - { - StartKey: append(tablecodec.GenTableRecordPrefix(4), []byte("aaa")...), - EndKey: append(tablecodec.GenTableRecordPrefix(4), []byte("bbb")...), Files: nil, - }, - }) - - ranges2 := []rtree.Range{ - { - StartKey: append(tablecodec.GenTableRecordPrefix(1), []byte("aaa")...), - EndKey: append(tablecodec.GenTableRecordPrefix(2), []byte("bbb")...), Files: nil, - }, - } - for _, rg := range ranges2 { - _, err := RewriteRange(&rg, rewriteRules) - require.Error(t, err) - require.Regexp(t, "table id mismatch.*", err.Error()) - } - - ranges3 := initRanges() - rewriteRules1 := initRewriteRules() - for i, rg := range ranges3 { - tmp, _ := RewriteRange(&rg, rewriteRules1) - ranges3[i] = *tmp - } - rs3, err := SortRanges(ranges3) - require.NoErrorf(t, err, "sort range1 failed: %v", err) - rangeEquals(t, rs3, []rtree.Range{ - {StartKey: []byte("bbd"), EndKey: []byte("bbf"), Files: nil}, - {StartKey: []byte("bbf"), EndKey: []byte("bbj"), Files: nil}, - {StartKey: []byte("xxa"), EndKey: []byte("xxe"), Files: nil}, - {StartKey: []byte("xxe"), EndKey: []byte("xxz"), Files: nil}, - }) -} diff --git a/br/pkg/restore/rawkv/BUILD.bazel b/br/pkg/restore/rawkv/BUILD.bazel new file mode 100644 index 0000000000..cac9142aac --- /dev/null +++ b/br/pkg/restore/rawkv/BUILD.bazel @@ -0,0 +1,32 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "rawkv", + srcs = ["rawkv_client.go"], + importpath = "github.com/pingcap/tidb/br/pkg/restore/rawkv", + visibility = ["//visibility:public"], + deps = [ + "//br/pkg/restore/utils", + "//pkg/util/hack", + "@com_github_pingcap_errors//:errors", + "@com_github_tikv_client_go_v2//config", + "@com_github_tikv_client_go_v2//rawkv", + "@com_github_tikv_pd_client//:client", + ], +) + +go_test( + name = "rawkv_test", + timeout = "short", + srcs = ["rawkv_client_test.go"], + flaky = True, + deps = [ + ":rawkv", + "//br/pkg/errors", + "//pkg/kv", + "//pkg/util/codec", + "@com_github_pingcap_errors//:errors", + "@com_github_stretchr_testify//require", + "@com_github_tikv_client_go_v2//rawkv", + ], +) diff --git a/br/pkg/restore/rawkv_client.go b/br/pkg/restore/rawkv/rawkv_client.go similarity index 97% rename from br/pkg/restore/rawkv_client.go rename to br/pkg/restore/rawkv/rawkv_client.go index 22f0a9a86d..e353aac231 100644 --- a/br/pkg/restore/rawkv_client.go +++ b/br/pkg/restore/rawkv/rawkv_client.go @@ -1,12 +1,13 @@ // Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package rawkv import ( "context" "time" "github.com/pingcap/errors" + "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/pkg/util/hack" "github.com/tikv/client-go/v2/config" "github.com/tikv/client-go/v2/rawkv" @@ -73,7 +74,7 @@ func (c *RawKVBatchClient) SetColumnFamily(columnFamily string) { // Put puts (key, value) into buffer justly, wait for batch write if the buffer is full. func (c *RawKVBatchClient) Put(ctx context.Context, key, value []byte, originTs uint64) error { - k := TruncateTS(key) + k := utils.TruncateTS(key) sk := hack.String(k) if v, ok := c.kvs[sk]; ok { if v.ts < originTs { diff --git a/br/pkg/restore/rawkv_client_test.go b/br/pkg/restore/rawkv/rawkv_client_test.go similarity index 94% rename from br/pkg/restore/rawkv_client_test.go rename to br/pkg/restore/rawkv/rawkv_client_test.go index 8ddc4bab90..79b12c6829 100644 --- a/br/pkg/restore/rawkv_client_test.go +++ b/br/pkg/restore/rawkv/rawkv_client_test.go @@ -1,6 +1,6 @@ // Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. -package restore_test +package rawkv_test import ( "bytes" @@ -10,7 +10,7 @@ import ( "github.com/pingcap/errors" berrors "github.com/pingcap/tidb/br/pkg/errors" - "github.com/pingcap/tidb/br/pkg/restore" + rawclient "github.com/pingcap/tidb/br/pkg/restore/rawkv" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" @@ -57,7 +57,7 @@ func (f *fakeRawkvClient) Close() error { func TestRawKVBatchClient(t *testing.T) { fakeRawkvClient := newFakeRawkvClient() batchCount := 3 - rawkvBatchClient := restore.NewRawKVBatchClient(fakeRawkvClient, batchCount) + rawkvBatchClient := rawclient.NewRawKVBatchClient(fakeRawkvClient, batchCount) defer rawkvBatchClient.Close() rawkvBatchClient.SetColumnFamily("default") @@ -93,7 +93,7 @@ func TestRawKVBatchClient(t *testing.T) { func TestRawKVBatchClientDuplicated(t *testing.T) { fakeRawkvClient := newFakeRawkvClient() batchCount := 3 - rawkvBatchClient := restore.NewRawKVBatchClient(fakeRawkvClient, batchCount) + rawkvBatchClient := rawclient.NewRawKVBatchClient(fakeRawkvClient, batchCount) defer rawkvBatchClient.Close() rawkvBatchClient.SetColumnFamily("default") diff --git a/br/pkg/restore/split_test.go b/br/pkg/restore/split_test.go deleted file mode 100644 index ee54ea291c..0000000000 --- a/br/pkg/restore/split_test.go +++ /dev/null @@ -1,713 +0,0 @@ -// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. - -package restore - -import ( - "bytes" - "context" - "fmt" - "sync" - "testing" - "time" - - "github.com/pingcap/errors" - backuppb "github.com/pingcap/kvproto/pkg/brpb" - "github.com/pingcap/kvproto/pkg/import_sstpb" - "github.com/pingcap/kvproto/pkg/metapb" - "github.com/pingcap/kvproto/pkg/pdpb" - "github.com/pingcap/log" - berrors "github.com/pingcap/tidb/br/pkg/errors" - "github.com/pingcap/tidb/br/pkg/glue" - "github.com/pingcap/tidb/br/pkg/logutil" - "github.com/pingcap/tidb/br/pkg/restore/split" - "github.com/pingcap/tidb/br/pkg/rtree" - "github.com/pingcap/tidb/br/pkg/utils/iter" - "github.com/pingcap/tidb/pkg/parser/model" - "github.com/pingcap/tidb/pkg/store/pdtypes" - "github.com/pingcap/tidb/pkg/tablecodec" - "github.com/pingcap/tidb/pkg/util/codec" - "github.com/stretchr/testify/require" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" -) - -type TestClient struct { - split.SplitClient - - mu sync.RWMutex - stores map[uint64]*metapb.Store - regions map[uint64]*split.RegionInfo - regionsInfo *pdtypes.RegionTree // For now it's only used in ScanRegions - nextRegionID uint64 - - scattered map[uint64]bool - InjectErr bool - InjectTimes int32 -} - -func NewTestClient( - stores map[uint64]*metapb.Store, - regions map[uint64]*split.RegionInfo, - nextRegionID uint64, -) *TestClient { - regionsInfo := &pdtypes.RegionTree{} - for _, regionInfo := range regions { - regionsInfo.SetRegion(pdtypes.NewRegionInfo(regionInfo.Region, regionInfo.Leader)) - } - return &TestClient{ - stores: stores, - regions: regions, - regionsInfo: regionsInfo, - nextRegionID: nextRegionID, - scattered: map[uint64]bool{}, - } -} - -func (c *TestClient) GetAllRegions() map[uint64]*split.RegionInfo { - c.mu.RLock() - defer c.mu.RUnlock() - return c.regions -} - -func (c *TestClient) GetStore(ctx context.Context, storeID uint64) (*metapb.Store, error) { - c.mu.RLock() - defer c.mu.RUnlock() - store, ok := c.stores[storeID] - if !ok { - return nil, errors.Errorf("store not found") - } - return store, nil -} - -func (c *TestClient) GetRegion(ctx context.Context, key []byte) (*split.RegionInfo, error) { - c.mu.RLock() - defer c.mu.RUnlock() - for _, region := range c.regions { - if bytes.Compare(key, region.Region.StartKey) >= 0 && - (len(region.Region.EndKey) == 0 || bytes.Compare(key, region.Region.EndKey) < 0) { - return region, nil - } - } - return nil, errors.Errorf("region not found: key=%s", string(key)) -} - -func (c *TestClient) GetRegionByID(ctx context.Context, regionID uint64) (*split.RegionInfo, error) { - c.mu.RLock() - defer c.mu.RUnlock() - region, ok := c.regions[regionID] - if !ok { - return nil, errors.Errorf("region not found: id=%d", regionID) - } - return region, nil -} - -func (c *TestClient) SplitWaitAndScatter(_ context.Context, _ *split.RegionInfo, keys [][]byte) ([]*split.RegionInfo, error) { - c.mu.Lock() - defer c.mu.Unlock() - newRegions := make([]*split.RegionInfo, 0) - for _, key := range keys { - var target *split.RegionInfo - splitKey := codec.EncodeBytes([]byte{}, key) - for _, region := range c.regions { - if region.ContainsInterior(splitKey) { - target = region - } - } - if target == nil { - continue - } - newRegion := &split.RegionInfo{ - Region: &metapb.Region{ - Peers: target.Region.Peers, - Id: c.nextRegionID, - StartKey: target.Region.StartKey, - EndKey: splitKey, - }, - } - c.regions[c.nextRegionID] = newRegion - c.nextRegionID++ - target.Region.StartKey = splitKey - c.regions[target.Region.Id] = target - newRegions = append(newRegions, newRegion) - } - return newRegions, nil -} - -func (c *TestClient) GetOperator(context.Context, uint64) (*pdpb.GetOperatorResponse, error) { - return &pdpb.GetOperatorResponse{ - Header: new(pdpb.ResponseHeader), - }, nil -} - -func (c *TestClient) ScanRegions(ctx context.Context, key, endKey []byte, limit int) ([]*split.RegionInfo, error) { - if c.InjectErr && c.InjectTimes > 0 { - c.InjectTimes -= 1 - return nil, status.Error(codes.Unavailable, "not leader") - } - if len(key) != 0 && bytes.Equal(key, endKey) { - return nil, status.Error(codes.Internal, "key and endKey are the same") - } - - infos := c.regionsInfo.ScanRange(key, endKey, limit) - regions := make([]*split.RegionInfo, 0, len(infos)) - for _, info := range infos { - regions = append(regions, &split.RegionInfo{ - Region: info.Meta, - Leader: info.Leader, - }) - } - return regions, nil -} - -func (c *TestClient) WaitRegionsScattered(context.Context, []*split.RegionInfo) (int, error) { - return 0, nil -} - -func TestScanEmptyRegion(t *testing.T) { - mockPDCli := split.NewMockPDClientForSplit() - mockPDCli.SetRegions([][]byte{{}, {12}, {34}, {}}) - client := split.NewClient(mockPDCli, nil, nil, 100, 4) - ranges := initRanges() - // make ranges has only one - ranges = ranges[0:1] - regionSplitter := NewRegionSplitter(client) - - ctx := context.Background() - err := regionSplitter.ExecuteSplit(ctx, ranges) - // should not return error with only one range entry - require.NoError(t, err) -} - -// region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) -// range: [aaa, aae), [aae, aaz), [ccd, ccf), [ccf, ccj) -// rewrite rules: aa -> xx, cc -> bb -// expected regions after split: -// -// [, aay), [aay, bba), [bba, bbf), [bbf, bbh), [bbh, bbj), -// [bbj, cca), [cca, xxe), [xxe, xxz), [xxz, ) -func TestSplitAndScatter(t *testing.T) { - rangeBoundaries := [][]byte{[]byte(""), []byte("aay"), []byte("bba"), []byte("bbh"), []byte("cca"), []byte("")} - encodeBytes(rangeBoundaries) - mockPDCli := split.NewMockPDClientForSplit() - mockPDCli.SetRegions(rangeBoundaries) - client := split.NewClient(mockPDCli, nil, nil, 100, 4) - regionSplitter := NewRegionSplitter(client) - ctx := context.Background() - - ranges := initRanges() - rules := initRewriteRules() - for i, rg := range ranges { - tmp, err := RewriteRange(&rg, rules) - require.NoError(t, err) - ranges[i] = *tmp - } - err := regionSplitter.ExecuteSplit(ctx, ranges) - require.NoError(t, err) - regions := mockPDCli.Regions.ScanRange(nil, nil, 100) - expected := [][]byte{[]byte(""), []byte("aay"), []byte("bba"), []byte("bbf"), []byte("bbh"), []byte("bbj"), []byte("cca"), []byte("xxe"), []byte("xxz"), []byte("")} - encodeBytes(expected) - require.Len(t, regions, len(expected)-1) - for i, region := range regions { - require.Equal(t, expected[i], region.Meta.StartKey) - require.Equal(t, expected[i+1], region.Meta.EndKey) - } -} - -func encodeBytes(keys [][]byte) { - for i := range keys { - if len(keys[i]) == 0 { - continue - } - keys[i] = codec.EncodeBytes(nil, keys[i]) - } -} - -func TestRawSplit(t *testing.T) { - // Fix issue #36490. - ranges := []rtree.Range{ - { - StartKey: []byte{0}, - EndKey: []byte{}, - }, - } - ctx := context.Background() - rangeBoundaries := [][]byte{[]byte(""), []byte("aay"), []byte("bba"), []byte("bbh"), []byte("cca"), []byte("")} - mockPDCli := split.NewMockPDClientForSplit() - mockPDCli.SetRegions(rangeBoundaries) - client := split.NewClient(mockPDCli, nil, nil, 100, 4, split.WithRawKV()) - - regionSplitter := NewRegionSplitter(client) - err := regionSplitter.ExecuteSplit(ctx, ranges) - require.NoError(t, err) - - regions := mockPDCli.Regions.ScanRange(nil, nil, 100) - require.Len(t, regions, len(rangeBoundaries)-1) - for i, region := range regions { - require.Equal(t, rangeBoundaries[i], region.Meta.StartKey) - require.Equal(t, rangeBoundaries[i+1], region.Meta.EndKey) - } -} - -// range: [aaa, aae), [aae, aaz), [ccd, ccf), [ccf, ccj) -func initRanges() []rtree.Range { - var ranges [4]rtree.Range - ranges[0] = rtree.Range{ - StartKey: []byte("aaa"), - EndKey: []byte("aae"), - } - ranges[1] = rtree.Range{ - StartKey: []byte("aae"), - EndKey: []byte("aaz"), - } - ranges[2] = rtree.Range{ - StartKey: []byte("ccd"), - EndKey: []byte("ccf"), - } - ranges[3] = rtree.Range{ - StartKey: []byte("ccf"), - EndKey: []byte("ccj"), - } - return ranges[:] -} - -func initRewriteRules() *RewriteRules { - var rules [2]*import_sstpb.RewriteRule - rules[0] = &import_sstpb.RewriteRule{ - OldKeyPrefix: []byte("aa"), - NewKeyPrefix: []byte("xx"), - } - rules[1] = &import_sstpb.RewriteRule{ - OldKeyPrefix: []byte("cc"), - NewKeyPrefix: []byte("bb"), - } - return &RewriteRules{ - Data: rules[:], - } -} - -type fakeRestorer struct { - mu sync.Mutex - - errorInSplit bool - splitRanges []rtree.Range - restoredFiles []*backuppb.File - tableIDIsInsequence bool -} - -func (f *fakeRestorer) SplitRanges(ctx context.Context, ranges []rtree.Range, updateCh glue.Progress, isRawKv bool) error { - f.mu.Lock() - defer f.mu.Unlock() - - if ctx.Err() != nil { - return ctx.Err() - } - f.splitRanges = append(f.splitRanges, ranges...) - if f.errorInSplit { - err := errors.Annotatef(berrors.ErrRestoreSplitFailed, - "the key space takes many efforts and finally get together, how dare you split them again... :<") - log.Error("error happens :3", logutil.ShortError(err)) - return err - } - return nil -} - -func (f *fakeRestorer) RestoreSSTFiles(ctx context.Context, tableIDWithFiles []TableIDWithFiles, updateCh glue.Progress) error { - f.mu.Lock() - defer f.mu.Unlock() - - if ctx.Err() != nil { - return ctx.Err() - } - for i, tableIDWithFile := range tableIDWithFiles { - if int64(i) != tableIDWithFile.TableID { - f.tableIDIsInsequence = false - } - f.restoredFiles = append(f.restoredFiles, tableIDWithFile.Files...) - } - err := errors.Annotatef(berrors.ErrRestoreWriteAndIngest, "the files to restore are taken by a hijacker, meow :3") - log.Error("error happens :3", logutil.ShortError(err)) - return err -} - -func fakeRanges(keys ...string) (r DrainResult) { - for i := range keys { - if i+1 == len(keys) { - return - } - r.Ranges = append(r.Ranges, rtree.Range{ - StartKey: []byte(keys[i]), - EndKey: []byte(keys[i+1]), - Files: []*backuppb.File{{Name: "fake.sst"}}, - }) - r.TableEndOffsetInRanges = append(r.TableEndOffsetInRanges, len(r.Ranges)) - r.TablesToSend = append(r.TablesToSend, CreatedTable{ - Table: &model.TableInfo{ - ID: int64(i), - }, - }) - } - return -} - -type errorInTimeSink struct { - ctx context.Context - errCh chan error - t *testing.T -} - -func (e errorInTimeSink) EmitTables(tables ...CreatedTable) {} - -func (e errorInTimeSink) EmitError(err error) { - e.errCh <- err -} - -func (e errorInTimeSink) Close() {} - -func (e errorInTimeSink) Wait() { - select { - case <-e.ctx.Done(): - e.t.Logf("The context is canceled but no error happen") - e.t.FailNow() - case <-e.errCh: - } -} - -func assertErrorEmitInTime(ctx context.Context, t *testing.T) errorInTimeSink { - errCh := make(chan error, 1) - return errorInTimeSink{ - ctx: ctx, - errCh: errCh, - t: t, - } -} - -func TestRestoreFailed(t *testing.T) { - ranges := []DrainResult{ - fakeRanges("aax", "abx", "abz"), - fakeRanges("abz", "bbz", "bcy"), - fakeRanges("bcy", "cad", "xxy"), - } - r := &fakeRestorer{ - tableIDIsInsequence: true, - } - sender, err := NewTiKVSender(context.TODO(), r, nil, 1, string(FineGrained)) - require.NoError(t, err) - dctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - sink := assertErrorEmitInTime(dctx, t) - sender.PutSink(sink) - for _, r := range ranges { - sender.RestoreBatch(r) - } - sink.Wait() - sink.Close() - sender.Close() - require.GreaterOrEqual(t, len(r.restoredFiles), 1) - require.True(t, r.tableIDIsInsequence) -} - -func TestSplitFailed(t *testing.T) { - ranges := []DrainResult{ - fakeRanges("aax", "abx", "abz"), - fakeRanges("abz", "bbz", "bcy"), - fakeRanges("bcy", "cad", "xxy"), - } - r := &fakeRestorer{errorInSplit: true, tableIDIsInsequence: true} - sender, err := NewTiKVSender(context.TODO(), r, nil, 1, string(FineGrained)) - require.NoError(t, err) - dctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - sink := assertErrorEmitInTime(dctx, t) - sender.PutSink(sink) - for _, r := range ranges { - sender.RestoreBatch(r) - } - sink.Wait() - sender.Close() - require.GreaterOrEqual(t, len(r.splitRanges), 2) - require.Len(t, r.restoredFiles, 0) - require.True(t, r.tableIDIsInsequence) -} - -func keyWithTablePrefix(tableID int64, key string) []byte { - rawKey := append(tablecodec.GenTableRecordPrefix(tableID), []byte(key)...) - return codec.EncodeBytes([]byte{}, rawKey) -} - -func TestSplitPoint(t *testing.T) { - ctx := context.Background() - var oldTableID int64 = 50 - var tableID int64 = 100 - rewriteRules := &RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), - NewKeyPrefix: tablecodec.EncodeTablePrefix(tableID), - }, - }, - } - - // range: b c d e g i - // +---+ +---+ +---------+ - // +-------------+----------+---------+ - // region: a f h j - splitHelper := split.NewSplitHelper() - splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "b"), EndKey: keyWithTablePrefix(oldTableID, "c")}, Value: split.Value{Size: 100, Number: 100}}) - splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "d"), EndKey: keyWithTablePrefix(oldTableID, "e")}, Value: split.Value{Size: 200, Number: 200}}) - splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "g"), EndKey: keyWithTablePrefix(oldTableID, "i")}, Value: split.Value{Size: 300, Number: 300}}) - client := newFakeSplitClient() - client.AppendRegion(keyWithTablePrefix(tableID, "a"), keyWithTablePrefix(tableID, "f")) - client.AppendRegion(keyWithTablePrefix(tableID, "f"), keyWithTablePrefix(tableID, "h")) - client.AppendRegion(keyWithTablePrefix(tableID, "h"), keyWithTablePrefix(tableID, "j")) - client.AppendRegion(keyWithTablePrefix(tableID, "j"), keyWithTablePrefix(tableID+1, "a")) - - iter := NewSplitHelperIteratorForTest(splitHelper, tableID, rewriteRules) - err := SplitPoint(ctx, iter, client, func(ctx context.Context, rs *RegionSplitter, u uint64, o int64, ri *split.RegionInfo, v []split.Valued) error { - require.Equal(t, u, uint64(0)) - require.Equal(t, o, int64(0)) - require.Equal(t, ri.Region.StartKey, keyWithTablePrefix(tableID, "a")) - require.Equal(t, ri.Region.EndKey, keyWithTablePrefix(tableID, "f")) - require.EqualValues(t, v[0].Key.StartKey, keyWithTablePrefix(tableID, "b")) - require.EqualValues(t, v[0].Key.EndKey, keyWithTablePrefix(tableID, "c")) - require.EqualValues(t, v[1].Key.StartKey, keyWithTablePrefix(tableID, "d")) - require.EqualValues(t, v[1].Key.EndKey, keyWithTablePrefix(tableID, "e")) - require.Equal(t, len(v), 2) - return nil - }) - require.NoError(t, err) -} - -func getCharFromNumber(prefix string, i int) string { - c := '1' + (i % 10) - b := '1' + (i%100)/10 - a := '1' + i/100 - return fmt.Sprintf("%s%c%c%c", prefix, a, b, c) -} - -func TestSplitPoint2(t *testing.T) { - ctx := context.Background() - var oldTableID int64 = 50 - var tableID int64 = 100 - rewriteRules := &RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), - NewKeyPrefix: tablecodec.EncodeTablePrefix(tableID), - }, - }, - } - - // range: b c d e f i j k l n - // +---+ +---+ +-----------------+ +----+ +--------+ - // +---------------+--+.....+----+------------+---------+ - // region: a g >128 h m o - splitHelper := split.NewSplitHelper() - splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "b"), EndKey: keyWithTablePrefix(oldTableID, "c")}, Value: split.Value{Size: 100, Number: 100}}) - splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "d"), EndKey: keyWithTablePrefix(oldTableID, "e")}, Value: split.Value{Size: 200, Number: 200}}) - splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "f"), EndKey: keyWithTablePrefix(oldTableID, "i")}, Value: split.Value{Size: 300, Number: 300}}) - splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "j"), EndKey: keyWithTablePrefix(oldTableID, "k")}, Value: split.Value{Size: 200, Number: 200}}) - splitHelper.Merge(split.Valued{Key: split.Span{StartKey: keyWithTablePrefix(oldTableID, "l"), EndKey: keyWithTablePrefix(oldTableID, "n")}, Value: split.Value{Size: 200, Number: 200}}) - client := newFakeSplitClient() - client.AppendRegion(keyWithTablePrefix(tableID, "a"), keyWithTablePrefix(tableID, "g")) - client.AppendRegion(keyWithTablePrefix(tableID, "g"), keyWithTablePrefix(tableID, getCharFromNumber("g", 0))) - for i := 0; i < 256; i++ { - client.AppendRegion(keyWithTablePrefix(tableID, getCharFromNumber("g", i)), keyWithTablePrefix(tableID, getCharFromNumber("g", i+1))) - } - client.AppendRegion(keyWithTablePrefix(tableID, getCharFromNumber("g", 256)), keyWithTablePrefix(tableID, "h")) - client.AppendRegion(keyWithTablePrefix(tableID, "h"), keyWithTablePrefix(tableID, "m")) - client.AppendRegion(keyWithTablePrefix(tableID, "m"), keyWithTablePrefix(tableID, "o")) - client.AppendRegion(keyWithTablePrefix(tableID, "o"), keyWithTablePrefix(tableID+1, "a")) - - firstSplit := true - iter := NewSplitHelperIteratorForTest(splitHelper, tableID, rewriteRules) - err := SplitPoint(ctx, iter, client, func(ctx context.Context, rs *RegionSplitter, u uint64, o int64, ri *split.RegionInfo, v []split.Valued) error { - if firstSplit { - require.Equal(t, u, uint64(0)) - require.Equal(t, o, int64(0)) - require.Equal(t, ri.Region.StartKey, keyWithTablePrefix(tableID, "a")) - require.Equal(t, ri.Region.EndKey, keyWithTablePrefix(tableID, "g")) - require.EqualValues(t, v[0].Key.StartKey, keyWithTablePrefix(tableID, "b")) - require.EqualValues(t, v[0].Key.EndKey, keyWithTablePrefix(tableID, "c")) - require.EqualValues(t, v[1].Key.StartKey, keyWithTablePrefix(tableID, "d")) - require.EqualValues(t, v[1].Key.EndKey, keyWithTablePrefix(tableID, "e")) - require.EqualValues(t, v[2].Key.StartKey, keyWithTablePrefix(tableID, "f")) - require.EqualValues(t, v[2].Key.EndKey, keyWithTablePrefix(tableID, "g")) - require.Equal(t, v[2].Value.Size, uint64(1)) - require.Equal(t, v[2].Value.Number, int64(1)) - require.Equal(t, len(v), 3) - firstSplit = false - } else { - require.Equal(t, u, uint64(1)) - require.Equal(t, o, int64(1)) - require.Equal(t, ri.Region.StartKey, keyWithTablePrefix(tableID, "h")) - require.Equal(t, ri.Region.EndKey, keyWithTablePrefix(tableID, "m")) - require.EqualValues(t, v[0].Key.StartKey, keyWithTablePrefix(tableID, "j")) - require.EqualValues(t, v[0].Key.EndKey, keyWithTablePrefix(tableID, "k")) - require.EqualValues(t, v[1].Key.StartKey, keyWithTablePrefix(tableID, "l")) - require.EqualValues(t, v[1].Key.EndKey, keyWithTablePrefix(tableID, "m")) - require.Equal(t, v[1].Value.Size, uint64(100)) - require.Equal(t, v[1].Value.Number, int64(100)) - require.Equal(t, len(v), 2) - } - return nil - }) - require.NoError(t, err) -} - -type fakeSplitClient struct { - split.SplitClient - regions []*split.RegionInfo -} - -func newFakeSplitClient() *fakeSplitClient { - return &fakeSplitClient{ - regions: make([]*split.RegionInfo, 0), - } -} - -func (f *fakeSplitClient) AppendRegion(startKey, endKey []byte) { - f.regions = append(f.regions, &split.RegionInfo{ - Region: &metapb.Region{ - StartKey: startKey, - EndKey: endKey, - }, - }) -} - -func (f *fakeSplitClient) ScanRegions(ctx context.Context, startKey, endKey []byte, limit int) ([]*split.RegionInfo, error) { - result := make([]*split.RegionInfo, 0) - count := 0 - for _, rng := range f.regions { - if bytes.Compare(rng.Region.StartKey, endKey) <= 0 && bytes.Compare(rng.Region.EndKey, startKey) > 0 { - result = append(result, rng) - count++ - } - if count >= limit { - break - } - } - return result, nil -} - -func (f *fakeSplitClient) WaitRegionsScattered(context.Context, []*split.RegionInfo) (int, error) { - return 0, nil -} - -func TestGetRewriteTableID(t *testing.T) { - var tableID int64 = 76 - var oldTableID int64 = 80 - { - rewriteRules := &RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), - NewKeyPrefix: tablecodec.EncodeTablePrefix(tableID), - }, - }, - } - - newTableID := GetRewriteTableID(oldTableID, rewriteRules) - require.Equal(t, tableID, newTableID) - } - - { - rewriteRules := &RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: tablecodec.GenTableRecordPrefix(oldTableID), - NewKeyPrefix: tablecodec.GenTableRecordPrefix(tableID), - }, - }, - } - - newTableID := GetRewriteTableID(oldTableID, rewriteRules) - require.Equal(t, tableID, newTableID) - } -} - -type mockLogIter struct { - next int -} - -func (m *mockLogIter) TryNext(ctx context.Context) iter.IterResult[*LogDataFileInfo] { - if m.next > 10000 { - return iter.Done[*LogDataFileInfo]() - } - m.next += 1 - return iter.Emit(&LogDataFileInfo{ - DataFileInfo: &backuppb.DataFileInfo{ - StartKey: []byte(fmt.Sprintf("a%d", m.next)), - EndKey: []byte("b"), - Length: 1024, // 1 KB - }, - }) -} - -func TestLogFilesIterWithSplitHelper(t *testing.T) { - var tableID int64 = 76 - var oldTableID int64 = 80 - rewriteRules := &RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), - NewKeyPrefix: tablecodec.EncodeTablePrefix(tableID), - }, - }, - } - rewriteRulesMap := map[int64]*RewriteRules{ - oldTableID: rewriteRules, - } - mockIter := &mockLogIter{} - ctx := context.Background() - logIter := NewLogFilesIterWithSplitHelper(mockIter, rewriteRulesMap, newFakeSplitClient(), 144*1024*1024, 1440000) - next := 0 - for r := logIter.TryNext(ctx); !r.Finished; r = logIter.TryNext(ctx) { - require.NoError(t, r.Err) - next += 1 - require.Equal(t, []byte(fmt.Sprintf("a%d", next)), r.Item.StartKey) - } -} - -func regionInfo(startKey, endKey string) *split.RegionInfo { - return &split.RegionInfo{ - Region: &metapb.Region{ - StartKey: []byte(startKey), - EndKey: []byte(endKey), - }, - } -} - -func TestSplitCheckPartRegionConsistency(t *testing.T) { - var ( - startKey []byte = []byte("a") - endKey []byte = []byte("f") - err error - ) - err = split.CheckPartRegionConsistency(startKey, endKey, nil) - require.Error(t, err) - err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ - regionInfo("b", "c"), - }) - require.Error(t, err) - err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ - regionInfo("a", "c"), - regionInfo("d", "e"), - }) - require.Error(t, err) - err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ - regionInfo("a", "c"), - regionInfo("c", "d"), - }) - require.NoError(t, err) - err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ - regionInfo("a", "c"), - regionInfo("c", "d"), - regionInfo("d", "f"), - }) - require.NoError(t, err) - err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ - regionInfo("a", "c"), - regionInfo("c", "z"), - }) - require.NoError(t, err) -} diff --git a/br/pkg/restore/util.go b/br/pkg/restore/util.go index 9d8855546a..99d6b00b27 100644 --- a/br/pkg/restore/util.go +++ b/br/pkg/restore/util.go @@ -3,327 +3,31 @@ package restore import ( - "bytes" "context" "fmt" - "regexp" - "sort" - "strings" - "github.com/emirpasic/gods/maps/treemap" _ "github.com/go-sql-driver/mysql" // mysql driver - "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" - "github.com/pingcap/kvproto/pkg/import_sstpb" - "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" - berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/glue" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/restore/split" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/rtree" "github.com/pingcap/tidb/br/pkg/utils" - "github.com/pingcap/tidb/pkg/parser/model" - "github.com/pingcap/tidb/pkg/tablecodec" - "github.com/pingcap/tidb/pkg/util/codec" - "github.com/pingcap/tidb/pkg/util/redact" "go.uber.org/zap" "go.uber.org/zap/zapcore" ) -var ( - quoteRegexp = regexp.MustCompile("`(?:[^`]|``)*`") +type Granularity string + +const ( + FineGrained Granularity = "fine-grained" + CoarseGrained Granularity = "coarse-grained" + + maxSplitKeysOnce = 10240 ) -// AppliedFile has two types for now. -// 1. SST file used by full backup/restore. -// 2. KV file used by pitr restore. -type AppliedFile interface { - GetStartKey() []byte - GetEndKey() []byte -} - -// getPartitionIDMap creates a map maping old physical ID to new physical ID. -func getPartitionIDMap(newTable, oldTable *model.TableInfo) map[int64]int64 { - tableIDMap := make(map[int64]int64) - - if oldTable.Partition != nil && newTable.Partition != nil { - nameMapID := make(map[string]int64) - - for _, old := range oldTable.Partition.Definitions { - nameMapID[old.Name.L] = old.ID - } - for _, new := range newTable.Partition.Definitions { - if oldID, exist := nameMapID[new.Name.L]; exist { - tableIDMap[oldID] = new.ID - } - } - } - - return tableIDMap -} - -// getTableIDMap creates a map maping old tableID to new tableID. -func getTableIDMap(newTable, oldTable *model.TableInfo) map[int64]int64 { - tableIDMap := getPartitionIDMap(newTable, oldTable) - tableIDMap[oldTable.ID] = newTable.ID - return tableIDMap -} - -// getIndexIDMap creates a map maping old indexID to new indexID. -func getIndexIDMap(newTable, oldTable *model.TableInfo) map[int64]int64 { - indexIDMap := make(map[int64]int64) - for _, srcIndex := range oldTable.Indices { - for _, destIndex := range newTable.Indices { - if srcIndex.Name == destIndex.Name { - indexIDMap[srcIndex.ID] = destIndex.ID - } - } - } - - return indexIDMap -} - -// GetRewriteRules returns the rewrite rule of the new table and the old table. -// getDetailRule is used for normal backup & restore. -// if set to true, means we collect the rules like tXXX_r, tYYY_i. -// if set to false, means we only collect the rules contain table_id, tXXX, tYYY. -func GetRewriteRules( - newTable, oldTable *model.TableInfo, newTimeStamp uint64, getDetailRule bool, -) *RewriteRules { - tableIDs := getTableIDMap(newTable, oldTable) - indexIDs := getIndexIDMap(newTable, oldTable) - - dataRules := make([]*import_sstpb.RewriteRule, 0) - for oldTableID, newTableID := range tableIDs { - if getDetailRule { - dataRules = append(dataRules, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.GenTableRecordPrefix(oldTableID), - NewKeyPrefix: tablecodec.GenTableRecordPrefix(newTableID), - NewTimestamp: newTimeStamp, - }) - for oldIndexID, newIndexID := range indexIDs { - dataRules = append(dataRules, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.EncodeTableIndexPrefix(oldTableID, oldIndexID), - NewKeyPrefix: tablecodec.EncodeTableIndexPrefix(newTableID, newIndexID), - NewTimestamp: newTimeStamp, - }) - } - } else { - dataRules = append(dataRules, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), - NewKeyPrefix: tablecodec.EncodeTablePrefix(newTableID), - NewTimestamp: newTimeStamp, - }) - } - } - - return &RewriteRules{ - Data: dataRules, - } -} - -func GetRewriteRulesMap( - newTable, oldTable *model.TableInfo, newTimeStamp uint64, getDetailRule bool, -) map[int64]*RewriteRules { - rules := make(map[int64]*RewriteRules) - - tableIDs := getTableIDMap(newTable, oldTable) - indexIDs := getIndexIDMap(newTable, oldTable) - - for oldTableID, newTableID := range tableIDs { - dataRules := make([]*import_sstpb.RewriteRule, 0) - if getDetailRule { - dataRules = append(dataRules, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.GenTableRecordPrefix(oldTableID), - NewKeyPrefix: tablecodec.GenTableRecordPrefix(newTableID), - NewTimestamp: newTimeStamp, - }) - for oldIndexID, newIndexID := range indexIDs { - dataRules = append(dataRules, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.EncodeTableIndexPrefix(oldTableID, oldIndexID), - NewKeyPrefix: tablecodec.EncodeTableIndexPrefix(newTableID, newIndexID), - NewTimestamp: newTimeStamp, - }) - } - } else { - dataRules = append(dataRules, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), - NewKeyPrefix: tablecodec.EncodeTablePrefix(newTableID), - NewTimestamp: newTimeStamp, - }) - } - - rules[oldTableID] = &RewriteRules{ - Data: dataRules, - } - } - - return rules -} - -// GetRewriteRuleOfTable returns a rewrite rule from t_{oldID} to t_{newID}. -func GetRewriteRuleOfTable( - oldTableID, newTableID int64, - newTimeStamp uint64, - indexIDs map[int64]int64, - getDetailRule bool, -) *RewriteRules { - dataRules := make([]*import_sstpb.RewriteRule, 0) - - if getDetailRule { - dataRules = append(dataRules, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.GenTableRecordPrefix(oldTableID), - NewKeyPrefix: tablecodec.GenTableRecordPrefix(newTableID), - NewTimestamp: newTimeStamp, - }) - for oldIndexID, newIndexID := range indexIDs { - dataRules = append(dataRules, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.EncodeTableIndexPrefix(oldTableID, oldIndexID), - NewKeyPrefix: tablecodec.EncodeTableIndexPrefix(newTableID, newIndexID), - NewTimestamp: newTimeStamp, - }) - } - } else { - dataRules = append(dataRules, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), - NewKeyPrefix: tablecodec.EncodeTablePrefix(newTableID), - NewTimestamp: newTimeStamp, - }) - } - - return &RewriteRules{Data: dataRules} -} - -// GetSSTMetaFromFile compares the keys in file, region and rewrite rules, then returns a sst conn. -// The range of the returned sst meta is [regionRule.NewKeyPrefix, append(regionRule.NewKeyPrefix, 0xff)]. -func GetSSTMetaFromFile( - id []byte, - file *backuppb.File, - region *metapb.Region, - regionRule *import_sstpb.RewriteRule, - rewriteMode RewriteMode, -) (meta *import_sstpb.SSTMeta, err error) { - r := *region - // If the rewrite mode is for keyspace, then the region bound should be decoded. - if rewriteMode == RewriteModeKeyspace { - if len(region.GetStartKey()) > 0 { - _, r.StartKey, err = codec.DecodeBytes(region.GetStartKey(), nil) - if err != nil { - return - } - } - if len(region.GetEndKey()) > 0 { - _, r.EndKey, err = codec.DecodeBytes(region.GetEndKey(), nil) - if err != nil { - return - } - } - } - - // Get the column family of the file by the file name. - var cfName string - if strings.Contains(file.GetName(), defaultCFName) { - cfName = defaultCFName - } else if strings.Contains(file.GetName(), writeCFName) { - cfName = writeCFName - } - // Find the overlapped part between the file and the region. - // Here we rewrites the keys to compare with the keys of the region. - rangeStart := regionRule.GetNewKeyPrefix() - // rangeStart = max(rangeStart, region.StartKey) - if bytes.Compare(rangeStart, r.GetStartKey()) < 0 { - rangeStart = r.GetStartKey() - } - - // Append 10 * 0xff to make sure rangeEnd cover all file key - // If choose to regionRule.NewKeyPrefix + 1, it may cause WrongPrefix here - // https://github.com/tikv/tikv/blob/970a9bf2a9ea782a455ae579ad237aaf6cb1daec/ - // components/sst_importer/src/sst_importer.rs#L221 - suffix := []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} - rangeEnd := append(append([]byte{}, regionRule.GetNewKeyPrefix()...), suffix...) - // rangeEnd = min(rangeEnd, region.EndKey) - if len(r.GetEndKey()) > 0 && bytes.Compare(rangeEnd, r.GetEndKey()) > 0 { - rangeEnd = r.GetEndKey() - } - - if bytes.Compare(rangeStart, rangeEnd) > 0 { - log.Panic("range start exceed range end", - logutil.File(file), - logutil.Key("startKey", rangeStart), - logutil.Key("endKey", rangeEnd)) - } - - log.Debug("get sstMeta", - logutil.Region(region), - logutil.File(file), - logutil.Key("startKey", rangeStart), - logutil.Key("endKey", rangeEnd)) - - return &import_sstpb.SSTMeta{ - Uuid: id, - CfName: cfName, - Range: &import_sstpb.Range{ - Start: rangeStart, - End: rangeEnd, - }, - Length: file.GetSize_(), - RegionId: region.GetId(), - RegionEpoch: region.GetRegionEpoch(), - CipherIv: file.GetCipherIv(), - }, nil -} - -// makeDBPool makes a session pool with specficated size by sessionFactory. -func makeDBPool(size uint, dbFactory func() (*DB, error)) ([]*DB, error) { - dbPool := make([]*DB, 0, size) - for i := uint(0); i < size; i++ { - db, e := dbFactory() - if e != nil { - return dbPool, e - } - if db != nil { - dbPool = append(dbPool, db) - } - } - return dbPool, nil -} - -// EstimateRangeSize estimates the total range count by file. -func EstimateRangeSize(files []*backuppb.File) int { - result := 0 - for _, f := range files { - if strings.HasSuffix(f.GetName(), "_write.sst") { - result++ - } - } - return result -} - -// MapTableToFiles makes a map that mapping table ID to its backup files. -// aware that one file can and only can hold one table. -func MapTableToFiles(files []*backuppb.File) map[int64][]*backuppb.File { - result := map[int64][]*backuppb.File{} - for _, file := range files { - tableID := tablecodec.DecodeTableID(file.GetStartKey()) - tableEndID := tablecodec.DecodeTableID(file.GetEndKey()) - if tableID != tableEndID { - log.Panic("key range spread between many files.", - zap.String("file name", file.Name), - logutil.Key("startKey", file.StartKey), - logutil.Key("endKey", file.EndKey)) - } - if tableID == 0 { - log.Panic("invalid table key of file", - zap.String("file name", file.Name), - logutil.Key("startKey", file.StartKey), - logutil.Key("endKey", file.EndKey)) - } - result[tableID] = append(result[tableID], file) - } - return result -} - // GoValidateFileRanges validate files by a stream of tables and yields // tables with range. func GoValidateFileRanges( @@ -359,14 +63,14 @@ func GoValidateFileRanges( } } for _, file := range files { - err := ValidateFileRewriteRule(file, t.RewriteRule) + err := restoreutils.ValidateFileRewriteRule(file, t.RewriteRule) if err != nil { errCh <- err return } } // Merge small ranges to reduce split and scatter regions. - ranges, stat, err := MergeAndRewriteFileRanges( + ranges, stat, err := restoreutils.MergeAndRewriteFileRanges( files, t.RewriteRule, splitSizeBytes, splitKeyCount) if err != nil { errCh <- err @@ -401,105 +105,6 @@ func GoValidateFileRanges( return outCh } -// ValidateFileRewriteRule uses rewrite rules to validate the ranges of a file. -func ValidateFileRewriteRule(file *backuppb.File, rewriteRules *RewriteRules) error { - // Check if the start key has a matched rewrite key - _, startRule := rewriteRawKey(file.GetStartKey(), rewriteRules) - if rewriteRules != nil && startRule == nil { - tableID := tablecodec.DecodeTableID(file.GetStartKey()) - log.Error( - "cannot find rewrite rule for file start key", - zap.Int64("tableID", tableID), - logutil.File(file), - ) - return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule") - } - // Check if the end key has a matched rewrite key - _, endRule := rewriteRawKey(file.GetEndKey(), rewriteRules) - if rewriteRules != nil && endRule == nil { - tableID := tablecodec.DecodeTableID(file.GetEndKey()) - log.Error( - "cannot find rewrite rule for file end key", - zap.Int64("tableID", tableID), - logutil.File(file), - ) - return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule") - } - // the rewrite rule of the start key and the end key should be equaled. - // i.e. there should only one rewrite rule for one file, a file should only be imported into one region. - if !bytes.Equal(startRule.GetNewKeyPrefix(), endRule.GetNewKeyPrefix()) { - startTableID := tablecodec.DecodeTableID(file.GetStartKey()) - endTableID := tablecodec.DecodeTableID(file.GetEndKey()) - log.Error( - "unexpected rewrite rules", - zap.Int64("startTableID", startTableID), - zap.Int64("endTableID", endTableID), - zap.Stringer("startRule", startRule), - zap.Stringer("endRule", endRule), - logutil.File(file), - ) - return errors.Annotatef(berrors.ErrRestoreInvalidRewrite, - "rewrite rule mismatch, the backup data may be dirty or from incompatible versions of BR, startKey rule: %X => %X, endKey rule: %X => %X", - startRule.OldKeyPrefix, startRule.NewKeyPrefix, endRule.OldKeyPrefix, endRule.NewKeyPrefix, - ) - } - return nil -} - -// Rewrites an encoded key and returns a encoded key. -func rewriteEncodedKey(key []byte, rewriteRules *RewriteRules) ([]byte, *import_sstpb.RewriteRule) { - if rewriteRules == nil { - return key, nil - } - if len(key) > 0 { - _, rawKey, _ := codec.DecodeBytes(key, nil) - return rewriteRawKey(rawKey, rewriteRules) - } - return nil, nil -} - -// Rewrites a raw key with raw key rewrite rule and returns an encoded key. -func rewriteRawKey(key []byte, rewriteRules *RewriteRules) ([]byte, *import_sstpb.RewriteRule) { - if rewriteRules == nil { - return codec.EncodeBytes([]byte{}, key), nil - } - if len(key) > 0 { - rule := matchOldPrefix(key, rewriteRules) - ret := bytes.Replace(key, rule.GetOldKeyPrefix(), rule.GetNewKeyPrefix(), 1) - return codec.EncodeBytes([]byte{}, ret), rule - } - return nil, nil -} - -func matchOldPrefix(key []byte, rewriteRules *RewriteRules) *import_sstpb.RewriteRule { - for _, rule := range rewriteRules.Data { - if bytes.HasPrefix(key, rule.GetOldKeyPrefix()) { - return rule - } - } - return nil -} - -func GetKeyTS(key []byte) (uint64, error) { - if len(key) < 8 { - return 0, errors.Annotatef(berrors.ErrInvalidArgument, - "the length of key is smaller than 8, key:%s", redact.Key(key)) - } - - _, ts, err := codec.DecodeUintDesc(key[len(key)-8:]) - return ts, err -} - -func TruncateTS(key []byte) []byte { - if len(key) == 0 { - return nil - } - if len(key) < 8 { - return key - } - return key[:len(key)-8] -} - // SplitRanges splits region by // 1. data range after rewrite. // 2. rewrite rules. @@ -520,7 +125,7 @@ func SplitRanges( splitClientOpts = append(splitClientOpts, split.WithRawKV()) } - splitter := NewRegionSplitter(split.NewClient( + splitter := restoreutils.NewRegionSplitter(split.NewClient( client.GetPDClient(), client.pdHTTPClient, client.GetTLSConfig(), @@ -532,81 +137,6 @@ func SplitRanges( return splitter.ExecuteSplit(ctx, ranges) } -func findMatchedRewriteRule(file AppliedFile, rules *RewriteRules) *import_sstpb.RewriteRule { - startID := tablecodec.DecodeTableID(file.GetStartKey()) - endID := tablecodec.DecodeTableID(file.GetEndKey()) - if startID != endID { - return nil - } - _, rule := rewriteRawKey(file.GetStartKey(), rules) - if rule == nil { - // fall back to encoded key - _, rule = rewriteEncodedKey(file.GetStartKey(), rules) - } - return rule -} - -// GetRewriteRawKeys rewrites rules to the raw key. -func GetRewriteRawKeys(file AppliedFile, rewriteRules *RewriteRules) (startKey, endKey []byte, err error) { - startID := tablecodec.DecodeTableID(file.GetStartKey()) - endID := tablecodec.DecodeTableID(file.GetEndKey()) - var rule *import_sstpb.RewriteRule - if startID == endID { - startKey, rule = rewriteRawKey(file.GetStartKey(), rewriteRules) - if rewriteRules != nil && rule == nil { - err = errors.Annotatef(berrors.ErrRestoreInvalidRewrite, "cannot find raw rewrite rule for start key, startKey: %s", redact.Key(file.GetStartKey())) - return - } - endKey, rule = rewriteRawKey(file.GetEndKey(), rewriteRules) - if rewriteRules != nil && rule == nil { - err = errors.Annotatef(berrors.ErrRestoreInvalidRewrite, "cannot find raw rewrite rule for end key, endKey: %s", redact.Key(file.GetEndKey())) - return - } - } else { - log.Error("table ids dont matched", - zap.Int64("startID", startID), - zap.Int64("endID", endID), - logutil.Key("startKey", startKey), - logutil.Key("endKey", endKey)) - err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "invalid table id") - } - return -} - -// GetRewriteRawKeys rewrites rules to the encoded key -func GetRewriteEncodedKeys(file AppliedFile, rewriteRules *RewriteRules) (startKey, endKey []byte, err error) { - startID := tablecodec.DecodeTableID(file.GetStartKey()) - endID := tablecodec.DecodeTableID(file.GetEndKey()) - var rule *import_sstpb.RewriteRule - if startID == endID { - startKey, rule = rewriteEncodedKey(file.GetStartKey(), rewriteRules) - if rewriteRules != nil && rule == nil { - err = errors.Annotatef(berrors.ErrRestoreInvalidRewrite, "cannot find encode rewrite rule for start key, startKey: %s", redact.Key(file.GetStartKey())) - return - } - endKey, rule = rewriteEncodedKey(file.GetEndKey(), rewriteRules) - if rewriteRules != nil && rule == nil { - err = errors.Annotatef(berrors.ErrRestoreInvalidRewrite, "cannot find encode rewrite rule for end key, endKey: %s", redact.Key(file.GetEndKey())) - return - } - } else { - log.Error("table ids dont matched", - zap.Int64("startID", startID), - zap.Int64("endID", endID), - logutil.Key("startKey", startKey), - logutil.Key("endKey", endKey)) - err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "invalid table id") - } - return -} - -func encodeKeyPrefix(key []byte) []byte { - encodedPrefix := make([]byte, 0) - ungroupedLen := len(key) % 8 - encodedPrefix = append(encodedPrefix, codec.EncodeBytes([]byte{}, key[:len(key)-ungroupedLen])...) - return append(encodedPrefix[:len(encodedPrefix)-9], key[len(key)-ungroupedLen:]...) -} - // ZapTables make zap field of table for debuging, including table names. func ZapTables(tables []CreatedTable) zapcore.Field { return logutil.AbbreviatedArray("tables", tables, func(input any) []string { @@ -620,281 +150,3 @@ func ZapTables(tables []CreatedTable) zapcore.Field { return names }) } - -// ParseQuoteName parse the quote `db`.`table` name, and split it. -func ParseQuoteName(name string) (db, table string) { - names := quoteRegexp.FindAllStringSubmatch(name, -1) - if len(names) != 2 { - log.Panic("failed to parse schema name", - zap.String("origin name", name), - zap.Any("parsed names", names)) - } - db = names[0][0] - table = names[1][0] - db = strings.ReplaceAll(unQuoteName(db), "``", "`") - table = strings.ReplaceAll(unQuoteName(table), "``", "`") - return db, table -} - -func unQuoteName(name string) string { - name = strings.TrimPrefix(name, "`") - return strings.TrimSuffix(name, "`") -} - -func PrefixStartKey(key []byte) []byte { - var sk = make([]byte, 0, len(key)+1) - sk = append(sk, 'z') - sk = append(sk, key...) - return sk -} - -func PrefixEndKey(key []byte) []byte { - if len(key) == 0 { - return []byte{'z' + 1} - } - return PrefixStartKey(key) -} - -func keyEq(a, b []byte) bool { - if len(a) != len(b) { - return false - } - for i := 0; i < len(a); i++ { - if a[i] != b[i] { - return false - } - } - return true -} - -func keyCmp(a, b []byte) int { - var length int - var chosen int - if len(a) < len(b) { - length = len(a) - chosen = -1 - } else if len(a) == len(b) { - length = len(a) - chosen = 0 - } else { - length = len(b) - chosen = 1 - } - for i := 0; i < length; i++ { - if a[i] < b[i] { - return -1 - } else if a[i] > b[i] { - return 1 - } - } - return chosen -} - -func keyCmpInterface(a, b any) int { - return keyCmp(a.([]byte), b.([]byte)) -} - -type RecoverRegionInfo struct { - RegionId uint64 - RegionVersion uint64 - StartKey []byte - EndKey []byte - TombStone bool -} - -func SortRecoverRegions(regions map[uint64][]*RecoverRegion) []*RecoverRegionInfo { - // last log term -> last index -> commit index - cmps := []func(a, b *RecoverRegion) int{ - func(a, b *RecoverRegion) int { - return int(a.GetLastLogTerm() - b.GetLastLogTerm()) - }, - func(a, b *RecoverRegion) int { - return int(a.GetLastIndex() - b.GetLastIndex()) - }, - func(a, b *RecoverRegion) int { - return int(a.GetCommitIndex() - b.GetCommitIndex()) - }, - } - - // Sort region peer by last log term -> last index -> commit index, and collect all regions' version. - var regionInfos = make([]*RecoverRegionInfo, 0, len(regions)) - for regionId, peers := range regions { - sort.Slice(peers, func(i, j int) bool { - for _, cmp := range cmps { - if v := cmp(peers[i], peers[j]); v != 0 { - return v > 0 - } - } - return false - }) - v := peers[0].Version - sk := PrefixStartKey(peers[0].StartKey) - ek := PrefixEndKey(peers[0].EndKey) - regionInfos = append(regionInfos, &RecoverRegionInfo{ - RegionId: regionId, - RegionVersion: v, - StartKey: sk, - EndKey: ek, - TombStone: peers[0].Tombstone, - }) - } - - sort.Slice(regionInfos, func(i, j int) bool { return regionInfos[i].RegionVersion > regionInfos[j].RegionVersion }) - return regionInfos -} - -func CheckConsistencyAndValidPeer(regionInfos []*RecoverRegionInfo) (map[uint64]struct{}, error) { - // split and merge in progressing during the backup, there may some overlap region, we have to handle it - // Resolve version conflicts. - var treeMap = treemap.NewWith(keyCmpInterface) - for _, p := range regionInfos { - var fk, fv any - fk, _ = treeMap.Ceiling(p.StartKey) - // keyspace overlap sk within ceiling - fk - if fk != nil && (keyEq(fk.([]byte), p.StartKey) || keyCmp(fk.([]byte), p.EndKey) < 0) { - continue - } - - // keyspace overlap sk within floor - fk.end_key - fk, fv = treeMap.Floor(p.StartKey) - if fk != nil && keyCmp(fv.(*RecoverRegionInfo).EndKey, p.StartKey) > 0 { - continue - } - treeMap.Put(p.StartKey, p) - } - - // After resolved, all validPeer regions shouldn't be tombstone. - // do some sanity check - var validPeers = make(map[uint64]struct{}, 0) - var iter = treeMap.Iterator() - var prevEndKey = PrefixStartKey([]byte{}) - var prevRegion uint64 = 0 - for iter.Next() { - v := iter.Value().(*RecoverRegionInfo) - if v.TombStone { - log.Error("validPeer shouldn't be tombstone", zap.Uint64("region id", v.RegionId)) - // TODO, some enhancement may need, a PoC or test may need for decision - return nil, errors.Annotatef(berrors.ErrRestoreInvalidPeer, - "Peer shouldn't be tombstone") - } - if !keyEq(prevEndKey, iter.Key().([]byte)) { - log.Error("regions are not adjacent", zap.Uint64("pre region", prevRegion), zap.Uint64("cur region", v.RegionId)) - // TODO, some enhancement may need, a PoC or test may need for decision - return nil, errors.Annotatef(berrors.ErrInvalidRange, - "invalid region range") - } - prevEndKey = v.EndKey - prevRegion = v.RegionId - validPeers[v.RegionId] = struct{}{} - } - return validPeers, nil -} - -// in cloud, since iops and bandwidth limitation, write operator in raft is slow, so raft state (logterm, lastlog, commitlog...) are the same among the peers -// LeaderCandidates select all peers can be select as a leader during the restore -func LeaderCandidates(peers []*RecoverRegion) ([]*RecoverRegion, error) { - if peers == nil { - return nil, errors.Annotatef(berrors.ErrRestoreRegionWithoutPeer, - "invalid region range") - } - candidates := make([]*RecoverRegion, 0, len(peers)) - // by default, the peers[0] to be assign as a leader, since peers already sorted by leader selection rule - leader := peers[0] - candidates = append(candidates, leader) - for _, peer := range peers[1:] { - // qualificated candidate is leader.logterm = candidate.logterm && leader.lastindex = candidate.lastindex && && leader.commitindex = candidate.commitindex - if peer.LastLogTerm == leader.LastLogTerm && peer.LastIndex == leader.LastIndex && peer.CommitIndex == leader.CommitIndex { - log.Debug("leader candidate", zap.Uint64("store id", peer.StoreId), zap.Uint64("region id", peer.RegionId), zap.Uint64("peer id", peer.PeerId)) - candidates = append(candidates, peer) - } - } - return candidates, nil -} - -// for region A, has candidate leader x, y, z -// peer x on store 1 with storeBalanceScore 3 -// peer y on store 3 with storeBalanceScore 2 -// peer z on store 4 with storeBalanceScore 1 -// result: peer z will be select as leader on store 4 -func SelectRegionLeader(storeBalanceScore map[uint64]int, peers []*RecoverRegion) *RecoverRegion { - // by default, the peers[0] to be assign as a leader - leader := peers[0] - minLeaderStore := storeBalanceScore[leader.StoreId] - for _, peer := range peers[1:] { - log.Debug("leader candidate", zap.Int("score", storeBalanceScore[peer.StoreId]), zap.Int("min-score", minLeaderStore), zap.Uint64("store id", peer.StoreId), zap.Uint64("region id", peer.RegionId), zap.Uint64("peer id", peer.PeerId)) - if storeBalanceScore[peer.StoreId] < minLeaderStore { - minLeaderStore = storeBalanceScore[peer.StoreId] - leader = peer - } - } - return leader -} - -// each 64 items constitute a bitmap unit -type bitMap map[int]uint64 - -func newBitMap() bitMap { - return make(map[int]uint64) -} - -func (m bitMap) pos(off int) (blockIndex int, bitOffset uint64) { - return off >> 6, uint64(1) << (off & 63) -} - -func (m bitMap) Set(off int) { - blockIndex, bitOffset := m.pos(off) - m[blockIndex] |= bitOffset -} - -func (m bitMap) Hit(off int) bool { - blockIndex, bitOffset := m.pos(off) - return (m[blockIndex] & bitOffset) > 0 -} - -type fileMap struct { - // group index -> bitmap of kv files - pos map[int]bitMap -} - -func newFileMap() fileMap { - return fileMap{ - pos: make(map[int]bitMap), - } -} - -type LogFilesSkipMap struct { - // metadata group key -> group map - skipMap map[string]fileMap -} - -func NewLogFilesSkipMap() *LogFilesSkipMap { - return &LogFilesSkipMap{ - skipMap: make(map[string]fileMap), - } -} - -func (m *LogFilesSkipMap) Insert(metaKey string, groupOff, fileOff int) { - mp, exists := m.skipMap[metaKey] - if !exists { - mp = newFileMap() - m.skipMap[metaKey] = mp - } - gp, exists := mp.pos[groupOff] - if !exists { - gp = newBitMap() - mp.pos[groupOff] = gp - } - gp.Set(fileOff) -} - -func (m *LogFilesSkipMap) NeedSkip(metaKey string, groupOff, fileOff int) bool { - mp, exists := m.skipMap[metaKey] - if !exists { - return false - } - gp, exists := mp.pos[groupOff] - if !exists { - return false - } - return gp.Hit(fileOff) -} diff --git a/br/pkg/restore/util_test.go b/br/pkg/restore/util_test.go index adcaed80a1..1716bc61cc 100644 --- a/br/pkg/restore/util_test.go +++ b/br/pkg/restore/util_test.go @@ -3,515 +3,207 @@ package restore import ( - "fmt" - "math/rand" + "context" + "sync" "testing" + "time" + "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" - "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/metapb" - recover_data "github.com/pingcap/kvproto/pkg/recoverdatapb" - "github.com/pingcap/tidb/pkg/tablecodec" - "github.com/pingcap/tidb/pkg/util/codec" + "github.com/pingcap/log" + berrors "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/tidb/br/pkg/glue" + "github.com/pingcap/tidb/br/pkg/logutil" + "github.com/pingcap/tidb/br/pkg/restore/split" + "github.com/pingcap/tidb/br/pkg/rtree" + "github.com/pingcap/tidb/pkg/parser/model" "github.com/stretchr/testify/require" ) -func TestGetKeyRangeByMode(t *testing.T) { - file := &backuppb.File{ - Name: "file_write.sst", - StartKey: []byte("t1a"), - EndKey: []byte("t1ccc"), +type fakeRestorer struct { + mu sync.Mutex + errorInSplit bool + splitRanges []rtree.Range + restoredFiles []*backuppb.File + tableIDIsInsequence bool +} + +func (f *fakeRestorer) SplitRanges(ctx context.Context, ranges []rtree.Range, updateCh glue.Progress, isRawKv bool) error { + f.mu.Lock() + defer f.mu.Unlock() + + if ctx.Err() != nil { + return ctx.Err() } - endFile := &backuppb.File{ - Name: "file_write.sst", - StartKey: []byte("t1a"), - EndKey: []byte(""), + f.splitRanges = append(f.splitRanges, ranges...) + if f.errorInSplit { + err := errors.Annotatef(berrors.ErrRestoreSplitFailed, + "the key space takes many efforts and finally get together, how dare you split them again... :<") + log.Error("error happens :3", logutil.ShortError(err)) + return err } - rule := &RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: []byte("t1"), - NewKeyPrefix: []byte("t2"), + return nil +} + +func (f *fakeRestorer) RestoreSSTFiles(ctx context.Context, tableIDWithFiles []TableIDWithFiles, updateCh glue.Progress) error { + f.mu.Lock() + defer f.mu.Unlock() + + if ctx.Err() != nil { + return ctx.Err() + } + for i, tableIDWithFile := range tableIDWithFiles { + if int64(i) != tableIDWithFile.TableID { + f.tableIDIsInsequence = false + } + f.restoredFiles = append(f.restoredFiles, tableIDWithFile.Files...) + } + err := errors.Annotatef(berrors.ErrRestoreWriteAndIngest, "the files to restore are taken by a hijacker, meow :3") + log.Error("error happens :3", logutil.ShortError(err)) + return err +} + +func fakeRanges(keys ...string) (r DrainResult) { + for i := range keys { + if i+1 == len(keys) { + return + } + r.Ranges = append(r.Ranges, rtree.Range{ + StartKey: []byte(keys[i]), + EndKey: []byte(keys[i+1]), + Files: []*backuppb.File{{Name: "fake.sst"}}, + }) + r.TableEndOffsetInRanges = append(r.TableEndOffsetInRanges, len(r.Ranges)) + r.TablesToSend = append(r.TablesToSend, CreatedTable{ + Table: &model.TableInfo{ + ID: int64(i), }, - }, + }) } - // raw kv - testRawFn := getKeyRangeByMode(Raw) - start, end, err := testRawFn(file, rule) - require.NoError(t, err) - require.Equal(t, []byte("t1a"), start) - require.Equal(t, []byte("t1ccc"), end) - - start, end, err = testRawFn(endFile, rule) - require.NoError(t, err) - require.Equal(t, []byte("t1a"), start) - require.Equal(t, []byte(""), end) - - // txn kv: the keys must be encoded. - testTxnFn := getKeyRangeByMode(Txn) - start, end, err = testTxnFn(file, rule) - require.NoError(t, err) - require.Equal(t, codec.EncodeBytes(nil, []byte("t1a")), start) - require.Equal(t, codec.EncodeBytes(nil, []byte("t1ccc")), end) - - start, end, err = testTxnFn(endFile, rule) - require.NoError(t, err) - require.Equal(t, codec.EncodeBytes(nil, []byte("t1a")), start) - require.Equal(t, []byte(""), end) - - // normal kv: the keys must be encoded. - testFn := getKeyRangeByMode(TiDB) - start, end, err = testFn(file, rule) - require.NoError(t, err) - require.Equal(t, codec.EncodeBytes(nil, []byte("t2a")), start) - require.Equal(t, codec.EncodeBytes(nil, []byte("t2ccc")), end) - - // TODO maybe fix later - // current restore does not support rewrite empty endkey. - // because backup guarantees that the end key is not empty. - // start, end, err = testFn(endFile, rule) - // require.NoError(t, err) - // require.Equal(t, codec.EncodeBytes(nil, []byte("t2a")), start) - // require.Equal(t, []byte(""), end) + return } -func TestParseQuoteName(t *testing.T) { - schema, table := ParseQuoteName("`a`.`b`") - require.Equal(t, "a", schema) - require.Equal(t, "b", table) - - schema, table = ParseQuoteName("`a``b`.``````") - require.Equal(t, "a`b", schema) - require.Equal(t, "``", table) - - schema, table = ParseQuoteName("`.`.`.`") - require.Equal(t, ".", schema) - require.Equal(t, ".", table) - - schema, table = ParseQuoteName("`.``.`.`.`") - require.Equal(t, ".`.", schema) - require.Equal(t, ".", table) +type errorInTimeSink struct { + ctx context.Context + errCh chan error + t *testing.T } -func TestGetSSTMetaFromFile(t *testing.T) { - file := &backuppb.File{ - Name: "file_write.sst", - StartKey: []byte("t1a"), - EndKey: []byte("t1ccc"), - } - rule := &import_sstpb.RewriteRule{ - OldKeyPrefix: []byte("t1"), - NewKeyPrefix: []byte("t2"), - } - region := &metapb.Region{ - StartKey: []byte("t2abc"), - EndKey: []byte("t3a"), - } - sstMeta, err := GetSSTMetaFromFile([]byte{}, file, region, rule, RewriteModeLegacy) - require.Nil(t, err) - require.Equal(t, "t2abc", string(sstMeta.GetRange().GetStart())) - require.Equal(t, "t2\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", string(sstMeta.GetRange().GetEnd())) +func (e errorInTimeSink) EmitTables(tables ...CreatedTable) {} + +func (e errorInTimeSink) EmitError(err error) { + e.errCh <- err } -func TestMapTableToFiles(t *testing.T) { - filesOfTable1 := []*backuppb.File{ - { - Name: "table1-1.sst", - StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(1), - }, - { - Name: "table1-2.sst", - StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(1), - }, - { - Name: "table1-3.sst", - StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(1), - }, - } - filesOfTable2 := []*backuppb.File{ - { - Name: "table2-1.sst", - StartKey: tablecodec.EncodeTablePrefix(2), - EndKey: tablecodec.EncodeTablePrefix(2), - }, - { - Name: "table2-2.sst", - StartKey: tablecodec.EncodeTablePrefix(2), - EndKey: tablecodec.EncodeTablePrefix(2), - }, - } +func (e errorInTimeSink) Close() {} - result := MapTableToFiles(append(filesOfTable2, filesOfTable1...)) - - require.Equal(t, filesOfTable1, result[1]) - require.Equal(t, filesOfTable2, result[2]) -} - -func TestValidateFileRewriteRule(t *testing.T) { - rules := &RewriteRules{ - Data: []*import_sstpb.RewriteRule{{ - OldKeyPrefix: []byte(tablecodec.EncodeTablePrefix(1)), - NewKeyPrefix: []byte(tablecodec.EncodeTablePrefix(2)), - }}, - } - - // Empty start/end key is not allowed. - err := ValidateFileRewriteRule( - &backuppb.File{ - Name: "file_write.sst", - StartKey: []byte(""), - EndKey: []byte(""), - }, - rules, - ) - require.Error(t, err) - require.Regexp(t, ".*cannot find rewrite rule.*", err.Error()) - - // Range is not overlap, no rule found. - err = ValidateFileRewriteRule( - &backuppb.File{ - Name: "file_write.sst", - StartKey: tablecodec.EncodeTablePrefix(0), - EndKey: tablecodec.EncodeTablePrefix(1), - }, - rules, - ) - require.Error(t, err) - require.Regexp(t, ".*cannot find rewrite rule.*", err.Error()) - - // No rule for end key. - err = ValidateFileRewriteRule( - &backuppb.File{ - Name: "file_write.sst", - StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(2), - }, - rules, - ) - require.Error(t, err) - require.Regexp(t, ".*cannot find rewrite rule.*", err.Error()) - - // Add a rule for end key. - rules.Data = append(rules.Data, &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.EncodeTablePrefix(2), - NewKeyPrefix: tablecodec.EncodeTablePrefix(3), - }) - err = ValidateFileRewriteRule( - &backuppb.File{ - Name: "file_write.sst", - StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(2), - }, - rules, - ) - require.Error(t, err) - require.Regexp(t, ".*rewrite rule mismatch.*", err.Error()) - - // Add a bad rule for end key, after rewrite start key > end key. - rules.Data = append(rules.Data[:1], &import_sstpb.RewriteRule{ - OldKeyPrefix: tablecodec.EncodeTablePrefix(2), - NewKeyPrefix: tablecodec.EncodeTablePrefix(1), - }) - err = ValidateFileRewriteRule( - &backuppb.File{ - Name: "file_write.sst", - StartKey: tablecodec.EncodeTablePrefix(1), - EndKey: tablecodec.EncodeTablePrefix(2), - }, - rules, - ) - require.Error(t, err) - require.Regexp(t, ".*rewrite rule mismatch.*", err.Error()) -} - -func TestRewriteFileKeys(t *testing.T) { - rewriteRules := RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - NewKeyPrefix: tablecodec.GenTablePrefix(2), - OldKeyPrefix: tablecodec.GenTablePrefix(1), - }, - { - NewKeyPrefix: tablecodec.GenTablePrefix(511), - OldKeyPrefix: tablecodec.GenTablePrefix(767), - }, - }, - } - rawKeyFile := backuppb.File{ - Name: "backup.sst", - StartKey: tablecodec.GenTableRecordPrefix(1), - EndKey: tablecodec.GenTableRecordPrefix(1).PrefixNext(), - } - start, end, err := GetRewriteRawKeys(&rawKeyFile, &rewriteRules) - require.NoError(t, err) - _, end, err = codec.DecodeBytes(end, nil) - require.NoError(t, err) - _, start, err = codec.DecodeBytes(start, nil) - require.NoError(t, err) - require.Equal(t, []byte(tablecodec.GenTableRecordPrefix(2)), start) - require.Equal(t, []byte(tablecodec.GenTableRecordPrefix(2).PrefixNext()), end) - - encodeKeyFile := backuppb.DataFileInfo{ - Path: "bakcup.log", - StartKey: codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(1)), - EndKey: codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(1).PrefixNext()), - } - start, end, err = GetRewriteEncodedKeys(&encodeKeyFile, &rewriteRules) - require.NoError(t, err) - require.Equal(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(2)), start) - require.Equal(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(2).PrefixNext()), end) - - // test for table id 767 - encodeKeyFile767 := backuppb.DataFileInfo{ - Path: "bakcup.log", - StartKey: codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(767)), - EndKey: codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(767).PrefixNext()), - } - // use raw rewrite should no error but not equal - start, end, err = GetRewriteRawKeys(&encodeKeyFile767, &rewriteRules) - require.NoError(t, err) - require.NotEqual(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(511)), start) - require.NotEqual(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(511).PrefixNext()), end) - // use encode rewrite should no error and equal - start, end, err = GetRewriteEncodedKeys(&encodeKeyFile767, &rewriteRules) - require.NoError(t, err) - require.Equal(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(511)), start) - require.Equal(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(511).PrefixNext()), end) -} - -func newPeerMeta( - regionId uint64, - peerId uint64, - storeId uint64, - startKey []byte, - endKey []byte, - lastLogTerm uint64, - lastIndex uint64, - commitIndex uint64, - version uint64, - tombstone bool, -) *RecoverRegion { - return &RecoverRegion{ - &recover_data.RegionMeta{ - RegionId: regionId, - PeerId: peerId, - StartKey: startKey, - EndKey: endKey, - LastLogTerm: lastLogTerm, - LastIndex: lastIndex, - CommitIndex: commitIndex, - Version: version, - Tombstone: tombstone, - }, - storeId, +func (e errorInTimeSink) Wait() { + select { + case <-e.ctx.Done(): + e.t.Logf("The context is canceled but no error happen") + e.t.FailNow() + case <-e.errCh: } } -func newRecoverRegionInfo(r *RecoverRegion) *RecoverRegionInfo { - return &RecoverRegionInfo{ - RegionVersion: r.Version, - RegionId: r.RegionId, - StartKey: PrefixStartKey(r.StartKey), - EndKey: PrefixEndKey(r.EndKey), - TombStone: r.Tombstone, +func assertErrorEmitInTime(ctx context.Context, t *testing.T) errorInTimeSink { + errCh := make(chan error, 1) + return errorInTimeSink{ + ctx: ctx, + errCh: errCh, + t: t, } } -func TestSortRecoverRegions(t *testing.T) { - selectedPeer1 := newPeerMeta(9, 11, 2, []byte("aa"), nil, 2, 0, 0, 0, false) - selectedPeer2 := newPeerMeta(19, 22, 3, []byte("bbb"), nil, 2, 1, 0, 1, false) - selectedPeer3 := newPeerMeta(29, 30, 1, []byte("c"), nil, 2, 1, 1, 2, false) - regions := map[uint64][]*RecoverRegion{ - 9: { - // peer 11 should be selected because of log term - newPeerMeta(9, 10, 1, []byte("a"), nil, 1, 1, 1, 1, false), - selectedPeer1, - newPeerMeta(9, 12, 3, []byte("aaa"), nil, 0, 0, 0, 0, false), - }, - 19: { - // peer 22 should be selected because of log index - newPeerMeta(19, 20, 1, []byte("b"), nil, 1, 1, 1, 1, false), - newPeerMeta(19, 21, 2, []byte("bb"), nil, 2, 0, 0, 0, false), - selectedPeer2, - }, - 29: { - // peer 30 should be selected because of log index - selectedPeer3, - newPeerMeta(29, 31, 2, []byte("cc"), nil, 2, 0, 0, 0, false), - newPeerMeta(29, 32, 3, []byte("ccc"), nil, 2, 1, 0, 0, false), - }, +func TestRestoreFailed(t *testing.T) { + ranges := []DrainResult{ + fakeRanges("aax", "abx", "abz"), + fakeRanges("abz", "bbz", "bcy"), + fakeRanges("bcy", "cad", "xxy"), } - regionsInfos := SortRecoverRegions(regions) - expectRegionInfos := []*RecoverRegionInfo{ - newRecoverRegionInfo(selectedPeer3), - newRecoverRegionInfo(selectedPeer2), - newRecoverRegionInfo(selectedPeer1), + r := &fakeRestorer{ + tableIDIsInsequence: true, } - require.Equal(t, expectRegionInfos, regionsInfos) -} - -func TestCheckConsistencyAndValidPeer(t *testing.T) { - //key space is continuous - validPeer1 := newPeerMeta(9, 11, 2, []byte(""), []byte("bb"), 2, 0, 0, 0, false) - validPeer2 := newPeerMeta(19, 22, 3, []byte("bb"), []byte("cc"), 2, 1, 0, 1, false) - validPeer3 := newPeerMeta(29, 30, 1, []byte("cc"), []byte(""), 2, 1, 1, 2, false) - - validRegionInfos := []*RecoverRegionInfo{ - newRecoverRegionInfo(validPeer1), - newRecoverRegionInfo(validPeer2), - newRecoverRegionInfo(validPeer3), - } - - validPeer, err := CheckConsistencyAndValidPeer(validRegionInfos) + sender, err := NewTiKVSender(context.TODO(), r, nil, 1, string(FineGrained)) require.NoError(t, err) - require.Equal(t, 3, len(validPeer)) - var regions = make(map[uint64]struct{}, 3) - regions[9] = struct{}{} - regions[19] = struct{}{} - regions[29] = struct{}{} - - require.Equal(t, regions, validPeer) - - //key space is not continuous - invalidPeer1 := newPeerMeta(9, 11, 2, []byte("aa"), []byte("cc"), 2, 0, 0, 0, false) - invalidPeer2 := newPeerMeta(19, 22, 3, []byte("dd"), []byte("cc"), 2, 1, 0, 1, false) - invalidPeer3 := newPeerMeta(29, 30, 1, []byte("cc"), []byte("dd"), 2, 1, 1, 2, false) - - invalidRegionInfos := []*RecoverRegionInfo{ - newRecoverRegionInfo(invalidPeer1), - newRecoverRegionInfo(invalidPeer2), - newRecoverRegionInfo(invalidPeer3), + dctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + sink := assertErrorEmitInTime(dctx, t) + sender.PutSink(sink) + for _, r := range ranges { + sender.RestoreBatch(r) } - - _, err = CheckConsistencyAndValidPeer(invalidRegionInfos) - require.Error(t, err) - require.Regexp(t, ".*invalid restore range.*", err.Error()) + sink.Wait() + sink.Close() + sender.Close() + require.GreaterOrEqual(t, len(r.restoredFiles), 1) + require.True(t, r.tableIDIsInsequence) } -func TestLeaderCandidates(t *testing.T) { - //key space is continuous - validPeer1 := newPeerMeta(9, 11, 2, []byte(""), []byte("bb"), 2, 1, 0, 0, false) - validPeer2 := newPeerMeta(19, 22, 3, []byte("bb"), []byte("cc"), 2, 1, 0, 1, false) - validPeer3 := newPeerMeta(29, 30, 1, []byte("cc"), []byte(""), 2, 1, 0, 2, false) - - peers := []*RecoverRegion{ - validPeer1, - validPeer2, - validPeer3, +func TestSplitFailed(t *testing.T) { + ranges := []DrainResult{ + fakeRanges("aax", "abx", "abz"), + fakeRanges("abz", "bbz", "bcy"), + fakeRanges("bcy", "cad", "xxy"), } - - candidates, err := LeaderCandidates(peers) + r := &fakeRestorer{errorInSplit: true, tableIDIsInsequence: true} + sender, err := NewTiKVSender(context.TODO(), r, nil, 1, string(FineGrained)) require.NoError(t, err) - require.Equal(t, 3, len(candidates)) + dctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + sink := assertErrorEmitInTime(dctx, t) + sender.PutSink(sink) + for _, r := range ranges { + sender.RestoreBatch(r) + } + sink.Wait() + sender.Close() + require.GreaterOrEqual(t, len(r.splitRanges), 2) + require.Len(t, r.restoredFiles, 0) + require.True(t, r.tableIDIsInsequence) } -func TestSelectRegionLeader(t *testing.T) { - validPeer1 := newPeerMeta(9, 11, 2, []byte(""), []byte("bb"), 2, 1, 0, 0, false) - validPeer2 := newPeerMeta(19, 22, 3, []byte("bb"), []byte("cc"), 2, 1, 0, 1, false) - validPeer3 := newPeerMeta(29, 30, 1, []byte("cc"), []byte(""), 2, 1, 0, 2, false) - - peers := []*RecoverRegion{ - validPeer1, - validPeer2, - validPeer3, +func regionInfo(startKey, endKey string) *split.RegionInfo { + return &split.RegionInfo{ + Region: &metapb.Region{ + StartKey: []byte(startKey), + EndKey: []byte(endKey), + }, } - // init store banlance score all is 0 - storeBalanceScore := make(map[uint64]int, len(peers)) - leader := SelectRegionLeader(storeBalanceScore, peers) - require.Equal(t, validPeer1, leader) - - // change store banlance store - storeBalanceScore[2] = 3 - storeBalanceScore[3] = 2 - storeBalanceScore[1] = 1 - leader = SelectRegionLeader(storeBalanceScore, peers) - require.Equal(t, validPeer3, leader) - - // one peer - peer := []*RecoverRegion{ - validPeer3, - } - // init store banlance score all is 0 - storeScore := make(map[uint64]int, len(peer)) - leader = SelectRegionLeader(storeScore, peer) - require.Equal(t, validPeer3, leader) } -func TestLogFilesSkipMap(t *testing.T) { +func TestSplitCheckPartRegionConsistency(t *testing.T) { var ( - metaNum = 2 - groupNum = 4 - fileNum = 1000 - - ratio = 0.1 + startKey []byte = []byte("a") + endKey []byte = []byte("f") + err error ) - - for ratio < 1 { - skipmap := NewLogFilesSkipMap() - nativemap := make(map[string]map[int]map[int]struct{}) - count := 0 - for i := 0; i < int(ratio*float64(metaNum*groupNum*fileNum)); i++ { - metaKey := fmt.Sprint(rand.Intn(metaNum)) - groupOff := rand.Intn(groupNum) - fileOff := rand.Intn(fileNum) - - mp, exists := nativemap[metaKey] - if !exists { - mp = make(map[int]map[int]struct{}) - nativemap[metaKey] = mp - } - gp, exists := mp[groupOff] - if !exists { - gp = make(map[int]struct{}) - mp[groupOff] = gp - } - if _, exists := gp[fileOff]; !exists { - gp[fileOff] = struct{}{} - skipmap.Insert(metaKey, groupOff, fileOff) - count += 1 - } - } - - ncount := 0 - for metaKey, mp := range nativemap { - for groupOff, gp := range mp { - for fileOff := range gp { - require.True(t, skipmap.NeedSkip(metaKey, groupOff, fileOff)) - ncount++ - } - } - } - - require.Equal(t, count, ncount) - - continueFunc := func(metaKey string, groupi, filei int) bool { - mp, exists := nativemap[metaKey] - if !exists { - return false - } - gp, exists := mp[groupi] - if !exists { - return false - } - _, exists = gp[filei] - return exists - } - - for metai := 0; metai < metaNum; metai++ { - metaKey := fmt.Sprint(metai) - for groupi := 0; groupi < groupNum; groupi++ { - for filei := 0; filei < fileNum; filei++ { - if continueFunc(metaKey, groupi, filei) { - continue - } - require.False(t, skipmap.NeedSkip(metaKey, groupi, filei)) - } - } - } - - ratio = ratio * 2 - } + err = split.CheckPartRegionConsistency(startKey, endKey, nil) + require.Error(t, err) + err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ + regionInfo("b", "c"), + }) + require.Error(t, err) + err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ + regionInfo("a", "c"), + regionInfo("d", "e"), + }) + require.Error(t, err) + err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ + regionInfo("a", "c"), + regionInfo("c", "d"), + }) + require.NoError(t, err) + err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ + regionInfo("a", "c"), + regionInfo("c", "d"), + regionInfo("d", "f"), + }) + require.NoError(t, err) + err = split.CheckPartRegionConsistency(startKey, endKey, []*split.RegionInfo{ + regionInfo("a", "c"), + regionInfo("c", "z"), + }) + require.NoError(t, err) } diff --git a/br/pkg/restore/utils/BUILD.bazel b/br/pkg/restore/utils/BUILD.bazel new file mode 100644 index 0000000000..002654bf00 --- /dev/null +++ b/br/pkg/restore/utils/BUILD.bazel @@ -0,0 +1,60 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "utils", + srcs = [ + "id.go", + "key.go", + "log_file_map.go", + "merge.go", + "rewrite_rule.go", + "split.go", + "value.go", + ], + importpath = "github.com/pingcap/tidb/br/pkg/restore/utils", + visibility = ["//visibility:public"], + deps = [ + "//br/pkg/errors", + "//br/pkg/logutil", + "//br/pkg/restore/split", + "//br/pkg/rtree", + "//pkg/parser/model", + "//pkg/tablecodec", + "//pkg/util/codec", + "//pkg/util/redact", + "@com_github_opentracing_opentracing_go//:opentracing-go", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_kvproto//pkg/brpb", + "@com_github_pingcap_kvproto//pkg/import_sstpb", + "@com_github_pingcap_log//:log", + "@org_uber_go_zap//:zap", + ], +) + +go_test( + name = "utils_test", + timeout = "short", + srcs = [ + "log_file_map_test.go", + "merge_test.go", + "rewrite_rule_test.go", + "split_test.go", + ], + flaky = True, + shard_count = 12, + deps = [ + ":utils", + "//br/pkg/conn", + "//br/pkg/errors", + "//br/pkg/restore/split", + "//br/pkg/rtree", + "//pkg/sessionctx/stmtctx", + "//pkg/tablecodec", + "//pkg/types", + "//pkg/util/codec", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_kvproto//pkg/brpb", + "@com_github_pingcap_kvproto//pkg/import_sstpb", + "@com_github_stretchr_testify//require", + ], +) diff --git a/br/pkg/restore/utils/id.go b/br/pkg/restore/utils/id.go new file mode 100644 index 0000000000..02b72ef1c7 --- /dev/null +++ b/br/pkg/restore/utils/id.go @@ -0,0 +1,58 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import "github.com/pingcap/tidb/pkg/parser/model" + +// GetPartitionIDMap creates a map maping old physical ID to new physical ID. +func GetPartitionIDMap(newTable, oldTable *model.TableInfo) map[int64]int64 { + tableIDMap := make(map[int64]int64) + + if oldTable.Partition != nil && newTable.Partition != nil { + nameMapID := make(map[string]int64) + + for _, old := range oldTable.Partition.Definitions { + nameMapID[old.Name.L] = old.ID + } + for _, new := range newTable.Partition.Definitions { + if oldID, exist := nameMapID[new.Name.L]; exist { + tableIDMap[oldID] = new.ID + } + } + } + + return tableIDMap +} + +// GetTableIDMap creates a map maping old tableID to new tableID. +func GetTableIDMap(newTable, oldTable *model.TableInfo) map[int64]int64 { + tableIDMap := GetPartitionIDMap(newTable, oldTable) + tableIDMap[oldTable.ID] = newTable.ID + return tableIDMap +} + +// GetIndexIDMap creates a map maping old indexID to new indexID. +func GetIndexIDMap(newTable, oldTable *model.TableInfo) map[int64]int64 { + indexIDMap := make(map[int64]int64) + for _, srcIndex := range oldTable.Indices { + for _, destIndex := range newTable.Indices { + if srcIndex.Name == destIndex.Name { + indexIDMap[srcIndex.ID] = destIndex.ID + } + } + } + + return indexIDMap +} diff --git a/br/pkg/restore/utils/key.go b/br/pkg/restore/utils/key.go new file mode 100644 index 0000000000..8a4ad05b0d --- /dev/null +++ b/br/pkg/restore/utils/key.go @@ -0,0 +1,34 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import "github.com/pingcap/tidb/pkg/util/codec" + +func TruncateTS(key []byte) []byte { + if len(key) == 0 { + return nil + } + if len(key) < 8 { + return key + } + return key[:len(key)-8] +} + +func EncodeKeyPrefix(key []byte) []byte { + encodedPrefix := make([]byte, 0) + ungroupedLen := len(key) % 8 + encodedPrefix = append(encodedPrefix, codec.EncodeBytes([]byte{}, key[:len(key)-ungroupedLen])...) + return append(encodedPrefix[:len(encodedPrefix)-9], key[len(key)-ungroupedLen:]...) +} diff --git a/br/pkg/restore/utils/log_file_map.go b/br/pkg/restore/utils/log_file_map.go new file mode 100644 index 0000000000..026b66d617 --- /dev/null +++ b/br/pkg/restore/utils/log_file_map.go @@ -0,0 +1,84 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +// each 64 items constitute a bitmap unit +type bitMap map[int]uint64 + +func newBitMap() bitMap { + return make(map[int]uint64) +} + +func (m bitMap) pos(off int) (blockIndex int, bitOffset uint64) { + return off >> 6, uint64(1) << (off & 63) +} + +func (m bitMap) Set(off int) { + blockIndex, bitOffset := m.pos(off) + m[blockIndex] |= bitOffset +} + +func (m bitMap) Hit(off int) bool { + blockIndex, bitOffset := m.pos(off) + return (m[blockIndex] & bitOffset) > 0 +} + +type fileMap struct { + // group index -> bitmap of kv files + pos map[int]bitMap +} + +func newFileMap() fileMap { + return fileMap{ + pos: make(map[int]bitMap), + } +} + +type LogFilesSkipMap struct { + // metadata group key -> group map + skipMap map[string]fileMap +} + +func NewLogFilesSkipMap() *LogFilesSkipMap { + return &LogFilesSkipMap{ + skipMap: make(map[string]fileMap), + } +} + +func (m *LogFilesSkipMap) Insert(metaKey string, groupOff, fileOff int) { + mp, exists := m.skipMap[metaKey] + if !exists { + mp = newFileMap() + m.skipMap[metaKey] = mp + } + gp, exists := mp.pos[groupOff] + if !exists { + gp = newBitMap() + mp.pos[groupOff] = gp + } + gp.Set(fileOff) +} + +func (m *LogFilesSkipMap) NeedSkip(metaKey string, groupOff, fileOff int) bool { + mp, exists := m.skipMap[metaKey] + if !exists { + return false + } + gp, exists := mp.pos[groupOff] + if !exists { + return false + } + return gp.Hit(fileOff) +} diff --git a/br/pkg/restore/utils/log_file_map_test.go b/br/pkg/restore/utils/log_file_map_test.go new file mode 100644 index 0000000000..9e43febf49 --- /dev/null +++ b/br/pkg/restore/utils/log_file_map_test.go @@ -0,0 +1,100 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils_test + +import ( + "fmt" + "math/rand" + "testing" + + "github.com/pingcap/tidb/br/pkg/restore/utils" + "github.com/stretchr/testify/require" +) + +func TestLogFilesSkipMap(t *testing.T) { + var ( + metaNum = 2 + groupNum = 4 + fileNum = 1000 + + ratio = 0.1 + ) + + for ratio < 1 { + skipmap := utils.NewLogFilesSkipMap() + nativemap := make(map[string]map[int]map[int]struct{}) + count := 0 + for i := 0; i < int(ratio*float64(metaNum*groupNum*fileNum)); i++ { + metaKey := fmt.Sprint(rand.Intn(metaNum)) + groupOff := rand.Intn(groupNum) + fileOff := rand.Intn(fileNum) + + mp, exists := nativemap[metaKey] + if !exists { + mp = make(map[int]map[int]struct{}) + nativemap[metaKey] = mp + } + gp, exists := mp[groupOff] + if !exists { + gp = make(map[int]struct{}) + mp[groupOff] = gp + } + if _, exists := gp[fileOff]; !exists { + gp[fileOff] = struct{}{} + skipmap.Insert(metaKey, groupOff, fileOff) + count += 1 + } + } + + ncount := 0 + for metaKey, mp := range nativemap { + for groupOff, gp := range mp { + for fileOff := range gp { + require.True(t, skipmap.NeedSkip(metaKey, groupOff, fileOff)) + ncount++ + } + } + } + + require.Equal(t, count, ncount) + + continueFunc := func(metaKey string, groupi, filei int) bool { + mp, exists := nativemap[metaKey] + if !exists { + return false + } + gp, exists := mp[groupi] + if !exists { + return false + } + _, exists = gp[filei] + return exists + } + + for metai := 0; metai < metaNum; metai++ { + metaKey := fmt.Sprint(metai) + for groupi := 0; groupi < groupNum; groupi++ { + for filei := 0; filei < fileNum; filei++ { + if continueFunc(metaKey, groupi, filei) { + continue + } + require.False(t, skipmap.NeedSkip(metaKey, groupi, filei)) + } + } + } + + ratio = ratio * 2 + } +} diff --git a/br/pkg/restore/merge.go b/br/pkg/restore/utils/merge.go similarity index 68% rename from br/pkg/restore/merge.go rename to br/pkg/restore/utils/merge.go index 77ecd74bcf..837eacd993 100644 --- a/br/pkg/restore/merge.go +++ b/br/pkg/restore/utils/merge.go @@ -1,24 +1,14 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package utils import ( "strings" "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" - "github.com/pingcap/kvproto/pkg/import_sstpb" - "github.com/pingcap/log" berrors "github.com/pingcap/tidb/br/pkg/errors" - "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/rtree" - "github.com/pingcap/tidb/pkg/tablecodec" - "go.uber.org/zap" -) - -const ( - writeCFName = "write" - defaultCFName = "default" ) // MergeRangesStat holds statistics for the MergeRanges. @@ -59,9 +49,9 @@ func MergeAndRewriteFileRanges( filesMap[string(file.StartKey)] = append(filesMap[string(file.StartKey)], file) // We skips all default cf files because we don't range overlap. - if file.Cf == writeCFName || strings.Contains(file.GetName(), writeCFName) { + if file.Cf == WriteCFName || strings.Contains(file.GetName(), WriteCFName) { writeCFFile++ - } else if file.Cf == defaultCFName || strings.Contains(file.GetName(), defaultCFName) { + } else if file.Cf == DefaultCFName || strings.Contains(file.GetName(), DefaultCFName) { defaultCFFile++ } totalBytes += file.TotalBytes @@ -123,40 +113,3 @@ func MergeAndRewriteFileRanges( MergedRegionBytesAvg: int(mergedRegionBytesAvg), }, nil } - -func RewriteRange(rg *rtree.Range, rewriteRules *RewriteRules) (*rtree.Range, error) { - if rewriteRules == nil { - return rg, nil - } - startID := tablecodec.DecodeTableID(rg.StartKey) - endID := tablecodec.DecodeTableID(rg.EndKey) - var rule *import_sstpb.RewriteRule - if startID != endID { - log.Warn("table id does not match", - logutil.Key("startKey", rg.StartKey), - logutil.Key("endKey", rg.EndKey), - zap.Int64("startID", startID), - zap.Int64("endID", endID)) - return nil, errors.Annotate(berrors.ErrRestoreTableIDMismatch, "table id mismatch") - } - rg.StartKey, rule = replacePrefix(rg.StartKey, rewriteRules) - if rule == nil { - log.Warn("cannot find rewrite rule", logutil.Key("key", rg.StartKey)) - } else { - log.Debug( - "rewrite start key", - logutil.Key("key", rg.StartKey), logutil.RewriteRule(rule)) - } - oldKey := rg.EndKey - rg.EndKey, rule = replacePrefix(rg.EndKey, rewriteRules) - if rule == nil { - log.Warn("cannot find rewrite rule", logutil.Key("key", rg.EndKey)) - } else { - log.Debug( - "rewrite end key", - logutil.Key("origin-key", oldKey), - logutil.Key("key", rg.EndKey), - logutil.RewriteRule(rule)) - } - return rg, nil -} diff --git a/br/pkg/restore/merge_test.go b/br/pkg/restore/utils/merge_test.go similarity index 65% rename from br/pkg/restore/merge_test.go rename to br/pkg/restore/utils/merge_test.go index a9c185070e..2d696b88fe 100644 --- a/br/pkg/restore/merge_test.go +++ b/br/pkg/restore/utils/merge_test.go @@ -1,6 +1,6 @@ // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. -package restore_test +package utils_test import ( "bytes" @@ -12,11 +12,9 @@ import ( "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" - "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/tidb/br/pkg/conn" berrors "github.com/pingcap/tidb/br/pkg/errors" - "github.com/pingcap/tidb/br/pkg/restore" - "github.com/pingcap/tidb/br/pkg/rtree" + "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/pkg/sessionctx/stmtctx" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/types" @@ -93,7 +91,7 @@ func TestMergeRanges(t *testing.T) { type Case struct { files [][5]int // tableID, indexID num, bytes, kv merged []int // length of each merged range - stat restore.MergeRangesStat + stat utils.MergeRangesStat } splitSizeBytes := int(conn.DefaultMergeRegionSizeBytes) splitKeyCount := int(conn.DefaultMergeRegionKeyCount) @@ -102,54 +100,54 @@ func TestMergeRanges(t *testing.T) { { files: [][5]int{}, merged: []int{}, - stat: restore.MergeRangesStat{TotalRegions: 0, MergedRegions: 0}, + stat: utils.MergeRangesStat{TotalRegions: 0, MergedRegions: 0}, }, // Do not merge big range. { files: [][5]int{{1, 0, 1, splitSizeBytes, 1}, {1, 0, 1, 1, 1}}, merged: []int{1, 1}, - stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, }, { files: [][5]int{{1, 0, 1, 1, 1}, {1, 0, 1, splitSizeBytes, 1}}, merged: []int{1, 1}, - stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, }, { files: [][5]int{{1, 0, 1, 1, splitKeyCount}, {1, 0, 1, 1, 1}}, merged: []int{1, 1}, - stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, }, { files: [][5]int{{1, 0, 1, 1, 1}, {1, 0, 1, 1, splitKeyCount}}, merged: []int{1, 1}, - stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, }, // 3 -> 1 { files: [][5]int{{1, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {1, 0, 1, 1, 1}}, merged: []int{3}, - stat: restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 1}, + stat: utils.MergeRangesStat{TotalRegions: 3, MergedRegions: 1}, }, // 3 -> 2, size: [split*1/3, split*1/3, split*1/2] -> [split*2/3, split*1/2] { files: [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 2, 1}}, merged: []int{2, 1}, - stat: restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 3, MergedRegions: 2}, }, // 4 -> 2, size: [split*1/3, split*1/3, split*1/2, 1] -> [split*2/3, split*1/2 +1] { files: [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 2, 1}, {1, 0, 1, 1, 1}}, merged: []int{2, 2}, - stat: restore.MergeRangesStat{TotalRegions: 4, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 4, MergedRegions: 2}, }, // 5 -> 3, size: [split*1/3, split*1/3, split, split*1/2, 1] -> [split*2/3, split, split*1/2 +1] { files: [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes, 1}, {1, 0, 1, splitSizeBytes / 2, 1}, {1, 0, 1, 1, 1}}, merged: []int{2, 1, 2}, - stat: restore.MergeRangesStat{TotalRegions: 5, MergedRegions: 3}, + stat: utils.MergeRangesStat{TotalRegions: 5, MergedRegions: 3}, }, // Do not merge ranges from different tables @@ -157,13 +155,13 @@ func TestMergeRanges(t *testing.T) { { files: [][5]int{{1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}}, merged: []int{1, 1}, - stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, }, // 3 -> 2, size: [1@split*1/3, 2@split*1/3, 2@split*1/2] -> [1@split*1/3, 2@split*5/6] { files: [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {2, 0, 1, splitSizeBytes / 3, 1}, {2, 0, 1, splitSizeBytes / 2, 1}}, merged: []int{1, 2}, - stat: restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 3, MergedRegions: 2}, }, // Do not merge ranges from different indexes. @@ -171,28 +169,28 @@ func TestMergeRanges(t *testing.T) { { files: [][5]int{{1, 1, 1, 1, 1}, {1, 2, 1, 1, 1}}, merged: []int{1, 1}, - stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, }, // Index ID out of order. // 2 -> 2, size: [1, 1] -> [1, 1], index ID: [2, 1] -> [1, 2] { files: [][5]int{{1, 2, 1, 1, 1}, {1, 1, 1, 1, 1}}, merged: []int{1, 1}, - stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, + stat: utils.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, }, // 3 -> 3, size: [1, 1, 1] -> [1, 1, 1] // (table ID, index ID): [(1, 0), (2, 1), (2, 2)] -> [(1, 0), (2, 1), (2, 2)] { files: [][5]int{{1, 0, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 2, 1, 1, 1}}, merged: []int{1, 1, 1}, - stat: restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 3}, + stat: utils.MergeRangesStat{TotalRegions: 3, MergedRegions: 3}, }, // 4 -> 3, size: [1, 1, 1, 1] -> [1, 1, 2] // (table ID, index ID): [(1, 0), (2, 1), (2, 0), (2, 0)] -> [(1, 0), (2, 1), (2, 0)] { files: [][5]int{{1, 0, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 0, 1, 1, 1}, {2, 0, 1, 1, 1}}, merged: []int{1, 1, 2}, - stat: restore.MergeRangesStat{TotalRegions: 4, MergedRegions: 3}, + stat: utils.MergeRangesStat{TotalRegions: 4, MergedRegions: 3}, }, // Merge the same table ID and index ID. // 4 -> 3, size: [1, 1, 1, 1] -> [1, 2, 1] @@ -200,7 +198,7 @@ func TestMergeRanges(t *testing.T) { { files: [][5]int{{1, 0, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 0, 1, 1, 1}}, merged: []int{1, 2, 1}, - stat: restore.MergeRangesStat{TotalRegions: 4, MergedRegions: 3}, + stat: utils.MergeRangesStat{TotalRegions: 4, MergedRegions: 3}, }, } @@ -210,7 +208,7 @@ func TestMergeRanges(t *testing.T) { for _, f := range cs.files { files = append(files, fb.build(f[0], f[1], f[2], f[3], f[4])...) } - rngs, stat, err := restore.MergeAndRewriteFileRanges(files, nil, conn.DefaultMergeRegionSizeBytes, conn.DefaultMergeRegionKeyCount) + rngs, stat, err := utils.MergeAndRewriteFileRanges(files, nil, conn.DefaultMergeRegionSizeBytes, conn.DefaultMergeRegionKeyCount) require.NoErrorf(t, err, "%+v", cs) require.Equalf(t, cs.stat.TotalRegions, stat.TotalRegions, "%+v", cs) require.Equalf(t, cs.stat.MergedRegions, stat.MergedRegions, "%+v", cs) @@ -232,7 +230,7 @@ func TestMergeRawKVRanges(t *testing.T) { files = append(files, fb.build(1, 0, 2, 1, 1)...) // RawKV does not have write cf files = files[1:] - _, stat, err := restore.MergeAndRewriteFileRanges( + _, stat, err := utils.MergeAndRewriteFileRanges( files, nil, conn.DefaultMergeRegionSizeBytes, conn.DefaultMergeRegionKeyCount) require.NoError(t, err) require.Equal(t, 1, stat.TotalRegions) @@ -245,7 +243,7 @@ func TestInvalidRanges(t *testing.T) { files = append(files, fb.build(1, 0, 1, 1, 1)...) files[0].Name = "invalid.sst" files[0].Cf = "invalid" - _, _, err := restore.MergeAndRewriteFileRanges( + _, _, err := utils.MergeAndRewriteFileRanges( files, nil, conn.DefaultMergeRegionSizeBytes, conn.DefaultMergeRegionKeyCount) require.Error(t, err) require.Equal(t, berrors.ErrRestoreInvalidBackup, errors.Cause(err)) @@ -267,7 +265,7 @@ func benchmarkMergeRanges(b *testing.B, filesCount int) { } var err error for i := 0; i < b.N; i++ { - _, _, err = restore.MergeAndRewriteFileRanges(files, nil, conn.DefaultMergeRegionSizeBytes, conn.DefaultMergeRegionKeyCount) + _, _, err = utils.MergeAndRewriteFileRanges(files, nil, conn.DefaultMergeRegionSizeBytes, conn.DefaultMergeRegionKeyCount) if err != nil { b.Error(err) } @@ -293,91 +291,3 @@ func BenchmarkMergeRanges50k(b *testing.B) { func BenchmarkMergeRanges100k(b *testing.B) { benchmarkMergeRanges(b, 100000) } -func TestRewriteRange(t *testing.T) { - // Define test cases - cases := []struct { - rg *rtree.Range - rewriteRules *restore.RewriteRules - expectedRange *rtree.Range - expectedError error - }{ - // Test case 1: No rewrite rules - { - rg: &rtree.Range{ - StartKey: []byte("startKey"), - EndKey: []byte("endKey"), - }, - rewriteRules: nil, - expectedRange: &rtree.Range{StartKey: []byte("startKey"), EndKey: []byte("endKey")}, - expectedError: nil, - }, - // Test case 2: Rewrite rule found for both start key and end key - { - rg: &rtree.Range{ - StartKey: append(tablecodec.GenTableIndexPrefix(1), []byte("startKey")...), - EndKey: append(tablecodec.GenTableIndexPrefix(1), []byte("endKey")...), - }, - rewriteRules: &restore.RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: tablecodec.GenTableIndexPrefix(1), - NewKeyPrefix: tablecodec.GenTableIndexPrefix(2), - }, - }, - }, - expectedRange: &rtree.Range{ - StartKey: append(tablecodec.GenTableIndexPrefix(2), []byte("startKey")...), - EndKey: append(tablecodec.GenTableIndexPrefix(2), []byte("endKey")...), - }, - expectedError: nil, - }, - // Test case 3: Rewrite rule found for end key - { - rg: &rtree.Range{ - StartKey: append(tablecodec.GenTableIndexPrefix(1), []byte("startKey")...), - EndKey: append(tablecodec.GenTableIndexPrefix(1), []byte("endKey")...), - }, - rewriteRules: &restore.RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: append(tablecodec.GenTableIndexPrefix(1), []byte("endKey")...), - NewKeyPrefix: append(tablecodec.GenTableIndexPrefix(2), []byte("newEndKey")...), - }, - }, - }, - expectedRange: &rtree.Range{ - StartKey: append(tablecodec.GenTableIndexPrefix(1), []byte("startKey")...), - EndKey: append(tablecodec.GenTableIndexPrefix(2), []byte("newEndKey")...), - }, - expectedError: nil, - }, - // Test case 4: Table ID mismatch - { - rg: &rtree.Range{ - StartKey: []byte("t1_startKey"), - EndKey: []byte("t2_endKey"), - }, - rewriteRules: &restore.RewriteRules{ - Data: []*import_sstpb.RewriteRule{ - { - OldKeyPrefix: []byte("t1_startKey"), - NewKeyPrefix: []byte("t2_newStartKey"), - }, - }, - }, - expectedRange: nil, - expectedError: errors.Annotate(berrors.ErrRestoreTableIDMismatch, "table id mismatch"), - }, - } - - // Run test cases - for _, tc := range cases { - actualRange, actualError := restore.RewriteRange(tc.rg, tc.rewriteRules) - if tc.expectedError != nil { - require.EqualError(t, tc.expectedError, actualError.Error()) - } else { - require.NoError(t, actualError) - } - require.Equal(t, tc.expectedRange, actualRange) - } -} diff --git a/br/pkg/restore/utils/rewrite_rule.go b/br/pkg/restore/utils/rewrite_rule.go new file mode 100644 index 0000000000..5d9878fd3f --- /dev/null +++ b/br/pkg/restore/utils/rewrite_rule.go @@ -0,0 +1,381 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "bytes" + + "github.com/pingcap/errors" + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/kvproto/pkg/import_sstpb" + "github.com/pingcap/log" + berrors "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/tidb/br/pkg/logutil" + "github.com/pingcap/tidb/br/pkg/rtree" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/tablecodec" + "github.com/pingcap/tidb/pkg/util/codec" + "github.com/pingcap/tidb/pkg/util/redact" + "go.uber.org/zap" +) + +// AppliedFile has two types for now. +// 1. SST file used by full backup/restore. +// 2. KV file used by pitr restore. +type AppliedFile interface { + GetStartKey() []byte + GetEndKey() []byte +} + +// RewriteRules contains rules for rewriting keys of tables. +type RewriteRules struct { + Data []*import_sstpb.RewriteRule + OldKeyspace []byte + NewKeyspace []byte +} + +// Append append its argument to this rewrite rules. +func (r *RewriteRules) Append(other RewriteRules) { + r.Data = append(r.Data, other.Data...) +} + +// EmptyRewriteRule make a map of new, empty rewrite rules. +func EmptyRewriteRulesMap() map[int64]*RewriteRules { + return make(map[int64]*RewriteRules) +} + +// EmptyRewriteRule make a new, empty rewrite rule. +func EmptyRewriteRule() *RewriteRules { + return &RewriteRules{ + Data: []*import_sstpb.RewriteRule{}, + } +} + +// GetRewriteRules returns the rewrite rule of the new table and the old table. +// getDetailRule is used for normal backup & restore. +// if set to true, means we collect the rules like tXXX_r, tYYY_i. +// if set to false, means we only collect the rules contain table_id, tXXX, tYYY. +func GetRewriteRules( + newTable, oldTable *model.TableInfo, newTimeStamp uint64, getDetailRule bool, +) *RewriteRules { + tableIDs := GetTableIDMap(newTable, oldTable) + indexIDs := GetIndexIDMap(newTable, oldTable) + + dataRules := make([]*import_sstpb.RewriteRule, 0) + for oldTableID, newTableID := range tableIDs { + if getDetailRule { + dataRules = append(dataRules, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.GenTableRecordPrefix(oldTableID), + NewKeyPrefix: tablecodec.GenTableRecordPrefix(newTableID), + NewTimestamp: newTimeStamp, + }) + for oldIndexID, newIndexID := range indexIDs { + dataRules = append(dataRules, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.EncodeTableIndexPrefix(oldTableID, oldIndexID), + NewKeyPrefix: tablecodec.EncodeTableIndexPrefix(newTableID, newIndexID), + NewTimestamp: newTimeStamp, + }) + } + } else { + dataRules = append(dataRules, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), + NewKeyPrefix: tablecodec.EncodeTablePrefix(newTableID), + NewTimestamp: newTimeStamp, + }) + } + } + + return &RewriteRules{ + Data: dataRules, + } +} + +func GetRewriteRulesMap( + newTable, oldTable *model.TableInfo, newTimeStamp uint64, getDetailRule bool, +) map[int64]*RewriteRules { + rules := make(map[int64]*RewriteRules) + + tableIDs := GetTableIDMap(newTable, oldTable) + indexIDs := GetIndexIDMap(newTable, oldTable) + + for oldTableID, newTableID := range tableIDs { + dataRules := make([]*import_sstpb.RewriteRule, 0) + if getDetailRule { + dataRules = append(dataRules, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.GenTableRecordPrefix(oldTableID), + NewKeyPrefix: tablecodec.GenTableRecordPrefix(newTableID), + NewTimestamp: newTimeStamp, + }) + for oldIndexID, newIndexID := range indexIDs { + dataRules = append(dataRules, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.EncodeTableIndexPrefix(oldTableID, oldIndexID), + NewKeyPrefix: tablecodec.EncodeTableIndexPrefix(newTableID, newIndexID), + NewTimestamp: newTimeStamp, + }) + } + } else { + dataRules = append(dataRules, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), + NewKeyPrefix: tablecodec.EncodeTablePrefix(newTableID), + NewTimestamp: newTimeStamp, + }) + } + + rules[oldTableID] = &RewriteRules{ + Data: dataRules, + } + } + + return rules +} + +// GetRewriteRuleOfTable returns a rewrite rule from t_{oldID} to t_{newID}. +func GetRewriteRuleOfTable( + oldTableID, newTableID int64, + newTimeStamp uint64, + indexIDs map[int64]int64, + getDetailRule bool, +) *RewriteRules { + dataRules := make([]*import_sstpb.RewriteRule, 0) + + if getDetailRule { + dataRules = append(dataRules, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.GenTableRecordPrefix(oldTableID), + NewKeyPrefix: tablecodec.GenTableRecordPrefix(newTableID), + NewTimestamp: newTimeStamp, + }) + for oldIndexID, newIndexID := range indexIDs { + dataRules = append(dataRules, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.EncodeTableIndexPrefix(oldTableID, oldIndexID), + NewKeyPrefix: tablecodec.EncodeTableIndexPrefix(newTableID, newIndexID), + NewTimestamp: newTimeStamp, + }) + } + } else { + dataRules = append(dataRules, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), + NewKeyPrefix: tablecodec.EncodeTablePrefix(newTableID), + NewTimestamp: newTimeStamp, + }) + } + + return &RewriteRules{Data: dataRules} +} + +// ValidateFileRewriteRule uses rewrite rules to validate the ranges of a file. +func ValidateFileRewriteRule(file *backuppb.File, rewriteRules *RewriteRules) error { + // Check if the start key has a matched rewrite key + _, startRule := rewriteRawKey(file.GetStartKey(), rewriteRules) + if rewriteRules != nil && startRule == nil { + tableID := tablecodec.DecodeTableID(file.GetStartKey()) + log.Error( + "cannot find rewrite rule for file start key", + zap.Int64("tableID", tableID), + logutil.File(file), + ) + return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule") + } + // Check if the end key has a matched rewrite key + _, endRule := rewriteRawKey(file.GetEndKey(), rewriteRules) + if rewriteRules != nil && endRule == nil { + tableID := tablecodec.DecodeTableID(file.GetEndKey()) + log.Error( + "cannot find rewrite rule for file end key", + zap.Int64("tableID", tableID), + logutil.File(file), + ) + return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule") + } + // the rewrite rule of the start key and the end key should be equaled. + // i.e. there should only one rewrite rule for one file, a file should only be imported into one region. + if !bytes.Equal(startRule.GetNewKeyPrefix(), endRule.GetNewKeyPrefix()) { + startTableID := tablecodec.DecodeTableID(file.GetStartKey()) + endTableID := tablecodec.DecodeTableID(file.GetEndKey()) + log.Error( + "unexpected rewrite rules", + zap.Int64("startTableID", startTableID), + zap.Int64("endTableID", endTableID), + zap.Stringer("startRule", startRule), + zap.Stringer("endRule", endRule), + logutil.File(file), + ) + return errors.Annotatef(berrors.ErrRestoreInvalidRewrite, + "rewrite rule mismatch, the backup data may be dirty or from incompatible versions of BR, startKey rule: %X => %X, endKey rule: %X => %X", + startRule.OldKeyPrefix, startRule.NewKeyPrefix, endRule.OldKeyPrefix, endRule.NewKeyPrefix, + ) + } + return nil +} + +// Rewrites an encoded key and returns a encoded key. +func rewriteEncodedKey(key []byte, rewriteRules *RewriteRules) ([]byte, *import_sstpb.RewriteRule) { + if rewriteRules == nil { + return key, nil + } + if len(key) > 0 { + _, rawKey, _ := codec.DecodeBytes(key, nil) + return rewriteRawKey(rawKey, rewriteRules) + } + return nil, nil +} + +// Rewrites a raw key with raw key rewrite rule and returns an encoded key. +func rewriteRawKey(key []byte, rewriteRules *RewriteRules) ([]byte, *import_sstpb.RewriteRule) { + if rewriteRules == nil { + return codec.EncodeBytes([]byte{}, key), nil + } + if len(key) > 0 { + rule := matchOldPrefix(key, rewriteRules) + ret := bytes.Replace(key, rule.GetOldKeyPrefix(), rule.GetNewKeyPrefix(), 1) + return codec.EncodeBytes([]byte{}, ret), rule + } + return nil, nil +} + +func matchOldPrefix(key []byte, rewriteRules *RewriteRules) *import_sstpb.RewriteRule { + for _, rule := range rewriteRules.Data { + if bytes.HasPrefix(key, rule.GetOldKeyPrefix()) { + return rule + } + } + return nil +} + +// GetRewriteTableID gets rewrite table id by the rewrite rule and original table id +func GetRewriteTableID(tableID int64, rewriteRules *RewriteRules) int64 { + tableKey := tablecodec.GenTableRecordPrefix(tableID) + rule := matchOldPrefix(tableKey, rewriteRules) + if rule == nil { + return 0 + } + + return tablecodec.DecodeTableID(rule.GetNewKeyPrefix()) +} + +func FindMatchedRewriteRule(file AppliedFile, rules *RewriteRules) *import_sstpb.RewriteRule { + startID := tablecodec.DecodeTableID(file.GetStartKey()) + endID := tablecodec.DecodeTableID(file.GetEndKey()) + if startID != endID { + return nil + } + _, rule := rewriteRawKey(file.GetStartKey(), rules) + if rule == nil { + // fall back to encoded key + _, rule = rewriteEncodedKey(file.GetStartKey(), rules) + } + return rule +} + +// GetRewriteRawKeys rewrites rules to the raw key. +func GetRewriteRawKeys(file AppliedFile, rewriteRules *RewriteRules) (startKey, endKey []byte, err error) { + startID := tablecodec.DecodeTableID(file.GetStartKey()) + endID := tablecodec.DecodeTableID(file.GetEndKey()) + var rule *import_sstpb.RewriteRule + if startID == endID { + startKey, rule = rewriteRawKey(file.GetStartKey(), rewriteRules) + if rewriteRules != nil && rule == nil { + err = errors.Annotatef(berrors.ErrRestoreInvalidRewrite, "cannot find raw rewrite rule for start key, startKey: %s", redact.Key(file.GetStartKey())) + return + } + endKey, rule = rewriteRawKey(file.GetEndKey(), rewriteRules) + if rewriteRules != nil && rule == nil { + err = errors.Annotatef(berrors.ErrRestoreInvalidRewrite, "cannot find raw rewrite rule for end key, endKey: %s", redact.Key(file.GetEndKey())) + return + } + } else { + log.Error("table ids dont matched", + zap.Int64("startID", startID), + zap.Int64("endID", endID), + logutil.Key("startKey", startKey), + logutil.Key("endKey", endKey)) + err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "invalid table id") + } + return +} + +// GetRewriteRawKeys rewrites rules to the encoded key +func GetRewriteEncodedKeys(file AppliedFile, rewriteRules *RewriteRules) (startKey, endKey []byte, err error) { + startID := tablecodec.DecodeTableID(file.GetStartKey()) + endID := tablecodec.DecodeTableID(file.GetEndKey()) + var rule *import_sstpb.RewriteRule + if startID == endID { + startKey, rule = rewriteEncodedKey(file.GetStartKey(), rewriteRules) + if rewriteRules != nil && rule == nil { + err = errors.Annotatef(berrors.ErrRestoreInvalidRewrite, "cannot find encode rewrite rule for start key, startKey: %s", redact.Key(file.GetStartKey())) + return + } + endKey, rule = rewriteEncodedKey(file.GetEndKey(), rewriteRules) + if rewriteRules != nil && rule == nil { + err = errors.Annotatef(berrors.ErrRestoreInvalidRewrite, "cannot find encode rewrite rule for end key, endKey: %s", redact.Key(file.GetEndKey())) + return + } + } else { + log.Error("table ids dont matched", + zap.Int64("startID", startID), + zap.Int64("endID", endID), + logutil.Key("startKey", startKey), + logutil.Key("endKey", endKey)) + err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "invalid table id") + } + return +} + +func replacePrefix(s []byte, rewriteRules *RewriteRules) ([]byte, *import_sstpb.RewriteRule) { + // We should search the dataRules firstly. + for _, rule := range rewriteRules.Data { + if bytes.HasPrefix(s, rule.GetOldKeyPrefix()) { + return append(append([]byte{}, rule.GetNewKeyPrefix()...), s[len(rule.GetOldKeyPrefix()):]...), rule + } + } + + return s, nil +} + +func RewriteRange(rg *rtree.Range, rewriteRules *RewriteRules) (*rtree.Range, error) { + if rewriteRules == nil { + return rg, nil + } + startID := tablecodec.DecodeTableID(rg.StartKey) + endID := tablecodec.DecodeTableID(rg.EndKey) + var rule *import_sstpb.RewriteRule + if startID != endID { + log.Warn("table id does not match", + logutil.Key("startKey", rg.StartKey), + logutil.Key("endKey", rg.EndKey), + zap.Int64("startID", startID), + zap.Int64("endID", endID)) + return nil, errors.Annotate(berrors.ErrRestoreTableIDMismatch, "table id mismatch") + } + rg.StartKey, rule = replacePrefix(rg.StartKey, rewriteRules) + if rule == nil { + log.Warn("cannot find rewrite rule", logutil.Key("key", rg.StartKey)) + } else { + log.Debug( + "rewrite start key", + logutil.Key("key", rg.StartKey), logutil.RewriteRule(rule)) + } + oldKey := rg.EndKey + rg.EndKey, rule = replacePrefix(rg.EndKey, rewriteRules) + if rule == nil { + log.Warn("cannot find rewrite rule", logutil.Key("key", rg.EndKey)) + } else { + log.Debug( + "rewrite end key", + logutil.Key("origin-key", oldKey), + logutil.Key("key", rg.EndKey), + logutil.RewriteRule(rule)) + } + return rg, nil +} diff --git a/br/pkg/restore/utils/rewrite_rule_test.go b/br/pkg/restore/utils/rewrite_rule_test.go new file mode 100644 index 0000000000..03ac302cf4 --- /dev/null +++ b/br/pkg/restore/utils/rewrite_rule_test.go @@ -0,0 +1,360 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils_test + +import ( + "testing" + + "github.com/pingcap/errors" + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/kvproto/pkg/import_sstpb" + berrors "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/tidb/br/pkg/restore/utils" + "github.com/pingcap/tidb/br/pkg/rtree" + "github.com/pingcap/tidb/pkg/tablecodec" + "github.com/pingcap/tidb/pkg/util/codec" + "github.com/stretchr/testify/require" +) + +func TestValidateFileRewriteRule(t *testing.T) { + rules := &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{{ + OldKeyPrefix: []byte(tablecodec.EncodeTablePrefix(1)), + NewKeyPrefix: []byte(tablecodec.EncodeTablePrefix(2)), + }}, + } + + // Empty start/end key is not allowed. + err := utils.ValidateFileRewriteRule( + &backuppb.File{ + Name: "file_write.sst", + StartKey: []byte(""), + EndKey: []byte(""), + }, + rules, + ) + require.Error(t, err) + require.Regexp(t, ".*cannot find rewrite rule.*", err.Error()) + + // Range is not overlap, no rule found. + err = utils.ValidateFileRewriteRule( + &backuppb.File{ + Name: "file_write.sst", + StartKey: tablecodec.EncodeTablePrefix(0), + EndKey: tablecodec.EncodeTablePrefix(1), + }, + rules, + ) + require.Error(t, err) + require.Regexp(t, ".*cannot find rewrite rule.*", err.Error()) + + // No rule for end key. + err = utils.ValidateFileRewriteRule( + &backuppb.File{ + Name: "file_write.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(2), + }, + rules, + ) + require.Error(t, err) + require.Regexp(t, ".*cannot find rewrite rule.*", err.Error()) + + // Add a rule for end key. + rules.Data = append(rules.Data, &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.EncodeTablePrefix(2), + NewKeyPrefix: tablecodec.EncodeTablePrefix(3), + }) + err = utils.ValidateFileRewriteRule( + &backuppb.File{ + Name: "file_write.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(2), + }, + rules, + ) + require.Error(t, err) + require.Regexp(t, ".*rewrite rule mismatch.*", err.Error()) + + // Add a bad rule for end key, after rewrite start key > end key. + rules.Data = append(rules.Data[:1], &import_sstpb.RewriteRule{ + OldKeyPrefix: tablecodec.EncodeTablePrefix(2), + NewKeyPrefix: tablecodec.EncodeTablePrefix(1), + }) + err = utils.ValidateFileRewriteRule( + &backuppb.File{ + Name: "file_write.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(2), + }, + rules, + ) + require.Error(t, err) + require.Regexp(t, ".*rewrite rule mismatch.*", err.Error()) +} + +func TestRewriteFileKeys(t *testing.T) { + rewriteRules := utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + NewKeyPrefix: tablecodec.GenTablePrefix(2), + OldKeyPrefix: tablecodec.GenTablePrefix(1), + }, + { + NewKeyPrefix: tablecodec.GenTablePrefix(511), + OldKeyPrefix: tablecodec.GenTablePrefix(767), + }, + }, + } + rawKeyFile := backuppb.File{ + Name: "backup.sst", + StartKey: tablecodec.GenTableRecordPrefix(1), + EndKey: tablecodec.GenTableRecordPrefix(1).PrefixNext(), + } + start, end, err := utils.GetRewriteRawKeys(&rawKeyFile, &rewriteRules) + require.NoError(t, err) + _, end, err = codec.DecodeBytes(end, nil) + require.NoError(t, err) + _, start, err = codec.DecodeBytes(start, nil) + require.NoError(t, err) + require.Equal(t, []byte(tablecodec.GenTableRecordPrefix(2)), start) + require.Equal(t, []byte(tablecodec.GenTableRecordPrefix(2).PrefixNext()), end) + + encodeKeyFile := backuppb.DataFileInfo{ + Path: "bakcup.log", + StartKey: codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(1)), + EndKey: codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(1).PrefixNext()), + } + start, end, err = utils.GetRewriteEncodedKeys(&encodeKeyFile, &rewriteRules) + require.NoError(t, err) + require.Equal(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(2)), start) + require.Equal(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(2).PrefixNext()), end) + + // test for table id 767 + encodeKeyFile767 := backuppb.DataFileInfo{ + Path: "bakcup.log", + StartKey: codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(767)), + EndKey: codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(767).PrefixNext()), + } + // use raw rewrite should no error but not equal + start, end, err = utils.GetRewriteRawKeys(&encodeKeyFile767, &rewriteRules) + require.NoError(t, err) + require.NotEqual(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(511)), start) + require.NotEqual(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(511).PrefixNext()), end) + // use encode rewrite should no error and equal + start, end, err = utils.GetRewriteEncodedKeys(&encodeKeyFile767, &rewriteRules) + require.NoError(t, err) + require.Equal(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(511)), start) + require.Equal(t, codec.EncodeBytes(nil, tablecodec.GenTableRecordPrefix(511).PrefixNext()), end) +} + +func TestRewriteRange(t *testing.T) { + // Define test cases + cases := []struct { + rg *rtree.Range + rewriteRules *utils.RewriteRules + expectedRange *rtree.Range + expectedError error + }{ + // Test case 1: No rewrite rules + { + rg: &rtree.Range{ + StartKey: []byte("startKey"), + EndKey: []byte("endKey"), + }, + rewriteRules: nil, + expectedRange: &rtree.Range{StartKey: []byte("startKey"), EndKey: []byte("endKey")}, + expectedError: nil, + }, + // Test case 2: Rewrite rule found for both start key and end key + { + rg: &rtree.Range{ + StartKey: append(tablecodec.GenTableIndexPrefix(1), []byte("startKey")...), + EndKey: append(tablecodec.GenTableIndexPrefix(1), []byte("endKey")...), + }, + rewriteRules: &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: tablecodec.GenTableIndexPrefix(1), + NewKeyPrefix: tablecodec.GenTableIndexPrefix(2), + }, + }, + }, + expectedRange: &rtree.Range{ + StartKey: append(tablecodec.GenTableIndexPrefix(2), []byte("startKey")...), + EndKey: append(tablecodec.GenTableIndexPrefix(2), []byte("endKey")...), + }, + expectedError: nil, + }, + // Test case 3: Rewrite rule found for end key + { + rg: &rtree.Range{ + StartKey: append(tablecodec.GenTableIndexPrefix(1), []byte("startKey")...), + EndKey: append(tablecodec.GenTableIndexPrefix(1), []byte("endKey")...), + }, + rewriteRules: &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: append(tablecodec.GenTableIndexPrefix(1), []byte("endKey")...), + NewKeyPrefix: append(tablecodec.GenTableIndexPrefix(2), []byte("newEndKey")...), + }, + }, + }, + expectedRange: &rtree.Range{ + StartKey: append(tablecodec.GenTableIndexPrefix(1), []byte("startKey")...), + EndKey: append(tablecodec.GenTableIndexPrefix(2), []byte("newEndKey")...), + }, + expectedError: nil, + }, + // Test case 4: Table ID mismatch + { + rg: &rtree.Range{ + StartKey: []byte("t1_startKey"), + EndKey: []byte("t2_endKey"), + }, + rewriteRules: &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: []byte("t1_startKey"), + NewKeyPrefix: []byte("t2_newStartKey"), + }, + }, + }, + expectedRange: nil, + expectedError: errors.Annotate(berrors.ErrRestoreTableIDMismatch, "table id mismatch"), + }, + } + + // Run test cases + for _, tc := range cases { + actualRange, actualError := utils.RewriteRange(tc.rg, tc.rewriteRules) + if tc.expectedError != nil { + require.EqualError(t, tc.expectedError, actualError.Error()) + } else { + require.NoError(t, actualError) + } + require.Equal(t, tc.expectedRange, actualRange) + } +} + +func TestGetRewriteTableID(t *testing.T) { + var tableID int64 = 76 + var oldTableID int64 = 80 + { + rewriteRules := &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: tablecodec.EncodeTablePrefix(oldTableID), + NewKeyPrefix: tablecodec.EncodeTablePrefix(tableID), + }, + }, + } + + newTableID := utils.GetRewriteTableID(oldTableID, rewriteRules) + require.Equal(t, tableID, newTableID) + } + + { + rewriteRules := &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: tablecodec.GenTableRecordPrefix(oldTableID), + NewKeyPrefix: tablecodec.GenTableRecordPrefix(tableID), + }, + }, + } + + newTableID := utils.GetRewriteTableID(oldTableID, rewriteRules) + require.Equal(t, tableID, newTableID) + } +} + +func rangeEquals(t *testing.T, obtained, expected []rtree.Range) { + require.Equal(t, len(expected), len(obtained)) + for i := range obtained { + require.Equal(t, expected[i].StartKey, obtained[i].StartKey) + require.Equal(t, expected[i].EndKey, obtained[i].EndKey) + } +} + +func TestSortRange(t *testing.T) { + dataRules := []*import_sstpb.RewriteRule{ + {OldKeyPrefix: tablecodec.GenTableRecordPrefix(1), NewKeyPrefix: tablecodec.GenTableRecordPrefix(4)}, + {OldKeyPrefix: tablecodec.GenTableRecordPrefix(2), NewKeyPrefix: tablecodec.GenTableRecordPrefix(5)}, + } + rewriteRules := &utils.RewriteRules{ + Data: dataRules, + } + ranges1 := []rtree.Range{ + { + StartKey: append(tablecodec.GenTableRecordPrefix(1), []byte("aaa")...), + EndKey: append(tablecodec.GenTableRecordPrefix(1), []byte("bbb")...), Files: nil, + }, + } + for i, rg := range ranges1 { + tmp, _ := utils.RewriteRange(&rg, rewriteRules) + ranges1[i] = *tmp + } + rs1, err := utils.SortRanges(ranges1) + require.NoErrorf(t, err, "sort range1 failed: %v", err) + rangeEquals(t, rs1, []rtree.Range{ + { + StartKey: append(tablecodec.GenTableRecordPrefix(4), []byte("aaa")...), + EndKey: append(tablecodec.GenTableRecordPrefix(4), []byte("bbb")...), Files: nil, + }, + }) + + ranges2 := []rtree.Range{ + { + StartKey: append(tablecodec.GenTableRecordPrefix(1), []byte("aaa")...), + EndKey: append(tablecodec.GenTableRecordPrefix(2), []byte("bbb")...), Files: nil, + }, + } + for _, rg := range ranges2 { + _, err := utils.RewriteRange(&rg, rewriteRules) + require.Error(t, err) + require.Regexp(t, "table id mismatch.*", err.Error()) + } + + ranges3 := []rtree.Range{ + {StartKey: []byte("aaa"), EndKey: []byte("aae")}, + {StartKey: []byte("aae"), EndKey: []byte("aaz")}, + {StartKey: []byte("ccd"), EndKey: []byte("ccf")}, + {StartKey: []byte("ccf"), EndKey: []byte("ccj")}, + } + rewriteRules1 := &utils.RewriteRules{ + Data: []*import_sstpb.RewriteRule{ + { + OldKeyPrefix: []byte("aa"), + NewKeyPrefix: []byte("xx"), + }, { + OldKeyPrefix: []byte("cc"), + NewKeyPrefix: []byte("bb"), + }, + }, + } + for i, rg := range ranges3 { + tmp, _ := utils.RewriteRange(&rg, rewriteRules1) + ranges3[i] = *tmp + } + rs3, err := utils.SortRanges(ranges3) + require.NoErrorf(t, err, "sort range1 failed: %v", err) + rangeEquals(t, rs3, []rtree.Range{ + {StartKey: []byte("bbd"), EndKey: []byte("bbf"), Files: nil}, + {StartKey: []byte("bbf"), EndKey: []byte("bbj"), Files: nil}, + {StartKey: []byte("xxa"), EndKey: []byte("xxe"), Files: nil}, + {StartKey: []byte("xxe"), EndKey: []byte("xxz"), Files: nil}, + }) +} diff --git a/br/pkg/restore/utils/split.go b/br/pkg/restore/utils/split.go new file mode 100644 index 0000000000..82ff17acc2 --- /dev/null +++ b/br/pkg/restore/utils/split.go @@ -0,0 +1,170 @@ +// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. + +package utils + +import ( + "context" + "time" + + "github.com/opentracing/opentracing-go" + "github.com/pingcap/errors" + "github.com/pingcap/log" + berrors "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/tidb/br/pkg/logutil" + "github.com/pingcap/tidb/br/pkg/restore/split" + "github.com/pingcap/tidb/br/pkg/rtree" + "go.uber.org/zap" +) + +// RegionSplitter is a executor of region split by rules. +type RegionSplitter struct { + client split.SplitClient +} + +// NewRegionSplitter returns a new RegionSplitter. +func NewRegionSplitter(client split.SplitClient) *RegionSplitter { + return &RegionSplitter{ + client: client, + } +} + +// SplitWaitAndScatter expose the function `SplitWaitAndScatter` of split client. +func (rs *RegionSplitter) SplitWaitAndScatter(ctx context.Context, region *split.RegionInfo, keys [][]byte) ([]*split.RegionInfo, error) { + return rs.client.SplitWaitAndScatter(ctx, region, keys) +} + +// ExecuteSplit executes regions split and make sure new splitted regions are balance. +// It will split regions by the rewrite rules, +// then it will split regions by the end key of each range. +// tableRules includes the prefix of a table, since some ranges may have +// a prefix with record sequence or index sequence. +// note: all ranges and rewrite rules must have raw key. +func (rs *RegionSplitter) ExecuteSplit( + ctx context.Context, + ranges []rtree.Range, +) error { + if len(ranges) == 0 { + log.Info("skip split regions, no range") + return nil + } + + if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { + span1 := span.Tracer().StartSpan("RegionSplitter.Split", opentracing.ChildOf(span.Context())) + defer span1.Finish() + ctx = opentracing.ContextWithSpan(ctx, span1) + } + + // Sort the range for getting the min and max key of the ranges + // TODO: this sort may not needed if we sort tables after creatation outside. + sortedRanges, errSplit := SortRanges(ranges) + if errSplit != nil { + return errors.Trace(errSplit) + } + if len(sortedRanges) == 0 { + log.Info("skip split regions after sorted, no range") + return nil + } + sortedKeys := make([][]byte, 0, len(sortedRanges)) + totalRangeSize := uint64(0) + for _, r := range sortedRanges { + sortedKeys = append(sortedKeys, r.EndKey) + totalRangeSize += r.Size + } + // the range size must be greater than 0 here + return rs.executeSplitByRanges(ctx, sortedKeys) +} + +func (rs *RegionSplitter) executeSplitByRanges( + ctx context.Context, + sortedKeys [][]byte, +) error { + startTime := time.Now() + // Choose the rough region split keys, + // each splited region contains 128 regions to be splitted. + const regionIndexStep = 128 + + roughSortedSplitKeys := make([][]byte, 0, len(sortedKeys)/regionIndexStep+1) + for curRegionIndex := regionIndexStep; curRegionIndex < len(sortedKeys); curRegionIndex += regionIndexStep { + roughSortedSplitKeys = append(roughSortedSplitKeys, sortedKeys[curRegionIndex]) + } + if len(roughSortedSplitKeys) > 0 { + if err := rs.executeSplitByKeys(ctx, roughSortedSplitKeys); err != nil { + return errors.Trace(err) + } + } + log.Info("finish spliting regions roughly", zap.Duration("take", time.Since(startTime))) + + // Then send split requests to each TiKV. + if err := rs.executeSplitByKeys(ctx, sortedKeys); err != nil { + return errors.Trace(err) + } + + log.Info("finish spliting and scattering regions", zap.Duration("take", time.Since(startTime))) + return nil +} + +// executeSplitByKeys will split regions by **sorted** keys with following steps. +// 1. locate regions with correspond keys. +// 2. split these regions with correspond keys. +// 3. make sure new split regions are balanced. +func (rs *RegionSplitter) executeSplitByKeys( + ctx context.Context, + sortedKeys [][]byte, +) error { + startTime := time.Now() + scatterRegions, err := rs.client.SplitKeysAndScatter(ctx, sortedKeys) + if err != nil { + return errors.Trace(err) + } + if len(scatterRegions) > 0 { + log.Info("finish splitting and scattering regions. and starts to wait", zap.Int("regions", len(scatterRegions)), + zap.Duration("take", time.Since(startTime))) + rs.waitRegionsScattered(ctx, scatterRegions, split.ScatterWaitUpperInterval) + } else { + log.Info("finish splitting regions.", zap.Duration("take", time.Since(startTime))) + } + return nil +} + +// waitRegionsScattered try to wait mutilple regions scatterd in 3 minutes. +// this could timeout, but if many regions scatterd the restore could continue +// so we don't wait long time here. +func (rs *RegionSplitter) waitRegionsScattered(ctx context.Context, scatterRegions []*split.RegionInfo, timeout time.Duration) { + log.Info("start to wait for scattering regions", zap.Int("regions", len(scatterRegions))) + startTime := time.Now() + leftCnt := rs.WaitForScatterRegionsTimeout(ctx, scatterRegions, timeout) + if leftCnt == 0 { + log.Info("waiting for scattering regions done", + zap.Int("regions", len(scatterRegions)), + zap.Duration("take", time.Since(startTime))) + } else { + log.Warn("waiting for scattering regions timeout", + zap.Int("not scattered Count", leftCnt), + zap.Int("regions", len(scatterRegions)), + zap.Duration("take", time.Since(startTime))) + } +} + +func (rs *RegionSplitter) WaitForScatterRegionsTimeout(ctx context.Context, regionInfos []*split.RegionInfo, timeout time.Duration) int { + ctx2, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + leftRegions, _ := rs.client.WaitRegionsScattered(ctx2, regionInfos) + return leftRegions +} + +// SortRanges checks if the range overlapped and sort them. +func SortRanges(ranges []rtree.Range) ([]rtree.Range, error) { + rangeTree := rtree.NewRangeTree() + for _, rg := range ranges { + if out := rangeTree.InsertRange(rg); out != nil { + log.Error("insert ranges overlapped", + logutil.Key("startKeyOut", out.StartKey), + logutil.Key("endKeyOut", out.EndKey), + logutil.Key("startKeyIn", rg.StartKey), + logutil.Key("endKeyIn", rg.EndKey)) + return nil, errors.Annotatef(berrors.ErrInvalidRange, "ranges overlapped") + } + } + sortedRanges := rangeTree.GetSortedRanges() + return sortedRanges, nil +} diff --git a/br/pkg/restore/utils/split_test.go b/br/pkg/restore/utils/split_test.go new file mode 100644 index 0000000000..3d6e4c44c5 --- /dev/null +++ b/br/pkg/restore/utils/split_test.go @@ -0,0 +1,137 @@ +// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. + +package utils_test + +import ( + "context" + "testing" + + "github.com/pingcap/kvproto/pkg/import_sstpb" + "github.com/pingcap/tidb/br/pkg/restore/split" + "github.com/pingcap/tidb/br/pkg/restore/utils" + "github.com/pingcap/tidb/br/pkg/rtree" + "github.com/pingcap/tidb/pkg/util/codec" + "github.com/stretchr/testify/require" +) + +func TestScanEmptyRegion(t *testing.T) { + mockPDCli := split.NewMockPDClientForSplit() + mockPDCli.SetRegions([][]byte{{}, {12}, {34}, {}}) + client := split.NewClient(mockPDCli, nil, nil, 100, 4) + ranges := initRanges() + // make ranges has only one + ranges = ranges[0:1] + regionSplitter := utils.NewRegionSplitter(client) + + ctx := context.Background() + err := regionSplitter.ExecuteSplit(ctx, ranges) + // should not return error with only one range entry + require.NoError(t, err) +} + +// region: [, aay), [aay, bba), [bba, bbh), [bbh, cca), [cca, ) +// range: [aaa, aae), [aae, aaz), [ccd, ccf), [ccf, ccj) +// rewrite rules: aa -> xx, cc -> bb +// expected regions after split: +// +// [, aay), [aay, bba), [bba, bbf), [bbf, bbh), [bbh, bbj), +// [bbj, cca), [cca, xxe), [xxe, xxz), [xxz, ) +func TestSplitAndScatter(t *testing.T) { + rangeBoundaries := [][]byte{[]byte(""), []byte("aay"), []byte("bba"), []byte("bbh"), []byte("cca"), []byte("")} + encodeBytes(rangeBoundaries) + mockPDCli := split.NewMockPDClientForSplit() + mockPDCli.SetRegions(rangeBoundaries) + client := split.NewClient(mockPDCli, nil, nil, 100, 4) + regionSplitter := utils.NewRegionSplitter(client) + ctx := context.Background() + + ranges := initRanges() + rules := initRewriteRules() + for i, rg := range ranges { + tmp, err := utils.RewriteRange(&rg, rules) + require.NoError(t, err) + ranges[i] = *tmp + } + err := regionSplitter.ExecuteSplit(ctx, ranges) + require.NoError(t, err) + regions := mockPDCli.Regions.ScanRange(nil, nil, 100) + expected := [][]byte{[]byte(""), []byte("aay"), []byte("bba"), []byte("bbf"), []byte("bbh"), []byte("bbj"), []byte("cca"), []byte("xxe"), []byte("xxz"), []byte("")} + encodeBytes(expected) + require.Len(t, regions, len(expected)-1) + for i, region := range regions { + require.Equal(t, expected[i], region.Meta.StartKey) + require.Equal(t, expected[i+1], region.Meta.EndKey) + } +} + +func encodeBytes(keys [][]byte) { + for i := range keys { + if len(keys[i]) == 0 { + continue + } + keys[i] = codec.EncodeBytes(nil, keys[i]) + } +} + +func TestRawSplit(t *testing.T) { + // Fix issue #36490. + ranges := []rtree.Range{ + { + StartKey: []byte{0}, + EndKey: []byte{}, + }, + } + ctx := context.Background() + rangeBoundaries := [][]byte{[]byte(""), []byte("aay"), []byte("bba"), []byte("bbh"), []byte("cca"), []byte("")} + mockPDCli := split.NewMockPDClientForSplit() + mockPDCli.SetRegions(rangeBoundaries) + client := split.NewClient(mockPDCli, nil, nil, 100, 4, split.WithRawKV()) + + regionSplitter := utils.NewRegionSplitter(client) + err := regionSplitter.ExecuteSplit(ctx, ranges) + require.NoError(t, err) + + regions := mockPDCli.Regions.ScanRange(nil, nil, 100) + require.Len(t, regions, len(rangeBoundaries)-1) + for i, region := range regions { + require.Equal(t, rangeBoundaries[i], region.Meta.StartKey) + require.Equal(t, rangeBoundaries[i+1], region.Meta.EndKey) + } +} + +// range: [aaa, aae), [aae, aaz), [ccd, ccf), [ccf, ccj) +func initRanges() []rtree.Range { + var ranges [4]rtree.Range + ranges[0] = rtree.Range{ + StartKey: []byte("aaa"), + EndKey: []byte("aae"), + } + ranges[1] = rtree.Range{ + StartKey: []byte("aae"), + EndKey: []byte("aaz"), + } + ranges[2] = rtree.Range{ + StartKey: []byte("ccd"), + EndKey: []byte("ccf"), + } + ranges[3] = rtree.Range{ + StartKey: []byte("ccf"), + EndKey: []byte("ccj"), + } + return ranges[:] +} + +func initRewriteRules() *utils.RewriteRules { + var rules [2]*import_sstpb.RewriteRule + rules[0] = &import_sstpb.RewriteRule{ + OldKeyPrefix: []byte("aa"), + NewKeyPrefix: []byte("xx"), + } + rules[1] = &import_sstpb.RewriteRule{ + OldKeyPrefix: []byte("cc"), + NewKeyPrefix: []byte("bb"), + } + return &utils.RewriteRules{ + Data: rules[:], + } +} diff --git a/br/pkg/restore/utils/value.go b/br/pkg/restore/utils/value.go new file mode 100644 index 0000000000..c70c702a7b --- /dev/null +++ b/br/pkg/restore/utils/value.go @@ -0,0 +1,20 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +const ( + WriteCFName = "write" + DefaultCFName = "default" +) diff --git a/br/pkg/stream/BUILD.bazel b/br/pkg/stream/BUILD.bazel index 7d34757c16..87f21f8010 100644 --- a/br/pkg/stream/BUILD.bazel +++ b/br/pkg/stream/BUILD.bazel @@ -6,6 +6,8 @@ go_library( "decode_kv.go", "meta_kv.go", "rewrite_meta_rawkv.go", + "search.go", + "stream_metas.go", "stream_mgr.go", "stream_status.go", "util.go", @@ -28,6 +30,7 @@ go_library( "//pkg/tablecodec", "//pkg/util", "//pkg/util/codec", + "//pkg/util/mathutil", "//pkg/util/table-filter", "@com_github_fatih_color//:color", "@com_github_klauspost_compress//zstd", @@ -49,12 +52,14 @@ go_test( "decode_kv_test.go", "meta_kv_test.go", "rewrite_meta_rawkv_test.go", + "search_test.go", + "stream_metas_test.go", "stream_misc_test.go", "util_test.go", ], embed = [":stream"], flaky = True, - shard_count = 26, + shard_count = 38, deps = [ "//br/pkg/storage", "//br/pkg/streamhelper", @@ -66,9 +71,14 @@ go_test( "//pkg/tablecodec", "//pkg/types", "//pkg/util/codec", + "//pkg/util/intest", "//pkg/util/table-filter", + "@com_github_fsouza_fake_gcs_server//fakestorage", + "@com_github_pingcap_errors//:errors", "@com_github_pingcap_kvproto//pkg/brpb", + "@com_github_pingcap_log//:log", "@com_github_stretchr_testify//require", "@com_github_tikv_client_go_v2//oracle", + "@org_uber_go_zap//:zap", ], ) diff --git a/br/pkg/restore/search.go b/br/pkg/stream/search.go similarity index 90% rename from br/pkg/restore/search.go rename to br/pkg/stream/search.go index 64632935fd..f788d335ab 100644 --- a/br/pkg/restore/search.go +++ b/br/pkg/stream/search.go @@ -1,6 +1,6 @@ // Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package stream import ( "bytes" @@ -16,13 +16,17 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" - "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/codec" "go.uber.org/zap" "golang.org/x/sync/errgroup" ) +const ( + writeCFName = "write" + defaultCFName = "default" +) + // Comparator is used for comparing the relationship of src and dst type Comparator interface { Compare(src, dst []byte) bool @@ -63,7 +67,11 @@ type StreamBackupSearch struct { } // NewStreamBackupSearch creates an instance of StreamBackupSearch -func NewStreamBackupSearch(storage storage.ExternalStorage, comparator Comparator, searchKey []byte) *StreamBackupSearch { +func NewStreamBackupSearch( + storage storage.ExternalStorage, + comparator Comparator, + searchKey []byte, +) *StreamBackupSearch { bs := &StreamBackupSearch{ storage: storage, comparator: comparator, @@ -84,11 +92,11 @@ func (s *StreamBackupSearch) SetEndTs(endTs uint64) { } func (s *StreamBackupSearch) readDataFiles(ctx context.Context, ch chan<- *backuppb.DataFileInfo) error { - opt := &storage.WalkOption{SubDir: stream.GetStreamBackupMetaPrefix()} + opt := &storage.WalkOption{SubDir: GetStreamBackupMetaPrefix()} pool := util.NewWorkerPool(64, "read backup meta") eg, egCtx := errgroup.WithContext(ctx) err := s.storage.WalkDir(egCtx, opt, func(path string, size int64) error { - if !strings.Contains(path, stream.GetStreamBackupMetaPrefix()) { + if !strings.Contains(path, GetStreamBackupMetaPrefix()) { return nil } @@ -118,7 +126,11 @@ func (s *StreamBackupSearch) readDataFiles(ctx context.Context, ch chan<- *backu return eg.Wait() } -func (s *StreamBackupSearch) resolveMetaData(ctx context.Context, metaData *backuppb.Metadata, ch chan<- *backuppb.DataFileInfo) { +func (s *StreamBackupSearch) resolveMetaData( + ctx context.Context, + metaData *backuppb.Metadata, + ch chan<- *backuppb.DataFileInfo, +) { for _, file := range metaData.Files { if file.IsMeta { continue @@ -197,7 +209,11 @@ func (s *StreamBackupSearch) Search(ctx context.Context) ([]*StreamKVInfo, error return entries, nil } -func (s *StreamBackupSearch) searchFromDataFile(ctx context.Context, dataFile *backuppb.DataFileInfo, ch chan<- *StreamKVInfo) error { +func (s *StreamBackupSearch) searchFromDataFile( + ctx context.Context, + dataFile *backuppb.DataFileInfo, + ch chan<- *StreamKVInfo, +) error { buff, err := s.storage.ReadFile(ctx, dataFile.Path) if err != nil { return errors.Annotatef(err, "read data file error, file: %s", dataFile.Path) @@ -207,7 +223,7 @@ func (s *StreamBackupSearch) searchFromDataFile(ctx context.Context, dataFile *b return errors.Annotatef(err, "validate checksum failed, file: %s", dataFile.Path) } - iter := stream.NewEventIterator(buff) + iter := NewEventIterator(buff) for iter.Valid() { iter.Next() if err := iter.GetError(); err != nil { @@ -231,7 +247,7 @@ func (s *StreamBackupSearch) searchFromDataFile(ctx context.Context, dataFile *b } if dataFile.Cf == writeCFName { - rawWriteCFValue := new(stream.RawWriteCFValue) + rawWriteCFValue := new(RawWriteCFValue) if err := rawWriteCFValue.ParseFrom(v); err != nil { return errors.Annotatef(err, "parse raw write cf value error, file: %s", dataFile.Path) } @@ -278,7 +294,8 @@ func (s *StreamBackupSearch) mergeCFEntries(defaultCFEntries, writeCFEntries map keyBytes, err := hex.DecodeString(entry.Key) if err != nil { - log.Warn("hex decode key failed", zap.String("key", entry.Key), zap.String("encode-key", entry.EncodedKey), zap.Error(err)) + log.Warn("hex decode key failed", + zap.String("key", entry.Key), zap.String("encode-key", entry.EncodedKey), zap.Error(err)) continue } diff --git a/br/pkg/restore/search_test.go b/br/pkg/stream/search_test.go similarity index 94% rename from br/pkg/restore/search_test.go rename to br/pkg/stream/search_test.go index 7fed62cdf8..6759b37dd6 100644 --- a/br/pkg/restore/search_test.go +++ b/br/pkg/stream/search_test.go @@ -1,6 +1,6 @@ // Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package stream import ( "bytes" @@ -13,7 +13,6 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/tidb/br/pkg/storage" - "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" ) @@ -114,7 +113,7 @@ func fakeDataFile(t *testing.T, s storage.ExternalStorage) (defaultCFDataFile, w ctx := context.Background() defaultCFBuf := bytes.NewBuffer([]byte{}) for _, defaultCF := range defaultCFs { - defaultCFBuf.Write(stream.EncodeKVEntry(encodeKey(defaultCF.key, defaultCF.startTs), []byte(defaultCF.val))) + defaultCFBuf.Write(EncodeKVEntry(encodeKey(defaultCF.key, defaultCF.startTs), []byte(defaultCF.val))) } err := s.WriteFile(ctx, defaultCFFile, defaultCFBuf.Bytes()) @@ -128,7 +127,7 @@ func fakeDataFile(t *testing.T, s storage.ExternalStorage) (defaultCFDataFile, w writeCFBuf := bytes.NewBuffer([]byte{}) for _, writeCF := range writeCFs { - writeCFBuf.Write(stream.EncodeKVEntry(encodeKey(writeCF.key, writeCF.commitTS), encodeShortValue(writeCF.val, writeCF.startTs))) + writeCFBuf.Write(EncodeKVEntry(encodeKey(writeCF.key, writeCF.commitTS), encodeShortValue(writeCF.val, writeCF.startTs))) } err = s.WriteFile(ctx, writeCFFile, writeCFBuf.Bytes()) diff --git a/br/pkg/restore/stream_metas.go b/br/pkg/stream/stream_metas.go similarity index 90% rename from br/pkg/restore/stream_metas.go rename to br/pkg/stream/stream_metas.go index 032bd61584..c86921cc15 100644 --- a/br/pkg/restore/stream_metas.go +++ b/br/pkg/stream/stream_metas.go @@ -1,6 +1,6 @@ // Copyright 2021 PingCAP, Inc. Licensed under Apache-2.0. -package restore +package stream import ( "context" @@ -14,7 +14,6 @@ import ( berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/storage" - "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/mathutil" "go.uber.org/zap" @@ -33,7 +32,7 @@ type StreamMetadataSet struct { MetadataDownloadBatchSize uint // a parser of metadata - Helper *stream.MetadataHelper + Helper *MetadataHelper // for test BeforeDoWriteBack func(path string, replaced *backuppb.Metadata) (skip bool) @@ -52,9 +51,18 @@ type MetadataInfo struct { FileGroupInfos []*FileGroupInfo } -// LoadUntilAndCalculateShiftTS loads the metadata until the specified timestamp and calculate the shift-until-ts by the way. -// This would record all metadata files that *may* contain data from transaction committed before that TS. -func (ms *StreamMetadataSet) LoadUntilAndCalculateShiftTS(ctx context.Context, s storage.ExternalStorage, until uint64) (uint64, error) { +func (ms *StreamMetadataSet) TEST_GetMetadataInfos() map[string]*MetadataInfo { + return ms.metadataInfos +} + +// LoadUntilAndCalculateShiftTS loads the metadata until the specified timestamp and calculate +// the shift-until-ts by the way. This would record all metadata files that *may* contain data +// from transaction committed before that TS. +func (ms *StreamMetadataSet) LoadUntilAndCalculateShiftTS( + ctx context.Context, + s storage.ExternalStorage, + until uint64, +) (uint64, error) { metadataMap := struct { sync.Mutex metas map[string]*MetadataInfo @@ -63,13 +71,14 @@ func (ms *StreamMetadataSet) LoadUntilAndCalculateShiftTS(ctx context.Context, s metadataMap.metas = make(map[string]*MetadataInfo) // `shiftUntilTS` must be less than `until` metadataMap.shiftUntilTS = until - err := stream.FastUnmarshalMetaData(ctx, s, ms.MetadataDownloadBatchSize, func(path string, raw []byte) error { + err := FastUnmarshalMetaData(ctx, s, ms.MetadataDownloadBatchSize, func(path string, raw []byte) error { m, err := ms.Helper.ParseToMetadataHard(raw) if err != nil { return err } // If the meta file contains only files with ts grater than `until`, when the file is from - // `Default`: it should be kept, because its corresponding `write` must has commit ts grater than it, which should not be considered. + // `Default`: it should be kept, because its corresponding `write` must has commit ts grater + // than it, which should not be considered. // `Write`: it should trivially not be considered. if m.MinTs <= until { // record these meta-information for statistics and filtering @@ -150,7 +159,12 @@ func (ms *StreamMetadataSet) IterateFilesFullyBefore(before uint64, f func(d *Fi // RemoveDataFilesAndUpdateMetadataInBatch concurrently remove datafilegroups and update metadata. // Only one metadata is processed in each thread, including deleting its datafilegroup and updating it. // Returns the not deleted datafilegroups. -func (ms *StreamMetadataSet) RemoveDataFilesAndUpdateMetadataInBatch(ctx context.Context, from uint64, storage storage.ExternalStorage, updateFn func(num int64)) ([]string, error) { +func (ms *StreamMetadataSet) RemoveDataFilesAndUpdateMetadataInBatch( + ctx context.Context, + from uint64, + storage storage.ExternalStorage, + updateFn func(num int64), +) ([]string, error) { var notDeleted struct { item []string sync.Mutex @@ -204,7 +218,13 @@ func (ms *StreamMetadataSet) RemoveDataFilesAndUpdateMetadataInBatch(ctx context } // removeDataFilesAndUpdateMetadata removes some datafilegroups of the metadata, if their max-ts is less than `from` -func (ms *StreamMetadataSet) removeDataFilesAndUpdateMetadata(ctx context.Context, storage storage.ExternalStorage, from uint64, meta *backuppb.Metadata, metaPath string) (num int64, notDeleted []string, err error) { +func (ms *StreamMetadataSet) removeDataFilesAndUpdateMetadata( + ctx context.Context, + storage storage.ExternalStorage, + from uint64, + meta *backuppb.Metadata, + metaPath string, +) (num int64, notDeleted []string, err error) { removed := make([]*backuppb.DataFileGroup, 0) remainedDataFiles := make([]*backuppb.DataFileGroup, 0) notDeleted = make([]string, 0) @@ -262,7 +282,12 @@ func (ms *StreamMetadataSet) removeDataFilesAndUpdateMetadata(ctx context.Contex return num, notDeleted, nil } -func (ms *StreamMetadataSet) doWriteBackForFile(ctx context.Context, s storage.ExternalStorage, path string, meta *backuppb.Metadata) error { +func (ms *StreamMetadataSet) doWriteBackForFile( + ctx context.Context, + s storage.ExternalStorage, + path string, + meta *backuppb.Metadata, +) error { // If the metadata file contains no data file, remove it due to it is meanless. if len(meta.FileGroups) == 0 { if err := s.DeleteFile(ctx, path); err != nil { @@ -341,7 +366,7 @@ func UpdateShiftTS(m *backuppb.Metadata, startTS uint64, restoreTS uint64) (uint for _, ds := range m.FileGroups { for _, d := range ds.DataFilesInfo { - if d.Cf == stream.DefaultCF || d.MinBeginTsInDefaultCf == 0 { + if d.Cf == DefaultCF || d.MinBeginTsInDefaultCf == 0 { continue } if d.MinTs > restoreTS || d.MaxTs < startTS { diff --git a/br/pkg/restore/stream_metas_test.go b/br/pkg/stream/stream_metas_test.go similarity index 79% rename from br/pkg/restore/stream_metas_test.go rename to br/pkg/stream/stream_metas_test.go index f1ee34feb6..2545f2fd90 100644 --- a/br/pkg/restore/stream_metas_test.go +++ b/br/pkg/stream/stream_metas_test.go @@ -1,6 +1,6 @@ // Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. -package restore_test +package stream import ( "context" @@ -16,9 +16,7 @@ import ( "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" - "github.com/pingcap/tidb/br/pkg/restore" "github.com/pingcap/tidb/br/pkg/storage" - "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/pkg/util/intest" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -99,7 +97,7 @@ func fakeStreamBackup(s storage.ExternalStorage) error { if err != nil { panic("failed to marshal test meta") } - name := fmt.Sprintf("%s/%04d.meta", stream.GetStreamBackupMetaPrefix(), i) + name := fmt.Sprintf("%s/%04d.meta", GetStreamBackupMetaPrefix(), i) if err = s.WriteFile(ctx, name, bs); err != nil { return errors.Trace(err) } @@ -127,7 +125,7 @@ func fakeStreamBackupV2(s storage.ExternalStorage) error { if err != nil { panic("failed to marshal test meta") } - name := fmt.Sprintf("%s/%04d.meta", stream.GetStreamBackupMetaPrefix(), i) + name := fmt.Sprintf("%s/%04d.meta", GetStreamBackupMetaPrefix(), i) if err = s.WriteFile(ctx, name, bs); err != nil { return errors.Trace(err) } @@ -140,7 +138,7 @@ func fakeStreamBackupV2(s storage.ExternalStorage) error { func TestTruncateLog(t *testing.T) { ctx := context.Background() tmpdir := t.TempDir() - backupMetaDir := filepath.Join(tmpdir, stream.GetStreamBackupMetaPrefix()) + backupMetaDir := filepath.Join(tmpdir, GetStreamBackupMetaPrefix()) _, err := storage.NewLocalStorage(backupMetaDir) require.NoError(t, err) @@ -149,14 +147,14 @@ func TestTruncateLog(t *testing.T) { require.NoError(t, fakeStreamBackup(l)) - s := restore.StreamMetadataSet{ - Helper: stream.NewMetadataHelper(), + s := StreamMetadataSet{ + Helper: NewMetadataHelper(), MetadataDownloadBatchSize: 128, } require.NoError(t, s.LoadFrom(ctx, l)) - fs := []*restore.FileGroupInfo{} - s.IterateFilesFullyBefore(17, func(d *restore.FileGroupInfo) (shouldBreak bool) { + fs := []*FileGroupInfo{} + s.IterateFilesFullyBefore(17, func(d *FileGroupInfo) (shouldBreak bool) { fs = append(fs, d) require.Less(t, d.MaxTS, uint64(17)) return false @@ -196,13 +194,13 @@ func TestTruncateLog(t *testing.T) { require.Equal(t, total, int64(15)) require.NoError(t, s.LoadFrom(ctx, l)) - s.IterateFilesFullyBefore(17, func(d *restore.FileGroupInfo) (shouldBreak bool) { + s.IterateFilesFullyBefore(17, func(d *FileGroupInfo) (shouldBreak bool) { t.Errorf("some of log files still not truncated, it is %#v", d) return true }) err = l.WalkDir(ctx, &storage.WalkOption{ - SubDir: stream.GetStreamBackupMetaPrefix(), + SubDir: GetStreamBackupMetaPrefix(), }, func(s string, i int64) error { require.NotContains(t, removedMetaFiles, s) return nil @@ -213,7 +211,7 @@ func TestTruncateLog(t *testing.T) { func TestTruncateLogV2(t *testing.T) { ctx := context.Background() tmpdir := t.TempDir() - backupMetaDir := filepath.Join(tmpdir, stream.GetStreamBackupMetaPrefix()) + backupMetaDir := filepath.Join(tmpdir, GetStreamBackupMetaPrefix()) _, err := storage.NewLocalStorage(backupMetaDir) require.NoError(t, err) @@ -222,14 +220,14 @@ func TestTruncateLogV2(t *testing.T) { require.NoError(t, fakeStreamBackupV2(l)) - s := restore.StreamMetadataSet{ - Helper: stream.NewMetadataHelper(), + s := StreamMetadataSet{ + Helper: NewMetadataHelper(), MetadataDownloadBatchSize: 128, } require.NoError(t, s.LoadFrom(ctx, l)) - fs := []*restore.FileGroupInfo{} - s.IterateFilesFullyBefore(17, func(d *restore.FileGroupInfo) (shouldBreak bool) { + fs := []*FileGroupInfo{} + s.IterateFilesFullyBefore(17, func(d *FileGroupInfo) (shouldBreak bool) { fs = append(fs, d) require.Less(t, d.MaxTS, uint64(17)) return false @@ -269,13 +267,13 @@ func TestTruncateLogV2(t *testing.T) { require.Equal(t, total, int64(15)) require.NoError(t, s.LoadFrom(ctx, l)) - s.IterateFilesFullyBefore(17, func(d *restore.FileGroupInfo) (shouldBreak bool) { + s.IterateFilesFullyBefore(17, func(d *FileGroupInfo) (shouldBreak bool) { t.Errorf("some of log files still not truncated, it is %#v", d) return true }) err = l.WalkDir(ctx, &storage.WalkOption{ - SubDir: stream.GetStreamBackupMetaPrefix(), + SubDir: GetStreamBackupMetaPrefix(), }, func(s string, i int64) error { require.NotContains(t, removedMetaFiles, s) return nil @@ -288,15 +286,15 @@ func TestTruncateSafepoint(t *testing.T) { l, err := storage.NewLocalStorage(t.TempDir()) require.NoError(t, err) - ts, err := restore.GetTSFromFile(ctx, l, restore.TruncateSafePointFileName) + ts, err := GetTSFromFile(ctx, l, TruncateSafePointFileName) require.NoError(t, err) require.Equal(t, int(ts), 0) for i := 0; i < 100; i++ { n := rand.Uint64() - require.NoError(t, restore.SetTSToFile(ctx, l, n, restore.TruncateSafePointFileName)) + require.NoError(t, SetTSToFile(ctx, l, n, TruncateSafePointFileName)) - ts, err = restore.GetTSFromFile(ctx, l, restore.TruncateSafePointFileName) + ts, err = GetTSFromFile(ctx, l, TruncateSafePointFileName) require.NoError(t, err) require.Equal(t, ts, n, "failed at %d round: truncate safepoint mismatch", i) } @@ -329,21 +327,21 @@ func TestTruncateSafepointForGCS(t *testing.T) { require.NoError(t, err) require.NoError(t, err) - ts, err := restore.GetTSFromFile(ctx, l, restore.TruncateSafePointFileName) + ts, err := GetTSFromFile(ctx, l, TruncateSafePointFileName) require.NoError(t, err) require.Equal(t, int(ts), 0) for i := 0; i < 100; i++ { n := rand.Uint64() - require.NoError(t, restore.SetTSToFile(ctx, l, n, restore.TruncateSafePointFileName)) + require.NoError(t, SetTSToFile(ctx, l, n, TruncateSafePointFileName)) - ts, err = restore.GetTSFromFile(ctx, l, restore.TruncateSafePointFileName) + ts, err = GetTSFromFile(ctx, l, TruncateSafePointFileName) require.NoError(t, err) require.Equal(t, ts, n, "failed at %d round: truncate safepoint mismatch", i) } } -func fakeMetaDatas(t *testing.T, helper *stream.MetadataHelper, cf string) []*backuppb.Metadata { +func fakeMetaDatas(t *testing.T, helper *MetadataHelper, cf string) []*backuppb.Metadata { ms := []*backuppb.Metadata{ { StoreId: 1, @@ -397,7 +395,7 @@ func fakeMetaDatas(t *testing.T, helper *stream.MetadataHelper, cf string) []*ba return m2s } -func fakeMetaDataV2s(t *testing.T, helper *stream.MetadataHelper, cf string) []*backuppb.Metadata { +func fakeMetaDataV2s(t *testing.T, helper *MetadataHelper, cf string) []*backuppb.Metadata { ms := []*backuppb.Metadata{ { StoreId: 1, @@ -482,43 +480,43 @@ func fakeMetaDataV2s(t *testing.T, helper *stream.MetadataHelper, cf string) []* } func ff(minTS, maxTS uint64) *backuppb.DataFileGroup { - return f(0, minTS, maxTS, stream.DefaultCF, 0) + return f(0, minTS, maxTS, DefaultCF, 0) } func TestReplaceMetadataTs(t *testing.T) { m := &backuppb.Metadata{} - restore.ReplaceMetadata(m, []*backuppb.DataFileGroup{ + ReplaceMetadata(m, []*backuppb.DataFileGroup{ ff(1, 3), ff(4, 5), }) require.Equal(t, m.MinTs, uint64(1)) require.Equal(t, m.MaxTs, uint64(5)) - restore.ReplaceMetadata(m, []*backuppb.DataFileGroup{ + ReplaceMetadata(m, []*backuppb.DataFileGroup{ ff(1, 4), ff(3, 5), }) require.Equal(t, m.MinTs, uint64(1)) require.Equal(t, m.MaxTs, uint64(5)) - restore.ReplaceMetadata(m, []*backuppb.DataFileGroup{ + ReplaceMetadata(m, []*backuppb.DataFileGroup{ ff(1, 6), ff(0, 5), }) require.Equal(t, m.MinTs, uint64(0)) require.Equal(t, m.MaxTs, uint64(6)) - restore.ReplaceMetadata(m, []*backuppb.DataFileGroup{ + ReplaceMetadata(m, []*backuppb.DataFileGroup{ ff(1, 3), }) require.Equal(t, m.MinTs, uint64(1)) require.Equal(t, m.MaxTs, uint64(3)) - restore.ReplaceMetadata(m, []*backuppb.DataFileGroup{}) + ReplaceMetadata(m, []*backuppb.DataFileGroup{}) require.Equal(t, m.MinTs, uint64(0)) require.Equal(t, m.MaxTs, uint64(0)) - restore.ReplaceMetadata(m, []*backuppb.DataFileGroup{ + ReplaceMetadata(m, []*backuppb.DataFileGroup{ ff(1, 3), ff(2, 4), ff(0, 2), @@ -596,7 +594,7 @@ func cleanFiles(ctx context.Context, s storage.ExternalStorage) error { } func metaName(storeId int64) string { - return fmt.Sprintf("%s/%04d.meta", stream.GetStreamBackupMetaPrefix(), storeId) + return fmt.Sprintf("%s/%04d.meta", GetStreamBackupMetaPrefix(), storeId) } func logName(storeId int64, minTS, maxTS uint64) string { @@ -608,7 +606,7 @@ func generateFiles(ctx context.Context, s storage.ExternalStorage, metas []*back if err := cleanFiles(ctx, s); err != nil { return err } - fname := path.Join(tmpDir, stream.GetStreamBackupMetaPrefix()) + fname := path.Join(tmpDir, GetStreamBackupMetaPrefix()) os.MkdirAll(fname, 0777) for _, meta := range metas { data, err := meta.Marshal() @@ -692,28 +690,28 @@ func TestTruncate1(t *testing.T) { // ↓ ↓ // filegroup 10-----d-----20 metas: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.DefaultCF, 0), + m_1(1, 10, 20, DefaultCF, 0), }, testParams: []*testParam{ { until: []uint64{5}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.DefaultCF, 0), + m_1(1, 10, 20, DefaultCF, 0), }, }, { until: []uint64{10}, shiftUntilTS: 10, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.DefaultCF, 0), + m_1(1, 10, 20, DefaultCF, 0), }, }, { until: []uint64{15}, shiftUntilTS: 15, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.DefaultCF, 0), + m_1(1, 10, 20, DefaultCF, 0), }, }, { until: []uint64{20}, shiftUntilTS: 20, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.DefaultCF, 0), + m_1(1, 10, 20, DefaultCF, 0), }, }, { until: []uint64{25}, @@ -727,18 +725,18 @@ func TestTruncate1(t *testing.T) { // ↓ ↓ // filegroup 5-d--10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 7, 10, 15, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 20, WriteCF, 5), }, }, { until: []uint64{25}, @@ -753,21 +751,21 @@ func TestTruncate1(t *testing.T) { // filegroup 5--d-8 ↓ ↓ // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 5, 8, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 8, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 8, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 8, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 9, 10, 15, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 8, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 8, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{25}, @@ -783,21 +781,21 @@ func TestTruncate1(t *testing.T) { // filegroup 5--d---10 ↓ // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 5, 10, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 10, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 10, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 10, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 9, 10, 15, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 10, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 10, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{25}, @@ -813,21 +811,21 @@ func TestTruncate1(t *testing.T) { // filegroup 5--d----↓-12 ↓ // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 5, 12, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 12, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 12, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 12, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 9, 10, 15, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 12, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 12, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{25}, @@ -843,21 +841,21 @@ func TestTruncate1(t *testing.T) { // filegroup 5--d----↓-----------20 // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 5, 20, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 20, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 20, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 20, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 15, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 20, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 20, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{25}, @@ -873,31 +871,31 @@ func TestTruncate1(t *testing.T) { // filegroup 5--d----↓-----------↓--22 // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 5, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 15, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 5, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{21}, shiftUntilTS: 21, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 22, stream.DefaultCF, 0), + m_1(1, 5, 22, DefaultCF, 0), }, }, { until: []uint64{22}, shiftUntilTS: 22, restMetadata: []*backuppb.Metadata{ - m_1(1, 5, 22, stream.DefaultCF, 0), + m_1(1, 5, 22, DefaultCF, 0), }, }, { until: []uint64{25}, @@ -913,21 +911,21 @@ func TestTruncate1(t *testing.T) { // filegroup 10-d-14 ↓ // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 10, 14, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 14, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 14, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 14, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 12, 14, 18, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 14, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 14, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{25}, @@ -943,21 +941,21 @@ func TestTruncate1(t *testing.T) { // filegroup 10----d------20 // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 20, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 20, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 14, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 20, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 20, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{25}, @@ -973,31 +971,31 @@ func TestTruncate1(t *testing.T) { // filegroup 10----d-------↓--22 // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 10, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 14, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 10, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{21}, shiftUntilTS: 21, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 22, stream.DefaultCF, 0), + m_1(1, 10, 22, DefaultCF, 0), }, }, { until: []uint64{22}, shiftUntilTS: 22, restMetadata: []*backuppb.Metadata{ - m_1(1, 10, 22, stream.DefaultCF, 0), + m_1(1, 10, 22, DefaultCF, 0), }, }, { until: []uint64{25}, @@ -1013,21 +1011,21 @@ func TestTruncate1(t *testing.T) { // filegroup ↓ 12--d--18 ↓ // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 12, 18, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 12, 18, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 12, 18, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 12, 18, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 11, 12, 15, 18, 19, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 12, 18, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 12, 18, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{25}, @@ -1043,21 +1041,21 @@ func TestTruncate1(t *testing.T) { // filegroup ↓ 14--d-20 // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 14, 20, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 14, 20, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 14, 20, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 14, 20, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 14, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 14, 20, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 14, 20, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{25}, @@ -1073,31 +1071,31 @@ func TestTruncate1(t *testing.T) { // filegroup ↓ 14-d--↓--22 // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 14, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 14, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 14, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 14, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 14, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 14, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 14, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{21}, shiftUntilTS: 21, restMetadata: []*backuppb.Metadata{ - m_1(1, 14, 22, stream.DefaultCF, 0), + m_1(1, 14, 22, DefaultCF, 0), }, }, { until: []uint64{22}, shiftUntilTS: 22, restMetadata: []*backuppb.Metadata{ - m_1(1, 14, 22, stream.DefaultCF, 0), + m_1(1, 14, 22, DefaultCF, 0), }, }, { until: []uint64{25}, @@ -1113,31 +1111,31 @@ func TestTruncate1(t *testing.T) { // filegroup ↓ 20--22 // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 20, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 20, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 20, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 20, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 14, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 20, 22, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 20, 22, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{21}, shiftUntilTS: 21, restMetadata: []*backuppb.Metadata{ - m_1(1, 20, 22, stream.DefaultCF, 0), + m_1(1, 20, 22, DefaultCF, 0), }, }, { until: []uint64{22}, shiftUntilTS: 22, restMetadata: []*backuppb.Metadata{ - m_1(1, 20, 22, stream.DefaultCF, 0), + m_1(1, 20, 22, DefaultCF, 0), }, }, { until: []uint64{25}, @@ -1153,31 +1151,31 @@ func TestTruncate1(t *testing.T) { // filegroup ↓ ↓ 21-d-24 // filegroup 5--d---10-----w-----20 metas: []*backuppb.Metadata{ - m_1(1, 21, 24, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 21, 24, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, testParams: []*testParam{ { until: []uint64{3}, shiftUntilTS: 3, restMetadata: []*backuppb.Metadata{ - m_1(1, 21, 24, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 21, 24, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{5, 8, 10, 14, 20}, shiftUntilTS: 5, restMetadata: []*backuppb.Metadata{ - m_1(1, 21, 24, stream.DefaultCF, 0), - m_1(2, 10, 20, stream.WriteCF, 5), + m_1(1, 21, 24, DefaultCF, 0), + m_1(2, 10, 20, WriteCF, 5), }, }, { until: []uint64{21}, shiftUntilTS: 21, restMetadata: []*backuppb.Metadata{ - m_1(1, 21, 24, stream.DefaultCF, 0), + m_1(1, 21, 24, DefaultCF, 0), }, }, { until: []uint64{22}, shiftUntilTS: 22, restMetadata: []*backuppb.Metadata{ - m_1(1, 21, 24, stream.DefaultCF, 0), + m_1(1, 21, 24, DefaultCF, 0), }, }, { until: []uint64{25}, @@ -1191,8 +1189,8 @@ func TestTruncate1(t *testing.T) { for j, ts := range cs.testParams { for _, until := range ts.until { t.Logf("case %d, param %d, until %d", i, j, until) - metas := restore.StreamMetadataSet{ - Helper: stream.NewMetadataHelper(), + metas := StreamMetadataSet{ + Helper: NewMetadataHelper(), MetadataDownloadBatchSize: 128, } err := generateFiles(ctx, s, cs.metas, tmpDir) @@ -1248,8 +1246,8 @@ func TestTruncate2(t *testing.T) { // filegroup 8----d--15-w-20 metas: []*backuppb.Metadata{ m_2(1, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 8, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 8, ), }, testParams: []*testParam2{ @@ -1257,16 +1255,16 @@ func TestTruncate2(t *testing.T) { until: []uint64{5}, shiftUntilTS: returnV(5), restMetadata: []*backuppb.Metadata{ m_2(1, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 8, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 8, ), }, }, { until: []uint64{8, 9, 10, 12, 13, 14, 15, 18, 20}, shiftUntilTS: returnV(8), restMetadata: []*backuppb.Metadata{ m_2(1, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 8, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 8, ), }, }, { @@ -1282,29 +1280,29 @@ func TestTruncate2(t *testing.T) { // filegroup 3 6 10-d-13 ↓ ↓ // filegroup 1-----------d--15-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 1, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 1, ), }, testParams: []*testParam2{ { until: []uint64{0}, shiftUntilTS: returnV(0), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 8, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 8, ), }, }, { until: []uint64{1, 2, 3, 4, 6, 9, 10, 12, 13, 14, 15, 18, 20}, shiftUntilTS: returnV(1), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 8, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 8, ), }, }, { @@ -1320,29 +1318,29 @@ func TestTruncate2(t *testing.T) { // filegroup 3 6 10-d-13 ↓ ↓ // filegroup 3----------d--15-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 3, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 3, ), }, testParams: []*testParam2{ { until: []uint64{2}, shiftUntilTS: returnV(2), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 3, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 3, ), }, }, { until: []uint64{3, 4, 6, 9, 10, 12, 13, 14, 15, 18, 20}, shiftUntilTS: returnV(3), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 3, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 3, ), }, }, { @@ -1358,29 +1356,29 @@ func TestTruncate2(t *testing.T) { // filegroup 3 7 10-d-13 ↓ ↓ // filegroup 5--------d--15-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 7, stream.DefaultCF, 0), + m_1(1, 3, 7, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 5, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 5, ), }, testParams: []*testParam2{ { until: []uint64{2, 3, 4}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 7, stream.DefaultCF, 0), + m_1(1, 3, 7, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 5, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 5, ), }, }, { until: []uint64{5, 6, 7, 9, 10, 12, 13, 14, 15, 18, 20}, shiftUntilTS: returnV(5), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 7, stream.DefaultCF, 0), + m_1(1, 3, 7, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 5, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 5, ), }, }, { @@ -1396,29 +1394,29 @@ func TestTruncate2(t *testing.T) { // filegroup 3 7 10-d-13 ↓ ↓ // filegroup 7------d--15-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 7, stream.DefaultCF, 0), + m_1(1, 3, 7, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 7, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 7, ), }, testParams: []*testParam2{ { until: []uint64{2, 3, 4, 6, 7}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 7, stream.DefaultCF, 0), + m_1(1, 3, 7, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 7, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 7, ), }, }, { until: []uint64{9, 10, 12, 13, 14, 15, 18, 20}, shiftUntilTS: returnV(7), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 7, stream.DefaultCF, 0), + m_1(1, 3, 7, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 7, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 7, ), }, }, { @@ -1434,36 +1432,36 @@ func TestTruncate2(t *testing.T) { // filegroup 3-d-6 10-d-13 ↓ ↓ // filegroup 8----d--15-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 8, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 8, ), }, testParams: []*testParam2{ { until: []uint64{2, 3, 4, 6}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 8, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 8, ), }, }, { until: []uint64{7}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 8, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 8, ), }, }, { until: []uint64{8, 9, 10, 12, 13, 14, 15, 18, 20}, shiftUntilTS: returnV(8), restMetadata: []*backuppb.Metadata{ m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 8, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 8, ), }, }, { @@ -1479,36 +1477,36 @@ func TestTruncate2(t *testing.T) { // filegroup 3-d-6 10-d-13 ↓ ↓ // filegroup 10--d--15-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 10, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 10, ), }, testParams: []*testParam2{ { until: []uint64{2, 3, 4, 6}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 10, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 10, ), }, }, { until: []uint64{7, 8, 9}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 10, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 10, ), }, }, { until: []uint64{10, 12, 13, 14, 15, 18, 20}, shiftUntilTS: returnV(10), restMetadata: []*backuppb.Metadata{ m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 10, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 10, ), }, }, { @@ -1524,36 +1522,36 @@ func TestTruncate2(t *testing.T) { // filegroup 3-d-6 9-d-13 ↓ ↓ // filegroup 11-d-15-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 9, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 11, + 9, 13, DefaultCF, 0, + 15, 20, WriteCF, 11, ), }, testParams: []*testParam2{ { until: []uint64{2, 3, 4, 6}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 9, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 11, + 9, 13, DefaultCF, 0, + 15, 20, WriteCF, 11, ), }, }, { until: []uint64{7, 8, 9, 10}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ m_2(2, - 9, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 11, + 9, 13, DefaultCF, 0, + 15, 20, WriteCF, 11, ), }, }, { until: []uint64{11, 12, 13, 14, 15, 18, 20}, shiftUntilTS: returnV(11), restMetadata: []*backuppb.Metadata{ m_2(2, - 9, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 11, + 9, 13, DefaultCF, 0, + 15, 20, WriteCF, 11, ), }, }, { @@ -1569,36 +1567,36 @@ func TestTruncate2(t *testing.T) { // filegroup 3-d-6 10-d-13 ↓ ↓ // filegroup 13d15-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 13, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 13, ), }, testParams: []*testParam2{ { until: []uint64{2, 3, 4, 6}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 13, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 13, ), }, }, { until: []uint64{7, 8, 9, 10, 12}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 13, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 13, ), }, }, { until: []uint64{13, 14, 15, 18, 20}, shiftUntilTS: returnV(13), restMetadata: []*backuppb.Metadata{ m_2(2, - 10, 13, stream.DefaultCF, 0, - 15, 20, stream.WriteCF, 13, + 10, 13, DefaultCF, 0, + 15, 20, WriteCF, 13, ), }, }, { @@ -1614,39 +1612,39 @@ func TestTruncate2(t *testing.T) { // filegroup 3-d-6 10-d-12 ↓ ↓ // filegroup 14d16-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 12, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 10, 12, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, testParams: []*testParam2{ { until: []uint64{2, 3, 4, 6}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 12, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 10, 12, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, }, { until: []uint64{7, 8, 9, 10, 11, 12}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ m_2(2, - 10, 12, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 10, 12, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, }, { until: []uint64{13}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(2, 16, 20, stream.WriteCF, 14), + m_1(2, 16, 20, WriteCF, 14), }, }, { until: []uint64{14, 15, 18, 20}, shiftUntilTS: returnV(14), restMetadata: []*backuppb.Metadata{ - m_1(2, 16, 20, stream.WriteCF, 14), + m_1(2, 16, 20, WriteCF, 14), }, }, { until: []uint64{25}, @@ -1661,39 +1659,39 @@ func TestTruncate2(t *testing.T) { // filegroup 3-d-6 10-d-12 ↓ ↓ // filegroup 14d16-w-20 metas: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 12, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 10, 12, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, testParams: []*testParam2{ { until: []uint64{2, 3, 4, 6}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(1, 3, 6, stream.DefaultCF, 0), + m_1(1, 3, 6, DefaultCF, 0), m_2(2, - 10, 12, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 10, 12, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, }, { until: []uint64{7, 8, 9, 10, 11, 12}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ m_2(2, - 10, 12, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 10, 12, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, }, { until: []uint64{13}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ - m_1(2, 16, 20, stream.WriteCF, 14), + m_1(2, 16, 20, WriteCF, 14), }, }, { until: []uint64{14, 15, 18, 20}, shiftUntilTS: returnV(14), restMetadata: []*backuppb.Metadata{ - m_1(2, 16, 20, stream.WriteCF, 14), + m_1(2, 16, 20, WriteCF, 14), }, }, { until: []uint64{25}, @@ -1707,8 +1705,8 @@ func TestTruncate2(t *testing.T) { for j, ts := range cs.testParams { for _, until := range ts.until { t.Logf("case %d, param %d, until %d", i, j, until) - metas := restore.StreamMetadataSet{ - Helper: stream.NewMetadataHelper(), + metas := StreamMetadataSet{ + Helper: NewMetadataHelper(), MetadataDownloadBatchSize: 128, } err := generateFiles(ctx, s, cs.metas, tmpDir) @@ -1748,12 +1746,12 @@ func TestTruncate3(t *testing.T) { // filegroup 5----d--------15--w--20 metas: []*backuppb.Metadata{ m_2(1, - 3, 7, stream.DefaultCF, 0, - 5, 10, stream.DefaultCF, 0, + 3, 7, DefaultCF, 0, + 5, 10, DefaultCF, 0, ), m_2(2, - 12, 18, stream.WriteCF, 3, - 15, 20, stream.WriteCF, 5, + 12, 18, WriteCF, 3, + 15, 20, WriteCF, 5, ), }, testParams: []*testParam2{ @@ -1761,36 +1759,36 @@ func TestTruncate3(t *testing.T) { until: []uint64{2}, shiftUntilTS: returnV(2), restMetadata: []*backuppb.Metadata{ m_2(1, - 3, 7, stream.DefaultCF, 0, - 5, 10, stream.DefaultCF, 0, + 3, 7, DefaultCF, 0, + 5, 10, DefaultCF, 0, ), m_2(2, - 12, 18, stream.WriteCF, 3, - 15, 20, stream.WriteCF, 5, + 12, 18, WriteCF, 3, + 15, 20, WriteCF, 5, ), }, }, { until: []uint64{3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18}, shiftUntilTS: returnV(3), restMetadata: []*backuppb.Metadata{ m_2(1, - 3, 7, stream.DefaultCF, 0, - 5, 10, stream.DefaultCF, 0, + 3, 7, DefaultCF, 0, + 5, 10, DefaultCF, 0, ), m_2(2, - 12, 18, stream.WriteCF, 3, - 15, 20, stream.WriteCF, 5, + 12, 18, WriteCF, 3, + 15, 20, WriteCF, 5, ), }, }, { until: []uint64{19, 20}, shiftUntilTS: returnV(5), restMetadata: []*backuppb.Metadata{ m_2(1, - 3, 7, stream.DefaultCF, 0, - 5, 10, stream.DefaultCF, 0, + 3, 7, DefaultCF, 0, + 5, 10, DefaultCF, 0, ), m_2(2, - 12, 18, stream.WriteCF, 3, - 15, 20, stream.WriteCF, 5, + 12, 18, WriteCF, 3, + 15, 20, WriteCF, 5, ), }, }, { @@ -1809,12 +1807,12 @@ func TestTruncate3(t *testing.T) { // filegroup 8---d----15--w--20 metas: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 4, 10, stream.DefaultCF, 0, + 2, 6, DefaultCF, 0, + 4, 10, DefaultCF, 0, ), m_2(2, - 12, 18, stream.WriteCF, 2, - 15, 20, stream.WriteCF, 8, + 12, 18, WriteCF, 2, + 15, 20, WriteCF, 8, ), }, testParams: []*testParam2{ @@ -1822,35 +1820,35 @@ func TestTruncate3(t *testing.T) { until: []uint64{1}, shiftUntilTS: returnV(1), restMetadata: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 4, 10, stream.DefaultCF, 0, + 2, 6, DefaultCF, 0, + 4, 10, DefaultCF, 0, ), m_2(2, - 12, 18, stream.WriteCF, 2, - 15, 20, stream.WriteCF, 8, + 12, 18, WriteCF, 2, + 15, 20, WriteCF, 8, ), }, }, { until: []uint64{2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18}, shiftUntilTS: returnV(2), restMetadata: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 4, 10, stream.DefaultCF, 0, + 2, 6, DefaultCF, 0, + 4, 10, DefaultCF, 0, ), m_2(2, - 12, 18, stream.WriteCF, 2, - 15, 20, stream.WriteCF, 8, + 12, 18, WriteCF, 2, + 15, 20, WriteCF, 8, ), }, }, { until: []uint64{19, 20}, shiftUntilTS: returnV(8), restMetadata: []*backuppb.Metadata{ m_1(1, - 4, 10, stream.DefaultCF, 0, + 4, 10, DefaultCF, 0, ), m_2(2, - 12, 18, stream.WriteCF, 2, - 15, 20, stream.WriteCF, 8, + 12, 18, WriteCF, 2, + 15, 20, WriteCF, 8, ), }, }, { @@ -1869,12 +1867,12 @@ func TestTruncate3(t *testing.T) { // filegroup 12---d--16--w--20 metas: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 4, 10, stream.DefaultCF, 0, + 2, 6, DefaultCF, 0, + 4, 10, DefaultCF, 0, ), m_2(2, - 14, 18, stream.WriteCF, 2, - 16, 20, stream.WriteCF, 12, + 14, 18, WriteCF, 2, + 16, 20, WriteCF, 12, ), }, testParams: []*testParam2{ @@ -1882,32 +1880,32 @@ func TestTruncate3(t *testing.T) { until: []uint64{1}, shiftUntilTS: returnV(1), restMetadata: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 4, 10, stream.DefaultCF, 0, + 2, 6, DefaultCF, 0, + 4, 10, DefaultCF, 0, ), m_2(2, - 14, 18, stream.WriteCF, 2, - 16, 20, stream.WriteCF, 12, + 14, 18, WriteCF, 2, + 16, 20, WriteCF, 12, ), }, }, { until: []uint64{2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18}, shiftUntilTS: returnV(2), restMetadata: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 4, 10, stream.DefaultCF, 0, + 2, 6, DefaultCF, 0, + 4, 10, DefaultCF, 0, ), m_2(2, - 14, 18, stream.WriteCF, 2, - 16, 20, stream.WriteCF, 12, + 14, 18, WriteCF, 2, + 16, 20, WriteCF, 12, ), }, }, { until: []uint64{19, 20}, shiftUntilTS: returnV(12), restMetadata: []*backuppb.Metadata{ m_2(2, - 14, 18, stream.WriteCF, 2, - 16, 20, stream.WriteCF, 8, + 14, 18, WriteCF, 2, + 16, 20, WriteCF, 8, ), }, }, { @@ -1926,12 +1924,12 @@ func TestTruncate3(t *testing.T) { // filegroup 14-d--16-w--20 metas: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 8, 10, stream.WriteCF, 4, + 2, 6, DefaultCF, 0, + 8, 10, WriteCF, 4, ), m_2(2, - 14, 18, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 14, 18, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, testParams: []*testParam2{ @@ -1939,52 +1937,52 @@ func TestTruncate3(t *testing.T) { until: []uint64{1}, shiftUntilTS: returnV(1), restMetadata: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 8, 10, stream.WriteCF, 4, + 2, 6, DefaultCF, 0, + 8, 10, WriteCF, 4, ), m_2(2, - 14, 18, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 14, 18, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, }, { until: []uint64{2, 3}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 8, 10, stream.WriteCF, 4, + 2, 6, DefaultCF, 0, + 8, 10, WriteCF, 4, ), m_2(2, - 14, 18, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 14, 18, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, }, { until: []uint64{4, 5, 6, 7, 8, 9, 10}, shiftUntilTS: returnV(4), restMetadata: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 8, 10, stream.WriteCF, 4, + 2, 6, DefaultCF, 0, + 8, 10, WriteCF, 4, ), m_2(2, - 14, 18, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 14, 18, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, }, { until: []uint64{12}, shiftUntilTS: returnV(12), restMetadata: []*backuppb.Metadata{ m_2(2, - 14, 18, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 14, 18, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, }, { until: []uint64{14, 15, 16, 17, 18, 19, 20}, shiftUntilTS: returnV(14), restMetadata: []*backuppb.Metadata{ m_2(2, - 14, 18, stream.DefaultCF, 0, - 16, 20, stream.WriteCF, 14, + 14, 18, DefaultCF, 0, + 16, 20, WriteCF, 14, ), }, }, { @@ -2004,15 +2002,15 @@ func TestTruncate3(t *testing.T) { // filegroup 20---d-24-w-26 metas: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 8, 10, stream.DefaultCF, 0, + 2, 6, DefaultCF, 0, + 8, 10, DefaultCF, 0, ), m_2(2, - 14, 18, stream.WriteCF, 9, - 16, 22, stream.DefaultCF, 0, + 14, 18, WriteCF, 9, + 16, 22, DefaultCF, 0, ), m_1(3, - 24, 26, stream.WriteCF, 20, + 24, 26, WriteCF, 20, ), }, testParams: []*testParam2{ @@ -2020,63 +2018,63 @@ func TestTruncate3(t *testing.T) { until: []uint64{1, 2, 3, 6}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ m_2(1, - 2, 6, stream.DefaultCF, 0, - 8, 10, stream.DefaultCF, 0, + 2, 6, DefaultCF, 0, + 8, 10, DefaultCF, 0, ), m_2(2, - 14, 18, stream.WriteCF, 9, - 16, 22, stream.DefaultCF, 0, + 14, 18, WriteCF, 9, + 16, 22, DefaultCF, 0, ), m_1(3, - 24, 26, stream.WriteCF, 20, + 24, 26, WriteCF, 20, ), }, }, { until: []uint64{7, 8}, shiftUntilTS: returnSelf(), restMetadata: []*backuppb.Metadata{ m_1(1, - 8, 10, stream.DefaultCF, 0, + 8, 10, DefaultCF, 0, ), m_2(2, - 14, 18, stream.WriteCF, 9, - 16, 22, stream.DefaultCF, 0, + 14, 18, WriteCF, 9, + 16, 22, DefaultCF, 0, ), m_1(3, - 24, 26, stream.WriteCF, 20, + 24, 26, WriteCF, 20, ), }, }, { until: []uint64{9, 10, 11, 14, 15, 16, 17, 18}, shiftUntilTS: returnV(9), restMetadata: []*backuppb.Metadata{ m_1(1, - 8, 10, stream.DefaultCF, 0, + 8, 10, DefaultCF, 0, ), m_2(2, - 14, 18, stream.WriteCF, 9, - 16, 22, stream.DefaultCF, 0, + 14, 18, WriteCF, 9, + 16, 22, DefaultCF, 0, ), m_1(3, - 24, 26, stream.WriteCF, 20, + 24, 26, WriteCF, 20, ), }, }, { until: []uint64{19}, shiftUntilTS: returnV(19), restMetadata: []*backuppb.Metadata{ m_1(2, - 16, 22, stream.DefaultCF, 0, + 16, 22, DefaultCF, 0, ), m_1(3, - 24, 26, stream.WriteCF, 20, + 24, 26, WriteCF, 20, ), }, }, { until: []uint64{20, 21, 22, 23, 24, 25, 26}, shiftUntilTS: returnV(20), restMetadata: []*backuppb.Metadata{ m_1(2, - 16, 22, stream.DefaultCF, 0, + 16, 22, DefaultCF, 0, ), m_1(3, - 24, 26, stream.WriteCF, 20, + 24, 26, WriteCF, 20, ), }, }, { @@ -2091,8 +2089,8 @@ func TestTruncate3(t *testing.T) { for j, ts := range cs.testParams { for _, until := range ts.until { t.Logf("case %d, param %d, until %d", i, j, until) - metas := restore.StreamMetadataSet{ - Helper: stream.NewMetadataHelper(), + metas := StreamMetadataSet{ + Helper: NewMetadataHelper(), MetadataDownloadBatchSize: 128, } err := generateFiles(ctx, s, cs.metas, tmpDir) @@ -2162,7 +2160,7 @@ func mf(id int64, filess [][]*backuppb.DataFileInfo) *backuppb.Metadata { StoreId: id, MetaVersion: backuppb.MetaVersion_V2, } - restore.ReplaceMetadata(m, filegroups) + ReplaceMetadata(m, filegroups) return m } @@ -2187,9 +2185,9 @@ func TestCalculateShiftTS(t *testing.T) { metas: []*backuppb.Metadata{ mf(1, [][]*backuppb.DataFileInfo{ { - fi(10, 20, stream.DefaultCF, 0), - fi(15, 30, stream.WriteCF, 8), - fi(25, 35, stream.WriteCF, 11), + fi(10, 20, DefaultCF, 0), + fi(15, 30, WriteCF, 8), + fi(25, 35, WriteCF, 11), }, }), }, @@ -2219,9 +2217,9 @@ func TestCalculateShiftTS(t *testing.T) { metas: []*backuppb.Metadata{ mf(1, [][]*backuppb.DataFileInfo{ { - fi(65, 70, stream.WriteCF, 55), - fi(50, 60, stream.DefaultCF, 0), - fi(80, 85, stream.WriteCF, 72), + fi(65, 70, WriteCF, 55), + fi(50, 60, DefaultCF, 0), + fi(80, 85, WriteCF, 72), }, }), }, @@ -2254,16 +2252,16 @@ func TestCalculateShiftTS(t *testing.T) { metas: []*backuppb.Metadata{ mf(1, [][]*backuppb.DataFileInfo{ { - fi(10, 20, stream.DefaultCF, 0), - fi(15, 30, stream.WriteCF, 8), - fi(25, 35, stream.WriteCF, 11), + fi(10, 20, DefaultCF, 0), + fi(15, 30, WriteCF, 8), + fi(25, 35, WriteCF, 11), }, }), mf(2, [][]*backuppb.DataFileInfo{ { - fi(65, 70, stream.WriteCF, 55), - fi(50, 60, stream.DefaultCF, 0), - fi(80, 85, stream.WriteCF, 72), + fi(65, 70, WriteCF, 55), + fi(50, 60, DefaultCF, 0), + fi(80, 85, WriteCF, 72), }, }), }, @@ -2304,8 +2302,8 @@ func TestCalculateShiftTS(t *testing.T) { for j, ts := range cs.testParams { for _, until := range ts.until { t.Logf("case %d, param %d, until %d", i, j, until) - metas := restore.StreamMetadataSet{ - Helper: stream.NewMetadataHelper(), + metas := StreamMetadataSet{ + Helper: NewMetadataHelper(), MetadataDownloadBatchSize: 128, } err := generateFiles(ctx, s, cs.metas, tmpDir) diff --git a/br/pkg/task/BUILD.bazel b/br/pkg/task/BUILD.bazel index a1b1b7690b..130256094c 100644 --- a/br/pkg/task/BUILD.bazel +++ b/br/pkg/task/BUILD.bazel @@ -34,7 +34,11 @@ go_library( "//br/pkg/metautil", "//br/pkg/pdutil", "//br/pkg/restore", + "//br/pkg/restore/data", + "//br/pkg/restore/file_importer", + "//br/pkg/restore/rawkv", "//br/pkg/restore/tiflashrec", + "//br/pkg/restore/utils", "//br/pkg/rtree", "//br/pkg/storage", "//br/pkg/stream", @@ -51,6 +55,7 @@ go_library( "//pkg/sessionctx/stmtctx", "//pkg/sessionctx/variable", "//pkg/statistics/handle", + "//pkg/tablecodec", "//pkg/types", "//pkg/util", "//pkg/util/cdcutil", @@ -99,7 +104,7 @@ go_test( ], embed = [":task"], flaky = True, - shard_count = 22, + shard_count = 23, deps = [ "//br/pkg/config", "//br/pkg/conn", diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index c0230a00d2..bebc5fc458 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -25,11 +25,13 @@ import ( "github.com/pingcap/tidb/br/pkg/metautil" "github.com/pingcap/tidb/br/pkg/pdutil" "github.com/pingcap/tidb/br/pkg/restore" + fileimporter "github.com/pingcap/tidb/br/pkg/restore/file_importer" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/version" "github.com/pingcap/tidb/pkg/config" + "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/mathutil" "github.com/spf13/cobra" @@ -535,9 +537,9 @@ func configureRestoreClient(ctx context.Context, client *restore.Client, cfg *Re err := restore.CheckKeyspaceBREnable(ctx, client.GetPDClient()) if err != nil { log.Warn("Keyspace BR is not supported in this cluster, fallback to legacy restore", zap.Error(err)) - client.SetRewriteMode(restore.RewriteModeLegacy) + client.SetRewriteMode(fileimporter.RewriteModeLegacy) } else { - client.SetRewriteMode(restore.RewriteModeKeyspace) + client.SetRewriteMode(fileimporter.RewriteModeKeyspace) } err = client.LoadRestoreStores(ctx) @@ -976,7 +978,7 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // If the API V2 data occurs in the restore process, the cluster must // support the keyspace rewrite mode. - if (len(oldKeyspace) > 0 || len(newKeyspace) > 0) && client.GetRewriteMode() == restore.RewriteModeLegacy { + if (len(oldKeyspace) > 0 || len(newKeyspace) > 0) && client.GetRewriteMode() == fileimporter.RewriteModeLegacy { return errors.Annotate(berrors.ErrRestoreModeMismatch, "cluster only supports legacy rewrite mode") } @@ -1006,13 +1008,13 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf // Block on creating tables before restore starts. since create table is no longer a heavy operation any more. tableStream = GoBlockCreateTablesPipeline(ctx, maxRestoreBatchSizeLimit, tableStream) - tableFileMap := restore.MapTableToFiles(files) + tableFileMap := MapTableToFiles(files) log.Debug("mapped table to files", zap.Any("result map", tableFileMap)) rangeStream := restore.GoValidateFileRanges( ctx, tableStream, tableFileMap, kvConfigs.MergeRegionSize.Value, kvConfigs.MergeRegionKeyCount.Value, errCh) - rangeSize := restore.EstimateRangeSize(files) + rangeSize := EstimateRangeSize(files) summary.CollectInt("restore ranges", rangeSize) log.Info("range and file prepared", zap.Int("file count", len(files)), zap.Int("range count", rangeSize)) @@ -1124,6 +1126,41 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return nil } +// EstimateRangeSize estimates the total range count by file. +func EstimateRangeSize(files []*backuppb.File) int { + result := 0 + for _, f := range files { + if strings.HasSuffix(f.GetName(), "_write.sst") { + result++ + } + } + return result +} + +// MapTableToFiles makes a map that mapping table ID to its backup files. +// aware that one file can and only can hold one table. +func MapTableToFiles(files []*backuppb.File) map[int64][]*backuppb.File { + result := map[int64][]*backuppb.File{} + for _, file := range files { + tableID := tablecodec.DecodeTableID(file.GetStartKey()) + tableEndID := tablecodec.DecodeTableID(file.GetEndKey()) + if tableID != tableEndID { + log.Panic("key range spread between many files.", + zap.String("file name", file.Name), + logutil.Key("startKey", file.StartKey), + logutil.Key("endKey", file.EndKey)) + } + if tableID == 0 { + log.Panic("invalid table key of file", + zap.String("file name", file.Name), + logutil.Key("startKey", file.StartKey), + logutil.Key("endKey", file.EndKey)) + } + result[tableID] = append(result[tableID], file) + } + return result +} + // dropToBlackhole drop all incoming tables into black hole, // i.e. don't execute checksum, just increase the process anyhow. func dropToBlackhole( diff --git a/br/pkg/task/restore_data.go b/br/pkg/task/restore_data.go index ff19f19eca..8e992de1f9 100644 --- a/br/pkg/task/restore_data.go +++ b/br/pkg/task/restore_data.go @@ -15,6 +15,7 @@ import ( "github.com/pingcap/tidb/br/pkg/conn/util" "github.com/pingcap/tidb/br/pkg/glue" "github.com/pingcap/tidb/br/pkg/restore" + "github.com/pingcap/tidb/br/pkg/restore/data" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" @@ -142,7 +143,7 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto // restore tikv data from a snapshot volume var totalRegions int - totalRegions, err = restore.RecoverData(ctx, resolveTS, allStores, mgr, progress, restoreTS, cfg.Concurrency) + totalRegions, err = data.RecoverData(ctx, resolveTS, allStores, mgr, progress, restoreTS, cfg.Concurrency) if err != nil { return errors.Trace(err) } diff --git a/br/pkg/task/restore_raw.go b/br/pkg/task/restore_raw.go index 50360d4b4e..bfb2d61239 100644 --- a/br/pkg/task/restore_raw.go +++ b/br/pkg/task/restore_raw.go @@ -14,6 +14,7 @@ import ( "github.com/pingcap/tidb/br/pkg/httputil" "github.com/pingcap/tidb/br/pkg/metautil" "github.com/pingcap/tidb/br/pkg/restore" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/summary" "github.com/spf13/cobra" "github.com/spf13/pflag" @@ -131,7 +132,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR } summary.CollectInt("restore files", len(files)) - ranges, _, err := restore.MergeAndRewriteFileRanges( + ranges, _, err := restoreutils.MergeAndRewriteFileRanges( files, nil, kvConfigs.MergeRegionSize.Value, kvConfigs.MergeRegionKeyCount.Value) if err != nil { return errors.Trace(err) diff --git a/br/pkg/task/restore_test.go b/br/pkg/task/restore_test.go index 490f0d1546..5eeedbbd1c 100644 --- a/br/pkg/task/restore_test.go +++ b/br/pkg/task/restore_test.go @@ -272,3 +272,40 @@ func mockBackupMeta(mockSchemas []*backuppb.Schema, mockFiles []*backuppb.File) Schemas: mockSchemas, } } + +func TestMapTableToFiles(t *testing.T) { + filesOfTable1 := []*backuppb.File{ + { + Name: "table1-1.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(1), + }, + { + Name: "table1-2.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(1), + }, + { + Name: "table1-3.sst", + StartKey: tablecodec.EncodeTablePrefix(1), + EndKey: tablecodec.EncodeTablePrefix(1), + }, + } + filesOfTable2 := []*backuppb.File{ + { + Name: "table2-1.sst", + StartKey: tablecodec.EncodeTablePrefix(2), + EndKey: tablecodec.EncodeTablePrefix(2), + }, + { + Name: "table2-2.sst", + StartKey: tablecodec.EncodeTablePrefix(2), + EndKey: tablecodec.EncodeTablePrefix(2), + }, + } + + result := MapTableToFiles(append(filesOfTable2, filesOfTable1...)) + + require.Equal(t, filesOfTable1, result[1]) + require.Equal(t, filesOfTable2, result[2]) +} diff --git a/br/pkg/task/restore_txn.go b/br/pkg/task/restore_txn.go index a74b8c9506..0025568185 100644 --- a/br/pkg/task/restore_txn.go +++ b/br/pkg/task/restore_txn.go @@ -12,6 +12,7 @@ import ( "github.com/pingcap/tidb/br/pkg/glue" "github.com/pingcap/tidb/br/pkg/metautil" "github.com/pingcap/tidb/br/pkg/restore" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/summary" ) @@ -72,7 +73,7 @@ func RunRestoreTxn(c context.Context, g glue.Glue, cmdName string, cfg *Config) } summary.CollectInt("restore files", len(files)) - ranges, _, err := restore.MergeAndRewriteFileRanges( + ranges, _, err := restoreutils.MergeAndRewriteFileRanges( files, nil, conn.DefaultMergeRegionSizeBytes, conn.DefaultMergeRegionKeyCount) if err != nil { return errors.Trace(err) diff --git a/br/pkg/task/stream.go b/br/pkg/task/stream.go index 5362fce37f..eb8a3c99ac 100644 --- a/br/pkg/task/stream.go +++ b/br/pkg/task/stream.go @@ -42,7 +42,9 @@ import ( "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/metautil" "github.com/pingcap/tidb/br/pkg/restore" + "github.com/pingcap/tidb/br/pkg/restore/rawkv" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" + restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/streamhelper" @@ -1008,7 +1010,7 @@ func RunStreamTruncate(c context.Context, g glue.Glue, cmdName string, cfg *Stre return storage.UnlockRemote(ctx, extStorage, truncateLockPath) }) - sp, err := restore.GetTSFromFile(ctx, extStorage, restore.TruncateSafePointFileName) + sp, err := stream.GetTSFromFile(ctx, extStorage, stream.TruncateSafePointFileName) if err != nil { return err } @@ -1021,7 +1023,7 @@ func RunStreamTruncate(c context.Context, g glue.Glue, cmdName string, cfg *Stre } readMetaDone := console.ShowTask("Reading Metadata... ", glue.WithTimeCost()) - metas := restore.StreamMetadataSet{ + metas := stream.StreamMetadataSet{ MetadataDownloadBatchSize: cfg.MetadataDownloadBatchSize, Helper: stream.NewMetadataHelper(), DryRun: cfg.DryRun, @@ -1038,7 +1040,7 @@ func RunStreamTruncate(c context.Context, g glue.Glue, cmdName string, cfg *Stre totalSize uint64 = 0 ) - metas.IterateFilesFullyBefore(shiftUntilTS, func(d *restore.FileGroupInfo) (shouldBreak bool) { + metas.IterateFilesFullyBefore(shiftUntilTS, func(d *stream.FileGroupInfo) (shouldBreak bool) { fileCount++ totalSize += d.Length kvCount += d.KVCount @@ -1053,8 +1055,8 @@ func RunStreamTruncate(c context.Context, g glue.Glue, cmdName string, cfg *Stre } if cfg.Until > sp && !cfg.DryRun { - if err := restore.SetTSToFile( - ctx, extStorage, cfg.Until, restore.TruncateSafePointFileName); err != nil { + if err := stream.SetTSToFile( + ctx, extStorage, cfg.Until, stream.TruncateSafePointFileName); err != nil { return err } } @@ -1409,7 +1411,7 @@ func restoreStream( idrules := make(map[int64]int64) downstreamIdset := make(map[int64]struct{}) for upstreamId, rule := range rewriteRules { - downstreamId := restore.GetRewriteTableID(upstreamId, rule) + downstreamId := restoreutils.GetRewriteTableID(upstreamId, rule) idrules[upstreamId] = downstreamId downstreamIdset[downstreamId] = struct{}{} } @@ -1606,7 +1608,7 @@ func getLogRangeWithStorage( // truncateTS: get log truncate ts from TruncateSafePointFileName. // If truncateTS equals 0, which represents the stream log has never been truncated. - truncateTS, err := restore.GetTSFromFile(ctx, s, restore.TruncateSafePointFileName) + truncateTS, err := stream.GetTSFromFile(ctx, s, stream.TruncateSafePointFileName) if err != nil { return backupLogInfo{}, errors.Trace(err) } @@ -1687,8 +1689,8 @@ func parseFullBackupTablesStorage( }, nil } -func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restore.RewriteRules { - rules := make(map[int64]*restore.RewriteRules) +func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreutils.RewriteRules { + rules := make(map[int64]*restoreutils.RewriteRules) filter := schemasReplace.TableFilter for _, dbReplace := range schemasReplace.DbMap { @@ -1705,7 +1707,7 @@ func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restore. log.Info("add rewrite rule", zap.String("tableName", dbReplace.Name+"."+tableReplace.Name), zap.Int64("oldID", oldTableID), zap.Int64("newID", tableReplace.TableID)) - rules[oldTableID] = restore.GetRewriteRuleOfTable( + rules[oldTableID] = restoreutils.GetRewriteRuleOfTable( oldTableID, tableReplace.TableID, 0, tableReplace.IndexMap, false) } @@ -1714,7 +1716,7 @@ func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restore. log.Info("add rewrite rule", zap.String("tableName", dbReplace.Name+"."+tableReplace.Name), zap.Int64("oldID", oldID), zap.Int64("newID", newID)) - rules[oldID] = restore.GetRewriteRuleOfTable(oldID, newID, 0, tableReplace.IndexMap, false) + rules[oldID] = restoreutils.GetRewriteRuleOfTable(oldID, newID, 0, tableReplace.IndexMap, false) } } } @@ -1726,18 +1728,18 @@ func newRawBatchClient( ctx context.Context, pdAddrs []string, tlsConfig TLSConfig, -) (*restore.RawKVBatchClient, error) { +) (*rawkv.RawKVBatchClient, error) { security := config.Security{ ClusterSSLCA: tlsConfig.CA, ClusterSSLCert: tlsConfig.Cert, ClusterSSLKey: tlsConfig.Key, } - rawkvClient, err := restore.NewRawkvClient(ctx, pdAddrs, security) + rawkvClient, err := rawkv.NewRawkvClient(ctx, pdAddrs, security) if err != nil { return nil, errors.Trace(err) } - return restore.NewRawKVBatchClient(rawkvClient, rawKVBatchCount), nil + return rawkv.NewRawKVBatchClient(rawkvClient, rawKVBatchCount), nil } // ShiftTS gets a smaller shiftTS than startTS.