Files
tidb/br/pkg/stream/client.go
2022-05-12 17:08:35 +08:00

304 lines
9.7 KiB
Go

// Copyright 2021 PingCAP, Inc. Licensed under Apache-2.0.
package stream
import (
"bytes"
"context"
"encoding/binary"
"strconv"
"strings"
"github.com/gogo/protobuf/proto"
"github.com/pingcap/errors"
"github.com/pingcap/log"
"go.uber.org/zap"
backuppb "github.com/pingcap/kvproto/pkg/brpb"
berrors "github.com/pingcap/tidb/br/pkg/errors"
"github.com/pingcap/tidb/br/pkg/redact"
"github.com/pingcap/tidb/kv"
clientv3 "go.etcd.io/etcd/client/v3"
)
// MetaDataClient is the client for operations over metadata.
type MetaDataClient struct {
*clientv3.Client
}
func NewMetaDataClient(c *clientv3.Client) *MetaDataClient {
return &MetaDataClient{c}
}
// PutTask put a task to the metadata storage.
func (c *MetaDataClient) PutTask(ctx context.Context, task TaskInfo) error {
data, err := task.PBInfo.Marshal()
if err != nil {
return errors.Annotatef(err, "failed to marshal task %s", task.PBInfo.Name)
}
ops := make([]clientv3.Op, 0, 2+len(task.Ranges))
ops = append(ops, clientv3.OpPut(TaskOf(task.PBInfo.Name), string(data)))
for _, r := range task.Ranges {
ops = append(ops, clientv3.OpPut(RangeKeyOf(task.PBInfo.Name, r.StartKey), string(r.EndKey)))
log.Debug("range info",
zap.String("task-name", task.PBInfo.Name),
zap.String("start-key", redact.Key(r.StartKey)),
zap.String("end-key", redact.Key(r.EndKey)),
)
}
if task.Pausing {
ops = append(ops, clientv3.OpPut(Pause(task.PBInfo.Name), ""))
}
txn := c.KV.Txn(ctx)
_, err = txn.Then(ops...).Commit()
if err != nil {
return errors.Annotatef(err, "failed to commit the change for task %s", task.PBInfo.Name)
}
return nil
}
// DeleteTask deletes a task, along with its metadata.
func (c *MetaDataClient) DeleteTask(ctx context.Context, taskName string) error {
_, err := c.KV.Txn(ctx).
Then(
clientv3.OpDelete(TaskOf(taskName)),
clientv3.OpDelete(RangesOf(taskName), clientv3.WithPrefix()),
clientv3.OpDelete(CheckPointsOf(taskName), clientv3.WithPrefix()),
clientv3.OpDelete(Pause(taskName)),
clientv3.OpDelete(LastErrorPrefixOf(taskName), clientv3.WithPrefix()),
).
Commit()
if err != nil {
return errors.Annotatef(err, "failed to delete task itself %s", taskName)
}
return nil
}
func (c *MetaDataClient) PauseTask(ctx context.Context, taskName string) error {
_, err := c.KV.Put(ctx, Pause(taskName), "")
if err != nil {
return errors.Annotatef(err, "failed to pause task %s", taskName)
}
return nil
}
func (c *MetaDataClient) ResumeTask(ctx context.Context, taskName string) error {
_, err := c.KV.Delete(ctx, Pause(taskName))
if err != nil {
return errors.Annotatef(err, "failed to resume task %s", taskName)
}
return nil
}
func (c *MetaDataClient) CleanLastErrorOfTask(ctx context.Context, taskName string) error {
_, err := c.KV.Delete(ctx, LastErrorPrefixOf(taskName), clientv3.WithPrefix())
if err != nil {
return errors.Annotatef(err, "failed to clean last error of task %s", taskName)
}
return nil
}
// GetTask get the basic task handle from the metadata storage.
func (c *MetaDataClient) GetTask(ctx context.Context, taskName string) (*Task, error) {
resp, err := c.Get(ctx, TaskOf(taskName))
if err != nil {
return nil, errors.Annotatef(err, "failed to fetch task %s", taskName)
}
if len(resp.Kvs) == 0 {
return nil, errors.Annotatef(berrors.ErrPiTRTaskNotFound, "no such task %s", taskName)
}
var taskInfo backuppb.StreamBackupTaskInfo
err = proto.Unmarshal(resp.Kvs[0].Value, &taskInfo)
if err != nil {
return nil, errors.Annotatef(err, "invalid binary presentation of task info (name = %s)", taskName)
}
task := &Task{
cli: c,
Info: taskInfo,
}
return task, nil
}
func (c *MetaDataClient) GetTaskWithPauseStatus(ctx context.Context, taskName string) (*Task, bool, error) {
resps, err := c.KV.Txn(ctx).
Then(
clientv3.OpGet(TaskOf(taskName)),
clientv3.OpGet(Pause(taskName)),
).Commit()
if err != nil {
return nil, false, errors.Annotatef(err, "failed to fetch task %s", taskName)
}
if len(resps.Responses) == 0 || len(resps.Responses[0].GetResponseRange().Kvs) == 0 {
return nil, false, errors.Annotatef(berrors.ErrPiTRTaskNotFound, "no such task %s", taskName)
}
var taskInfo backuppb.StreamBackupTaskInfo
err = proto.Unmarshal(resps.Responses[0].GetResponseRange().Kvs[0].Value, &taskInfo)
if err != nil {
return nil, false, errors.Annotatef(err, "invalid binary presentation of task info (name = %s)", taskName)
}
paused := false
if len(resps.Responses) > 1 && len(resps.Responses[1].GetResponseRange().Kvs) > 0 {
paused = true
}
return &Task{cli: c, Info: taskInfo}, paused, nil
}
// GetAllTasks get all of tasks from metadata storage.
func (c *MetaDataClient) GetAllTasks(ctx context.Context) ([]Task, error) {
scanner := scanEtcdPrefix(c.Client, PrefixOfTask())
kvs, err := scanner.AllPages(ctx, 1)
if err != nil {
return nil, errors.Trace(err)
} else if len(kvs) == 0 {
return nil, nil
}
tasks := make([]Task, len(kvs))
for idx, kv := range kvs {
err = proto.Unmarshal(kv.Value, &tasks[idx].Info)
if err != nil {
return nil, errors.Trace(err)
}
tasks[idx].cli = c
}
return tasks, nil
}
// GetTaskCount get the count of tasks from metadata storage.
func (c *MetaDataClient) GetTaskCount(ctx context.Context) (int, error) {
scanner := scanEtcdPrefix(c.Client, PrefixOfTask())
kvs, err := scanner.AllPages(ctx, 1)
return len(kvs), errors.Trace(err)
}
// Task presents a remote "task" object.
// returned by a query of task.
// Associated to the client created it, hence be able to fetch remote fields like `ranges`.
type Task struct {
cli *MetaDataClient
Info backuppb.StreamBackupTaskInfo
}
// Pause is a shorthand for `metaCli.PauseTask`.
func (t *Task) Pause(ctx context.Context) error {
return t.cli.PauseTask(ctx, t.Info.Name)
}
// Resume is a shorthand for `metaCli.ResumeTask`
func (t *Task) Resume(ctx context.Context) error {
return t.cli.ResumeTask(ctx, t.Info.Name)
}
func (t *Task) IsPaused(ctx context.Context) (bool, error) {
resp, err := t.cli.KV.Get(ctx, Pause(t.Info.Name), clientv3.WithCountOnly())
if err != nil {
return false, errors.Annotatef(err, "failed to fetch the status of task %s", t.Info.Name)
}
return resp.Count > 0, nil
}
// Ranges tries to fetch the range from the metadata storage.
func (t *Task) Ranges(ctx context.Context) (Ranges, error) {
ranges := make(Ranges, 0, 64)
kvs, err := scanEtcdPrefix(t.cli.Client, RangesOf(t.Info.Name)).AllPages(ctx, 64)
if err != nil {
return nil, errors.Annotatef(err, "failed to fetch ranges of task %s", t.Info.Name)
}
commonPrefix := []byte(RangesOf(t.Info.Name))
for _, kvp := range kvs {
// The prefix must matches.
startKey := kvp.Key[len(commonPrefix):]
ranges = append(ranges, kv.KeyRange{StartKey: startKey, EndKey: kvp.Value})
}
return ranges, nil
}
// NextBackupTSList lists the backup ts of each store.
func (t *Task) NextBackupTSList(ctx context.Context) (map[uint64]uint64, error) {
result := map[uint64]uint64{}
prefix := CheckPointsOf(t.Info.Name)
scanner := scanEtcdPrefix(t.cli.Client, prefix)
kvs, err := scanner.AllPages(ctx, 1024)
if err != nil {
return nil, errors.Annotatef(err, "failed to get checkpoints of %s", t.Info.Name)
}
for _, kv := range kvs {
if len(kv.Value) != 8 {
return nil, errors.Annotatef(berrors.ErrPiTRMalformedMetadata,
"the next backup ts isn't 64bits (it is %d bytes, value = %s)",
len(kv.Value),
redact.Key(kv.Value))
}
nextBackupTS := binary.BigEndian.Uint64(kv.Value)
storeIDBinary := bytes.TrimPrefix(kv.Key, []byte(prefix))
if len(storeIDBinary) != 8 {
return nil, errors.Annotatef(berrors.ErrPiTRMalformedMetadata,
"the store id isn't 64bits (it is %d bytes, value = %s)",
len(storeIDBinary),
redact.Key(storeIDBinary))
}
storeID := binary.BigEndian.Uint64(storeIDBinary)
result[storeID] = nextBackupTS
}
return result, nil
}
// MinNextBackupTS query the all next backup ts of a store, returning the minimal next backup ts of the store.
func (t *Task) MinNextBackupTS(ctx context.Context, store uint64) (uint64, error) {
key := CheckPointOf(t.Info.Name, store)
resp, err := t.cli.KV.Get(ctx, key)
if err != nil {
return 0, errors.Annotatef(err, "failed to get checkpoints of %s", t.Info.Name)
}
if resp.Count != 1 {
return 0, nil
}
kv := resp.Kvs[0]
if len(kv.Value) != 8 {
return 0, errors.Annotatef(berrors.ErrPiTRMalformedMetadata,
"the next backup ts of store %d isn't 64bits (it is %d bytes, value = %s)",
store,
len(kv.Value),
redact.Key(kv.Value))
}
nextBackupTS := binary.BigEndian.Uint64(kv.Value)
return nextBackupTS, nil
}
// Step forwards the progress (next_backup_ts) of some region.
// The task should be done by TiKV. This function should only be used for test cases.
func (t *Task) Step(ctx context.Context, store uint64, ts uint64) error {
_, err := t.cli.KV.Put(ctx, CheckPointOf(t.Info.Name, store), string(encodeUint64(ts)))
if err != nil {
return errors.Annotatef(err, "failed forward the progress of %s to %d", t.Info.Name, ts)
}
return nil
}
func (t *Task) LastError(ctx context.Context) (map[uint64]backuppb.StreamBackupError, error) {
storeToError := map[uint64]backuppb.StreamBackupError{}
prefix := LastErrorPrefixOf(t.Info.Name)
result, err := t.cli.Get(ctx, prefix, clientv3.WithPrefix())
if err != nil {
return nil, errors.Annotatef(err, "failed to get the last error for task %s", t.Info.GetName())
}
for _, r := range result.Kvs {
storeIDStr := strings.TrimPrefix(string(r.Key), prefix)
storeID, err := strconv.ParseUint(storeIDStr, 10, 64)
if err != nil {
return nil, errors.Annotatef(err, "failed to parse the store ID string %s", storeIDStr)
}
var lastErr backuppb.StreamBackupError
if err := proto.Unmarshal(r.Value, &lastErr); err != nil {
return nil, errors.Annotatef(err, "failed to parse wire encoding for store %d", storeID)
}
storeToError[storeID] = lastErr
}
return storeToError, nil
}