br: support reading a file concurrently from S3 (#46633)

ref pingcap/tidb#45719
This commit is contained in:
wjHuang
2023-09-12 10:25:42 +08:00
committed by GitHub
parent dbb493ff22
commit eab042aa48
13 changed files with 484 additions and 27 deletions

View File

@ -4043,6 +4043,19 @@ def go_deps():
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/jmespath/go-jmespath/internal/testify/com_github_jmespath_go_jmespath_internal_testify-v1.5.1.zip",
],
)
go_repository(
name = "com_github_johannesboyne_gofakes3",
build_file_proto_mode = "disable_global",
importpath = "github.com/johannesboyne/gofakes3",
sha256 = "b0ba2f7ee1765c24d88f2c5c3d478992f03d40c72531d3725696baa5fdad4a73",
strip_prefix = "github.com/johannesboyne/gofakes3@v0.0.0-20230506070712-04da935ef877",
urls = [
"http://bazel-cache.pingcap.net:8080/gomod/github.com/johannesboyne/gofakes3/com_github_johannesboyne_gofakes3-v0.0.0-20230506070712-04da935ef877.zip",
"http://ats.apps.svc/gomod/github.com/johannesboyne/gofakes3/com_github_johannesboyne_gofakes3-v0.0.0-20230506070712-04da935ef877.zip",
"https://cache.hawkingrei.com/gomod/github.com/johannesboyne/gofakes3/com_github_johannesboyne_gofakes3-v0.0.0-20230506070712-04da935ef877.zip",
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/johannesboyne/gofakes3/com_github_johannesboyne_gofakes3-v0.0.0-20230506070712-04da935ef877.zip",
],
)
go_repository(
name = "com_github_joho_sqltocsv",
build_file_proto_mode = "disable_global",
@ -6231,6 +6244,19 @@ def go_deps():
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/ryanuber/columnize/com_github_ryanuber_columnize-v2.1.0+incompatible.zip",
],
)
go_repository(
name = "com_github_ryszard_goskiplist",
build_file_proto_mode = "disable_global",
importpath = "github.com/ryszard/goskiplist",
sha256 = "12c65729fc31d5a9bf246eb387bd4c268d0d68bf33b913cccd81bebd47d6f80d",
strip_prefix = "github.com/ryszard/goskiplist@v0.0.0-20150312221310-2dfbae5fcf46",
urls = [
"http://bazel-cache.pingcap.net:8080/gomod/github.com/ryszard/goskiplist/com_github_ryszard_goskiplist-v0.0.0-20150312221310-2dfbae5fcf46.zip",
"http://ats.apps.svc/gomod/github.com/ryszard/goskiplist/com_github_ryszard_goskiplist-v0.0.0-20150312221310-2dfbae5fcf46.zip",
"https://cache.hawkingrei.com/gomod/github.com/ryszard/goskiplist/com_github_ryszard_goskiplist-v0.0.0-20150312221310-2dfbae5fcf46.zip",
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/ryszard/goskiplist/com_github_ryszard_goskiplist-v0.0.0-20150312221310-2dfbae5fcf46.zip",
],
)
go_repository(
name = "com_github_samuel_go_zookeeper",
build_file_proto_mode = "disable_global",
@ -6374,6 +6400,19 @@ def go_deps():
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/sergi/go-diff/com_github_sergi_go_diff-v1.1.0.zip",
],
)
go_repository(
name = "com_github_shabbyrobe_gocovmerge",
build_file_proto_mode = "disable_global",
importpath = "github.com/shabbyrobe/gocovmerge",
sha256 = "3c4cbe51a4350af0f4f042034e5b27470e7df81c842fb22d13cb73cdcba31b66",
strip_prefix = "github.com/shabbyrobe/gocovmerge@v0.0.0-20190829150210-3e036491d500",
urls = [
"http://bazel-cache.pingcap.net:8080/gomod/github.com/shabbyrobe/gocovmerge/com_github_shabbyrobe_gocovmerge-v0.0.0-20190829150210-3e036491d500.zip",
"http://ats.apps.svc/gomod/github.com/shabbyrobe/gocovmerge/com_github_shabbyrobe_gocovmerge-v0.0.0-20190829150210-3e036491d500.zip",
"https://cache.hawkingrei.com/gomod/github.com/shabbyrobe/gocovmerge/com_github_shabbyrobe_gocovmerge-v0.0.0-20190829150210-3e036491d500.zip",
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/shabbyrobe/gocovmerge/com_github_shabbyrobe_gocovmerge-v0.0.0-20190829150210-3e036491d500.zip",
],
)
go_repository(
name = "com_github_shazow_go_diff",
build_file_proto_mode = "disable_global",

View File

@ -5,6 +5,7 @@ go_library(
srcs = [
"byte_reader.go",
"codec.go",
"concurrent_reader.go",
"engine.go",
"file.go",
"iter.go",
@ -53,15 +54,22 @@ go_test(
],
embed = [":external"],
flaky = True,
shard_count = 31,
shard_count = 32,
deps = [
"//br/pkg/lightning/backend/kv",
"//br/pkg/lightning/common",
"//br/pkg/storage",
"//kv",
"//util/codec",
"@com_github_aws_aws_sdk_go//aws",
"@com_github_aws_aws_sdk_go//aws/credentials",
"@com_github_aws_aws_sdk_go//aws/session",
"@com_github_aws_aws_sdk_go//service/s3",
"@com_github_cockroachdb_pebble//:pebble",
"@com_github_johannesboyne_gofakes3//:gofakes3",
"@com_github_johannesboyne_gofakes3//backend/s3mem",
"@com_github_pingcap_errors//:errors",
"@com_github_pingcap_kvproto//pkg/brpb",
"@com_github_stretchr_testify//require",
"@org_golang_x_exp//rand",
"@org_uber_go_atomic//:atomic",

View File

@ -17,13 +17,18 @@ package external
import (
"context"
"io"
"sync/atomic"
"github.com/pingcap/errors"
"github.com/pingcap/tidb/br/pkg/storage"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/mathutil"
"go.uber.org/zap"
)
// ConcurrentReaderBufferSize is the buffer size for concurrent reader.
var ConcurrentReaderBufferSize = 4 * 1024 * 1024
// byteReader provides structured reading on a byte stream of external storage.
type byteReader struct {
ctx context.Context
@ -33,6 +38,12 @@ type byteReader struct {
bufOffset int
retPointers []*[]byte
useConcurrentReaderCurrent atomic.Bool
useConcurrentReader atomic.Bool
currFileOffset int64
conReader *singeFileReader
}
func openStoreReaderAndSeek(
@ -54,16 +65,52 @@ func openStoreReaderAndSeek(
// newByteReader wraps readNBytes functionality to storageReader. It will not
// close storageReader when meet error.
func newByteReader(ctx context.Context, storageReader storage.ReadSeekCloser, bufSize int) (*byteReader, error) {
func newByteReader(ctx context.Context, storageReader storage.ExternalFileReader, bufSize int, st storage.ExternalStorage, name string, defaultUseConcurrency bool) (*byteReader, error) {
conReader, err := newSingeFileReader(ctx, st, name, 8, ConcurrentReaderBufferSize)
if err != nil {
return nil, err
}
r := &byteReader{
ctx: ctx,
storageReader: storageReader,
buf: make([]byte, bufSize),
bufOffset: 0,
conReader: conReader,
}
r.switchReaderMode(defaultUseConcurrency)
return r, r.reload()
}
// switchReaderMode switches to concurrent reader.
func (r *byteReader) switchReaderMode(useConcurrent bool) {
r.useConcurrentReader.Store(useConcurrent)
}
func (r *byteReader) switchToConcurrentReaderImpl() error {
if r.conReader == nil {
return errors.New("can't use the concurrent mode because reader is not initialized correctly")
}
currOffset, err := r.storageReader.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
r.currFileOffset = currOffset
r.conReader.currentFileOffset = currOffset
r.conReader.bufferReadOffset = 0
r.useConcurrentReaderCurrent.Store(true)
r.conReader.buffer = make([]byte, r.conReader.concurrency*r.conReader.readBufferSize)
return nil
}
func (r *byteReader) switchToNormalReaderImpl() error {
r.useConcurrentReaderCurrent.Store(false)
r.currFileOffset = r.conReader.currentFileOffset
r.conReader.buffer = nil
_, err := r.storageReader.Seek(r.currFileOffset, io.SeekStart)
return err
}
// readNBytes reads the next n bytes from the reader and returns a buffer slice containing those bytes.
// The returned slice (pointer) can not be used after r.reset. In the same interval of r.reset,
// byteReader guarantees that the returned slice (pointer) will point to the same content
@ -118,6 +165,9 @@ func (r *byteReader) cloneSlices() {
}
func (r *byteReader) next(n int) []byte {
if r.useConcurrentReaderCurrent.Load() {
return r.conReader.next(n)
}
end := mathutil.Min(r.bufOffset+n, len(r.buf))
ret := r.buf[r.bufOffset:end]
r.bufOffset += len(ret)
@ -125,6 +175,24 @@ func (r *byteReader) next(n int) []byte {
}
func (r *byteReader) reload() error {
to := r.useConcurrentReader.Load()
now := r.useConcurrentReaderCurrent.Load()
if to != now {
if to {
err := r.switchToConcurrentReaderImpl()
if err != nil {
return err
}
} else {
err := r.switchToNormalReaderImpl()
if err != nil {
return err
}
}
}
if to {
return r.conReader.reload()
}
nBytes, err := io.ReadFull(r.storageReader, r.buf[0:])
if err != nil {
switch err {

View File

@ -17,10 +17,19 @@ package external
import (
"context"
"io"
"net/http/httptest"
"testing"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/johannesboyne/gofakes3"
"github.com/johannesboyne/gofakes3/backend/s3mem"
"github.com/pingcap/errors"
backuppb "github.com/pingcap/kvproto/pkg/brpb"
"github.com/pingcap/tidb/br/pkg/storage"
"github.com/stretchr/testify/require"
"golang.org/x/exp/rand"
)
@ -50,8 +59,26 @@ func (s *mockExtStore) Close() error {
}
func TestByteReader(t *testing.T) {
testByteReaderNormal(t, false)
testByteReaderNormal(t, true)
}
func testByteReaderNormal(t *testing.T, useConcurrency bool) {
st, clean := NewS3WithBucketAndPrefix(t, "test", "testprefix")
defer clean()
// Prepare
err := st.WriteFile(context.Background(), "testfile", []byte("abcde"))
require.NoError(t, err)
newRsc := func() storage.ReadSeekCloser {
rsc, err := st.Open(context.Background(), "testfile")
require.NoError(t, err)
return rsc
}
// Test basic next() usage.
br, err := newByteReader(context.Background(), &mockExtStore{src: []byte("abcde")}, 3)
br, err := newByteReader(context.Background(), newRsc(), 3, st, "testfile", useConcurrency)
require.NoError(t, err)
x := br.next(1)
require.Equal(t, 1, len(x))
@ -63,7 +90,7 @@ func TestByteReader(t *testing.T) {
require.NoError(t, br.Close())
// Test basic readNBytes() usage.
br, err = newByteReader(context.Background(), &mockExtStore{src: []byte("abcde")}, 3)
br, err = newByteReader(context.Background(), newRsc(), 3, st, "testfile", useConcurrency)
require.NoError(t, err)
y, err := br.readNBytes(2)
require.NoError(t, err)
@ -73,7 +100,7 @@ func TestByteReader(t *testing.T) {
require.Equal(t, byte('b'), x[1])
require.NoError(t, br.Close())
br, err = newByteReader(context.Background(), &mockExtStore{src: []byte("abcde")}, 3)
br, err = newByteReader(context.Background(), newRsc(), 3, st, "testfile", useConcurrency)
require.NoError(t, err)
y, err = br.readNBytes(5) // Read all the data.
require.NoError(t, err)
@ -82,13 +109,16 @@ func TestByteReader(t *testing.T) {
require.Equal(t, byte('e'), x[4])
require.NoError(t, br.Close())
br, err = newByteReader(context.Background(), &mockExtStore{src: []byte("abcde")}, 3)
br, err = newByteReader(context.Background(), newRsc(), 3, st, "testfile", useConcurrency)
require.NoError(t, err)
_, err = br.readNBytes(7) // EOF
require.Error(t, err)
err = st.WriteFile(context.Background(), "testfile", []byte("abcdef"))
require.NoError(t, err)
ms := &mockExtStore{src: []byte("abcdef")}
br, err = newByteReader(context.Background(), ms, 2)
br, err = newByteReader(context.Background(), ms, 2, nil, "", false)
require.NoError(t, err)
y, err = br.readNBytes(3)
require.NoError(t, err)
@ -100,7 +130,7 @@ func TestByteReader(t *testing.T) {
require.NoError(t, br.Close())
ms = &mockExtStore{src: []byte("abcdef")}
br, err = newByteReader(context.Background(), ms, 2)
br, err = newByteReader(context.Background(), ms, 2, nil, "", false)
require.NoError(t, err)
y, err = br.readNBytes(2)
require.NoError(t, err)
@ -115,7 +145,7 @@ func TestByteReader(t *testing.T) {
func TestByteReaderClone(t *testing.T) {
ms := &mockExtStore{src: []byte("0123456789")}
br, err := newByteReader(context.Background(), ms, 4)
br, err := newByteReader(context.Background(), ms, 4, nil, "", false)
require.NoError(t, err)
y1, err := br.readNBytes(2)
require.NoError(t, err)
@ -135,7 +165,7 @@ func TestByteReaderClone(t *testing.T) {
require.NoError(t, br.Close())
ms = &mockExtStore{src: []byte("0123456789")}
br, err = newByteReader(context.Background(), ms, 4)
br, err = newByteReader(context.Background(), ms, 4, nil, "", false)
require.NoError(t, err)
y1, err = br.readNBytes(2)
require.NoError(t, err)
@ -158,7 +188,7 @@ func TestByteReaderClone(t *testing.T) {
func TestByteReaderAuxBuf(t *testing.T) {
ms := &mockExtStore{src: []byte("0123456789")}
br, err := newByteReader(context.Background(), ms, 1)
br, err := newByteReader(context.Background(), ms, 1, nil, "", false)
require.NoError(t, err)
y1, err := br.readNBytes(1)
require.NoError(t, err)
@ -178,6 +208,16 @@ func TestByteReaderAuxBuf(t *testing.T) {
}
func TestReset(t *testing.T) {
testReset(t, false)
testReset(t, true)
}
func testReset(t *testing.T, useConcurrency bool) {
st, clean := NewS3WithBucketAndPrefix(t, "test", "testprefix")
defer func() {
clean()
}()
seed := time.Now().Unix()
rand.Seed(uint64(seed))
t.Logf("seed: %d", seed)
@ -185,9 +225,17 @@ func TestReset(t *testing.T) {
for i := range src {
src[i] = byte(i)
}
ms := &mockExtStore{src: src}
// Prepare
err := st.WriteFile(context.Background(), "testfile", src)
require.NoError(t, err)
newRsc := func() storage.ReadSeekCloser {
rsc, err := st.Open(context.Background(), "testfile")
require.NoError(t, err)
return rsc
}
bufSize := rand.Intn(256)
br, err := newByteReader(context.Background(), ms, bufSize)
br, err := newByteReader(context.Background(), newRsc(), bufSize, st, "testfile", useConcurrency)
require.NoError(t, err)
end := 0
toCheck := make([]*[]byte, 0, 10)
@ -219,8 +267,27 @@ func TestReset(t *testing.T) {
}
func TestUnexpectedEOF(t *testing.T) {
ms := &mockExtStore{src: []byte("0123456789")}
br, err := newByteReader(context.Background(), ms, 3)
st, clean := NewS3WithBucketAndPrefix(t, "test", "testprefix")
defer func() {
clean()
}()
// Prepare
err := st.WriteFile(context.Background(), "testfile", []byte("0123456789"))
require.NoError(t, err)
newRsc := func() storage.ReadSeekCloser {
rsc, err := st.Open(context.Background(), "testfile")
require.NoError(t, err)
return rsc
}
br, err := newByteReader(context.Background(), newRsc(), 3, st, "testfile", false)
require.NoError(t, err)
_, err = br.readNBytes(100)
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
br, err = newByteReader(context.Background(), newRsc(), 3, st, "testfile", true)
require.NoError(t, err)
_, err = br.readNBytes(100)
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
@ -228,6 +295,101 @@ func TestUnexpectedEOF(t *testing.T) {
func TestEmptyContent(t *testing.T) {
ms := &mockExtStore{src: []byte{}}
_, err := newByteReader(context.Background(), ms, 100)
_, err := newByteReader(context.Background(), ms, 100, nil, "", false)
require.Equal(t, io.EOF, err)
st, clean := NewS3WithBucketAndPrefix(t, "test", "testprefix")
defer clean()
// Prepare
err = st.WriteFile(context.Background(), "testfile", []byte(""))
require.NoError(t, err)
newRsc := func() storage.ReadSeekCloser {
rsc, err := st.Open(context.Background(), "testfile")
require.NoError(t, err)
return rsc
}
_, err = newByteReader(context.Background(), newRsc(), 100, st, "testfile", true)
require.Equal(t, io.EOF, err)
}
func TestSwitchMode(t *testing.T) {
st, clean := NewS3WithBucketAndPrefix(t, "test", "testprefix")
defer clean()
// Prepare
fileSize := 1024 * 1024
err := st.WriteFile(context.Background(), "testfile", make([]byte, fileSize))
require.NoError(t, err)
newRsc := func() storage.ReadSeekCloser {
rsc, err := st.Open(context.Background(), "testfile")
require.NoError(t, err)
return rsc
}
ConcurrentReaderBufferSize = 100
br, err := newByteReader(context.Background(), newRsc(), 100, st, "testfile", false)
seed := time.Now().Unix()
rand.Seed(uint64(seed))
t.Logf("seed: %d", seed)
totalCnt := 0
modeUseCon := false
for totalCnt < fileSize {
if rand.Intn(5) == 0 {
if modeUseCon {
br.switchReaderMode(false)
modeUseCon = false
} else {
br.switchReaderMode(true)
modeUseCon = true
}
}
n := rand.Intn(100)
if n == 0 {
n = 1
}
if totalCnt+n > fileSize {
n = fileSize - totalCnt
}
if n == 0 {
break
}
y, err := br.readNBytes(n)
if err == io.EOF {
break
}
require.NoError(t, err)
totalCnt += len(*y)
}
require.Equal(t, fileSize, totalCnt)
}
// NewS3WithBucketAndPrefix creates a new S3Storage for testing.
func NewS3WithBucketAndPrefix(t *testing.T, bucketName, prefixName string) (*storage.S3Storage, func()) {
backend := s3mem.New()
faker := gofakes3.New(backend)
ts := httptest.NewServer(faker.Server())
err := backend.CreateBucket("test")
require.NoError(t, err)
config := aws.NewConfig()
config.WithEndpoint(ts.URL)
config.WithRegion("region")
config.WithCredentials(credentials.NewStaticCredentials("dummy-access", "dummy-secret", ""))
config.WithS3ForcePathStyle(true) // Removes need for subdomain
svc := s3.New(session.New(), config)
st := storage.NewS3StorageForTest(svc, &backuppb.S3{
Region: "region",
Bucket: bucketName,
Prefix: prefixName,
Acl: "acl",
Sse: "sse",
StorageClass: "sc",
})
return st, ts.Close
}

View File

@ -0,0 +1,119 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package external
import (
"context"
"io"
"github.com/pingcap/tidb/br/pkg/lightning/log"
"github.com/pingcap/tidb/br/pkg/storage"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
)
// singeFileReader is a concurrent reader for a single file.
type singeFileReader struct {
ctx context.Context
concurrency int
readBufferSize int
currentFileOffset int64
bufferReadOffset int64
bufferMaxOffset int64
maxFileOffset int64
name string
storage storage.ExternalStorage
buffer []byte
}
// newSingeFileReader creates a new singeFileReader.
func newSingeFileReader(ctx context.Context, st storage.ExternalStorage, name string, concurrency int, readBufferSize int) (*singeFileReader, error) {
if st == nil {
return nil, nil
}
if _, ok := st.(*storage.S3Storage); !ok {
return nil, nil
}
maxOffset, err := storage.GetMaxOffset(ctx, st, name)
if err != nil {
return nil, err
}
return &singeFileReader{
ctx: ctx,
concurrency: concurrency,
readBufferSize: readBufferSize,
currentFileOffset: 0,
bufferReadOffset: 0,
maxFileOffset: maxOffset,
name: name,
storage: st,
buffer: nil,
}, nil
}
// reload reloads the buffer.
func (r *singeFileReader) reload() error {
if r.currentFileOffset >= r.maxFileOffset {
return io.EOF
}
eg := errgroup.Group{}
for i := 0; i < r.concurrency; i++ {
i := i
eg.Go(func() error {
startOffset := r.currentFileOffset + int64(i*r.readBufferSize)
endOffset := startOffset + int64(r.readBufferSize)
if endOffset > r.maxFileOffset {
endOffset = r.maxFileOffset
}
if startOffset > endOffset {
return nil
}
_, err := storage.ReadDataInRange(r.ctx, r.storage, r.name, startOffset, r.buffer[i*r.readBufferSize:i*r.readBufferSize+int(endOffset-startOffset)])
if err != nil {
log.FromContext(r.ctx).Warn("read meet error", zap.Any("startOffset", startOffset), zap.Any("endOffset", endOffset), zap.Error(err))
return err
}
return nil
})
}
err := eg.Wait()
if err != nil {
return err
}
if r.currentFileOffset+int64(r.readBufferSize*r.concurrency) > r.maxFileOffset {
r.bufferMaxOffset = r.maxFileOffset - r.currentFileOffset
r.currentFileOffset = r.maxFileOffset
} else {
r.bufferMaxOffset = int64(r.readBufferSize * r.concurrency)
r.currentFileOffset += int64(r.readBufferSize * r.concurrency)
}
r.bufferReadOffset = 0
return nil
}
// next returns the next n bytes.
func (r *singeFileReader) next(n int) []byte {
end := min(r.bufferReadOffset+int64(n), r.bufferMaxOffset)
ret := r.buffer[r.bufferReadOffset:end]
r.bufferReadOffset += int64(len(ret))
return ret
}

View File

@ -40,7 +40,7 @@ func newKVReader(
if err != nil {
return nil, err
}
br, err := newByteReader(ctx, sr, bufSize)
br, err := newByteReader(ctx, sr, bufSize, store, name, false)
if err != nil {
br.Close()
return nil, err

View File

@ -30,7 +30,7 @@ func newStatsReader(ctx context.Context, store storage.ExternalStorage, name str
if err != nil {
return nil, err
}
br, err := newByteReader(ctx, sr, bufSize)
br, err := newByteReader(ctx, sr, bufSize, store, name, false)
if err != nil {
return nil, err
}

View File

@ -349,12 +349,12 @@ func (w *Writer) flushKVs(ctx context.Context, fromClose bool) (err error) {
return
}
if err1 != nil {
logger.Error("close data writer failed", zap.Error(err))
logger.Error("close data writer failed", zap.Error(err1))
err = err1
return
}
if err2 != nil {
logger.Error("close stat writer failed", zap.Error(err))
logger.Error("close stat writer failed", zap.Error(err2))
err = err2
return
}
@ -421,12 +421,12 @@ func (w *Writer) createStorageWriter(ctx context.Context) (
err error,
) {
dataPath := filepath.Join(w.filenamePrefix, strconv.Itoa(w.currentSeq))
dataWriter, err := w.store.Create(ctx, dataPath, nil)
dataWriter, err := w.store.Create(ctx, dataPath, &storage.WriterOption{Concurrency: 20})
if err != nil {
return "", "", nil, nil, err
}
statPath := filepath.Join(w.filenamePrefix+statSuffix, strconv.Itoa(w.currentSeq))
statsWriter, err := w.store.Create(ctx, statPath, nil)
statsWriter, err := w.store.Create(ctx, statPath, &storage.WriterOption{Concurrency: 20})
if err != nil {
return "", "", nil, nil, err
}

View File

@ -23,6 +23,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//br/pkg/errors",
"//br/pkg/lightning/log",
"//br/pkg/logutil",
"//sessionctx/variable",
"@com_github_aliyun_alibaba_cloud_sdk_go//sdk/auth/credentials",

View File

@ -37,6 +37,8 @@ import (
"go.uber.org/zap"
)
var hardcodedS3ChunkSize = 5 * 1024 * 1024
const (
s3EndpointOption = "s3.endpoint"
s3RegionOption = "s3.region"
@ -57,9 +59,7 @@ const (
// the maximum number of byte to read for seek.
maxSkipOffsetByRead = 1 << 16 // 64KB
// TODO make this configurable, 5 mb is a good minimum size but on low latency/high bandwidth network you can go a lot bigger
hardcodedS3ChunkSize = 5 * 1024 * 1024
defaultRegion = "us-east-1"
defaultRegion = "us-east-1"
// to check the cloud type by endpoint tag.
domainAliyun = "aliyuncs.com"
)
@ -70,6 +70,9 @@ var permissionCheckFn = map[Permission]func(*s3.S3, *backuppb.S3) error{
GetObject: getObject,
}
// WriteBufferSize is the size of the buffer used for writing. (64K may be a better choice)
var WriteBufferSize = 5 * 1024 * 1024
// S3Storage defines some standard operations for BR/Lightning on the S3 storage.
// It implements the `ExternalStorage` interface.
type S3Storage struct {
@ -946,7 +949,7 @@ func (rs *S3Storage) Create(ctx context.Context, name string, option *WriterOpti
} else {
up := s3manager.NewUploaderWithClient(rs.svc, func(u *s3manager.Uploader) {
u.Concurrency = option.Concurrency
u.BufferProvider = s3manager.NewBufferedReadSeekerWriteToPool(option.Concurrency * 8 * 1024 * 1024)
u.BufferProvider = s3manager.NewBufferedReadSeekerWriteToPool(option.Concurrency * hardcodedS3ChunkSize)
})
rd, wd := io.Pipe()
upParams := &s3manager.UploadInput{
@ -967,7 +970,7 @@ func (rs *S3Storage) Create(ctx context.Context, name string, option *WriterOpti
}()
uploader = s3Writer
}
uploaderWriter := newBufferedWriter(uploader, hardcodedS3ChunkSize, NoCompression)
uploaderWriter := newBufferedWriter(uploader, WriteBufferSize, NoCompression)
return uploaderWriter, nil
}

View File

@ -7,10 +7,14 @@ import (
"io"
"net/http"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/pingcap/errors"
backuppb "github.com/pingcap/kvproto/pkg/brpb"
berrors "github.com/pingcap/tidb/br/pkg/errors"
"github.com/pingcap/tidb/br/pkg/lightning/log"
"go.uber.org/zap"
)
// Permission represents the permission we need to check in create storage.
@ -216,3 +220,38 @@ func CloneDefaultHttpTransport() (*http.Transport, bool) {
transport, ok := http.DefaultTransport.(*http.Transport)
return transport.Clone(), ok
}
// GetMaxOffset returns the max offset of the file.
func GetMaxOffset(ctx context.Context, storage ExternalStorage, name string) (n int64, err error) {
s3storage, ok := storage.(*S3Storage)
if !ok {
return 0, errors.New("only support s3 storage")
}
output, err := s3storage.svc.HeadObjectWithContext(ctx, &s3.HeadObjectInput{
Bucket: aws.String(s3storage.options.Bucket),
Key: aws.String(s3storage.options.Prefix + name),
})
if err != nil {
return 0, err
}
return *output.ContentLength, nil
}
// ReadDataInRange reads data from storage in range [start, start+len(p)).
func ReadDataInRange(ctx context.Context, storage ExternalStorage, name string, start int64, p []byte) (n int, err error) {
s3storage, ok := storage.(*S3Storage)
if !ok {
return 0, errors.New("only support s3 storage")
}
rd, _, err := s3storage.open(ctx, name, start, start+int64(len(p)))
if err != nil {
return 0, err
}
defer func() {
err := rd.Close()
if err != nil {
log.FromContext(ctx).Warn("failed to close reader", zap.Error(err))
}
}()
return io.ReadFull(rd, p)
}

3
go.mod
View File

@ -61,6 +61,7 @@ require (
github.com/jedib0t/go-pretty/v6 v6.2.2
github.com/jellydator/ttlcache/v3 v3.0.1
github.com/jingyugao/rowserrcheck v1.1.1
github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877
github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df
github.com/kisielk/errcheck v1.6.3
github.com/klauspost/compress v1.16.5
@ -139,7 +140,9 @@ require (
)
require (
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 // indirect
github.com/segmentio/asm v1.2.0 // indirect
github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500 // indirect
k8s.io/utils v0.0.0-20230209194617-a36077c30491 // indirect
)

15
go.sum
View File

@ -133,6 +133,7 @@ github.com/ashanbrown/makezero v1.1.1 h1:iCQ87C0V0vSyO+M9E/FZYbu65auqH0lnsOkf5Fc
github.com/ashanbrown/makezero v1.1.1/go.mod h1:i1bJLCRSCHOcOa9Y6MyF2FTfMZMFdHvxKHxgO5Z1axI=
github.com/aws/aws-sdk-go v1.15.24/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0=
github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/aws/aws-sdk-go v1.44.256/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/aws/aws-sdk-go v1.44.259 h1:7yDn1dcv4DZFMKpu+2exIH5O6ipNj9qXrKfdMUaIJwY=
github.com/aws/aws-sdk-go v1.44.259/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g=
@ -601,6 +602,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877 h1:O7syWuYGzre3s73s+NkgB8e0ZvsIVhT/zxNU7V1gHK8=
github.com/johannesboyne/gofakes3 v0.0.0-20230506070712-04da935ef877/go.mod h1:AxgWC4DDX54O2WDoQO1Ceabtn6IbktjU/7bigor+66g=
github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df h1:Zrb0IbuLOGHL7nrO2WrcuNWgDTlzFv3zY69QMx4ggQE=
github.com/joho/sqltocsv v0.0.0-20210428211105-a6d6801d59df/go.mod h1:mAVCUAYtW9NG31eB30umMSLKcDt6mCUWSjoSn5qBh0k=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
@ -909,6 +912,8 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 h1:GHRpF1pTW19a8tTFrMLUcfWwyC0pnifVo2ClaLq+hP8=
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5PCi+MFsC7HjREoAz1BU+Mq60+05gifQSsHSDG/8=
github.com/samuel/go-zookeeper v0.0.0-20161028232340-1d7be4effb13/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E=
github.com/sasha-s/go-deadlock v0.0.0-20161201235124-341000892f3d/go.mod h1:StQn567HiB1fF2yJ44N9au7wOhrPS3iZqiDbRupzT10=
github.com/sasha-s/go-deadlock v0.2.0 h1:lMqc+fUb7RrFS3gQLtoQsJ7/6TV/pAIFvBsqX73DK8Y=
@ -919,6 +924,8 @@ github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500 h1:WnNuhiq+FOY3jNj6JXFT+eLN3CQ/oPIsDPRanvwsmbI=
github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500/go.mod h1:+njLrG5wSeoG4Ds61rFgEzKvenR2UHbjMoDHsczxly0=
github.com/shirou/gopsutil/v3 v3.21.12/go.mod h1:BToYZVTlSVlfazpDDYFnsVZLaoRG+g8ufT6fPQLdJzA=
github.com/shirou/gopsutil/v3 v3.23.5 h1:5SgDCeQ0KW0S4N0znjeM/eFHXXOKyv2dVNgRq/c9P6Y=
github.com/shirou/gopsutil/v3 v3.23.5/go.mod h1:Ng3Maa27Q2KARVJ0SPZF5NdrQSC3XHKP8IIWrHgMeLY=
@ -955,6 +962,7 @@ github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasO
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/afero v1.2.1/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
@ -1065,6 +1073,7 @@ github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQ
github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw=
github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU=
go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4=
go.etcd.io/etcd/api/v3 v3.5.2 h1:tXok5yLlKyuQ/SXSjtqHc4uzNaMqZi2XsoSPr/LlJXI=
@ -1219,6 +1228,7 @@ golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.6.0/go.mod h1:4mET923SAdbXp2ki8ey+zGs1SLqsuM2Y0uvdZR/fUNI=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.11.0 h1:bUO06HqtnRcc/7l71XBe4WcqTZ+3AH1J59zWDDwLKgU=
golang.org/x/mod v0.11.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -1273,6 +1283,7 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug
golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
@ -1379,6 +1390,7 @@ golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@ -1387,6 +1399,7 @@ golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuX
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
@ -1433,6 +1446,7 @@ golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgw
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190829051458-42f498d34c4d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
@ -1477,6 +1491,7 @@ golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
golang.org/x/tools v0.10.0 h1:tvDr/iQoUqNdohiYm0LmmKcBk+q86lb9EprIUFhHHGg=
golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=