367 lines
13 KiB
Go
367 lines
13 KiB
Go
// Copyright 2023 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package external
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"slices"
|
|
"testing"
|
|
|
|
"github.com/docker/go-units"
|
|
"github.com/pingcap/tidb/br/pkg/storage"
|
|
"github.com/pingcap/tidb/pkg/ingestor/engineapi"
|
|
"github.com/pingcap/tidb/pkg/lightning/membuf"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func testGetFirstAndLastKey(
|
|
t *testing.T,
|
|
data engineapi.IngestData,
|
|
lowerBound, upperBound []byte,
|
|
expectedFirstKey, expectedLastKey []byte,
|
|
) {
|
|
firstKey, lastKey, err := data.GetFirstAndLastKey(lowerBound, upperBound)
|
|
require.NoError(t, err)
|
|
require.Equal(t, expectedFirstKey, firstKey)
|
|
require.Equal(t, expectedLastKey, lastKey)
|
|
}
|
|
|
|
func testNewIter(
|
|
t *testing.T,
|
|
data engineapi.IngestData,
|
|
lowerBound, upperBound []byte,
|
|
expectedKVs []kvPair,
|
|
) {
|
|
ctx := context.Background()
|
|
iter := data.NewIter(ctx, lowerBound, upperBound, nil)
|
|
var kvs []kvPair
|
|
for iter.First(); iter.Valid(); iter.Next() {
|
|
require.NoError(t, iter.Error())
|
|
kvs = append(kvs, kvPair{key: iter.Key(), value: iter.Value()})
|
|
}
|
|
require.NoError(t, iter.Error())
|
|
require.NoError(t, iter.Close())
|
|
require.Equal(t, expectedKVs, kvs)
|
|
}
|
|
|
|
func TestMemoryIngestData(t *testing.T) {
|
|
kvs := []kvPair{
|
|
{key: []byte("key1"), value: []byte("value1")},
|
|
{key: []byte("key2"), value: []byte("value2")},
|
|
{key: []byte("key3"), value: []byte("value3")},
|
|
{key: []byte("key4"), value: []byte("value4")},
|
|
{key: []byte("key5"), value: []byte("value5")},
|
|
}
|
|
data := &MemoryIngestData{
|
|
kvs: kvs,
|
|
ts: 123,
|
|
}
|
|
|
|
require.EqualValues(t, 123, data.GetTS())
|
|
testGetFirstAndLastKey(t, data, nil, nil, []byte("key1"), []byte("key5"))
|
|
testGetFirstAndLastKey(t, data, []byte("key1"), []byte("key6"), []byte("key1"), []byte("key5"))
|
|
testGetFirstAndLastKey(t, data, []byte("key2"), []byte("key5"), []byte("key2"), []byte("key4"))
|
|
testGetFirstAndLastKey(t, data, []byte("key25"), []byte("key35"), []byte("key3"), []byte("key3"))
|
|
testGetFirstAndLastKey(t, data, []byte("key25"), []byte("key26"), nil, nil)
|
|
testGetFirstAndLastKey(t, data, []byte("key0"), []byte("key1"), nil, nil)
|
|
testGetFirstAndLastKey(t, data, []byte("key6"), []byte("key9"), nil, nil)
|
|
|
|
testNewIter(t, data, nil, nil, kvs)
|
|
testNewIter(t, data, []byte("key1"), []byte("key6"), kvs)
|
|
testNewIter(t, data, []byte("key2"), []byte("key5"), kvs[1:4])
|
|
testNewIter(t, data, []byte("key25"), []byte("key35"), kvs[2:3])
|
|
testNewIter(t, data, []byte("key25"), []byte("key26"), nil)
|
|
testNewIter(t, data, []byte("key0"), []byte("key1"), nil)
|
|
testNewIter(t, data, []byte("key6"), []byte("key9"), nil)
|
|
|
|
data = &MemoryIngestData{
|
|
ts: 234,
|
|
}
|
|
encodedKVs := make([]kvPair, 0, len(kvs)*2)
|
|
duplicatedKVs := make([]kvPair, 0, len(kvs)*2)
|
|
|
|
for i := range kvs {
|
|
encodedKey := slices.Clone(kvs[i].key)
|
|
encodedKVs = append(encodedKVs, kvPair{key: encodedKey, value: kvs[i].value})
|
|
if i%2 == 0 {
|
|
continue
|
|
}
|
|
|
|
// duplicatedKeys will be like key2_0, key2_1, key4_0, key4_1
|
|
duplicatedKVs = append(duplicatedKVs, kvPair{key: encodedKey, value: kvs[i].value})
|
|
|
|
encodedKey = slices.Clone(kvs[i].key)
|
|
newValues := make([]byte, len(kvs[i].value)+1)
|
|
copy(newValues, kvs[i].value)
|
|
newValues[len(kvs[i].value)] = 1
|
|
encodedKVs = append(encodedKVs, kvPair{key: encodedKey, value: newValues})
|
|
duplicatedKVs = append(duplicatedKVs, kvPair{key: encodedKey, value: newValues})
|
|
}
|
|
data.kvs = encodedKVs
|
|
|
|
require.EqualValues(t, 234, data.GetTS())
|
|
testGetFirstAndLastKey(t, data, nil, nil, []byte("key1"), []byte("key5"))
|
|
testGetFirstAndLastKey(t, data, []byte("key1"), []byte("key6"), []byte("key1"), []byte("key5"))
|
|
testGetFirstAndLastKey(t, data, []byte("key2"), []byte("key5"), []byte("key2"), []byte("key4"))
|
|
testGetFirstAndLastKey(t, data, []byte("key25"), []byte("key35"), []byte("key3"), []byte("key3"))
|
|
testGetFirstAndLastKey(t, data, []byte("key25"), []byte("key26"), nil, nil)
|
|
testGetFirstAndLastKey(t, data, []byte("key0"), []byte("key1"), nil, nil)
|
|
testGetFirstAndLastKey(t, data, []byte("key6"), []byte("key9"), nil, nil)
|
|
}
|
|
|
|
func TestSplit(t *testing.T) {
|
|
cases := []struct {
|
|
input []int
|
|
conc int
|
|
expected [][]int
|
|
}{
|
|
{
|
|
input: []int{1, 2, 3, 4, 5},
|
|
conc: 1,
|
|
expected: [][]int{{1, 2, 3, 4, 5}},
|
|
},
|
|
{
|
|
input: []int{1, 2, 3, 4, 5},
|
|
conc: 2,
|
|
expected: [][]int{{1, 2, 3}, {4, 5}},
|
|
},
|
|
{
|
|
input: []int{1, 2, 3, 4, 5},
|
|
conc: 0,
|
|
expected: [][]int{{1, 2, 3, 4, 5}},
|
|
},
|
|
{
|
|
input: []int{1, 2, 3, 4, 5},
|
|
conc: 5,
|
|
expected: [][]int{{1}, {2}, {3}, {4}, {5}},
|
|
},
|
|
{
|
|
input: []int{},
|
|
conc: 5,
|
|
expected: nil,
|
|
},
|
|
{
|
|
input: []int{1, 2, 3, 4, 5},
|
|
conc: 100,
|
|
expected: [][]int{{1}, {2}, {3}, {4}, {5}},
|
|
},
|
|
}
|
|
|
|
for _, c := range cases {
|
|
got := split(c.input, c.conc)
|
|
require.Equal(t, c.expected, got)
|
|
}
|
|
}
|
|
|
|
func prepareKVFiles(t *testing.T, store storage.ExternalStorage, contents [][]kvPair) (dataFiles, statFiles []string) {
|
|
ctx := context.Background()
|
|
for i, c := range contents {
|
|
var summary *WriterSummary
|
|
// we want to create a file for each content, so make the below size larger.
|
|
writer := NewWriterBuilder().SetPropKeysDistance(4).
|
|
SetMemorySizeLimit(8*units.MiB).SetBlockSize(8*units.MiB).
|
|
SetOnCloseFunc(func(s *WriterSummary) { summary = s }).
|
|
Build(store, "/test", fmt.Sprintf("%d", i))
|
|
for _, p := range c {
|
|
require.NoError(t, writer.WriteRow(ctx, p.key, p.value, nil))
|
|
}
|
|
require.NoError(t, writer.Close(ctx))
|
|
require.Len(t, summary.MultipleFilesStats, 1)
|
|
require.Len(t, summary.MultipleFilesStats[0].Filenames, 1)
|
|
require.Zero(t, summary.ConflictInfo.Count)
|
|
require.Empty(t, summary.ConflictInfo.Files)
|
|
dataFiles = append(dataFiles, summary.MultipleFilesStats[0].Filenames[0][0])
|
|
statFiles = append(statFiles, summary.MultipleFilesStats[0].Filenames[0][1])
|
|
}
|
|
return
|
|
}
|
|
|
|
func getAllDataFromDataAndRanges(t *testing.T, dataAndRanges *engineapi.DataAndRanges) []kvPair {
|
|
ctx := context.Background()
|
|
iter := dataAndRanges.Data.NewIter(ctx, nil, nil, membuf.NewPool())
|
|
var allKVs []kvPair
|
|
for iter.First(); iter.Valid(); iter.Next() {
|
|
allKVs = append(allKVs, kvPair{key: iter.Key(), value: iter.Value()})
|
|
}
|
|
require.NoError(t, iter.Close())
|
|
return allKVs
|
|
}
|
|
|
|
func TestEngineOnDup(t *testing.T) {
|
|
ctx := context.Background()
|
|
contents := [][]kvPair{{
|
|
{key: []byte{4}, value: []byte("bbb")},
|
|
{key: []byte{4}, value: []byte("bbb")},
|
|
{key: []byte{1}, value: []byte("aa")},
|
|
{key: []byte{1}, value: []byte("aa")},
|
|
{key: []byte{1}, value: []byte("aa")},
|
|
{key: []byte{2}, value: []byte("vv")},
|
|
{key: []byte{3}, value: []byte("sds")},
|
|
}}
|
|
|
|
getEngineFn := func(store storage.ExternalStorage, onDup engineapi.OnDuplicateKey, inDataFiles, inStatFiles []string) *Engine {
|
|
return NewExternalEngine(
|
|
ctx,
|
|
store, inDataFiles, inStatFiles,
|
|
[]byte{1}, []byte{5},
|
|
[][]byte{{1}, {2}, {3}, {4}, {5}},
|
|
[][]byte{{1}, {3}, {5}},
|
|
10,
|
|
123,
|
|
456,
|
|
789,
|
|
true,
|
|
16*units.GiB,
|
|
onDup,
|
|
"/",
|
|
)
|
|
}
|
|
|
|
t.Run("on duplicate ignore", func(t *testing.T) {
|
|
onDup := engineapi.OnDuplicateKeyIgnore
|
|
store := storage.NewMemStorage()
|
|
dataFiles, statFiles := prepareKVFiles(t, store, contents)
|
|
extEngine := getEngineFn(store, onDup, dataFiles, statFiles)
|
|
loadDataCh := make(chan engineapi.DataAndRanges, 4)
|
|
require.ErrorContains(t, extEngine.LoadIngestData(ctx, loadDataCh), "duplicate key found")
|
|
t.Cleanup(func() {
|
|
require.NoError(t, extEngine.Close())
|
|
})
|
|
})
|
|
|
|
t.Run("on duplicate error", func(t *testing.T) {
|
|
onDup := engineapi.OnDuplicateKeyError
|
|
store := storage.NewMemStorage()
|
|
dataFiles, statFiles := prepareKVFiles(t, store, contents)
|
|
extEngine := getEngineFn(store, onDup, dataFiles, statFiles)
|
|
loadDataCh := make(chan engineapi.DataAndRanges, 4)
|
|
require.ErrorContains(t, extEngine.LoadIngestData(ctx, loadDataCh), "[Lightning:Restore:ErrFoundDuplicateKey]found duplicate key '\x01', value 'aa'")
|
|
t.Cleanup(func() {
|
|
require.NoError(t, extEngine.Close())
|
|
})
|
|
})
|
|
|
|
t.Run("on duplicate record or remove, no duplicates", func(t *testing.T) {
|
|
for _, od := range []engineapi.OnDuplicateKey{engineapi.OnDuplicateKeyRecord, engineapi.OnDuplicateKeyRemove} {
|
|
store := storage.NewMemStorage()
|
|
dfiles, sfiles := prepareKVFiles(t, store, [][]kvPair{{
|
|
{key: []byte{4}, value: []byte("bbb")},
|
|
{key: []byte{1}, value: []byte("aa")},
|
|
{key: []byte{2}, value: []byte("vv")},
|
|
{key: []byte{3}, value: []byte("sds")},
|
|
}})
|
|
extEngine := getEngineFn(store, od, dfiles, sfiles)
|
|
loadDataCh := make(chan engineapi.DataAndRanges, 4)
|
|
require.NoError(t, extEngine.LoadIngestData(ctx, loadDataCh))
|
|
t.Cleanup(func() {
|
|
require.NoError(t, extEngine.Close())
|
|
})
|
|
require.Len(t, loadDataCh, 1)
|
|
dataAndRanges := <-loadDataCh
|
|
allKVs := getAllDataFromDataAndRanges(t, &dataAndRanges)
|
|
require.EqualValues(t, []kvPair{
|
|
{key: []byte{1}, value: []byte("aa")},
|
|
{key: []byte{2}, value: []byte("vv")},
|
|
{key: []byte{3}, value: []byte("sds")},
|
|
{key: []byte{4}, value: []byte("bbb")},
|
|
}, allKVs)
|
|
info := extEngine.ConflictInfo()
|
|
require.Zero(t, info.Count)
|
|
require.Empty(t, info.Files)
|
|
}
|
|
})
|
|
|
|
t.Run("on duplicate record or remove, partial duplicated", func(t *testing.T) {
|
|
contents2 := [][]kvPair{
|
|
{{key: []byte{1}, value: []byte("aa")}, {key: []byte{1}, value: []byte("aa")}},
|
|
{{key: []byte{1}, value: []byte("aa")}, {key: []byte{2}, value: []byte("vv")}, {key: []byte{3}, value: []byte("sds")}},
|
|
{{key: []byte{4}, value: []byte("bbb")}, {key: []byte{4}, value: []byte("bbb")}},
|
|
}
|
|
for _, cont := range [][][]kvPair{contents, contents2} {
|
|
for _, od := range []engineapi.OnDuplicateKey{engineapi.OnDuplicateKeyRecord, engineapi.OnDuplicateKeyRemove} {
|
|
store := storage.NewMemStorage()
|
|
dataFiles, statFiles := prepareKVFiles(t, store, cont)
|
|
extEngine := getEngineFn(store, od, dataFiles, statFiles)
|
|
loadDataCh := make(chan engineapi.DataAndRanges, 4)
|
|
require.NoError(t, extEngine.LoadIngestData(ctx, loadDataCh))
|
|
t.Cleanup(func() {
|
|
require.NoError(t, extEngine.Close())
|
|
})
|
|
require.Len(t, loadDataCh, 1)
|
|
dataAndRanges := <-loadDataCh
|
|
allKVs := getAllDataFromDataAndRanges(t, &dataAndRanges)
|
|
require.EqualValues(t, []kvPair{
|
|
{key: []byte{2}, value: []byte("vv")},
|
|
{key: []byte{3}, value: []byte("sds")},
|
|
}, allKVs)
|
|
info := extEngine.ConflictInfo()
|
|
if od == engineapi.OnDuplicateKeyRemove {
|
|
require.Zero(t, info.Count)
|
|
require.Empty(t, info.Files)
|
|
} else {
|
|
require.EqualValues(t, 5, info.Count)
|
|
require.Len(t, info.Files, 1)
|
|
dupPairs := readKVFile(t, store, info.Files[0])
|
|
require.EqualValues(t, []kvPair{
|
|
{key: []byte{1}, value: []byte("aa")},
|
|
{key: []byte{1}, value: []byte("aa")},
|
|
{key: []byte{1}, value: []byte("aa")},
|
|
{key: []byte{4}, value: []byte("bbb")},
|
|
{key: []byte{4}, value: []byte("bbb")},
|
|
}, dupPairs)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
|
|
t.Run("on duplicate record or remove, all duplicated", func(t *testing.T) {
|
|
for _, od := range []engineapi.OnDuplicateKey{engineapi.OnDuplicateKeyRecord, engineapi.OnDuplicateKeyRemove} {
|
|
store := storage.NewMemStorage()
|
|
dfiles, sfiles := prepareKVFiles(t, store, [][]kvPair{{
|
|
{key: []byte{1}, value: []byte("aaa")},
|
|
{key: []byte{1}, value: []byte("aaa")},
|
|
{key: []byte{1}, value: []byte("aaa")},
|
|
{key: []byte{1}, value: []byte("aaa")},
|
|
}})
|
|
extEngine := getEngineFn(store, od, dfiles, sfiles)
|
|
loadDataCh := make(chan engineapi.DataAndRanges, 4)
|
|
require.NoError(t, extEngine.LoadIngestData(ctx, loadDataCh))
|
|
t.Cleanup(func() {
|
|
require.NoError(t, extEngine.Close())
|
|
})
|
|
require.Len(t, loadDataCh, 1)
|
|
dataAndRanges := <-loadDataCh
|
|
allKVs := getAllDataFromDataAndRanges(t, &dataAndRanges)
|
|
require.Empty(t, allKVs)
|
|
info := extEngine.ConflictInfo()
|
|
if od == engineapi.OnDuplicateKeyRemove {
|
|
require.Zero(t, info.Count)
|
|
require.Empty(t, info.Files)
|
|
} else {
|
|
require.EqualValues(t, 4, info.Count)
|
|
require.Len(t, info.Files, 1)
|
|
dupPairs := readKVFile(t, store, info.Files[0])
|
|
require.EqualValues(t, []kvPair{
|
|
{key: []byte{1}, value: []byte("aaa")},
|
|
{key: []byte{1}, value: []byte("aaa")},
|
|
{key: []byte{1}, value: []byte("aaa")},
|
|
{key: []byte{1}, value: []byte("aaa")},
|
|
}, dupPairs)
|
|
}
|
|
}
|
|
})
|
|
}
|