Files
tidb/pkg/lightning/backend/external/engine_test.go

367 lines
13 KiB
Go

// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package external
import (
"context"
"fmt"
"slices"
"testing"
"github.com/docker/go-units"
"github.com/pingcap/tidb/br/pkg/storage"
"github.com/pingcap/tidb/pkg/ingestor/engineapi"
"github.com/pingcap/tidb/pkg/lightning/membuf"
"github.com/stretchr/testify/require"
)
func testGetFirstAndLastKey(
t *testing.T,
data engineapi.IngestData,
lowerBound, upperBound []byte,
expectedFirstKey, expectedLastKey []byte,
) {
firstKey, lastKey, err := data.GetFirstAndLastKey(lowerBound, upperBound)
require.NoError(t, err)
require.Equal(t, expectedFirstKey, firstKey)
require.Equal(t, expectedLastKey, lastKey)
}
func testNewIter(
t *testing.T,
data engineapi.IngestData,
lowerBound, upperBound []byte,
expectedKVs []kvPair,
) {
ctx := context.Background()
iter := data.NewIter(ctx, lowerBound, upperBound, nil)
var kvs []kvPair
for iter.First(); iter.Valid(); iter.Next() {
require.NoError(t, iter.Error())
kvs = append(kvs, kvPair{key: iter.Key(), value: iter.Value()})
}
require.NoError(t, iter.Error())
require.NoError(t, iter.Close())
require.Equal(t, expectedKVs, kvs)
}
func TestMemoryIngestData(t *testing.T) {
kvs := []kvPair{
{key: []byte("key1"), value: []byte("value1")},
{key: []byte("key2"), value: []byte("value2")},
{key: []byte("key3"), value: []byte("value3")},
{key: []byte("key4"), value: []byte("value4")},
{key: []byte("key5"), value: []byte("value5")},
}
data := &MemoryIngestData{
kvs: kvs,
ts: 123,
}
require.EqualValues(t, 123, data.GetTS())
testGetFirstAndLastKey(t, data, nil, nil, []byte("key1"), []byte("key5"))
testGetFirstAndLastKey(t, data, []byte("key1"), []byte("key6"), []byte("key1"), []byte("key5"))
testGetFirstAndLastKey(t, data, []byte("key2"), []byte("key5"), []byte("key2"), []byte("key4"))
testGetFirstAndLastKey(t, data, []byte("key25"), []byte("key35"), []byte("key3"), []byte("key3"))
testGetFirstAndLastKey(t, data, []byte("key25"), []byte("key26"), nil, nil)
testGetFirstAndLastKey(t, data, []byte("key0"), []byte("key1"), nil, nil)
testGetFirstAndLastKey(t, data, []byte("key6"), []byte("key9"), nil, nil)
testNewIter(t, data, nil, nil, kvs)
testNewIter(t, data, []byte("key1"), []byte("key6"), kvs)
testNewIter(t, data, []byte("key2"), []byte("key5"), kvs[1:4])
testNewIter(t, data, []byte("key25"), []byte("key35"), kvs[2:3])
testNewIter(t, data, []byte("key25"), []byte("key26"), nil)
testNewIter(t, data, []byte("key0"), []byte("key1"), nil)
testNewIter(t, data, []byte("key6"), []byte("key9"), nil)
data = &MemoryIngestData{
ts: 234,
}
encodedKVs := make([]kvPair, 0, len(kvs)*2)
duplicatedKVs := make([]kvPair, 0, len(kvs)*2)
for i := range kvs {
encodedKey := slices.Clone(kvs[i].key)
encodedKVs = append(encodedKVs, kvPair{key: encodedKey, value: kvs[i].value})
if i%2 == 0 {
continue
}
// duplicatedKeys will be like key2_0, key2_1, key4_0, key4_1
duplicatedKVs = append(duplicatedKVs, kvPair{key: encodedKey, value: kvs[i].value})
encodedKey = slices.Clone(kvs[i].key)
newValues := make([]byte, len(kvs[i].value)+1)
copy(newValues, kvs[i].value)
newValues[len(kvs[i].value)] = 1
encodedKVs = append(encodedKVs, kvPair{key: encodedKey, value: newValues})
duplicatedKVs = append(duplicatedKVs, kvPair{key: encodedKey, value: newValues})
}
data.kvs = encodedKVs
require.EqualValues(t, 234, data.GetTS())
testGetFirstAndLastKey(t, data, nil, nil, []byte("key1"), []byte("key5"))
testGetFirstAndLastKey(t, data, []byte("key1"), []byte("key6"), []byte("key1"), []byte("key5"))
testGetFirstAndLastKey(t, data, []byte("key2"), []byte("key5"), []byte("key2"), []byte("key4"))
testGetFirstAndLastKey(t, data, []byte("key25"), []byte("key35"), []byte("key3"), []byte("key3"))
testGetFirstAndLastKey(t, data, []byte("key25"), []byte("key26"), nil, nil)
testGetFirstAndLastKey(t, data, []byte("key0"), []byte("key1"), nil, nil)
testGetFirstAndLastKey(t, data, []byte("key6"), []byte("key9"), nil, nil)
}
func TestSplit(t *testing.T) {
cases := []struct {
input []int
conc int
expected [][]int
}{
{
input: []int{1, 2, 3, 4, 5},
conc: 1,
expected: [][]int{{1, 2, 3, 4, 5}},
},
{
input: []int{1, 2, 3, 4, 5},
conc: 2,
expected: [][]int{{1, 2, 3}, {4, 5}},
},
{
input: []int{1, 2, 3, 4, 5},
conc: 0,
expected: [][]int{{1, 2, 3, 4, 5}},
},
{
input: []int{1, 2, 3, 4, 5},
conc: 5,
expected: [][]int{{1}, {2}, {3}, {4}, {5}},
},
{
input: []int{},
conc: 5,
expected: nil,
},
{
input: []int{1, 2, 3, 4, 5},
conc: 100,
expected: [][]int{{1}, {2}, {3}, {4}, {5}},
},
}
for _, c := range cases {
got := split(c.input, c.conc)
require.Equal(t, c.expected, got)
}
}
func prepareKVFiles(t *testing.T, store storage.ExternalStorage, contents [][]kvPair) (dataFiles, statFiles []string) {
ctx := context.Background()
for i, c := range contents {
var summary *WriterSummary
// we want to create a file for each content, so make the below size larger.
writer := NewWriterBuilder().SetPropKeysDistance(4).
SetMemorySizeLimit(8*units.MiB).SetBlockSize(8*units.MiB).
SetOnCloseFunc(func(s *WriterSummary) { summary = s }).
Build(store, "/test", fmt.Sprintf("%d", i))
for _, p := range c {
require.NoError(t, writer.WriteRow(ctx, p.key, p.value, nil))
}
require.NoError(t, writer.Close(ctx))
require.Len(t, summary.MultipleFilesStats, 1)
require.Len(t, summary.MultipleFilesStats[0].Filenames, 1)
require.Zero(t, summary.ConflictInfo.Count)
require.Empty(t, summary.ConflictInfo.Files)
dataFiles = append(dataFiles, summary.MultipleFilesStats[0].Filenames[0][0])
statFiles = append(statFiles, summary.MultipleFilesStats[0].Filenames[0][1])
}
return
}
func getAllDataFromDataAndRanges(t *testing.T, dataAndRanges *engineapi.DataAndRanges) []kvPair {
ctx := context.Background()
iter := dataAndRanges.Data.NewIter(ctx, nil, nil, membuf.NewPool())
var allKVs []kvPair
for iter.First(); iter.Valid(); iter.Next() {
allKVs = append(allKVs, kvPair{key: iter.Key(), value: iter.Value()})
}
require.NoError(t, iter.Close())
return allKVs
}
func TestEngineOnDup(t *testing.T) {
ctx := context.Background()
contents := [][]kvPair{{
{key: []byte{4}, value: []byte("bbb")},
{key: []byte{4}, value: []byte("bbb")},
{key: []byte{1}, value: []byte("aa")},
{key: []byte{1}, value: []byte("aa")},
{key: []byte{1}, value: []byte("aa")},
{key: []byte{2}, value: []byte("vv")},
{key: []byte{3}, value: []byte("sds")},
}}
getEngineFn := func(store storage.ExternalStorage, onDup engineapi.OnDuplicateKey, inDataFiles, inStatFiles []string) *Engine {
return NewExternalEngine(
ctx,
store, inDataFiles, inStatFiles,
[]byte{1}, []byte{5},
[][]byte{{1}, {2}, {3}, {4}, {5}},
[][]byte{{1}, {3}, {5}},
10,
123,
456,
789,
true,
16*units.GiB,
onDup,
"/",
)
}
t.Run("on duplicate ignore", func(t *testing.T) {
onDup := engineapi.OnDuplicateKeyIgnore
store := storage.NewMemStorage()
dataFiles, statFiles := prepareKVFiles(t, store, contents)
extEngine := getEngineFn(store, onDup, dataFiles, statFiles)
loadDataCh := make(chan engineapi.DataAndRanges, 4)
require.ErrorContains(t, extEngine.LoadIngestData(ctx, loadDataCh), "duplicate key found")
t.Cleanup(func() {
require.NoError(t, extEngine.Close())
})
})
t.Run("on duplicate error", func(t *testing.T) {
onDup := engineapi.OnDuplicateKeyError
store := storage.NewMemStorage()
dataFiles, statFiles := prepareKVFiles(t, store, contents)
extEngine := getEngineFn(store, onDup, dataFiles, statFiles)
loadDataCh := make(chan engineapi.DataAndRanges, 4)
require.ErrorContains(t, extEngine.LoadIngestData(ctx, loadDataCh), "[Lightning:Restore:ErrFoundDuplicateKey]found duplicate key '\x01', value 'aa'")
t.Cleanup(func() {
require.NoError(t, extEngine.Close())
})
})
t.Run("on duplicate record or remove, no duplicates", func(t *testing.T) {
for _, od := range []engineapi.OnDuplicateKey{engineapi.OnDuplicateKeyRecord, engineapi.OnDuplicateKeyRemove} {
store := storage.NewMemStorage()
dfiles, sfiles := prepareKVFiles(t, store, [][]kvPair{{
{key: []byte{4}, value: []byte("bbb")},
{key: []byte{1}, value: []byte("aa")},
{key: []byte{2}, value: []byte("vv")},
{key: []byte{3}, value: []byte("sds")},
}})
extEngine := getEngineFn(store, od, dfiles, sfiles)
loadDataCh := make(chan engineapi.DataAndRanges, 4)
require.NoError(t, extEngine.LoadIngestData(ctx, loadDataCh))
t.Cleanup(func() {
require.NoError(t, extEngine.Close())
})
require.Len(t, loadDataCh, 1)
dataAndRanges := <-loadDataCh
allKVs := getAllDataFromDataAndRanges(t, &dataAndRanges)
require.EqualValues(t, []kvPair{
{key: []byte{1}, value: []byte("aa")},
{key: []byte{2}, value: []byte("vv")},
{key: []byte{3}, value: []byte("sds")},
{key: []byte{4}, value: []byte("bbb")},
}, allKVs)
info := extEngine.ConflictInfo()
require.Zero(t, info.Count)
require.Empty(t, info.Files)
}
})
t.Run("on duplicate record or remove, partial duplicated", func(t *testing.T) {
contents2 := [][]kvPair{
{{key: []byte{1}, value: []byte("aa")}, {key: []byte{1}, value: []byte("aa")}},
{{key: []byte{1}, value: []byte("aa")}, {key: []byte{2}, value: []byte("vv")}, {key: []byte{3}, value: []byte("sds")}},
{{key: []byte{4}, value: []byte("bbb")}, {key: []byte{4}, value: []byte("bbb")}},
}
for _, cont := range [][][]kvPair{contents, contents2} {
for _, od := range []engineapi.OnDuplicateKey{engineapi.OnDuplicateKeyRecord, engineapi.OnDuplicateKeyRemove} {
store := storage.NewMemStorage()
dataFiles, statFiles := prepareKVFiles(t, store, cont)
extEngine := getEngineFn(store, od, dataFiles, statFiles)
loadDataCh := make(chan engineapi.DataAndRanges, 4)
require.NoError(t, extEngine.LoadIngestData(ctx, loadDataCh))
t.Cleanup(func() {
require.NoError(t, extEngine.Close())
})
require.Len(t, loadDataCh, 1)
dataAndRanges := <-loadDataCh
allKVs := getAllDataFromDataAndRanges(t, &dataAndRanges)
require.EqualValues(t, []kvPair{
{key: []byte{2}, value: []byte("vv")},
{key: []byte{3}, value: []byte("sds")},
}, allKVs)
info := extEngine.ConflictInfo()
if od == engineapi.OnDuplicateKeyRemove {
require.Zero(t, info.Count)
require.Empty(t, info.Files)
} else {
require.EqualValues(t, 5, info.Count)
require.Len(t, info.Files, 1)
dupPairs := readKVFile(t, store, info.Files[0])
require.EqualValues(t, []kvPair{
{key: []byte{1}, value: []byte("aa")},
{key: []byte{1}, value: []byte("aa")},
{key: []byte{1}, value: []byte("aa")},
{key: []byte{4}, value: []byte("bbb")},
{key: []byte{4}, value: []byte("bbb")},
}, dupPairs)
}
}
}
})
t.Run("on duplicate record or remove, all duplicated", func(t *testing.T) {
for _, od := range []engineapi.OnDuplicateKey{engineapi.OnDuplicateKeyRecord, engineapi.OnDuplicateKeyRemove} {
store := storage.NewMemStorage()
dfiles, sfiles := prepareKVFiles(t, store, [][]kvPair{{
{key: []byte{1}, value: []byte("aaa")},
{key: []byte{1}, value: []byte("aaa")},
{key: []byte{1}, value: []byte("aaa")},
{key: []byte{1}, value: []byte("aaa")},
}})
extEngine := getEngineFn(store, od, dfiles, sfiles)
loadDataCh := make(chan engineapi.DataAndRanges, 4)
require.NoError(t, extEngine.LoadIngestData(ctx, loadDataCh))
t.Cleanup(func() {
require.NoError(t, extEngine.Close())
})
require.Len(t, loadDataCh, 1)
dataAndRanges := <-loadDataCh
allKVs := getAllDataFromDataAndRanges(t, &dataAndRanges)
require.Empty(t, allKVs)
info := extEngine.ConflictInfo()
if od == engineapi.OnDuplicateKeyRemove {
require.Zero(t, info.Count)
require.Empty(t, info.Files)
} else {
require.EqualValues(t, 4, info.Count)
require.Len(t, info.Files, 1)
dupPairs := readKVFile(t, store, info.Files[0])
require.EqualValues(t, []kvPair{
{key: []byte{1}, value: []byte("aaa")},
{key: []byte{1}, value: []byte("aaa")},
{key: []byte{1}, value: []byte("aaa")},
{key: []byte{1}, value: []byte("aaa")},
}, dupPairs)
}
}
})
}