243 lines
7.0 KiB
Go
243 lines
7.0 KiB
Go
// Copyright 2021 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package local
|
|
|
|
import (
|
|
"bytes"
|
|
"math/rand"
|
|
"path/filepath"
|
|
"sort"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/cockroachdb/pebble"
|
|
"github.com/pingcap/tidb/br/pkg/lightning/common"
|
|
"github.com/pingcap/tidb/br/pkg/lightning/log"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestDupDetectIterator(t *testing.T) {
|
|
var pairs []common.KvPair
|
|
prevRowMax := int64(0)
|
|
// Unique pairs.
|
|
for i := 0; i < 20; i++ {
|
|
pairs = append(pairs, common.KvPair{
|
|
Key: randBytes(32),
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(prevRowMax),
|
|
})
|
|
prevRowMax++
|
|
}
|
|
// Duplicate pairs which repeat the same key twice.
|
|
for i := 20; i < 40; i++ {
|
|
key := randBytes(32)
|
|
pairs = append(pairs, common.KvPair{
|
|
Key: key,
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(prevRowMax),
|
|
})
|
|
prevRowMax++
|
|
pairs = append(pairs, common.KvPair{
|
|
Key: key,
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(prevRowMax),
|
|
})
|
|
prevRowMax++
|
|
}
|
|
// Duplicate pairs which repeat the same key three times.
|
|
for i := 40; i < 50; i++ {
|
|
key := randBytes(32)
|
|
pairs = append(pairs, common.KvPair{
|
|
Key: key,
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(prevRowMax),
|
|
})
|
|
prevRowMax++
|
|
pairs = append(pairs, common.KvPair{
|
|
Key: key,
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(prevRowMax),
|
|
})
|
|
prevRowMax++
|
|
pairs = append(pairs, common.KvPair{
|
|
Key: key,
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(prevRowMax),
|
|
})
|
|
prevRowMax++
|
|
}
|
|
|
|
// Find duplicates from the generated pairs.
|
|
var dupPairs []common.KvPair
|
|
sort.Slice(pairs, func(i, j int) bool {
|
|
return bytes.Compare(pairs[i].Key, pairs[j].Key) < 0
|
|
})
|
|
uniqueKeys := make([][]byte, 0)
|
|
for i := 0; i < len(pairs); {
|
|
j := i + 1
|
|
for j < len(pairs) && bytes.Equal(pairs[j-1].Key, pairs[j].Key) {
|
|
j++
|
|
}
|
|
uniqueKeys = append(uniqueKeys, pairs[i].Key)
|
|
if i+1 == j {
|
|
i++
|
|
continue
|
|
}
|
|
for k := i; k < j; k++ {
|
|
dupPairs = append(dupPairs, pairs[k])
|
|
}
|
|
i = j
|
|
}
|
|
|
|
keyAdapter := dupDetectKeyAdapter{}
|
|
|
|
// Write pairs to db after shuffling the pairs.
|
|
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
|
|
rnd.Shuffle(len(pairs), func(i, j int) {
|
|
pairs[i], pairs[j] = pairs[j], pairs[i]
|
|
})
|
|
storeDir := t.TempDir()
|
|
db, err := pebble.Open(filepath.Join(storeDir, "kv"), &pebble.Options{})
|
|
require.NoError(t, err)
|
|
wb := db.NewBatch()
|
|
for _, p := range pairs {
|
|
key := keyAdapter.Encode(nil, p.Key, p.RowID)
|
|
require.NoError(t, wb.Set(key, p.Val, nil))
|
|
}
|
|
require.NoError(t, wb.Commit(pebble.Sync))
|
|
|
|
dupDB, err := pebble.Open(filepath.Join(storeDir, "duplicates"), &pebble.Options{})
|
|
require.NoError(t, err)
|
|
var iter Iter
|
|
iter = newDupDetectIter(db, keyAdapter, &pebble.IterOptions{}, dupDB, log.L(), DupDetectOpt{})
|
|
sort.Slice(pairs, func(i, j int) bool {
|
|
key1 := keyAdapter.Encode(nil, pairs[i].Key, pairs[i].RowID)
|
|
key2 := keyAdapter.Encode(nil, pairs[j].Key, pairs[j].RowID)
|
|
return bytes.Compare(key1, key2) < 0
|
|
})
|
|
|
|
// Verify first pair.
|
|
require.True(t, iter.First())
|
|
require.True(t, iter.Valid())
|
|
require.Equal(t, pairs[0].Key, iter.Key())
|
|
require.Equal(t, pairs[0].Val, iter.Value())
|
|
|
|
// Verify last pair.
|
|
require.True(t, iter.Last())
|
|
require.True(t, iter.Valid())
|
|
require.Equal(t, pairs[len(pairs)-1].Key, iter.Key())
|
|
require.Equal(t, pairs[len(pairs)-1].Val, iter.Value())
|
|
|
|
// Iterate all keys and check the count of unique keys.
|
|
for iter.First(); iter.Valid(); iter.Next() {
|
|
require.Equal(t, uniqueKeys[0], iter.Key())
|
|
uniqueKeys = uniqueKeys[1:]
|
|
}
|
|
require.NoError(t, iter.Error())
|
|
require.Equal(t, 0, len(uniqueKeys))
|
|
require.NoError(t, iter.Close())
|
|
require.NoError(t, db.Close())
|
|
|
|
// Check duplicates detected by dupDetectIter.
|
|
iter = newDupDBIter(dupDB, keyAdapter, &pebble.IterOptions{})
|
|
var detectedPairs []common.KvPair
|
|
for iter.First(); iter.Valid(); iter.Next() {
|
|
detectedPairs = append(detectedPairs, common.KvPair{
|
|
Key: append([]byte{}, iter.Key()...),
|
|
Val: append([]byte{}, iter.Value()...),
|
|
})
|
|
}
|
|
require.NoError(t, iter.Error())
|
|
require.NoError(t, iter.Close())
|
|
require.NoError(t, dupDB.Close())
|
|
require.Equal(t, len(dupPairs), len(detectedPairs))
|
|
|
|
sort.Slice(dupPairs, func(i, j int) bool {
|
|
keyCmp := bytes.Compare(dupPairs[i].Key, dupPairs[j].Key)
|
|
return keyCmp < 0 || keyCmp == 0 && bytes.Compare(dupPairs[i].Val, dupPairs[j].Val) < 0
|
|
})
|
|
sort.Slice(detectedPairs, func(i, j int) bool {
|
|
keyCmp := bytes.Compare(detectedPairs[i].Key, detectedPairs[j].Key)
|
|
return keyCmp < 0 || keyCmp == 0 && bytes.Compare(detectedPairs[i].Val, detectedPairs[j].Val) < 0
|
|
})
|
|
for i := 0; i < len(detectedPairs); i++ {
|
|
require.Equal(t, dupPairs[i].Key, detectedPairs[i].Key)
|
|
require.Equal(t, dupPairs[i].Val, detectedPairs[i].Val)
|
|
}
|
|
}
|
|
|
|
func TestDupDetectIterSeek(t *testing.T) {
|
|
pairs := []common.KvPair{
|
|
{
|
|
Key: []byte{1, 2, 3, 0},
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(1),
|
|
},
|
|
{
|
|
Key: []byte{1, 2, 3, 1},
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(2),
|
|
},
|
|
{
|
|
Key: []byte{1, 2, 3, 1},
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(3),
|
|
},
|
|
{
|
|
Key: []byte{1, 2, 3, 2},
|
|
Val: randBytes(128),
|
|
RowID: common.EncodeIntRowID(4),
|
|
},
|
|
}
|
|
|
|
storeDir := t.TempDir()
|
|
db, err := pebble.Open(filepath.Join(storeDir, "kv"), &pebble.Options{})
|
|
require.NoError(t, err)
|
|
|
|
keyAdapter := dupDetectKeyAdapter{}
|
|
wb := db.NewBatch()
|
|
for _, p := range pairs {
|
|
key := keyAdapter.Encode(nil, p.Key, p.RowID)
|
|
require.NoError(t, wb.Set(key, p.Val, nil))
|
|
}
|
|
require.NoError(t, wb.Commit(pebble.Sync))
|
|
|
|
dupDB, err := pebble.Open(filepath.Join(storeDir, "duplicates"), &pebble.Options{})
|
|
require.NoError(t, err)
|
|
iter := newDupDetectIter(db, keyAdapter, &pebble.IterOptions{}, dupDB, log.L(), DupDetectOpt{})
|
|
|
|
require.True(t, iter.Seek([]byte{1, 2, 3, 1}))
|
|
require.Equal(t, pairs[1].Val, iter.Value())
|
|
require.True(t, iter.Next())
|
|
require.Equal(t, pairs[3].Val, iter.Value())
|
|
require.NoError(t, iter.Close())
|
|
require.NoError(t, db.Close())
|
|
require.NoError(t, dupDB.Close())
|
|
}
|
|
|
|
func TestKeyAdapterEncoding(t *testing.T) {
|
|
keyAdapter := dupDetectKeyAdapter{}
|
|
srcKey := []byte{1, 2, 3}
|
|
v := keyAdapter.Encode(nil, srcKey, common.EncodeIntRowID(1))
|
|
resKey, err := keyAdapter.Decode(nil, v)
|
|
require.NoError(t, err)
|
|
require.EqualValues(t, srcKey, resKey)
|
|
|
|
v = keyAdapter.Encode(nil, srcKey, []byte("mock_common_handle"))
|
|
resKey, err = keyAdapter.Decode(nil, v)
|
|
require.NoError(t, err)
|
|
require.EqualValues(t, srcKey, resKey)
|
|
}
|