Files
tidb/util/extsort/disk_sorter_test.go

974 lines
25 KiB
Go

// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package extsort
import (
"bytes"
"cmp"
"context"
"encoding/json"
"math/rand"
"slices"
"sync"
"testing"
"github.com/cockroachdb/pebble"
"github.com/cockroachdb/pebble/sstable"
"github.com/cockroachdb/pebble/vfs"
"github.com/pingcap/errors"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
)
func TestDiskSorterCommon(t *testing.T) {
sorter, err := OpenDiskSorter(t.TempDir(), &DiskSorterOptions{
WriterBufferSize: 32 * 1024,
CompactionThreshold: 4,
MaxCompactionDepth: 4,
})
require.NoError(t, err)
runCommonTest(t, sorter)
require.NoError(t, sorter.Close())
}
func TestDiskSorterCommonParallel(t *testing.T) {
sorter, err := OpenDiskSorter(t.TempDir(), &DiskSorterOptions{
WriterBufferSize: 32 * 1024,
CompactionThreshold: 4,
MaxCompactionDepth: 4,
})
require.NoError(t, err)
runCommonParallelTest(t, sorter)
require.NoError(t, sorter.Close())
}
func TestDiskSorterReopen(t *testing.T) {
dirname := t.TempDir()
sorter, err := OpenDiskSorter(dirname, &DiskSorterOptions{
WriterBufferSize: 32 * 1024,
CompactionThreshold: 4,
MaxCompactionDepth: 4,
})
require.NoError(t, err)
rng := rand.New(rand.NewSource(0))
kvs := genRandomKVs(rng, 2000, 256, 1024)
batch1, batch2 := kvs[:1000], kvs[1000:]
// Write the first batch.
w, err := sorter.NewWriter(context.Background())
require.NoError(t, err)
for _, kv := range batch1 {
require.NoError(t, w.Put(kv.key, kv.value))
}
require.NoError(t, w.Close())
// Reopen the sorter.
require.NoError(t, sorter.Close())
sorter, err = OpenDiskSorter(dirname, &DiskSorterOptions{
WriterBufferSize: 32 * 1024,
CompactionThreshold: 4,
MaxCompactionDepth: 4,
})
require.NoError(t, err)
require.False(t, sorter.IsSorted())
// Write the second batch.
w, err = sorter.NewWriter(context.Background())
require.NoError(t, err)
for _, kv := range batch2 {
require.NoError(t, w.Put(kv.key, kv.value))
}
require.NoError(t, w.Close())
slices.SortFunc(kvs, func(a, b keyValue) int {
return bytes.Compare(a.key, b.key)
})
verify := func() {
iter, err := sorter.NewIterator(context.Background())
require.NoError(t, err)
kvCnt := 0
for iter.First(); iter.Valid(); iter.Next() {
require.Equal(t, kvs[kvCnt].key, iter.UnsafeKey())
require.Equal(t, kvs[kvCnt].value, iter.UnsafeValue())
kvCnt++
}
require.Equal(t, len(kvs), kvCnt)
require.NoError(t, iter.Close())
}
// Sort and verify.
require.NoError(t, sorter.Sort(context.Background()))
verify()
require.True(t, sorter.IsSorted())
// Reopen the sorter again.
require.NoError(t, sorter.Close())
sorter, err = OpenDiskSorter(dirname, &DiskSorterOptions{
WriterBufferSize: 32 * 1024,
})
require.NoError(t, err)
require.True(t, sorter.IsSorted())
verify()
}
func TestKVStatsCollector(t *testing.T) {
kvs := []keyValue{
{[]byte("aa"), []byte("11")},
{[]byte("bb"), []byte("22")},
{[]byte("cc"), []byte("33")},
{[]byte("dd"), []byte("44")},
{[]byte("ee"), []byte("55")},
}
testCases := []struct {
bucketSize int
expected kvStats
}{
{
bucketSize: 0,
expected: kvStats{Histogram: []kvStatsBucket{
{4, []byte("aa")},
{4, []byte("bb")},
{4, []byte("cc")},
{4, []byte("dd")},
{4, []byte("ee")},
}},
},
{
bucketSize: 4,
expected: kvStats{Histogram: []kvStatsBucket{
{4, []byte("aa")},
{4, []byte("bb")},
{4, []byte("cc")},
{4, []byte("dd")},
{4, []byte("ee")},
}},
},
{
bucketSize: 7,
expected: kvStats{Histogram: []kvStatsBucket{
{8, []byte("bb")},
{8, []byte("dd")},
{4, []byte("ee")},
}},
},
{
bucketSize: 50,
expected: kvStats{Histogram: []kvStatsBucket{
{20, []byte("ee")},
}},
},
}
for _, tc := range testCases {
c := newKVStatsCollector(tc.bucketSize)
for _, kv := range kvs {
err := c.Add(sstable.InternalKey{UserKey: kv.key}, kv.value)
require.NoError(t, err)
}
userProps := make(map[string]string)
require.NoError(t, c.Finish(userProps))
require.Len(t, userProps, 1)
prop, ok := userProps[kvStatsPropKey]
require.True(t, ok)
var stats kvStats
require.NoError(t, json.Unmarshal([]byte(prop), &stats))
require.Equal(t, tc.expected, stats)
}
}
func TestMakeFilename(t *testing.T) {
testCases := []struct {
dir string
fileNum int
expected string
}{
{
dir: "/tmp",
fileNum: 1,
expected: "/tmp/000001.sst",
},
{
dir: "/tmp",
fileNum: 123,
expected: "/tmp/000123.sst",
},
{
dir: "/tmp",
fileNum: 666666,
expected: "/tmp/666666.sst",
},
{
dir: "/tmp",
fileNum: 7777777,
expected: "/tmp/7777777.sst",
},
}
fs := vfs.NewMem()
for _, tc := range testCases {
require.Equal(t, tc.expected, makeFilename(fs, tc.dir, tc.fileNum))
}
}
func TestParseFilename(t *testing.T) {
testCases := []struct {
filename string
expOK bool
expFileNum int
}{
{
filename: "/tmp/1.sst",
expOK: true,
expFileNum: 1,
},
{
filename: "/tmp/123.sst",
expOK: true,
expFileNum: 123,
},
{
filename: "/tmp/000001.sst",
expOK: true,
expFileNum: 1,
},
{
filename: "/tmp/000123.sst",
expOK: true,
expFileNum: 123,
},
{
filename: "/tmp/666666.sst",
expOK: true,
expFileNum: 666666,
},
{
filename: "/tmp/7777777.sst",
expOK: true,
expFileNum: 7777777,
},
{
filename: "/tmp/123.sst.tmp",
expOK: false,
},
{
filename: diskSorterSortedFile,
expOK: false,
},
}
fs := vfs.NewMem()
for _, tc := range testCases {
fileNum, ok := parseFilename(fs, tc.filename)
require.Equal(t, tc.expOK, ok)
if tc.expOK {
require.Equal(t, tc.expFileNum, fileNum)
}
}
}
func TestSSTWriter(t *testing.T) {
fs := vfs.Default
dirname := t.TempDir()
fileNum := 13
var fileMeta *fileMetadata
sw, err := newSSTWriter(fs, dirname, fileNum, 8, func(meta *fileMetadata) {
fileMeta = meta
})
require.NoError(t, err)
require.NoError(t, sw.Set([]byte("aa"), []byte("11")))
require.NoError(t, sw.Set([]byte("bb"), []byte("22")))
require.NoError(t, sw.Set([]byte("cc"), []byte("33")))
require.NoError(t, sw.Set([]byte("dd"), []byte("44")))
require.NoError(t, sw.Set([]byte("ee"), []byte("55")))
require.NoError(t, sw.Close())
require.FileExists(t, makeFilename(fs, dirname, fileNum))
list, err := fs.List(dirname)
require.NoError(t, err)
require.Len(t, list, 1)
require.NotNil(t, fileMeta)
require.Equal(t, fileNum, fileMeta.fileNum)
require.Equal(t, []byte("aa"), fileMeta.startKey)
require.Equal(t, []byte("ee\x00"), fileMeta.endKey)
require.Equal(t, []byte("ee"), fileMeta.lastKey)
require.Equal(t, kvStats{Histogram: []kvStatsBucket{
{8, []byte("bb")},
{8, []byte("dd")},
{4, []byte("ee")},
}}, fileMeta.kvStats)
}
func TestSSTWriterEmpty(t *testing.T) {
fs := vfs.Default
dirname := t.TempDir()
fileNum := 13
var fileMeta *fileMetadata
sw, err := newSSTWriter(fs, dirname, fileNum, 8, func(meta *fileMetadata) {
fileMeta = meta
})
require.NoError(t, err)
require.NoError(t, sw.Close())
require.FileExists(t, makeFilename(fs, dirname, fileNum))
list, err := fs.List(dirname)
require.NoError(t, err)
require.Len(t, list, 1)
require.NotNil(t, fileMeta)
require.Equal(t, fileNum, fileMeta.fileNum)
require.Nil(t, fileMeta.startKey)
require.Equal(t, []byte{0}, fileMeta.endKey)
require.Nil(t, fileMeta.lastKey)
require.Equal(t, kvStats{}, fileMeta.kvStats)
}
func TestSSTWriterError(t *testing.T) {
fs := vfs.Default
dirname := t.TempDir()
fileNum := 13
sw, err := newSSTWriter(fs, dirname, fileNum, 0, func(meta *fileMetadata) {})
require.NoError(t, err)
require.NoError(t, sw.Set([]byte("bb"), []byte("11")))
require.Error(t, sw.Set([]byte("aa"), []byte("22")))
require.Error(t, sw.Close())
list, err := fs.List(dirname)
require.NoError(t, err)
require.Empty(t, list)
}
func TestSSTReaderPool(t *testing.T) {
fs := vfs.Default
dirname := t.TempDir()
writeSSTFile(t, fs, dirname, 1, nil)
pool := newSSTReaderPool(fs, dirname, pebble.NewCache(8<<20))
r1, err := pool.get(1)
require.NoError(t, err)
r2, err := pool.get(1)
require.NoError(t, err)
require.True(t, r1 == r2, "should be the same reader")
require.NoError(t, pool.unref(1))
// reader is still referenced, so it should be valid.
iter, err := r1.NewIter(nil, nil)
require.NoError(t, err)
require.NoError(t, iter.Close())
require.NoError(t, pool.unref(1))
// reader is no longer referenced, so it should be closed.
iter, err = r1.NewIter(nil, nil)
require.Error(t, err)
// reader has been removed from the pool, unref should panic.
require.Panics(t, func() {
_ = pool.unref(1)
})
}
func TestSSTReaderPoolParallel(t *testing.T) {
fs := vfs.Default
dirname := t.TempDir()
writeSSTFile(t, fs, dirname, 1, nil)
writeSSTFile(t, fs, dirname, 2, nil)
writeSSTFile(t, fs, dirname, 3, nil)
cache := pebble.NewCache(8 << 20)
defer cache.Unref()
pool := newSSTReaderPool(fs, dirname, cache)
var wg sync.WaitGroup
for i := 0; i <= 16; i++ {
wg.Add(1)
go func(fileNum int) {
defer wg.Done()
for j := 0; j < 10000; j++ {
_, err := pool.get(fileNum)
require.NoError(t, err)
require.NoError(t, pool.unref(fileNum))
}
}(i%3 + 1)
}
wg.Wait()
}
// writeSSTFile writes an SST file with the given key/value pairs.
func writeSSTFile(
t *testing.T,
fs vfs.FS,
dirname string,
fileNum int,
kvs []keyValue,
) *fileMetadata {
var file *fileMetadata
sw, err := newSSTWriter(fs, dirname, fileNum, 8, func(meta *fileMetadata) {
file = meta
})
require.NoError(t, err)
for _, kv := range kvs {
require.NoError(t, sw.Set(kv.key, kv.value))
}
require.NoError(t, sw.Close())
return file
}
func TestSSTIter(t *testing.T) {
fs := vfs.Default
dirname := t.TempDir()
writeSSTFile(t, fs, dirname, 1, []keyValue{
{[]byte("aa"), []byte("11")},
{[]byte("bb"), []byte("22")},
{[]byte("cc"), []byte("33")},
{[]byte("dd"), []byte("44")},
{[]byte("ee"), []byte("55")},
})
pool := newSSTReaderPool(fs, dirname, pebble.NewCache(8<<20))
reader, err := pool.get(1)
require.NoError(t, err)
rawIter, err := reader.NewIter(nil, nil)
require.NoError(t, err)
iter := &sstIter{
iter: rawIter,
onClose: func() error {
return pool.unref(1)
},
}
require.True(t, iter.Seek([]byte("bc")))
require.Equal(t, []byte("cc"), iter.UnsafeKey())
require.Equal(t, []byte("33"), iter.UnsafeValue())
require.True(t, iter.First())
require.Equal(t, []byte("aa"), iter.UnsafeKey())
require.Equal(t, []byte("11"), iter.UnsafeValue())
require.True(t, iter.Next())
require.Equal(t, []byte("bb"), iter.UnsafeKey())
require.Equal(t, []byte("22"), iter.UnsafeValue())
require.True(t, iter.Last())
require.Equal(t, []byte("ee"), iter.UnsafeKey())
require.Equal(t, []byte("55"), iter.UnsafeValue())
require.False(t, iter.Next())
require.False(t, iter.Valid())
require.NoError(t, iter.Close())
}
func TestMergingIter(t *testing.T) {
fs := vfs.Default
dirname := t.TempDir()
cache := pebble.NewCache(8 << 20)
defer cache.Unref()
readerPool := newSSTReaderPool(fs, dirname, cache)
openIter := func(file *fileMetadata) (Iterator, error) {
reader, err := readerPool.get(file.fileNum)
if err != nil {
return nil, errors.Trace(err)
}
iter, err := reader.NewIter(nil, nil)
if err != nil {
_ = reader.Close()
return nil, errors.Trace(err)
}
return &sstIter{
iter: iter,
onClose: func() error {
return readerPool.unref(file.fileNum)
},
}, nil
}
files := []*fileMetadata{
writeSSTFile(t, fs, dirname, 1, []keyValue{
{[]byte("a0"), []byte("va0")},
{[]byte("a1"), []byte("va1")},
{[]byte("e0"), []byte("ve0")},
{[]byte("e1"), []byte("ve1")},
}),
writeSSTFile(t, fs, dirname, 2, []keyValue{
{[]byte("b0"), []byte("vb0")},
{[]byte("b1"), []byte("vb1")},
{[]byte("d0"), []byte("vd0")},
{[]byte("d1"), []byte("vd1")},
}),
writeSSTFile(t, fs, dirname, 3, []keyValue{
{[]byte("c0"), []byte("vc0")},
{[]byte("c1"), []byte("vc1")},
{[]byte("g0"), []byte("vg0")},
{[]byte("g1"), []byte("vg1")},
}),
writeSSTFile(t, fs, dirname, 4, []keyValue{
{[]byte("f0"), []byte("vf0")},
{[]byte("f1"), []byte("vf1")},
{[]byte("h1"), []byte("vh1")},
}),
writeSSTFile(t, fs, dirname, 5, []keyValue{
{[]byte("f0"), []byte("vf0")}, // duplicate key with file 4
{[]byte("f2"), []byte("vf2")},
{[]byte("h0"), []byte("vh0")},
}),
writeSSTFile(t, fs, dirname, 6, []keyValue{
{[]byte("i0"), []byte("vi0")},
{[]byte("i1"), []byte("vi1")},
{[]byte("j0"), []byte("vj0")},
{[]byte("j1"), []byte("vj1")},
}),
}
slices.SortFunc(files, func(a, b *fileMetadata) int {
return bytes.Compare(a.startKey, b.startKey)
})
iter := newMergingIter(files, openIter)
var allKVs []keyValue
for iter.First(); iter.Valid(); iter.Next() {
allKVs = append(allKVs, keyValue{
key: slices.Clone(iter.UnsafeKey()),
value: slices.Clone(iter.UnsafeValue()),
})
}
require.NoError(t, iter.Error())
require.Equal(t, []keyValue{
{[]byte("a0"), []byte("va0")},
{[]byte("a1"), []byte("va1")},
{[]byte("b0"), []byte("vb0")},
{[]byte("b1"), []byte("vb1")},
{[]byte("c0"), []byte("vc0")},
{[]byte("c1"), []byte("vc1")},
{[]byte("d0"), []byte("vd0")},
{[]byte("d1"), []byte("vd1")},
{[]byte("e0"), []byte("ve0")},
{[]byte("e1"), []byte("ve1")},
{[]byte("f0"), []byte("vf0")},
{[]byte("f1"), []byte("vf1")},
{[]byte("f2"), []byte("vf2")},
{[]byte("g0"), []byte("vg0")},
{[]byte("g1"), []byte("vg1")},
{[]byte("h0"), []byte("vh0")},
{[]byte("h1"), []byte("vh1")},
{[]byte("i0"), []byte("vi0")},
{[]byte("i1"), []byte("vi1")},
{[]byte("j0"), []byte("vj0")},
{[]byte("j1"), []byte("vj1")},
}, allKVs)
// Seek to the first key.
require.True(t, iter.Seek(nil))
require.Equal(t, []byte("a0"), iter.UnsafeKey())
require.Equal(t, []byte("va0"), iter.UnsafeValue())
// Seek to the key after the last key.
require.False(t, iter.Seek([]byte("k")))
require.NoError(t, iter.Error())
// Randomly seek to keys and check the result.
seekIndexes := make([]int, len(allKVs))
for i := range seekIndexes {
seekIndexes[i] = i
}
rand.Shuffle(len(seekIndexes), func(i, j int) {
seekIndexes[i], seekIndexes[j] = seekIndexes[j], seekIndexes[i]
})
for _, i := range seekIndexes {
require.True(t, iter.Seek(allKVs[i].key))
require.Equal(t, allKVs[i].key, iter.UnsafeKey())
require.Equal(t, allKVs[i].value, iter.UnsafeValue())
}
// Test Last.
require.True(t, iter.Last())
require.Equal(t, []byte("j1"), iter.UnsafeKey())
require.Equal(t, []byte("vj1"), iter.UnsafeValue())
require.False(t, iter.Next())
require.NoError(t, iter.Error())
// Then moving to First should be ok.
require.True(t, iter.First())
require.Equal(t, []byte("a0"), iter.UnsafeKey())
require.Equal(t, []byte("va0"), iter.UnsafeValue())
require.NoError(t, iter.Close())
// Check that no iterator is leaked.
require.Zero(t, len(readerPool.mu.readers))
}
func TestPickCompactionFiles(t *testing.T) {
testCases := []struct {
allFiles []*fileMetadata
compactionThreshold int
expected []*fileMetadata
}{
{
// maxDepth: 1
// 1: a-b
// 2: b-c
// 3: c-d
allFiles: []*fileMetadata{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("b")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("c")},
{fileNum: 3, startKey: []byte("c"), endKey: []byte("d")},
},
compactionThreshold: 2,
expected: nil,
},
{
// maxDepth: 2
// 1: a-b
// 2: b--d
// 3: c--e
allFiles: []*fileMetadata{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("b")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("d")},
{fileNum: 3, startKey: []byte("c"), endKey: []byte("e")},
},
compactionThreshold: 2,
expected: []*fileMetadata{
{fileNum: 2, startKey: []byte("b"), endKey: []byte("d")},
{fileNum: 3, startKey: []byte("c"), endKey: []byte("e")},
},
},
{
// maxDepth: 3
// 1: a---c
// 2: b-------f
// 3: d------g
// 4: e--------i
// 5: h----j
allFiles: []*fileMetadata{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("g")},
{fileNum: 4, startKey: []byte("e"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
compactionThreshold: 2,
expected: []*fileMetadata{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("g")},
{fileNum: 4, startKey: []byte("e"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
},
{
// maxDepth: 3
// 1: a---c
// 2: b-------f
// 3: d------g
// 4: e--------i
// 5: h----j
allFiles: []*fileMetadata{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("g")},
{fileNum: 4, startKey: []byte("e"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
compactionThreshold: 3,
expected: []*fileMetadata{
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("g")},
{fileNum: 4, startKey: []byte("e"), endKey: []byte("i")},
},
},
{
// maxDepth: 3
// 1: a---c
// 2: b-------f
// 3: d------g
// 4: e--------i
// 5: h----j
allFiles: []*fileMetadata{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("g")},
{fileNum: 4, startKey: []byte("e"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
compactionThreshold: 4,
expected: nil,
},
}
for _, tc := range testCases {
actual := pickCompactionFiles(tc.allFiles, tc.compactionThreshold, zap.NewNop())
slices.SortFunc(actual, func(a, b *fileMetadata) int {
return cmp.Compare(a.fileNum, b.fileNum)
})
require.Equal(t, tc.expected, actual)
}
}
func TestSplitCompactionFiles(t *testing.T) {
testCases := []struct {
files []*fileMetadata
maxCompactionDepth int
expected [][]*fileMetadata
}{
{
// 1: a---c
// 2: b-------f
// 3: d------g
// 4: e--------i
// 5: h----j
files: []*fileMetadata{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("g")},
{fileNum: 4, startKey: []byte("e"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
maxCompactionDepth: 5,
expected: [][]*fileMetadata{
{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("g")},
{fileNum: 4, startKey: []byte("e"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
},
},
{
// 1: a---c
// 2: b-------f
// 3: d------g
// 4: e--------i
// 5: h----j
files: []*fileMetadata{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("g")},
{fileNum: 4, startKey: []byte("e"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
maxCompactionDepth: 4,
expected: [][]*fileMetadata{
{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("g")},
},
{
{fileNum: 4, startKey: []byte("e"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
},
},
{
// 1: a---c
// 2: b-------f
// 3: d-e
// 4: g---i
// 5: h----j
files: []*fileMetadata{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("e")},
{fileNum: 4, startKey: []byte("g"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
maxCompactionDepth: 3,
expected: [][]*fileMetadata{
{
{fileNum: 1, startKey: []byte("a"), endKey: []byte("c")},
{fileNum: 2, startKey: []byte("b"), endKey: []byte("f")},
{fileNum: 3, startKey: []byte("d"), endKey: []byte("e")},
},
{
{fileNum: 4, startKey: []byte("g"), endKey: []byte("i")},
{fileNum: 5, startKey: []byte("h"), endKey: []byte("j")},
},
},
},
}
for _, tc := range testCases {
actual := splitCompactionFiles(tc.files, tc.maxCompactionDepth)
require.Equal(t, tc.expected, actual)
}
}
func TestBuildCompactions(t *testing.T) {
testCases := []struct {
files []*fileMetadata
maxCompactionSize int
expected []*compaction
}{
{
files: []*fileMetadata{
{
fileNum: 1,
startKey: []byte("a"),
endKey: []byte("c"),
},
{
fileNum: 2,
startKey: []byte("b"),
endKey: []byte("d"),
},
},
maxCompactionSize: 20,
expected: []*compaction{
{
startKey: []byte("a"),
endKey: []byte("d"),
overlapFiles: []*fileMetadata{
{fileNum: 1}, {fileNum: 2},
},
},
},
},
{
files: []*fileMetadata{
{
fileNum: 1,
startKey: []byte("a"),
endKey: []byte("e\x00"),
kvStats: kvStats{Histogram: []kvStatsBucket{
{Size: 20, UpperBound: []byte("b")},
{Size: 23, UpperBound: []byte("c")},
{Size: 21, UpperBound: []byte("d")},
{Size: 25, UpperBound: []byte("e")},
}},
},
},
maxCompactionSize: 20,
expected: []*compaction{
{
startKey: []byte("a"),
endKey: []byte("b"),
overlapFiles: []*fileMetadata{
{fileNum: 1},
},
},
{
startKey: []byte("b"),
endKey: []byte("c"),
overlapFiles: []*fileMetadata{
{fileNum: 1},
},
},
{
startKey: []byte("c"),
endKey: []byte("d"),
overlapFiles: []*fileMetadata{
{fileNum: 1},
},
},
{
startKey: []byte("d"),
endKey: []byte("e\x00"),
overlapFiles: []*fileMetadata{
{fileNum: 1},
},
},
},
},
{
files: []*fileMetadata{
{
fileNum: 1,
startKey: []byte("a"),
endKey: []byte("e\x00"),
kvStats: kvStats{Histogram: []kvStatsBucket{
{Size: 20, UpperBound: []byte("b")},
{Size: 23, UpperBound: []byte("c")},
{Size: 17, UpperBound: []byte("d")},
{Size: 25, UpperBound: []byte("e")},
}},
},
},
maxCompactionSize: 50,
expected: []*compaction{
{
startKey: []byte("a"),
endKey: []byte("d"),
overlapFiles: []*fileMetadata{
{fileNum: 1},
},
},
{
startKey: []byte("d"),
endKey: []byte("e\x00"),
overlapFiles: []*fileMetadata{
{fileNum: 1},
},
},
},
},
{
files: []*fileMetadata{
{
fileNum: 1,
startKey: []byte("a"),
endKey: []byte("e\x00"),
kvStats: kvStats{Histogram: []kvStatsBucket{
{Size: 20, UpperBound: []byte("b")},
{Size: 23, UpperBound: []byte("c")},
{Size: 17, UpperBound: []byte("d")},
{Size: 25, UpperBound: []byte("e")},
}},
},
{
fileNum: 2,
startKey: []byte("c"),
endKey: []byte("g\x00"),
kvStats: kvStats{Histogram: []kvStatsBucket{
{Size: 21, UpperBound: []byte("d")},
{Size: 22, UpperBound: []byte("f")},
{Size: 20, UpperBound: []byte("g")},
}},
},
},
maxCompactionSize: 50,
expected: []*compaction{
{
startKey: []byte("a"),
endKey: []byte("d"),
overlapFiles: []*fileMetadata{
{fileNum: 1}, {fileNum: 2},
},
},
{
startKey: []byte("d"),
endKey: []byte("g\x00"),
overlapFiles: []*fileMetadata{
{fileNum: 1}, {fileNum: 2},
},
},
},
},
}
for _, tc := range testCases {
actual := buildCompactions(tc.files, tc.maxCompactionSize)
require.Equal(t, len(tc.expected), len(actual))
for i := range tc.expected {
require.Equal(t, len(tc.expected[i].overlapFiles), len(actual[i].overlapFiles))
for j := range tc.expected[i].overlapFiles {
require.Equal(t, tc.expected[i].overlapFiles[j].fileNum, actual[i].overlapFiles[j].fileNum)
}
}
}
}