292 lines
7.4 KiB
Go
292 lines
7.4 KiB
Go
// Copyright 2023 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package extsort
|
|
|
|
import (
|
|
"context"
|
|
goerrors "errors"
|
|
"fmt"
|
|
"math"
|
|
"path/filepath"
|
|
"runtime"
|
|
"sync/atomic"
|
|
|
|
"github.com/cockroachdb/pebble"
|
|
"github.com/cockroachdb/pebble/sstable"
|
|
"github.com/pingcap/errors"
|
|
"golang.org/x/exp/slices"
|
|
)
|
|
|
|
const (
|
|
diskSorterStateWriting = 0
|
|
diskSorterStateSorting = 1
|
|
diskSorterStateSorted = 2
|
|
)
|
|
|
|
// DiskSorter is an external sorter that sorts data on disk.
|
|
type DiskSorter struct {
|
|
db *pebble.DB
|
|
dbOpts *pebble.Options
|
|
|
|
opts *DiskSorterOptions
|
|
dbDir string // directory for the pebble database
|
|
tmpDir string // directory for temporary files
|
|
idGen *atomic.Int64 // id generator for sst files
|
|
|
|
state atomic.Int32
|
|
}
|
|
|
|
// DiskSorterOptions holds the optional parameters for DiskSorter.
|
|
type DiskSorterOptions struct {
|
|
// Concurrency is the maximum number of goroutines that can be used to
|
|
// sort data in parallel.
|
|
//
|
|
// The default value is runtime.GOMAXPROCS(0).
|
|
Concurrency int
|
|
|
|
// WriterBufferSize is the size of the buffer used by the writer.
|
|
// Larger buffer size can improve the write and sort performance,
|
|
// and reduce the number of disk operations.
|
|
//
|
|
// The default value is 128MB.
|
|
WriterBufferSize int
|
|
}
|
|
|
|
func (o *DiskSorterOptions) ensureDefaults() {
|
|
if o.Concurrency == 0 {
|
|
o.Concurrency = runtime.GOMAXPROCS(0)
|
|
}
|
|
if o.WriterBufferSize == 0 {
|
|
o.WriterBufferSize = 128 << 20
|
|
}
|
|
}
|
|
|
|
// OpenDiskSorter opens a DiskSorter with the given directory.
|
|
func OpenDiskSorter(dirname string, opts *DiskSorterOptions) (*DiskSorter, error) {
|
|
if opts == nil {
|
|
opts = &DiskSorterOptions{}
|
|
}
|
|
opts.ensureDefaults()
|
|
|
|
dbOpts := &pebble.Options{
|
|
MaxConcurrentCompactions: opts.Concurrency,
|
|
DisableWAL: true,
|
|
L0CompactionThreshold: math.MaxInt,
|
|
L0StopWritesThreshold: math.MaxInt,
|
|
}
|
|
dbOpts = dbOpts.EnsureDefaults()
|
|
|
|
dbDir := filepath.Join(dirname, "db")
|
|
tmpDir := filepath.Join(dirname, "tmp")
|
|
|
|
// Clean up the temporary directory.
|
|
if err := dbOpts.FS.RemoveAll(tmpDir); err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if err := dbOpts.FS.MkdirAll(tmpDir, 0755); err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
|
|
db, err := pebble.Open(dbDir, dbOpts)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
return &DiskSorter{
|
|
db: db,
|
|
dbOpts: dbOpts,
|
|
opts: opts,
|
|
dbDir: dbDir,
|
|
tmpDir: tmpDir,
|
|
idGen: new(atomic.Int64),
|
|
}, nil
|
|
}
|
|
|
|
// NewWriter implements the ExternalSorter.NewWriter.
|
|
func (d *DiskSorter) NewWriter(_ context.Context) (Writer, error) {
|
|
if d.state.Load() > diskSorterStateWriting {
|
|
return nil, errors.Errorf("diskSorter started sorting, cannot write more data")
|
|
}
|
|
return &diskSorterWriter{
|
|
d: d,
|
|
buf: make([]byte, d.opts.WriterBufferSize),
|
|
}, nil
|
|
}
|
|
|
|
// Sort implements the ExternalSorter.Sort.
|
|
func (d *DiskSorter) Sort(ctx context.Context) error {
|
|
if d.state.Load() == diskSorterStateSorted {
|
|
return nil
|
|
}
|
|
d.state.Store(diskSorterStateSorting)
|
|
if err := d.doSort(ctx); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
// TODO: Persist the state to disk.
|
|
d.state.Store(diskSorterStateSorted)
|
|
return nil
|
|
}
|
|
|
|
func (d *DiskSorter) doSort(_ context.Context) error {
|
|
iter := d.db.NewIter(nil)
|
|
if !iter.Last() {
|
|
// No keys or any error occurred.
|
|
_ = iter.Close()
|
|
return errors.Trace(iter.Error())
|
|
}
|
|
end := slices.Clone(iter.Key())
|
|
if err := iter.Close(); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
// TODO: It seems compact doesn't run in parallel.
|
|
// We can use multiple compactions to speed up.
|
|
err := d.db.Compact(nil, end)
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
// IsSorted implements the ExternalSorter.IsSorted.
|
|
func (d *DiskSorter) IsSorted() bool {
|
|
return d.state.Load() == diskSorterStateSorted
|
|
}
|
|
|
|
// NewIterator implements the ExternalSorter.NewIterator.
|
|
func (d *DiskSorter) NewIterator(_ context.Context) (Iterator, error) {
|
|
if d.state.Load() != diskSorterStateSorted {
|
|
return nil, errors.Errorf("diskSorter is not sorted")
|
|
}
|
|
return &diskSorterIterator{iter: d.db.NewIter(nil)}, nil
|
|
}
|
|
|
|
// Close implements the ExternalSorter.Close.
|
|
func (d *DiskSorter) Close() error {
|
|
return errors.Trace(d.db.Close())
|
|
}
|
|
|
|
// CloseAndCleanup implements the ExternalSorter.CloseAndCleanup.
|
|
func (d *DiskSorter) CloseAndCleanup() error {
|
|
if err := d.Close(); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
fs := d.dbOpts.FS
|
|
err1 := fs.RemoveAll(d.dbDir)
|
|
err2 := fs.RemoveAll(d.tmpDir)
|
|
err := goerrors.Join(err1, err2)
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
type diskSorterIterator struct{ iter *pebble.Iterator }
|
|
|
|
func (i *diskSorterIterator) Seek(key []byte) bool { return i.iter.SeekGE(key) }
|
|
func (i *diskSorterIterator) First() bool { return i.iter.First() }
|
|
func (i *diskSorterIterator) Next() bool { return i.iter.Next() }
|
|
func (i *diskSorterIterator) Last() bool { return i.iter.Last() }
|
|
func (i *diskSorterIterator) Valid() bool { return i.iter.Valid() }
|
|
func (i *diskSorterIterator) Error() error { return i.iter.Error() }
|
|
func (i *diskSorterIterator) UnsafeKey() []byte { return i.iter.Key() }
|
|
func (i *diskSorterIterator) UnsafeValue() []byte { return i.iter.Value() }
|
|
func (i *diskSorterIterator) Close() error { return i.iter.Close() }
|
|
|
|
type keyValue struct {
|
|
key []byte
|
|
value []byte
|
|
}
|
|
|
|
type diskSorterWriter struct {
|
|
d *DiskSorter
|
|
kvs []keyValue
|
|
buf []byte
|
|
off int
|
|
}
|
|
|
|
func (w *diskSorterWriter) Put(key, value []byte) error {
|
|
if w.off+len(key)+len(value) > len(w.buf) {
|
|
if err := w.flush(); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
// The default buffer is too small, enlarge it to fit the key and value.
|
|
if w.off+len(key)+len(value) > len(w.buf) {
|
|
w.buf = make([]byte, w.off+len(key)+len(value))
|
|
}
|
|
}
|
|
|
|
var kv keyValue
|
|
kv.key = w.buf[w.off : w.off+len(key)]
|
|
w.off += len(key)
|
|
copy(kv.key, key)
|
|
if len(value) > 0 {
|
|
kv.value = w.buf[w.off : w.off+len(value)]
|
|
w.off += len(value)
|
|
copy(kv.value, value)
|
|
}
|
|
w.kvs = append(w.kvs, kv)
|
|
return nil
|
|
}
|
|
|
|
func (w *diskSorterWriter) Flush() error {
|
|
if len(w.kvs) == 0 {
|
|
return nil
|
|
}
|
|
return w.flush()
|
|
}
|
|
|
|
func (w *diskSorterWriter) flush() error {
|
|
db := w.d.db
|
|
fs := w.d.dbOpts.FS
|
|
comparer := w.d.dbOpts.Comparer
|
|
|
|
filename := fmt.Sprintf("%d.sst", w.d.idGen.Add(1))
|
|
sstPath := filepath.Join(w.d.tmpDir, filename)
|
|
|
|
defer func() {
|
|
if _, err := fs.Stat(sstPath); err == nil {
|
|
_ = fs.Remove(sstPath)
|
|
}
|
|
}()
|
|
|
|
f, err := fs.Create(sstPath)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
slices.SortFunc(w.kvs, func(a, b keyValue) bool {
|
|
return comparer.Compare(a.key, b.key) < 0
|
|
})
|
|
|
|
sstWriter := sstable.NewWriter(f, sstable.WriterOptions{
|
|
Comparer: comparer,
|
|
})
|
|
for _, kv := range w.kvs {
|
|
if err := sstWriter.Set(kv.key, kv.value); err != nil {
|
|
_ = sstWriter.Close()
|
|
return errors.Trace(err)
|
|
}
|
|
}
|
|
if err := sstWriter.Close(); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
if err := db.Ingest([]string{sstPath}); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
w.kvs = w.kvs[:0]
|
|
w.off = 0
|
|
return nil
|
|
}
|
|
|
|
func (w *diskSorterWriter) Close() error {
|
|
return w.Flush()
|
|
}
|