365 lines
8.6 KiB
Go
365 lines
8.6 KiB
Go
// Copyright 2019 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package memdb
|
|
|
|
import (
|
|
"bytes"
|
|
"math"
|
|
"unsafe"
|
|
)
|
|
|
|
const (
|
|
maxHeight = 16
|
|
nodeHeaderSize = int(unsafe.Sizeof(nodeHeader{}))
|
|
)
|
|
|
|
// DB is an in-memory key/value database.
|
|
type DB struct {
|
|
height int
|
|
head nodeWithAddr
|
|
arena *arena
|
|
|
|
length int
|
|
size int
|
|
}
|
|
|
|
// New creates a new initialized in-memory key/value DB.
|
|
// The initBlockSize is the size of first block.
|
|
// This DB is append-only, deleting an entry would remove entry node but not
|
|
// reclaim KV buffer.
|
|
func New(initBlockSize int) *DB {
|
|
return &DB{
|
|
height: 1,
|
|
head: nodeWithAddr{node: new(node)},
|
|
arena: newArenaLocator(initBlockSize),
|
|
}
|
|
}
|
|
|
|
// Reset resets the DB to initial empty state.
|
|
// Release all blocks except the initial one.
|
|
func (db *DB) Reset() {
|
|
db.height = 1
|
|
db.head.node = new(node)
|
|
db.length = 0
|
|
db.size = 0
|
|
db.arena.reset()
|
|
}
|
|
|
|
// Get gets the value for the given key. It returns nil if the
|
|
// DB does not contain the key.
|
|
func (db *DB) Get(key []byte) []byte {
|
|
node, data, match := db.findGreaterEqual(key)
|
|
if !match {
|
|
return nil
|
|
}
|
|
return node.getValue(data)
|
|
}
|
|
|
|
// Put sets the value for the given key.
|
|
// It overwrites any previous value for that key.
|
|
func (db *DB) Put(key []byte, v []byte) bool {
|
|
arena := db.arena
|
|
lsHeight := db.height
|
|
var prev [maxHeight + 1]nodeWithAddr
|
|
var next [maxHeight + 1]nodeWithAddr
|
|
prev[lsHeight] = db.head
|
|
|
|
var exists bool
|
|
for i := lsHeight - 1; i >= 0; i-- {
|
|
// Use higher level to speed up for current level.
|
|
prev[i], next[i], exists = db.findSpliceForLevel(db.arena, key, prev[i+1], i)
|
|
}
|
|
|
|
var height int
|
|
if !exists {
|
|
height = db.randomHeight()
|
|
} else {
|
|
height = db.prepareOverwrite(next[:])
|
|
}
|
|
|
|
x, addr := db.newNode(arena, key, v, height)
|
|
if height > lsHeight {
|
|
db.height = height
|
|
}
|
|
|
|
// We always insert from the base level and up. After you add a node in base level, we cannot
|
|
// create a node in the level above because it would have discovered the node in the base level.
|
|
for i := 0; i < height; i++ {
|
|
x.nexts[i] = next[i].addr
|
|
if prev[i].node == nil {
|
|
prev[i] = db.head
|
|
}
|
|
prev[i].nexts[i] = addr
|
|
}
|
|
|
|
x.prev = prev[0].addr
|
|
if next[0].node != nil {
|
|
next[0].prev = addr
|
|
}
|
|
|
|
db.length++
|
|
db.size += len(key) + len(v)
|
|
return true
|
|
}
|
|
|
|
// The pointers in findSpliceForLevel may point to the node which going to be overwrite,
|
|
// prepareOverwrite update them to point to the next node, so we can link new node with the list correctly.
|
|
func (db *DB) prepareOverwrite(next []nodeWithAddr) int {
|
|
old := next[0]
|
|
|
|
// Update necessary states.
|
|
db.size -= int(old.valLen) + int(old.keyLen)
|
|
db.length--
|
|
|
|
height := int(old.height)
|
|
for i := 0; i < height; i++ {
|
|
if next[i].addr == old.addr {
|
|
next[i].addr = old.nexts[i]
|
|
if !next[i].addr.isNull() {
|
|
data := db.arena.getFrom(next[i].addr)
|
|
next[i].node = (*node)(unsafe.Pointer(&data[0]))
|
|
}
|
|
}
|
|
}
|
|
return height
|
|
}
|
|
|
|
// Delete deletes the value for the given key.
|
|
// It returns false if the DB does not contain the key.
|
|
func (db *DB) Delete(key []byte) bool {
|
|
listHeight := db.height
|
|
var prev [maxHeight + 1]nodeWithAddr
|
|
prev[listHeight] = db.head
|
|
|
|
var keyNode nodeWithAddr
|
|
var match bool
|
|
for i := listHeight - 1; i >= 0; i-- {
|
|
prev[i], keyNode, match = db.findSpliceForLevel(db.arena, key, prev[i+1], i)
|
|
}
|
|
if !match {
|
|
return false
|
|
}
|
|
|
|
for i := int(keyNode.height) - 1; i >= 0; i-- {
|
|
prev[i].nexts[i] = keyNode.nexts[i]
|
|
}
|
|
nextAddr := keyNode.nexts[0]
|
|
if !nextAddr.isNull() {
|
|
nextData := db.arena.getFrom(nextAddr)
|
|
next := (*node)(unsafe.Pointer(&nextData[0]))
|
|
next.prev = prev[0].addr
|
|
}
|
|
|
|
db.length--
|
|
db.size -= int(keyNode.keyLen) + int(keyNode.valLen)
|
|
return true
|
|
}
|
|
|
|
// Len returns the number of entries in the DB.
|
|
func (db *DB) Len() int {
|
|
return db.length
|
|
}
|
|
|
|
// Size returns sum of keys and values length. Note that deleted
|
|
// key/value will not be accounted for, but it will still consume
|
|
// the buffer, since the buffer is append only.
|
|
func (db *DB) Size() int {
|
|
return db.size
|
|
}
|
|
|
|
// findSpliceForLevel returns (outBefore, outAfter) with outBefore.key < key <= outAfter.key.
|
|
// The input "before" tells us where to start looking.
|
|
// If we found a node with the same key, then we return true.
|
|
func (db *DB) findSpliceForLevel(arena *arena, key []byte, before nodeWithAddr, level int) (nodeWithAddr, nodeWithAddr, bool) {
|
|
for {
|
|
// Assume before.key < key.
|
|
nextAddr := before.nexts[level]
|
|
if nextAddr.isNull() {
|
|
return before, nodeWithAddr{}, false
|
|
}
|
|
data := arena.getFrom(nextAddr)
|
|
next := nodeWithAddr{(*node)(unsafe.Pointer(&data[0])), nextAddr}
|
|
nextKey := next.getKey(data)
|
|
cmp := bytes.Compare(nextKey, key)
|
|
if cmp >= 0 {
|
|
// before.key < key < next.key. We are done for this level.
|
|
return before, next, cmp == 0
|
|
}
|
|
before = next // Keep moving right on this level.
|
|
}
|
|
}
|
|
|
|
func (db *DB) findGreaterEqual(key []byte) (*node, []byte, bool) {
|
|
prev := db.head.node
|
|
level := db.height - 1
|
|
|
|
for {
|
|
var nextData []byte
|
|
var next *node
|
|
addr := prev.nexts[level]
|
|
if !addr.isNull() {
|
|
arena := db.arena
|
|
nextData = arena.getFrom(addr)
|
|
next = (*node)(unsafe.Pointer(&nextData[0]))
|
|
|
|
nextKey := next.getKey(nextData)
|
|
cmp := bytes.Compare(nextKey, key)
|
|
if cmp < 0 {
|
|
// next key is still smaller, keep moving.
|
|
prev = next
|
|
continue
|
|
}
|
|
if cmp == 0 {
|
|
// prev.key < key == next.key.
|
|
return next, nextData, true
|
|
}
|
|
}
|
|
// next is greater than key or next is nil. go to the lower level.
|
|
if level > 0 {
|
|
level--
|
|
continue
|
|
}
|
|
return next, nextData, false
|
|
}
|
|
}
|
|
|
|
func (db *DB) findLess(key []byte, allowEqual bool) (*node, []byte, bool) {
|
|
var prevData []byte
|
|
prev := db.head.node
|
|
level := db.height - 1
|
|
|
|
for {
|
|
next, nextData := db.getNext(prev, level)
|
|
if next != nil {
|
|
cmp := bytes.Compare(key, next.getKey(nextData))
|
|
if cmp > 0 {
|
|
// prev.key < next.key < key. We can continue to move right.
|
|
prev = next
|
|
prevData = nextData
|
|
continue
|
|
}
|
|
if cmp == 0 && allowEqual {
|
|
// prev.key < key == next.key.
|
|
return next, nextData, true
|
|
}
|
|
}
|
|
// get closer to the key in the lower level.
|
|
if level > 0 {
|
|
level--
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
|
|
// We are not going to return head.
|
|
if prev == db.head.node {
|
|
return nil, nil, false
|
|
}
|
|
return prev, prevData, false
|
|
}
|
|
|
|
// findLast returns the last element. If head (empty db), we return nil. All the find functions
|
|
// will NEVER return the head nodes.
|
|
func (db *DB) findLast() (*node, []byte) {
|
|
var nodeData []byte
|
|
node := db.head.node
|
|
level := db.height - 1
|
|
|
|
for {
|
|
next, nextData := db.getNext(node, level)
|
|
if next != nil {
|
|
node = next
|
|
nodeData = nextData
|
|
continue
|
|
}
|
|
if level == 0 {
|
|
if node == db.head.node {
|
|
return nil, nil
|
|
}
|
|
return node, nodeData
|
|
}
|
|
level--
|
|
}
|
|
}
|
|
|
|
func (db *DB) newNode(arena *arena, key []byte, v []byte, height int) (*node, arenaAddr) {
|
|
// The base level is already allocated in the node struct.
|
|
nodeSize := nodeHeaderSize + height*8 + 8 + len(key) + len(v)
|
|
addr, data := arena.alloc(nodeSize)
|
|
node := (*node)(unsafe.Pointer(&data[0]))
|
|
node.keyLen = uint16(len(key))
|
|
node.height = uint16(height)
|
|
node.valLen = uint32(len(v))
|
|
copy(data[node.nodeLen():], key)
|
|
copy(data[node.nodeLen()+int(node.keyLen):], v)
|
|
return node, addr
|
|
}
|
|
|
|
// fastRand is a fast thread local random function.
|
|
//go:linkname fastRand runtime.fastrand
|
|
func fastRand() uint32
|
|
|
|
func (db *DB) randomHeight() int {
|
|
h := 1
|
|
for h < maxHeight && fastRand() < uint32(math.MaxUint32)/4 {
|
|
h++
|
|
}
|
|
return h
|
|
}
|
|
|
|
type nodeHeader struct {
|
|
height uint16
|
|
keyLen uint16
|
|
valLen uint32
|
|
}
|
|
|
|
type node struct {
|
|
nodeHeader
|
|
|
|
// Addr of previous node at base level.
|
|
prev arenaAddr
|
|
// Height of the nexts.
|
|
nexts [maxHeight]arenaAddr
|
|
}
|
|
|
|
type nodeWithAddr struct {
|
|
*node
|
|
addr arenaAddr
|
|
}
|
|
|
|
func (n *node) nodeLen() int {
|
|
return int(n.height)*8 + 8 + nodeHeaderSize
|
|
}
|
|
|
|
func (n *node) getKey(buf []byte) []byte {
|
|
nodeLen := n.nodeLen()
|
|
return buf[nodeLen : nodeLen+int(n.keyLen)]
|
|
}
|
|
|
|
func (n *node) getValue(buf []byte) []byte {
|
|
nodeLenKeyLen := n.nodeLen() + int(n.keyLen)
|
|
return buf[nodeLenKeyLen : nodeLenKeyLen+int(n.valLen)]
|
|
}
|
|
|
|
func (db *DB) getNext(n *node, level int) (*node, []byte) {
|
|
addr := n.nexts[level]
|
|
if addr.isNull() {
|
|
return nil, nil
|
|
}
|
|
arena := db.arena
|
|
data := arena.getFrom(addr)
|
|
node := (*node)(unsafe.Pointer(&data[0]))
|
|
return node, data
|
|
}
|