Files
tidb/kv/memdb/memdb.go

365 lines
8.6 KiB
Go

// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package memdb
import (
"bytes"
"math"
"unsafe"
)
const (
maxHeight = 16
nodeHeaderSize = int(unsafe.Sizeof(nodeHeader{}))
)
// DB is an in-memory key/value database.
type DB struct {
height int
head nodeWithAddr
arena *arena
length int
size int
}
// New creates a new initialized in-memory key/value DB.
// The initBlockSize is the size of first block.
// This DB is append-only, deleting an entry would remove entry node but not
// reclaim KV buffer.
func New(initBlockSize int) *DB {
return &DB{
height: 1,
head: nodeWithAddr{node: new(node)},
arena: newArenaLocator(initBlockSize),
}
}
// Reset resets the DB to initial empty state.
// Release all blocks except the initial one.
func (db *DB) Reset() {
db.height = 1
db.head.node = new(node)
db.length = 0
db.size = 0
db.arena.reset()
}
// Get gets the value for the given key. It returns nil if the
// DB does not contain the key.
func (db *DB) Get(key []byte) []byte {
node, data, match := db.findGreaterEqual(key)
if !match {
return nil
}
return node.getValue(data)
}
// Put sets the value for the given key.
// It overwrites any previous value for that key.
func (db *DB) Put(key []byte, v []byte) bool {
arena := db.arena
lsHeight := db.height
var prev [maxHeight + 1]nodeWithAddr
var next [maxHeight + 1]nodeWithAddr
prev[lsHeight] = db.head
var exists bool
for i := lsHeight - 1; i >= 0; i-- {
// Use higher level to speed up for current level.
prev[i], next[i], exists = db.findSpliceForLevel(db.arena, key, prev[i+1], i)
}
var height int
if !exists {
height = db.randomHeight()
} else {
height = db.prepareOverwrite(next[:])
}
x, addr := db.newNode(arena, key, v, height)
if height > lsHeight {
db.height = height
}
// We always insert from the base level and up. After you add a node in base level, we cannot
// create a node in the level above because it would have discovered the node in the base level.
for i := 0; i < height; i++ {
x.nexts[i] = next[i].addr
if prev[i].node == nil {
prev[i] = db.head
}
prev[i].nexts[i] = addr
}
x.prev = prev[0].addr
if next[0].node != nil {
next[0].prev = addr
}
db.length++
db.size += len(key) + len(v)
return true
}
// The pointers in findSpliceForLevel may point to the node which going to be overwrite,
// prepareOverwrite update them to point to the next node, so we can link new node with the list correctly.
func (db *DB) prepareOverwrite(next []nodeWithAddr) int {
old := next[0]
// Update necessary states.
db.size -= int(old.valLen) + int(old.keyLen)
db.length--
height := int(old.height)
for i := 0; i < height; i++ {
if next[i].addr == old.addr {
next[i].addr = old.nexts[i]
if !next[i].addr.isNull() {
data := db.arena.getFrom(next[i].addr)
next[i].node = (*node)(unsafe.Pointer(&data[0]))
}
}
}
return height
}
// Delete deletes the value for the given key.
// It returns false if the DB does not contain the key.
func (db *DB) Delete(key []byte) bool {
listHeight := db.height
var prev [maxHeight + 1]nodeWithAddr
prev[listHeight] = db.head
var keyNode nodeWithAddr
var match bool
for i := listHeight - 1; i >= 0; i-- {
prev[i], keyNode, match = db.findSpliceForLevel(db.arena, key, prev[i+1], i)
}
if !match {
return false
}
for i := int(keyNode.height) - 1; i >= 0; i-- {
prev[i].nexts[i] = keyNode.nexts[i]
}
nextAddr := keyNode.nexts[0]
if !nextAddr.isNull() {
nextData := db.arena.getFrom(nextAddr)
next := (*node)(unsafe.Pointer(&nextData[0]))
next.prev = prev[0].addr
}
db.length--
db.size -= int(keyNode.keyLen) + int(keyNode.valLen)
return true
}
// Len returns the number of entries in the DB.
func (db *DB) Len() int {
return db.length
}
// Size returns sum of keys and values length. Note that deleted
// key/value will not be accounted for, but it will still consume
// the buffer, since the buffer is append only.
func (db *DB) Size() int {
return db.size
}
// findSpliceForLevel returns (outBefore, outAfter) with outBefore.key < key <= outAfter.key.
// The input "before" tells us where to start looking.
// If we found a node with the same key, then we return true.
func (db *DB) findSpliceForLevel(arena *arena, key []byte, before nodeWithAddr, level int) (nodeWithAddr, nodeWithAddr, bool) {
for {
// Assume before.key < key.
nextAddr := before.nexts[level]
if nextAddr.isNull() {
return before, nodeWithAddr{}, false
}
data := arena.getFrom(nextAddr)
next := nodeWithAddr{(*node)(unsafe.Pointer(&data[0])), nextAddr}
nextKey := next.getKey(data)
cmp := bytes.Compare(nextKey, key)
if cmp >= 0 {
// before.key < key < next.key. We are done for this level.
return before, next, cmp == 0
}
before = next // Keep moving right on this level.
}
}
func (db *DB) findGreaterEqual(key []byte) (*node, []byte, bool) {
prev := db.head.node
level := db.height - 1
for {
var nextData []byte
var next *node
addr := prev.nexts[level]
if !addr.isNull() {
arena := db.arena
nextData = arena.getFrom(addr)
next = (*node)(unsafe.Pointer(&nextData[0]))
nextKey := next.getKey(nextData)
cmp := bytes.Compare(nextKey, key)
if cmp < 0 {
// next key is still smaller, keep moving.
prev = next
continue
}
if cmp == 0 {
// prev.key < key == next.key.
return next, nextData, true
}
}
// next is greater than key or next is nil. go to the lower level.
if level > 0 {
level--
continue
}
return next, nextData, false
}
}
func (db *DB) findLess(key []byte, allowEqual bool) (*node, []byte, bool) {
var prevData []byte
prev := db.head.node
level := db.height - 1
for {
next, nextData := db.getNext(prev, level)
if next != nil {
cmp := bytes.Compare(key, next.getKey(nextData))
if cmp > 0 {
// prev.key < next.key < key. We can continue to move right.
prev = next
prevData = nextData
continue
}
if cmp == 0 && allowEqual {
// prev.key < key == next.key.
return next, nextData, true
}
}
// get closer to the key in the lower level.
if level > 0 {
level--
continue
}
break
}
// We are not going to return head.
if prev == db.head.node {
return nil, nil, false
}
return prev, prevData, false
}
// findLast returns the last element. If head (empty db), we return nil. All the find functions
// will NEVER return the head nodes.
func (db *DB) findLast() (*node, []byte) {
var nodeData []byte
node := db.head.node
level := db.height - 1
for {
next, nextData := db.getNext(node, level)
if next != nil {
node = next
nodeData = nextData
continue
}
if level == 0 {
if node == db.head.node {
return nil, nil
}
return node, nodeData
}
level--
}
}
func (db *DB) newNode(arena *arena, key []byte, v []byte, height int) (*node, arenaAddr) {
// The base level is already allocated in the node struct.
nodeSize := nodeHeaderSize + height*8 + 8 + len(key) + len(v)
addr, data := arena.alloc(nodeSize)
node := (*node)(unsafe.Pointer(&data[0]))
node.keyLen = uint16(len(key))
node.height = uint16(height)
node.valLen = uint32(len(v))
copy(data[node.nodeLen():], key)
copy(data[node.nodeLen()+int(node.keyLen):], v)
return node, addr
}
// fastRand is a fast thread local random function.
//go:linkname fastRand runtime.fastrand
func fastRand() uint32
func (db *DB) randomHeight() int {
h := 1
for h < maxHeight && fastRand() < uint32(math.MaxUint32)/4 {
h++
}
return h
}
type nodeHeader struct {
height uint16
keyLen uint16
valLen uint32
}
type node struct {
nodeHeader
// Addr of previous node at base level.
prev arenaAddr
// Height of the nexts.
nexts [maxHeight]arenaAddr
}
type nodeWithAddr struct {
*node
addr arenaAddr
}
func (n *node) nodeLen() int {
return int(n.height)*8 + 8 + nodeHeaderSize
}
func (n *node) getKey(buf []byte) []byte {
nodeLen := n.nodeLen()
return buf[nodeLen : nodeLen+int(n.keyLen)]
}
func (n *node) getValue(buf []byte) []byte {
nodeLenKeyLen := n.nodeLen() + int(n.keyLen)
return buf[nodeLenKeyLen : nodeLenKeyLen+int(n.valLen)]
}
func (db *DB) getNext(n *node, level int) (*node, []byte) {
addr := n.nexts[level]
if addr.isNull() {
return nil, nil
}
arena := db.arena
data := arena.getFrom(addr)
node := (*node)(unsafe.Pointer(&data[0]))
return node, data
}