Files
tidb/pkg/kv/kv.go

845 lines
29 KiB
Go

// Copyright 2015 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kv
import (
"bytes"
"context"
"crypto/tls"
"slices"
"time"
"github.com/pingcap/errors"
deadlockpb "github.com/pingcap/kvproto/pkg/deadlock"
"github.com/pingcap/kvproto/pkg/kvrpcpb"
"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/meta/model"
"github.com/pingcap/tidb/pkg/parser"
"github.com/pingcap/tidb/pkg/resourcegroup"
"github.com/pingcap/tidb/pkg/util/memory"
"github.com/pingcap/tidb/pkg/util/resourcegrouptag"
"github.com/pingcap/tidb/pkg/util/tiflash"
"github.com/pingcap/tidb/pkg/util/trxevents"
"github.com/pingcap/tipb/go-tipb"
tikvstore "github.com/tikv/client-go/v2/kv"
"github.com/tikv/client-go/v2/oracle"
"github.com/tikv/client-go/v2/tikv"
"github.com/tikv/client-go/v2/tikvrpc"
"github.com/tikv/client-go/v2/util"
pd "github.com/tikv/pd/client"
pdhttp "github.com/tikv/pd/client/http"
"go.uber.org/atomic"
)
// UnCommitIndexKVFlag uses to indicate the index key/value is no need to commit.
// This is used in the situation of the index key/value was unchanged when do update.
// Usage:
// 1. For non-unique index: normally, the index value is '0'.
// Change the value to '1' indicate the index key/value is no need to commit.
// 2. For unique index: normally, the index value is the record handle ID, 8 bytes.
// Append UnCommitIndexKVFlag to the value indicate the index key/value is no need to commit.
const UnCommitIndexKVFlag byte = '1'
// Those limits is enforced to make sure the transaction can be well handled by TiKV.
var (
// TxnEntrySizeLimit is limit of single entry size (len(key) + len(value)).
TxnEntrySizeLimit = atomic.NewUint64(config.DefTxnEntrySizeLimit)
// TxnTotalSizeLimit is limit of the sum of all entry size.
TxnTotalSizeLimit = atomic.NewUint64(config.DefTxnTotalSizeLimit)
)
// Getter is the interface for the Get method.
type Getter interface {
// Get gets the value for key k from kv store.
// If corresponding kv pair does not exist, it returns nil and ErrNotExist.
Get(ctx context.Context, k Key) ([]byte, error)
}
// Retriever is the interface wraps the basic Get and Seek methods.
type Retriever interface {
Getter
// Iter creates an Iterator positioned on the first entry that k <= entry's key.
// If such entry is not found, it returns an invalid Iterator with no error.
// It yields only keys that < upperBound. If upperBound is nil, it means the upperBound is unbounded.
// The Iterator must be Closed after use.
Iter(k Key, upperBound Key) (Iterator, error)
// IterReverse creates a reversed Iterator positioned on the first entry which key is less than k.
// The returned iterator will iterate from greater key to smaller key.
// If k is nil, the returned iterator will be positioned at the last key.
// It yields only keys that >= lowerBound. If lowerBound is nil, it means the lowerBound is unbounded.
IterReverse(k, lowerBound Key) (Iterator, error)
}
// EmptyIterator is an iterator without any entry
type EmptyIterator struct{}
// Valid returns true if the current iterator is valid.
func (*EmptyIterator) Valid() bool { return false }
// Key returns the current key. Always return nil for this iterator
func (*EmptyIterator) Key() Key { return nil }
// Value returns the current value. Always return nil for this iterator
func (*EmptyIterator) Value() []byte { return nil }
// Next goes the next position. Always return error for this iterator
func (*EmptyIterator) Next() error { return errors.New("iterator is invalid") }
// Close closes the iterator.
func (*EmptyIterator) Close() {}
// EmptyRetriever is a retriever without any entry
type EmptyRetriever struct{}
// Get gets the value for key k from kv store. Always return nil for this retriever
func (*EmptyRetriever) Get(_ context.Context, _ Key) ([]byte, error) {
return nil, ErrNotExist
}
// Iter creates an Iterator. Always return EmptyIterator for this retriever
func (*EmptyRetriever) Iter(_ Key, _ Key) (Iterator, error) { return &EmptyIterator{}, nil }
// IterReverse creates a reversed Iterator. Always return EmptyIterator for this retriever
func (*EmptyRetriever) IterReverse(_ Key, _ Key) (Iterator, error) {
return &EmptyIterator{}, nil
}
// Mutator is the interface wraps the basic Set and Delete methods.
type Mutator interface {
// Set sets the value for key k as v into kv store.
// v must NOT be nil or empty, otherwise it returns ErrCannotSetNilValue.
Set(k Key, v []byte) error
// Delete removes the entry for key k from kv store.
Delete(k Key) error
}
// StagingHandle is the reference of a staging buffer.
type StagingHandle int
var (
// InvalidStagingHandle is an invalid handler, MemBuffer will check handler to ensure safety.
InvalidStagingHandle StagingHandle = 0
// LastActiveStagingHandle is an special handler which always point to the last active staging buffer.
LastActiveStagingHandle StagingHandle = -1
)
// RetrieverMutator is the interface that groups Retriever and Mutator interfaces.
type RetrieverMutator interface {
Retriever
Mutator
}
// MemBuffer is an in-memory kv collection, can be used to buffer write operations.
type MemBuffer interface {
RetrieverMutator
// RLock locks the MemBuffer for shared read.
// In the most case, MemBuffer will only used by single goroutine,
// but it will be read by multiple goroutine when combined with executor.UnionScanExec.
// To avoid race introduced by executor.UnionScanExec, MemBuffer expose read lock for it.
RLock()
// RUnlock unlocks the MemBuffer.
RUnlock()
// GetFlags returns the latest flags associated with key.
GetFlags(Key) (KeyFlags, error)
// SetWithFlags put key-value into the last active staging buffer with the given KeyFlags.
SetWithFlags(Key, []byte, ...FlagsOp) error
// UpdateFlags updates the flags associated with key.
UpdateFlags(Key, ...FlagsOp)
// DeleteWithFlags delete key with the given KeyFlags
DeleteWithFlags(Key, ...FlagsOp) error
// Staging create a new staging buffer inside the MemBuffer.
// Subsequent writes will be temporarily stored in this new staging buffer.
// When you think all modifications looks good, you can call `Release` to public all of them to the upper level buffer.
Staging() StagingHandle
// Release publish all modifications in the latest staging buffer to upper level.
Release(StagingHandle)
// Cleanup cleanup the resources referenced by the StagingHandle.
// If the changes are not published by `Release`, they will be discarded.
Cleanup(StagingHandle)
// InspectStage used to inspect the value updates in the given stage.
InspectStage(StagingHandle, func(Key, KeyFlags, []byte))
// SnapshotGetter returns a Getter for a snapshot of MemBuffer.
SnapshotGetter() Getter
// SnapshotIter returns a Iterator for a snapshot of MemBuffer.
SnapshotIter(k, upperbound Key) Iterator
// SnapshotIterReverse returns a reverse Iterator for a snapshot of MemBuffer.
SnapshotIterReverse(k, lowerBound Key) Iterator
// Len returns the number of entries in the DB.
Len() int
// Size returns sum of keys and values length.
Size() int
// RemoveFromBuffer removes the entry from the buffer. It's used for testing.
RemoveFromBuffer(Key)
// GetLocal checks if the key exists in the buffer in local memory.
GetLocal(context.Context, []byte) ([]byte, error)
// BatchGet gets values from the memory buffer.
BatchGet(ctx context.Context, keys [][]byte) (map[string][]byte, error)
}
// FindKeysInStage returns all keys in the given stage that satisfies the given condition.
func FindKeysInStage(m MemBuffer, h StagingHandle, predicate func(Key, KeyFlags, []byte) bool) []Key {
result := make([]Key, 0)
m.InspectStage(h, func(k Key, f KeyFlags, v []byte) {
if predicate(k, f, v) {
result = append(result, k)
}
})
return result
}
// LockCtx contains information for LockKeys method.
type LockCtx = tikvstore.LockCtx
// Transaction defines the interface for operations inside a Transaction.
// This is not thread safe.
type Transaction interface {
RetrieverMutator
AssertionProto
FairLockingController
// Size returns sum of keys and values length.
Size() int
// Mem returns the memory consumption of the transaction.
Mem() uint64
// SetMemoryFootprintChangeHook sets the hook that will be called when the memory footprint changes.
SetMemoryFootprintChangeHook(func(uint64))
// MemHookSet returns whether the memory footprint change hook is set.
MemHookSet() bool
// Len returns the number of entries in the DB.
Len() int
// Commit commits the transaction operations to KV store.
Commit(context.Context) error
// Rollback undoes the transaction operations to KV store.
Rollback() error
// String implements fmt.Stringer interface.
String() string
// LockKeys tries to lock the entries with the keys in KV store.
// Will block until all keys are locked successfully or an error occurs.
LockKeys(ctx context.Context, lockCtx *LockCtx, keys ...Key) error
// LockKeysFunc tries to lock the entries with the keys in KV store.
// Will block until all keys are locked successfully or an error occurs.
// fn is called before LockKeys unlocks the keys.
LockKeysFunc(ctx context.Context, lockCtx *LockCtx, fn func(), keys ...Key) error
// SetOption sets an option with a value, when val is nil, uses the default
// value of this option.
SetOption(opt int, val any)
// GetOption returns the option
GetOption(opt int) any
// IsReadOnly checks if the transaction has only performed read operations.
IsReadOnly() bool
// StartTS returns the transaction start timestamp.
StartTS() uint64
// Valid returns if the transaction is valid.
// A transaction become invalid after commit or rollback.
Valid() bool
// GetMemBuffer return the MemBuffer binding to this transaction.
GetMemBuffer() MemBuffer
// GetSnapshot returns the Snapshot binding to this transaction.
GetSnapshot() Snapshot
// SetVars sets variables to the transaction.
SetVars(vars any)
// GetVars gets variables from the transaction.
GetVars() any
// BatchGet gets kv from the memory buffer of statement and transaction, and the kv storage.
// Do not use len(value) == 0 or value == nil to represent non-exist.
// If a key doesn't exist, there shouldn't be any corresponding entry in the result map.
BatchGet(ctx context.Context, keys []Key) (map[string][]byte, error)
IsPessimistic() bool
// CacheTableInfo caches the index name.
// PresumeKeyNotExists will use this to help decode error message.
CacheTableInfo(id int64, info *model.TableInfo)
// GetTableInfo returns the cached index name.
// If there is no such index already inserted through CacheIndexName, it will return UNKNOWN.
GetTableInfo(id int64) *model.TableInfo
// SetDiskFullOpt set allowed options of current operation in each TiKV disk usage level.
SetDiskFullOpt(level kvrpcpb.DiskFullOpt)
// ClearDiskFullOpt clear allowed flag
ClearDiskFullOpt()
// GetMemDBCheckpoint gets the transaction's memDB checkpoint.
GetMemDBCheckpoint() *tikv.MemDBCheckpoint
// RollbackMemDBToCheckpoint rollbacks the transaction's memDB to the specified checkpoint.
RollbackMemDBToCheckpoint(*tikv.MemDBCheckpoint)
// UpdateMemBufferFlags updates the flags of a node in the mem buffer.
UpdateMemBufferFlags(key []byte, flags ...FlagsOp)
// IsPipelined returns whether the transaction is used for pipelined DML.
IsPipelined() bool
// MayFlush flush the pipelined memdb if the keys or size exceeds threshold, no effect for standard DML.
MayFlush() error
}
// AssertionProto is an interface defined for the assertion protocol.
type AssertionProto interface {
// SetAssertion sets an assertion for an operation on the key.
// TODO: Use a special type instead of `FlagsOp`. Otherwise there's risk that the assertion flag is incorrectly used
// in other places like `MemBuffer.SetWithFlags`.
SetAssertion(key []byte, assertion ...FlagsOp) error
}
// FairLockingController is the interface that defines fair locking related operations.
type FairLockingController interface {
StartFairLocking() error
RetryFairLocking(ctx context.Context) error
CancelFairLocking(ctx context.Context) error
DoneFairLocking(ctx context.Context) error
IsInFairLockingMode() bool
}
// Client is used to send request to KV layer.
type Client interface {
// Send sends request to KV layer, returns a Response.
Send(ctx context.Context, req *Request, vars any, option *ClientSendOption) Response
// IsRequestTypeSupported checks if reqType and subType is supported.
IsRequestTypeSupported(reqType, subType int64) bool
}
// ClientSendOption wraps options during Client Send
type ClientSendOption struct {
SessionMemTracker *memory.Tracker
EnabledRateLimitAction bool
EventCb trxevents.EventCallback
EnableCollectExecutionInfo bool
TiFlashReplicaRead tiflash.ReplicaRead
AppendWarning func(warn error)
}
// ReqTypes.
const (
ReqTypeSelect = 101
ReqTypeIndex = 102
ReqTypeDAG = 103
ReqTypeAnalyze = 104
ReqTypeChecksum = 105
ReqSubTypeBasic = 0
ReqSubTypeDesc = 10000
ReqSubTypeGroupBy = 10001
ReqSubTypeTopN = 10002
ReqSubTypeSignature = 10003
ReqSubTypeAnalyzeIdx = 10004
ReqSubTypeAnalyzeCol = 10005
)
// StoreType represents the type of a store.
type StoreType uint8
const (
// TiKV means the type of a store is TiKV.
TiKV StoreType = iota
// TiFlash means the type of a store is TiFlash.
TiFlash
// TiDB means the type of a store is TiDB.
TiDB
// UnSpecified means the store type is unknown
UnSpecified = 255
)
// Name returns the name of store type.
func (t StoreType) Name() string {
if t == TiFlash {
return "tiflash"
} else if t == TiDB {
return "tidb"
} else if t == TiKV {
return "tikv"
}
return "unspecified"
}
// KeyRanges wrap the ranges for partitioned table cases.
// We might send ranges from different in the one request.
type KeyRanges struct {
ranges [][]KeyRange
rowCountHints [][]int
isPartitioned bool
}
// NewPartitionedKeyRanges constructs a new RequestRange for partitioned table.
func NewPartitionedKeyRanges(ranges [][]KeyRange) *KeyRanges {
return NewPartitionedKeyRangesWithHints(ranges, nil)
}
// NewNonPartitionedKeyRanges constructs a new RequestRange for a non-partitioned table.
func NewNonPartitionedKeyRanges(ranges []KeyRange) *KeyRanges {
return NewNonParitionedKeyRangesWithHint(ranges, nil)
}
// NewPartitionedKeyRangesWithHints constructs a new RequestRange for partitioned table with row count hint.
func NewPartitionedKeyRangesWithHints(ranges [][]KeyRange, hints [][]int) *KeyRanges {
return &KeyRanges{
ranges: ranges,
rowCountHints: hints,
isPartitioned: true,
}
}
// NewNonParitionedKeyRangesWithHint constructs a new RequestRange for a non partitioned table with rou count hint.
func NewNonParitionedKeyRangesWithHint(ranges []KeyRange, hints []int) *KeyRanges {
rr := &KeyRanges{
ranges: [][]KeyRange{ranges},
isPartitioned: false,
}
if hints != nil {
rr.rowCountHints = [][]int{hints}
}
return rr
}
// FirstPartitionRange returns the the result of first range.
// We may use some func to generate ranges for both partitioned table and non partitioned table.
// This method provides a way to fallback to non-partitioned ranges.
func (rr *KeyRanges) FirstPartitionRange() []KeyRange {
if len(rr.ranges) == 0 {
return []KeyRange{}
}
return rr.ranges[0]
}
// SetToNonPartitioned set the status to non-partitioned.
func (rr *KeyRanges) SetToNonPartitioned() error {
if len(rr.ranges) > 1 {
return errors.Errorf("you want to change the partitioned ranges to non-partitioned ranges")
}
rr.isPartitioned = false
return nil
}
// AppendSelfTo appends itself to another slice.
func (rr *KeyRanges) AppendSelfTo(ranges []KeyRange) []KeyRange {
for _, r := range rr.ranges {
ranges = append(ranges, r...)
}
return ranges
}
// SortByFunc sorts each partition's ranges.
// Since the ranges are sorted in most cases, we check it first.
func (rr *KeyRanges) SortByFunc(sortFunc func(i, j KeyRange) int) {
if !slices.IsSortedFunc(rr.ranges, func(i, j []KeyRange) int {
// A simple short-circuit since the empty range actually won't make anything wrong.
if len(i) == 0 || len(j) == 0 {
return -1
}
return sortFunc(i[0], j[0])
}) {
slices.SortFunc(rr.ranges, func(i, j []KeyRange) int {
if len(i) == 0 {
return -1
}
if len(j) == 0 {
return 1
}
return sortFunc(i[0], j[0])
})
}
for i := range rr.ranges {
if !slices.IsSortedFunc(rr.ranges[i], sortFunc) {
slices.SortFunc(rr.ranges[i], sortFunc)
}
}
}
// ForEachPartitionWithErr runs the func for each partition with an error check.
func (rr *KeyRanges) ForEachPartitionWithErr(theFunc func([]KeyRange, []int) error) (err error) {
for i := range rr.ranges {
var hints []int
if len(rr.rowCountHints) > i {
hints = rr.rowCountHints[i]
}
err = theFunc(rr.ranges[i], hints)
if err != nil {
return err
}
}
return nil
}
// ForEachPartition runs the func for each partition without error check.
func (rr *KeyRanges) ForEachPartition(theFunc func([]KeyRange)) {
for i := range rr.ranges {
theFunc(rr.ranges[i])
}
}
// PartitionNum returns how many partition is involved in the ranges.
func (rr *KeyRanges) PartitionNum() int {
return len(rr.ranges)
}
// IsFullySorted checks whether the ranges are sorted inside partition and each partition is also sorated.
func (rr *KeyRanges) IsFullySorted() bool {
sortedByPartition := slices.IsSortedFunc(rr.ranges, func(i, j []KeyRange) int {
// A simple short-circuit since the empty range actually won't make anything wrong.
if len(i) == 0 || len(j) == 0 {
return -1
}
return bytes.Compare(i[0].StartKey, j[0].StartKey)
})
if !sortedByPartition {
return false
}
for _, ranges := range rr.ranges {
if !slices.IsSortedFunc(ranges, func(i, j KeyRange) int {
return bytes.Compare(i.StartKey, j.StartKey)
}) {
return false
}
}
return true
}
// TotalRangeNum returns how many ranges there are.
func (rr *KeyRanges) TotalRangeNum() int {
ret := 0
for _, r := range rr.ranges {
ret += len(r)
}
return ret
}
// Request represents a kv request.
type Request struct {
// Tp is the request type.
Tp int64
StartTs uint64
Data []byte
// KeyRanges makes sure that the request is sent first by partition then by region.
// When the table is small, it's possible that multiple partitions are in the same region.
KeyRanges *KeyRanges
// For PartitionTableScan used by tiflash.
PartitionIDAndRanges []PartitionIDAndRanges
// Concurrency is 1, if it only sends the request to a single storage unit when
// ResponseIterator.Next is called. If concurrency is greater than 1, the request will be
// sent to multiple storage units concurrently.
Concurrency int
// IsolationLevel is the isolation level, default is SI.
IsolationLevel IsoLevel
// Priority is the priority of this KV request, its value may be PriorityNormal/PriorityLow/PriorityHigh.
Priority int
// memTracker is used to trace and control memory usage in co-processor layer.
MemTracker *memory.Tracker
// KeepOrder is true, if the response should be returned in order.
KeepOrder bool
// Desc is true, if the request is sent in descending order.
Desc bool
// NotFillCache makes this request do not touch the LRU cache of the underlying storage.
NotFillCache bool
// ReplicaRead is used for reading data from replicas, only follower is supported at this time.
ReplicaRead ReplicaReadType
// StoreType represents this request is sent to the which type of store.
StoreType StoreType
// Cacheable is true if the request can be cached. Currently only deterministic DAG requests can be cached.
Cacheable bool
// SchemaVer is for any schema-ful storage to validate schema correctness if necessary.
SchemaVar int64
// BatchCop indicates whether send batch coprocessor request to tiflash.
BatchCop bool
// TaskID is an unique ID for an execution of a statement
TaskID uint64
// TiDBServerID is the specified TiDB serverID to execute request. `0` means all TiDB instances.
TiDBServerID uint64
// TxnScope is the scope of the txn
TxnScope string
// ReadReplicaScope is the scope of the read replica.
ReadReplicaScope string
// IsStaleness indicates whether the request read staleness data
IsStaleness bool
// ClosestReplicaReadAdjuster used to adjust a copr request.
ClosestReplicaReadAdjuster CoprRequestAdjuster
// MatchStoreLabels indicates the labels the store should be matched
MatchStoreLabels []*metapb.StoreLabel
// ResourceGroupTagger indicates the kv request task group tagger.
ResourceGroupTagger *ResourceGroupTagBuilder
// Paging indicates whether the request is a paging request.
Paging struct {
Enable bool
// MinPagingSize is used when Paging is true.
MinPagingSize uint64
// MaxPagingSize is used when Paging is true.
MaxPagingSize uint64
}
// RequestSource indicates whether the request is an internal request.
RequestSource util.RequestSource
// StoreBatchSize indicates the batch size of coprocessor in the same store.
StoreBatchSize int
// ResourceGroupName is the name of the bind resource group.
ResourceGroupName string
// LimitSize indicates whether the request is scan and limit
LimitSize uint64
// StoreBusyThreshold is the threshold for the store to return ServerIsBusy
StoreBusyThreshold time.Duration
// TiKVClientReadTimeout is the timeout of kv read request
TiKVClientReadTimeout uint64
RunawayChecker resourcegroup.RunawayChecker
// ConnID stores the session connection id.
ConnID uint64
// ConnAlias stores the session connection alias.
ConnAlias string
}
// CoprRequestAdjuster is used to check and adjust a copr request according to specific rules.
// return true if the request is changed.
type CoprRequestAdjuster func(*Request, int) bool
// PartitionIDAndRanges used by PartitionTableScan in tiflash.
type PartitionIDAndRanges struct {
ID int64
KeyRanges []KeyRange
}
const (
// GlobalReplicaScope indicates the default replica scope for tidb to request
GlobalReplicaScope = oracle.GlobalTxnScope
)
// ResultSubset represents a result subset from a single storage unit.
// TODO: Find a better interface for ResultSubset that can reuse bytes.
type ResultSubset interface {
// GetData gets the data.
GetData() []byte
// GetStartKey gets the start key.
GetStartKey() Key
// MemSize returns how many bytes of memory this result use for tracing memory usage.
MemSize() int64
// RespTime returns the response time for the request.
RespTime() time.Duration
}
// Response represents the response returned from KV layer.
type Response interface {
// Next returns a resultSubset from a single storage unit.
// When full result set is returned, nil is returned.
Next(ctx context.Context) (resultSubset ResultSubset, err error)
// Close response.
Close() error
}
// Snapshot defines the interface for the snapshot fetched from KV store.
type Snapshot interface {
Retriever
// BatchGet gets a batch of values from snapshot.
BatchGet(ctx context.Context, keys []Key) (map[string][]byte, error)
// SetOption sets an option with a value, when val is nil, uses the default
// value of this option. Only ReplicaRead is supported for snapshot
SetOption(opt int, val any)
}
// SnapshotInterceptor is used to intercept snapshot's read operation
type SnapshotInterceptor interface {
// OnGet intercepts Get operation for Snapshot
OnGet(ctx context.Context, snap Snapshot, k Key) ([]byte, error)
// OnBatchGet intercepts BatchGet operation for Snapshot
OnBatchGet(ctx context.Context, snap Snapshot, keys []Key) (map[string][]byte, error)
// OnIter intercepts Iter operation for Snapshot
OnIter(snap Snapshot, k Key, upperBound Key) (Iterator, error)
// OnIterReverse intercepts IterReverse operation for Snapshot
OnIterReverse(snap Snapshot, k Key, lowerBound Key) (Iterator, error)
}
// BatchGetter is the interface for BatchGet.
type BatchGetter interface {
// BatchGet gets a batch of values.
BatchGet(ctx context.Context, keys []Key) (map[string][]byte, error)
}
// Driver is the interface that must be implemented by a KV storage.
type Driver interface {
// Open returns a new Storage.
// The path is the string for storage specific format.
Open(path string) (Storage, error)
}
// Storage defines the interface for storage.
// Isolation should be at least SI(SNAPSHOT ISOLATION)
type Storage interface {
// Begin a global transaction
Begin(opts ...tikv.TxnOption) (Transaction, error)
// GetSnapshot gets a snapshot that is able to read any data which data is <= ver.
// if ver is MaxVersion or > current max committed version, we will use current version for this snapshot.
GetSnapshot(ver Version) Snapshot
// GetClient gets a client instance.
GetClient() Client
// GetMPPClient gets a mpp client instance.
GetMPPClient() MPPClient
// Close store
Close() error
// UUID return a unique ID which represents a Storage.
UUID() string
// CurrentVersion returns current max committed version with the given txnScope (local or global).
CurrentVersion(txnScope string) (Version, error)
// GetOracle gets a timestamp oracle client.
GetOracle() oracle.Oracle
// SupportDeleteRange gets the storage support delete range or not.
SupportDeleteRange() (supported bool)
// Name gets the name of the storage engine
Name() string
// Describe returns of brief introduction of the storage
Describe() string
// ShowStatus returns the specified status of the storage
ShowStatus(ctx context.Context, key string) (any, error)
// GetMemCache return memory manager of the storage.
GetMemCache() MemManager
// GetMinSafeTS return the minimal SafeTS of the storage with given txnScope.
GetMinSafeTS(txnScope string) uint64
// GetLockWaits return all lock wait information
GetLockWaits() ([]*deadlockpb.WaitForEntry, error)
// GetCodec gets the codec of the storage.
GetCodec() tikv.Codec
}
// EtcdBackend is used for judging a storage is a real TiKV.
type EtcdBackend interface {
EtcdAddrs() ([]string, error)
TLSConfig() *tls.Config
StartGCWorker() error
}
// StorageWithPD is used to get pd client.
type StorageWithPD interface {
GetPDClient() pd.Client
GetPDHTTPClient() pdhttp.Client
}
// FnKeyCmp is the function for iterator the keys
type FnKeyCmp func(key Key) bool
// Iterator is the interface for a iterator on KV store.
type Iterator interface {
Valid() bool
Key() Key
Value() []byte
Next() error
Close()
}
// SplittableStore is the kv store which supports split regions.
type SplittableStore interface {
SplitRegions(ctx context.Context, splitKey [][]byte, scatter bool, tableID *int64) (regionID []uint64, err error)
WaitScatterRegionFinish(ctx context.Context, regionID uint64, backOff int) error
CheckRegionInScattering(regionID uint64) (bool, error)
}
// Priority value for transaction priority.
const (
PriorityNormal = iota
PriorityLow
PriorityHigh
)
// IsoLevel is the transaction's isolation level.
type IsoLevel int
const (
// SI stands for 'snapshot isolation'.
SI IsoLevel = iota
// RC stands for 'read committed'.
RC
// RCCheckTS stands for 'read consistency read with ts check'.
RCCheckTS
)
// ResourceGroupTagBuilder is used to build the resource group tag for a kv request.
type ResourceGroupTagBuilder struct {
sqlDigest *parser.Digest
planDigest *parser.Digest
accessKey []byte
}
// NewResourceGroupTagBuilder creates a new ResourceGroupTagBuilder.
func NewResourceGroupTagBuilder() *ResourceGroupTagBuilder {
return &ResourceGroupTagBuilder{}
}
// SetSQLDigest sets the sql digest for the request.
func (b *ResourceGroupTagBuilder) SetSQLDigest(digest *parser.Digest) *ResourceGroupTagBuilder {
b.sqlDigest = digest
return b
}
// SetPlanDigest sets the plan digest for the request.
func (b *ResourceGroupTagBuilder) SetPlanDigest(digest *parser.Digest) *ResourceGroupTagBuilder {
b.planDigest = digest
return b
}
// BuildProtoTagger sets the access key for the request.
func (b *ResourceGroupTagBuilder) BuildProtoTagger() tikvrpc.ResourceGroupTagger {
return func(req *tikvrpc.Request) {
b.Build(req)
}
}
// EncodeTagWithKey encodes the resource group tag, returns the encoded bytes.
func (b *ResourceGroupTagBuilder) EncodeTagWithKey(key []byte) []byte {
tag := &tipb.ResourceGroupTag{}
if b.sqlDigest != nil {
tag.SqlDigest = b.sqlDigest.Bytes()
}
if b.planDigest != nil {
tag.PlanDigest = b.planDigest.Bytes()
}
if len(key) > 0 {
tag.TableId = decodeTableID(key)
label := resourcegrouptag.GetResourceGroupLabelByKey(key)
tag.Label = &label
}
tagEncoded, err := tag.Marshal()
if err != nil {
return nil
}
return tagEncoded
}
// Build builds the resource group tag for the request.
func (b *ResourceGroupTagBuilder) Build(req *tikvrpc.Request) {
if req == nil {
return
}
if encodedBytes := b.EncodeTagWithKey(resourcegrouptag.GetFirstKeyFromRequest(req)); len(encodedBytes) > 0 {
req.ResourceGroupTag = encodedBytes
}
}
// DecodeTableIDFunc is used to decode table id from key.
var DecodeTableIDFunc func(Key) int64
// avoid import cycle, not import tablecodec in kv package.
func decodeTableID(key Key) int64 {
if DecodeTableIDFunc != nil {
return DecodeTableIDFunc(key)
}
return 0
}