Files
tidb/pkg/ingestor/engineapi/engine.go

127 lines
5.0 KiB
Go

// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package engineapi
import (
"context"
)
// Range contains a start key and an end key. The Range's key should not be
// encoded by duplicate detection.
type Range struct {
Start []byte
End []byte // end is always exclusive except import_sstpb.SSTMeta
}
// DataAndRanges is a pair of IngestData and list of Range. Each Range will
// become a regionJob, and the regionJob will read data from Data field.
type DataAndRanges struct {
Data IngestData
SortedRanges []Range
}
// Engine describes the common interface of local and external engine that
// local backend uses.
type Engine interface {
// ID is the identifier of an engine.
ID() string
// LoadIngestData sends DataAndRanges to outCh.
LoadIngestData(ctx context.Context, outCh chan<- DataAndRanges) error
// KVStatistics returns the total kv size and total kv count.
KVStatistics() (totalKVSize int64, totalKVCount int64)
// ImportedStatistics returns the imported kv size and imported kv count.
ImportedStatistics() (importedKVSize int64, importedKVCount int64)
// ConflictInfo returns the conflict information of the engine.
// TODO only external engine have this method, we should't make part of the
// Engine interface, but right now we have to consider backend pkg which need
// to consider the lightning tidb backend.
ConflictInfo() ConflictInfo
// GetKeyRange returns the key range [startKey, endKey) of the engine. If the
// duplicate detection is enabled, the keys in engine are encoded by duplicate
// detection but the returned keys should not be encoded.
GetKeyRange() (startKey []byte, endKey []byte, err error)
// GetRegionSplitKeys checks the KV distribution of the Engine and returns the
// keys that can be used as region split keys. If the duplicate detection is
// enabled, the keys stored in engine are encoded by duplicate detection but the
// returned keys should not be encoded.
//
// Currently, the start/end key of this import should also be included in the
// returned split keys.
GetRegionSplitKeys() ([][]byte, error)
Close() error
}
// ConflictInfo records the KV conflict information.
// to help describe how we do conflict resolution, we separate 'conflict KV' out
// from 'duplicate KV':
// - 'duplicate KV' means the KV pairs that have the same key, including keys
// come from PK/UK/non-UK.
// - 'conflict KV' means the KV pairs that have the same key, and they can cause
// conflict ROWS in the table, including keys come from PK/UK. non-UK keys may
// became duplicate when PK keys are duplicated, but we don't need to consider
// them when resolving conflicts.
type ConflictInfo struct {
// Count is the recorded count of conflict KV pairs, either PK or UK.
Count uint64 `json:"count,omitempty"`
// Files is the list of files that contain conflict KV pairs.
// it's in the same format as normal KV files.
Files []string `json:"files,omitempty"`
}
// Merge merges the other ConflictInfo into this one.
func (c *ConflictInfo) Merge(other *ConflictInfo) {
c.Count += other.Count
c.Files = append(c.Files, other.Files...)
}
// OnDuplicateKey is the action when a duplicate key is found during global sort.
// Note: lightning also have similar concept call OnDup, they have different semantic.
// we put it here to avoid import cycle.
type OnDuplicateKey int
const (
// OnDuplicateKeyIgnore means ignore the duplicate key.
// this is the current behavior, we will keep it before we fully switch to
// below 3 options.
OnDuplicateKeyIgnore OnDuplicateKey = iota
// OnDuplicateKeyRecord means record the duplicate keys to external store.
// depends on the step, we might only record when number of duplicates are larger
// than 2, since we only see the local sorted data, not the global sorted data,
// such as during encoding and merge sorting.
// we use this for PK and UK in import-into.
OnDuplicateKeyRecord
// OnDuplicateKeyRemove means remove the duplicate key silently.
// we use this action for non-unique secondary indexes in import-into.
OnDuplicateKeyRemove
// OnDuplicateKeyError return an error when a duplicate key is found.
// may use this for add unique index.
OnDuplicateKeyError
)
// String implements fmt.Stringer interface.
func (o OnDuplicateKey) String() string {
switch o {
case OnDuplicateKeyIgnore:
return "ignore"
case OnDuplicateKeyRecord:
return "record"
case OnDuplicateKeyRemove:
return "remove"
case OnDuplicateKeyError:
return "error"
}
return "unknown"
}