// Copyright 2023 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package engineapi import ( "context" ) // Range contains a start key and an end key. The Range's key should not be // encoded by duplicate detection. type Range struct { Start []byte End []byte // end is always exclusive except import_sstpb.SSTMeta } // DataAndRanges is a pair of IngestData and list of Range. Each Range will // become a regionJob, and the regionJob will read data from Data field. type DataAndRanges struct { Data IngestData SortedRanges []Range } // Engine describes the common interface of local and external engine that // local backend uses. type Engine interface { // ID is the identifier of an engine. ID() string // LoadIngestData sends DataAndRanges to outCh. LoadIngestData(ctx context.Context, outCh chan<- DataAndRanges) error // KVStatistics returns the total kv size and total kv count. KVStatistics() (totalKVSize int64, totalKVCount int64) // ImportedStatistics returns the imported kv size and imported kv count. ImportedStatistics() (importedKVSize int64, importedKVCount int64) // ConflictInfo returns the conflict information of the engine. // TODO only external engine have this method, we should't make part of the // Engine interface, but right now we have to consider backend pkg which need // to consider the lightning tidb backend. ConflictInfo() ConflictInfo // GetKeyRange returns the key range [startKey, endKey) of the engine. If the // duplicate detection is enabled, the keys in engine are encoded by duplicate // detection but the returned keys should not be encoded. GetKeyRange() (startKey []byte, endKey []byte, err error) // GetRegionSplitKeys checks the KV distribution of the Engine and returns the // keys that can be used as region split keys. If the duplicate detection is // enabled, the keys stored in engine are encoded by duplicate detection but the // returned keys should not be encoded. // // Currently, the start/end key of this import should also be included in the // returned split keys. GetRegionSplitKeys() ([][]byte, error) Close() error } // ConflictInfo records the KV conflict information. // to help describe how we do conflict resolution, we separate 'conflict KV' out // from 'duplicate KV': // - 'duplicate KV' means the KV pairs that have the same key, including keys // come from PK/UK/non-UK. // - 'conflict KV' means the KV pairs that have the same key, and they can cause // conflict ROWS in the table, including keys come from PK/UK. non-UK keys may // became duplicate when PK keys are duplicated, but we don't need to consider // them when resolving conflicts. type ConflictInfo struct { // Count is the recorded count of conflict KV pairs, either PK or UK. Count uint64 `json:"count,omitempty"` // Files is the list of files that contain conflict KV pairs. // it's in the same format as normal KV files. Files []string `json:"files,omitempty"` } // Merge merges the other ConflictInfo into this one. func (c *ConflictInfo) Merge(other *ConflictInfo) { c.Count += other.Count c.Files = append(c.Files, other.Files...) } // OnDuplicateKey is the action when a duplicate key is found during global sort. // Note: lightning also have similar concept call OnDup, they have different semantic. // we put it here to avoid import cycle. type OnDuplicateKey int const ( // OnDuplicateKeyIgnore means ignore the duplicate key. // this is the current behavior, we will keep it before we fully switch to // below 3 options. OnDuplicateKeyIgnore OnDuplicateKey = iota // OnDuplicateKeyRecord means record the duplicate keys to external store. // depends on the step, we might only record when number of duplicates are larger // than 2, since we only see the local sorted data, not the global sorted data, // such as during encoding and merge sorting. // we use this for PK and UK in import-into. OnDuplicateKeyRecord // OnDuplicateKeyRemove means remove the duplicate key silently. // we use this action for non-unique secondary indexes in import-into. OnDuplicateKeyRemove // OnDuplicateKeyError return an error when a duplicate key is found. // may use this for add unique index. OnDuplicateKeyError ) // String implements fmt.Stringer interface. func (o OnDuplicateKey) String() string { switch o { case OnDuplicateKeyIgnore: return "ignore" case OnDuplicateKeyRecord: return "record" case OnDuplicateKeyRemove: return "remove" case OnDuplicateKeyError: return "error" } return "unknown" }