Files
tidb/pkg/disttask/importinto/proto.go

188 lines
6.1 KiB
Go

// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package importinto
import (
"fmt"
"sync"
"sync/atomic"
"github.com/pingcap/tidb/pkg/domain/infosync"
"github.com/pingcap/tidb/pkg/executor/importer"
"github.com/pingcap/tidb/pkg/lightning/backend"
"github.com/pingcap/tidb/pkg/lightning/backend/external"
"github.com/pingcap/tidb/pkg/lightning/mydump"
"github.com/pingcap/tidb/pkg/lightning/verification"
"github.com/pingcap/tidb/pkg/meta/autoid"
)
// TaskMeta is the task of IMPORT INTO.
// All the field should be serializable.
type TaskMeta struct {
// IMPORT INTO job id, see mysql.tidb_import_jobs.
JobID int64
Plan importer.Plan
Stmt string
Result Result
// eligible instances to run this task, we run on all instances if it's empty.
// we only need this when run IMPORT INTO without distributed option now, i.e.
// running on the instance that initiate the IMPORT INTO.
EligibleInstances []*infosync.ServerInfo
// the file chunks to import, when import from server file, we need to pass those
// files to the framework scheduler which might run on another instance.
// we use a map from engine ID to chunks since we need support split_file for CSV,
// so need to split them into engines before passing to scheduler.
ChunkMap map[int32][]Chunk
}
// ImportStepMeta is the meta of import step.
// Scheduler will split the task into subtasks(FileInfos -> Chunks)
// All the field should be serializable.
type ImportStepMeta struct {
// this is the engine ID, not the id in tidb_background_subtask table.
ID int32
Chunks []Chunk
Checksum map[int64]Checksum // see KVGroupChecksum for definition of map key.
Result Result
// MaxIDs stores the max id that have been used during encoding for each allocator type.
// the max id is same among all allocator types for now, since we're using same base, see
// NewPanickingAllocators for more info.
MaxIDs map[autoid.AllocatorType]int64
SortedDataMeta *external.SortedKVMeta
// SortedIndexMetas is a map from index id to its sorted kv meta.
SortedIndexMetas map[int64]*external.SortedKVMeta
}
const (
// dataKVGroup is the group name of the sorted kv for data.
// index kv will be stored in a group named as index-id.
dataKVGroup = "data"
)
// MergeSortStepMeta is the meta of merge sort step.
type MergeSortStepMeta struct {
// KVGroup is the group name of the sorted kv, either dataKVGroup or index-id.
KVGroup string `json:"kv-group"`
DataFiles []string `json:"data-files"`
external.SortedKVMeta `json:"sorted-kv-meta"`
}
// WriteIngestStepMeta is the meta of write and ingest step.
// only used when global sort is enabled.
type WriteIngestStepMeta struct {
KVGroup string `json:"kv-group"`
external.SortedKVMeta `json:"sorted-kv-meta"`
DataFiles []string `json:"data-files"`
StatFiles []string `json:"stat-files"`
RangeJobKeys [][]byte `json:"range-job-keys"`
RangeSplitKeys [][]byte `json:"range-split-keys"`
TS uint64 `json:"ts"`
Result Result
}
// PostProcessStepMeta is the meta of post process step.
type PostProcessStepMeta struct {
// accumulated checksum of all subtasks in import step. See KVGroupChecksum for
// definition of map key.
Checksum map[int64]Checksum
// MaxIDs of max all max-ids of subtasks in import step.
MaxIDs map[autoid.AllocatorType]int64
}
// SharedVars is the shared variables of all minimal tasks in a subtask.
// This is because subtasks cannot directly obtain the results of the minimal subtask.
// All the fields should be concurrent safe.
type SharedVars struct {
TableImporter *importer.TableImporter
DataEngine *backend.OpenedEngine
IndexEngine *backend.OpenedEngine
Progress *importer.Progress
mu sync.Mutex
Checksum *verification.KVGroupChecksum
SortedDataMeta *external.SortedKVMeta
// SortedIndexMetas is a map from index id to its sorted kv meta.
SortedIndexMetas map[int64]*external.SortedKVMeta
ShareMu sync.Mutex
}
func (sv *SharedVars) mergeDataSummary(summary *external.WriterSummary) {
sv.mu.Lock()
defer sv.mu.Unlock()
sv.SortedDataMeta.MergeSummary(summary)
}
func (sv *SharedVars) mergeIndexSummary(indexID int64, summary *external.WriterSummary) {
sv.mu.Lock()
defer sv.mu.Unlock()
meta, ok := sv.SortedIndexMetas[indexID]
if !ok {
meta = external.NewSortedKVMeta(summary)
sv.SortedIndexMetas[indexID] = meta
return
}
meta.MergeSummary(summary)
}
// importStepMinimalTask is the minimal task of IMPORT INTO.
// TaskExecutor will split the subtask into minimal tasks(Chunks -> Chunk)
type importStepMinimalTask struct {
Plan importer.Plan
Chunk Chunk
SharedVars *SharedVars
panicked *atomic.Bool
}
// RecoverArgs implements workerpool.TaskMayPanic interface.
func (t *importStepMinimalTask) RecoverArgs() (metricsLabel string, funcInfo string, recoverFn func(), quit bool) {
return "encodeAndSortOperator", "RecoverArgs", func() {
t.panicked.Store(true)
}, false
}
func (t *importStepMinimalTask) String() string {
return fmt.Sprintf("chunk:%s:%d", t.Chunk.Path, t.Chunk.Offset)
}
// Chunk records the chunk information.
type Chunk struct {
Path string
FileSize int64
Offset int64
EndOffset int64
PrevRowIDMax int64
RowIDMax int64
Type mydump.SourceType
Compression mydump.Compression
Timestamp int64
}
// Checksum records the checksum information.
type Checksum struct {
Sum uint64
KVs uint64
Size uint64
}
// Result records the metrics information.
// This portion of the code may be implemented uniformly in the framework in the future.
type Result struct {
LoadedRowCnt uint64
ColSizeMap map[int64]int64
}