188 lines
6.1 KiB
Go
188 lines
6.1 KiB
Go
// Copyright 2023 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package importinto
|
|
|
|
import (
|
|
"fmt"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"github.com/pingcap/tidb/pkg/domain/infosync"
|
|
"github.com/pingcap/tidb/pkg/executor/importer"
|
|
"github.com/pingcap/tidb/pkg/lightning/backend"
|
|
"github.com/pingcap/tidb/pkg/lightning/backend/external"
|
|
"github.com/pingcap/tidb/pkg/lightning/mydump"
|
|
"github.com/pingcap/tidb/pkg/lightning/verification"
|
|
"github.com/pingcap/tidb/pkg/meta/autoid"
|
|
)
|
|
|
|
// TaskMeta is the task of IMPORT INTO.
|
|
// All the field should be serializable.
|
|
type TaskMeta struct {
|
|
// IMPORT INTO job id, see mysql.tidb_import_jobs.
|
|
JobID int64
|
|
Plan importer.Plan
|
|
Stmt string
|
|
Result Result
|
|
// eligible instances to run this task, we run on all instances if it's empty.
|
|
// we only need this when run IMPORT INTO without distributed option now, i.e.
|
|
// running on the instance that initiate the IMPORT INTO.
|
|
EligibleInstances []*infosync.ServerInfo
|
|
// the file chunks to import, when import from server file, we need to pass those
|
|
// files to the framework scheduler which might run on another instance.
|
|
// we use a map from engine ID to chunks since we need support split_file for CSV,
|
|
// so need to split them into engines before passing to scheduler.
|
|
ChunkMap map[int32][]Chunk
|
|
}
|
|
|
|
// ImportStepMeta is the meta of import step.
|
|
// Scheduler will split the task into subtasks(FileInfos -> Chunks)
|
|
// All the field should be serializable.
|
|
type ImportStepMeta struct {
|
|
// this is the engine ID, not the id in tidb_background_subtask table.
|
|
ID int32
|
|
Chunks []Chunk
|
|
Checksum map[int64]Checksum // see KVGroupChecksum for definition of map key.
|
|
Result Result
|
|
// MaxIDs stores the max id that have been used during encoding for each allocator type.
|
|
// the max id is same among all allocator types for now, since we're using same base, see
|
|
// NewPanickingAllocators for more info.
|
|
MaxIDs map[autoid.AllocatorType]int64
|
|
|
|
SortedDataMeta *external.SortedKVMeta
|
|
// SortedIndexMetas is a map from index id to its sorted kv meta.
|
|
SortedIndexMetas map[int64]*external.SortedKVMeta
|
|
}
|
|
|
|
const (
|
|
// dataKVGroup is the group name of the sorted kv for data.
|
|
// index kv will be stored in a group named as index-id.
|
|
dataKVGroup = "data"
|
|
)
|
|
|
|
// MergeSortStepMeta is the meta of merge sort step.
|
|
type MergeSortStepMeta struct {
|
|
// KVGroup is the group name of the sorted kv, either dataKVGroup or index-id.
|
|
KVGroup string `json:"kv-group"`
|
|
DataFiles []string `json:"data-files"`
|
|
external.SortedKVMeta `json:"sorted-kv-meta"`
|
|
}
|
|
|
|
// WriteIngestStepMeta is the meta of write and ingest step.
|
|
// only used when global sort is enabled.
|
|
type WriteIngestStepMeta struct {
|
|
KVGroup string `json:"kv-group"`
|
|
external.SortedKVMeta `json:"sorted-kv-meta"`
|
|
DataFiles []string `json:"data-files"`
|
|
StatFiles []string `json:"stat-files"`
|
|
RangeJobKeys [][]byte `json:"range-job-keys"`
|
|
RangeSplitKeys [][]byte `json:"range-split-keys"`
|
|
TS uint64 `json:"ts"`
|
|
|
|
Result Result
|
|
}
|
|
|
|
// PostProcessStepMeta is the meta of post process step.
|
|
type PostProcessStepMeta struct {
|
|
// accumulated checksum of all subtasks in import step. See KVGroupChecksum for
|
|
// definition of map key.
|
|
Checksum map[int64]Checksum
|
|
// MaxIDs of max all max-ids of subtasks in import step.
|
|
MaxIDs map[autoid.AllocatorType]int64
|
|
}
|
|
|
|
// SharedVars is the shared variables of all minimal tasks in a subtask.
|
|
// This is because subtasks cannot directly obtain the results of the minimal subtask.
|
|
// All the fields should be concurrent safe.
|
|
type SharedVars struct {
|
|
TableImporter *importer.TableImporter
|
|
DataEngine *backend.OpenedEngine
|
|
IndexEngine *backend.OpenedEngine
|
|
Progress *importer.Progress
|
|
|
|
mu sync.Mutex
|
|
Checksum *verification.KVGroupChecksum
|
|
|
|
SortedDataMeta *external.SortedKVMeta
|
|
// SortedIndexMetas is a map from index id to its sorted kv meta.
|
|
SortedIndexMetas map[int64]*external.SortedKVMeta
|
|
ShareMu sync.Mutex
|
|
}
|
|
|
|
func (sv *SharedVars) mergeDataSummary(summary *external.WriterSummary) {
|
|
sv.mu.Lock()
|
|
defer sv.mu.Unlock()
|
|
sv.SortedDataMeta.MergeSummary(summary)
|
|
}
|
|
|
|
func (sv *SharedVars) mergeIndexSummary(indexID int64, summary *external.WriterSummary) {
|
|
sv.mu.Lock()
|
|
defer sv.mu.Unlock()
|
|
meta, ok := sv.SortedIndexMetas[indexID]
|
|
if !ok {
|
|
meta = external.NewSortedKVMeta(summary)
|
|
sv.SortedIndexMetas[indexID] = meta
|
|
return
|
|
}
|
|
meta.MergeSummary(summary)
|
|
}
|
|
|
|
// importStepMinimalTask is the minimal task of IMPORT INTO.
|
|
// TaskExecutor will split the subtask into minimal tasks(Chunks -> Chunk)
|
|
type importStepMinimalTask struct {
|
|
Plan importer.Plan
|
|
Chunk Chunk
|
|
SharedVars *SharedVars
|
|
panicked *atomic.Bool
|
|
}
|
|
|
|
// RecoverArgs implements workerpool.TaskMayPanic interface.
|
|
func (t *importStepMinimalTask) RecoverArgs() (metricsLabel string, funcInfo string, recoverFn func(), quit bool) {
|
|
return "encodeAndSortOperator", "RecoverArgs", func() {
|
|
t.panicked.Store(true)
|
|
}, false
|
|
}
|
|
|
|
func (t *importStepMinimalTask) String() string {
|
|
return fmt.Sprintf("chunk:%s:%d", t.Chunk.Path, t.Chunk.Offset)
|
|
}
|
|
|
|
// Chunk records the chunk information.
|
|
type Chunk struct {
|
|
Path string
|
|
FileSize int64
|
|
Offset int64
|
|
EndOffset int64
|
|
PrevRowIDMax int64
|
|
RowIDMax int64
|
|
Type mydump.SourceType
|
|
Compression mydump.Compression
|
|
Timestamp int64
|
|
}
|
|
|
|
// Checksum records the checksum information.
|
|
type Checksum struct {
|
|
Sum uint64
|
|
KVs uint64
|
|
Size uint64
|
|
}
|
|
|
|
// Result records the metrics information.
|
|
// This portion of the code may be implemented uniformly in the framework in the future.
|
|
type Result struct {
|
|
LoadedRowCnt uint64
|
|
ColSizeMap map[int64]int64
|
|
}
|