1522 lines
49 KiB
Go
1522 lines
49 KiB
Go
// Copyright 2023 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package importer
|
|
|
|
import (
|
|
"cmp"
|
|
"context"
|
|
"database/sql"
|
|
"fmt"
|
|
"net/url"
|
|
"path/filepath"
|
|
"reflect"
|
|
"slices"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/docker/go-units"
|
|
"github.com/pingcap/errors"
|
|
"github.com/pingcap/kvproto/pkg/metapb"
|
|
"github.com/pingcap/tidb/br/pkg/storage"
|
|
"github.com/pingcap/tidb/br/pkg/streamhelper"
|
|
"github.com/pingcap/tidb/lightning/pkg/precheck"
|
|
"github.com/pingcap/tidb/pkg/lightning/backend/encode"
|
|
"github.com/pingcap/tidb/pkg/lightning/backend/kv"
|
|
"github.com/pingcap/tidb/pkg/lightning/checkpoints"
|
|
"github.com/pingcap/tidb/pkg/lightning/common"
|
|
"github.com/pingcap/tidb/pkg/lightning/config"
|
|
"github.com/pingcap/tidb/pkg/lightning/log"
|
|
"github.com/pingcap/tidb/pkg/lightning/mydump"
|
|
"github.com/pingcap/tidb/pkg/meta/model"
|
|
"github.com/pingcap/tidb/pkg/parser/mysql"
|
|
"github.com/pingcap/tidb/pkg/table"
|
|
"github.com/pingcap/tidb/pkg/types"
|
|
"github.com/pingcap/tidb/pkg/util"
|
|
"github.com/pingcap/tidb/pkg/util/cdcutil"
|
|
"github.com/pingcap/tidb/pkg/util/engine"
|
|
"github.com/pingcap/tidb/pkg/util/logutil"
|
|
"github.com/pingcap/tidb/pkg/util/set"
|
|
pdhttp "github.com/tikv/pd/client/http"
|
|
clientv3 "go.etcd.io/etcd/client/v3"
|
|
"go.uber.org/zap"
|
|
"golang.org/x/sync/errgroup"
|
|
"google.golang.org/grpc"
|
|
)
|
|
|
|
type clusterResourceCheckItem struct {
|
|
preInfoGetter PreImportInfoGetter
|
|
}
|
|
|
|
// NewClusterResourceCheckItem creates a new clusterResourceCheckItem.
|
|
func NewClusterResourceCheckItem(preInfoGetter PreImportInfoGetter) precheck.Checker {
|
|
return &clusterResourceCheckItem{
|
|
preInfoGetter: preInfoGetter,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker.GetCheckItemID.
|
|
func (*clusterResourceCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckTargetClusterSize
|
|
}
|
|
|
|
func (ci *clusterResourceCheckItem) getClusterAvail(ctx context.Context) (tikvAvail uint64, tiflashAvail uint64, err error) {
|
|
storeInfo, err := ci.preInfoGetter.GetStorageInfo(ctx)
|
|
if err != nil {
|
|
return 0, 0, errors.Trace(err)
|
|
}
|
|
|
|
for _, store := range storeInfo.Stores {
|
|
avail, err := units.RAMInBytes(store.Status.Available)
|
|
if err != nil {
|
|
return 0, 0, errors.Trace(err)
|
|
}
|
|
if engine.IsTiFlashHTTPResp(&store.Store) {
|
|
tiflashAvail += uint64(avail)
|
|
} else {
|
|
tikvAvail += uint64(avail)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// Check implements Checker.Check.
|
|
func (ci *clusterResourceCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Warn,
|
|
Passed: true,
|
|
Message: "",
|
|
}
|
|
|
|
var (
|
|
err error
|
|
tikvAvail uint64
|
|
tiflashAvail uint64
|
|
tikvSourceSize uint64
|
|
tiflashSourceSize uint64
|
|
taskMgr taskMetaMgr
|
|
)
|
|
taskMgrVal := ctx.Value(taskManagerKey)
|
|
if taskMgrVal != nil {
|
|
if mgr, ok := taskMgrVal.(taskMetaMgr); ok {
|
|
taskMgr = mgr
|
|
}
|
|
}
|
|
if taskMgr == nil {
|
|
var err error
|
|
estimatedDataSizeResult, err := ci.preInfoGetter.EstimateSourceDataSize(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
tikvSourceSize = uint64(estimatedDataSizeResult.SizeWithIndex)
|
|
tiflashSourceSize = uint64(estimatedDataSizeResult.TiFlashSize)
|
|
tikvAvail, tiflashAvail, err = ci.getClusterAvail(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
} else {
|
|
if err := taskMgr.CheckTasksExclusively(ctx, func(tasks []taskMeta) ([]taskMeta, error) {
|
|
tikvAvail = 0
|
|
tiflashAvail = 0
|
|
tikvSourceSize = 0
|
|
tiflashSourceSize = 0
|
|
restoreStarted := false
|
|
for _, task := range tasks {
|
|
if task.status > taskMetaStatusInitial {
|
|
restoreStarted = true
|
|
}
|
|
tikvSourceSize += task.tikvSourceBytes
|
|
tiflashSourceSize += task.tiflashSourceBytes
|
|
if task.tikvAvail > 0 {
|
|
tikvAvail = task.tikvAvail
|
|
}
|
|
if task.tiflashAvail > 0 {
|
|
tiflashAvail = task.tiflashAvail
|
|
}
|
|
}
|
|
if restoreStarted || tikvAvail > 0 || tiflashAvail > 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
tikvAvail, tiflashAvail, err = ci.getClusterAvail(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
newTasks := slices.Clone(tasks)
|
|
for i := range newTasks {
|
|
newTasks[i].tikvAvail = tikvAvail
|
|
newTasks[i].tiflashAvail = tiflashAvail
|
|
}
|
|
return newTasks, nil
|
|
}); err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
}
|
|
|
|
replicaCount, err := ci.preInfoGetter.GetMaxReplica(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
tikvSourceSize = tikvSourceSize * replicaCount
|
|
|
|
if tikvSourceSize <= tikvAvail && tiflashSourceSize <= tiflashAvail {
|
|
theResult.Message = fmt.Sprintf("The storage space is rich, which TiKV/Tiflash is %s/%s. The estimated storage space is %s/%s.",
|
|
units.BytesSize(float64(tikvAvail)), units.BytesSize(float64(tiflashAvail)), units.BytesSize(float64(tikvSourceSize)), units.BytesSize(float64(tiflashSourceSize)))
|
|
}
|
|
|
|
if tikvSourceSize > tikvAvail {
|
|
theResult.Passed = false
|
|
theResult.Message += fmt.Sprintf("TiKV requires more storage space. Estimated required size: %s. Actual size: %s.",
|
|
units.BytesSize(float64(tikvSourceSize)), units.BytesSize(float64(tikvAvail)))
|
|
}
|
|
if tiflashAvail > 0 && tiflashSourceSize > tiflashAvail {
|
|
theResult.Passed = false
|
|
theResult.Message += fmt.Sprintf(" TiFlash requires more storage space. Estimated required size: %s. Actual size: %s.",
|
|
units.BytesSize(float64(tiflashSourceSize)), units.BytesSize(float64(tiflashAvail)))
|
|
}
|
|
if !theResult.Passed {
|
|
theResult.Message += " Please increase storage to prevent import task failures."
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
type clusterVersionCheckItem struct {
|
|
preInfoGetter PreImportInfoGetter
|
|
dbMetas []*mydump.MDDatabaseMeta
|
|
}
|
|
|
|
// NewClusterVersionCheckItem creates a new clusterVersionCheckItem.
|
|
func NewClusterVersionCheckItem(preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
|
|
return &clusterVersionCheckItem{
|
|
preInfoGetter: preInfoGetter,
|
|
dbMetas: dbMetas,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker.GetCheckItemID.
|
|
func (*clusterVersionCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckTargetClusterVersion
|
|
}
|
|
|
|
// Check implements Checker.Check.
|
|
func (ci *clusterVersionCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Critical,
|
|
Passed: true,
|
|
Message: "Cluster version check passed",
|
|
}
|
|
checkCtx := WithPreInfoGetterDBMetas(ctx, ci.dbMetas)
|
|
if err := ci.preInfoGetter.CheckVersionRequirements(checkCtx); err != nil {
|
|
err := common.NormalizeError(err)
|
|
theResult.Passed = false
|
|
theResult.Message = fmt.Sprintf("Cluster version check failed: %s", err.Error())
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
type emptyRegionCheckItem struct {
|
|
preInfoGetter PreImportInfoGetter
|
|
dbMetas []*mydump.MDDatabaseMeta
|
|
}
|
|
|
|
// NewEmptyRegionCheckItem creates a new emptyRegionCheckItem.
|
|
func NewEmptyRegionCheckItem(preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
|
|
return &emptyRegionCheckItem{
|
|
preInfoGetter: preInfoGetter,
|
|
dbMetas: dbMetas,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker.GetCheckItemID.
|
|
func (*emptyRegionCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckTargetClusterEmptyRegion
|
|
}
|
|
|
|
// Check implements Checker.Check.
|
|
func (ci *emptyRegionCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Warn,
|
|
Passed: true,
|
|
Message: "Cluster doesn't have too many empty regions",
|
|
}
|
|
dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
storeInfo, err := ci.preInfoGetter.GetStorageInfo(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if len(storeInfo.Stores) <= 1 {
|
|
return theResult, nil
|
|
}
|
|
emptyRegionsInfo, err := ci.preInfoGetter.GetEmptyRegionsInfo(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
regions := make(map[int64]int)
|
|
stores := make(map[int64]*pdhttp.StoreInfo)
|
|
for _, region := range emptyRegionsInfo.Regions {
|
|
for _, peer := range region.Peers {
|
|
regions[peer.StoreID]++
|
|
}
|
|
}
|
|
for _, store := range storeInfo.Stores {
|
|
stores[store.Store.ID] = &store
|
|
}
|
|
tableCount := 0
|
|
for _, db := range ci.dbMetas {
|
|
info, ok := dbInfos[db.Name]
|
|
if !ok {
|
|
continue
|
|
}
|
|
tableCount += len(info.Tables)
|
|
}
|
|
errorThrehold := max(errorEmptyRegionCntPerStore, tableCount*3)
|
|
warnThrehold := max(warnEmptyRegionCntPerStore, tableCount)
|
|
var (
|
|
errStores []string
|
|
warnStores []string
|
|
)
|
|
for storeID, regionCnt := range regions {
|
|
if store, ok := stores[storeID]; ok {
|
|
if metapb.StoreState(metapb.StoreState_value[store.Store.StateName]) != metapb.StoreState_Up {
|
|
continue
|
|
}
|
|
if engine.IsTiFlashHTTPResp(&store.Store) {
|
|
continue
|
|
}
|
|
if regionCnt > errorThrehold {
|
|
errStores = append(errStores, strconv.Itoa(int(storeID)))
|
|
} else if regionCnt > warnThrehold {
|
|
warnStores = append(warnStores, strconv.Itoa(int(storeID)))
|
|
}
|
|
}
|
|
}
|
|
|
|
var messages []string
|
|
if len(errStores) > 0 {
|
|
theResult.Passed = false
|
|
messages = append(messages, fmt.Sprintf("TiKV stores (%s) contains more than %v empty regions respectively, "+
|
|
"which will greatly affect the import speed and success rate", strings.Join(errStores, ", "), errorEmptyRegionCntPerStore))
|
|
}
|
|
if len(warnStores) > 0 {
|
|
messages = append(messages, fmt.Sprintf("TiKV stores (%s) contains more than %v empty regions respectively, "+
|
|
"which will affect the import speed and success rate", strings.Join(warnStores, ", "), warnEmptyRegionCntPerStore))
|
|
}
|
|
if len(messages) > 0 {
|
|
theResult.Message = strings.Join(messages, "\n")
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
type regionDistributionCheckItem struct {
|
|
preInfoGetter PreImportInfoGetter
|
|
dbMetas []*mydump.MDDatabaseMeta
|
|
}
|
|
|
|
// NewRegionDistributionCheckItem creates a new regionDistributionCheckItem.
|
|
func NewRegionDistributionCheckItem(preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
|
|
return ®ionDistributionCheckItem{
|
|
preInfoGetter: preInfoGetter,
|
|
dbMetas: dbMetas,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker.GetCheckItemID.
|
|
func (*regionDistributionCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckTargetClusterRegionDist
|
|
}
|
|
|
|
// Check implements Checker.Check.
|
|
func (ci *regionDistributionCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Warn,
|
|
Passed: true,
|
|
Message: "Cluster region distribution is balanced",
|
|
}
|
|
|
|
storesInfo, err := ci.preInfoGetter.GetStorageInfo(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
stores := make([]*pdhttp.StoreInfo, 0, len(storesInfo.Stores))
|
|
for _, store := range storesInfo.Stores {
|
|
if metapb.StoreState(metapb.StoreState_value[store.Store.StateName]) != metapb.StoreState_Up {
|
|
continue
|
|
}
|
|
if engine.IsTiFlashHTTPResp(&store.Store) {
|
|
continue
|
|
}
|
|
stores = append(stores, &store)
|
|
}
|
|
if len(stores) <= 1 {
|
|
return theResult, nil
|
|
}
|
|
slices.SortFunc(stores, func(i, j *pdhttp.StoreInfo) int {
|
|
return cmp.Compare(i.Status.RegionCount, j.Status.RegionCount)
|
|
})
|
|
minStore := stores[0]
|
|
maxStore := stores[len(stores)-1]
|
|
|
|
dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
tableCount := 0
|
|
for _, db := range ci.dbMetas {
|
|
info, ok := dbInfos[db.Name]
|
|
if !ok {
|
|
continue
|
|
}
|
|
tableCount += len(info.Tables)
|
|
}
|
|
threhold := max(checkRegionCntRatioThreshold, tableCount)
|
|
if maxStore.Status.RegionCount <= int64(threhold) {
|
|
return theResult, nil
|
|
}
|
|
ratio := float64(minStore.Status.RegionCount) / float64(maxStore.Status.RegionCount)
|
|
if ratio < errorRegionCntMinMaxRatio {
|
|
theResult.Passed = false
|
|
theResult.Message = fmt.Sprintf("Region distribution is unbalanced, the ratio of the regions count of the store(%v) "+
|
|
"with least regions(%v) to the store(%v) with most regions(%v) is %v, but we expect it must not be less than %v",
|
|
minStore.Store.ID, minStore.Status.RegionCount, maxStore.Store.ID, maxStore.Status.RegionCount, ratio, errorRegionCntMinMaxRatio)
|
|
} else if ratio < warnRegionCntMinMaxRatio {
|
|
theResult.Message = fmt.Sprintf("Region distribution is unbalanced, the ratio of the regions count of the store(%v) "+
|
|
"with least regions(%v) to the store(%v) with most regions(%v) is %v, but we expect it should not be less than %v",
|
|
minStore.Store.ID, minStore.Status.RegionCount, maxStore.Store.ID, maxStore.Status.RegionCount, ratio, warnRegionCntMinMaxRatio)
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
type storagePermissionCheckItem struct {
|
|
cfg *config.Config
|
|
}
|
|
|
|
// NewStoragePermissionCheckItem creates a new storagePermissionCheckItem.
|
|
func NewStoragePermissionCheckItem(cfg *config.Config) precheck.Checker {
|
|
return &storagePermissionCheckItem{
|
|
cfg: cfg,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker.GetCheckItemID.
|
|
func (*storagePermissionCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckSourcePermission
|
|
}
|
|
|
|
// Check implements Checker.Check.
|
|
func (ci *storagePermissionCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Critical,
|
|
Passed: true,
|
|
Message: "Lightning has the correct storage permission",
|
|
}
|
|
|
|
u, err := storage.ParseBackend(ci.cfg.Mydumper.SourceDir, nil)
|
|
if err != nil {
|
|
return nil, common.NormalizeError(err)
|
|
}
|
|
_, err = storage.New(ctx, u, &storage.ExternalStorageOptions{
|
|
CheckPermissions: []storage.Permission{
|
|
storage.ListObjects,
|
|
storage.GetObject,
|
|
},
|
|
})
|
|
if err != nil {
|
|
theResult.Passed = false
|
|
theResult.Message = err.Error()
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
type largeFileCheckItem struct {
|
|
cfg *config.Config
|
|
dbMetas []*mydump.MDDatabaseMeta
|
|
}
|
|
|
|
// NewLargeFileCheckItem creates a new largeFileCheckItem.
|
|
func NewLargeFileCheckItem(cfg *config.Config, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
|
|
return &largeFileCheckItem{
|
|
cfg: cfg,
|
|
dbMetas: dbMetas,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker.GetCheckItemID.
|
|
func (*largeFileCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckLargeDataFile
|
|
}
|
|
|
|
// Check implements Checker.Check.
|
|
func (ci *largeFileCheckItem) Check(_ context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Warn,
|
|
Passed: true,
|
|
Message: "Source data files size is proper",
|
|
}
|
|
|
|
if !ci.cfg.Mydumper.StrictFormat {
|
|
for _, db := range ci.dbMetas {
|
|
for _, t := range db.Tables {
|
|
for _, f := range t.DataFiles {
|
|
if f.FileMeta.RealSize > defaultCSVSize {
|
|
theResult.Message = fmt.Sprintf("large data file: %s file exists and it will slow down import performance", f.FileMeta.Path)
|
|
theResult.Passed = false
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
theResult.Message = "Skip the data file size check, because config.StrictFormat is true"
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
type localDiskPlacementCheckItem struct {
|
|
cfg *config.Config
|
|
}
|
|
|
|
// NewLocalDiskPlacementCheckItem creates a new localDiskPlacementCheckItem.
|
|
func NewLocalDiskPlacementCheckItem(cfg *config.Config) precheck.Checker {
|
|
return &localDiskPlacementCheckItem{
|
|
cfg: cfg,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker.GetCheckItemID.
|
|
func (*localDiskPlacementCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckLocalDiskPlacement
|
|
}
|
|
|
|
// Check implements Checker.Check.
|
|
func (ci *localDiskPlacementCheckItem) Check(_ context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Warn,
|
|
Passed: true,
|
|
Message: "local source dir and temp-kv dir are in different disks",
|
|
}
|
|
sourceDir := strings.TrimPrefix(ci.cfg.Mydumper.SourceDir, storage.LocalURIPrefix)
|
|
same, err := common.SameDisk(sourceDir, ci.cfg.TikvImporter.SortedKVDir)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if same {
|
|
theResult.Passed = false
|
|
theResult.Message = fmt.Sprintf("sorted-kv-dir:%s and data-source-dir:%s are in the same disk, may slow down performance",
|
|
ci.cfg.TikvImporter.SortedKVDir, sourceDir)
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
type localTempKVDirCheckItem struct {
|
|
cfg *config.Config
|
|
preInfoGetter PreImportInfoGetter
|
|
dbMetas []*mydump.MDDatabaseMeta
|
|
}
|
|
|
|
// NewLocalTempKVDirCheckItem creates a new localTempKVDirCheckItem.
|
|
func NewLocalTempKVDirCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
|
|
return &localTempKVDirCheckItem{
|
|
cfg: cfg,
|
|
preInfoGetter: preInfoGetter,
|
|
dbMetas: dbMetas,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker.GetCheckItemID.
|
|
func (*localTempKVDirCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckLocalTempKVDir
|
|
}
|
|
|
|
func (ci *localTempKVDirCheckItem) hasCompressedFiles() bool {
|
|
for _, dbMeta := range ci.dbMetas {
|
|
for _, tbMeta := range dbMeta.Tables {
|
|
for _, file := range tbMeta.DataFiles {
|
|
if file.FileMeta.Compression != mydump.CompressionNone {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Check implements Checker.Check.
|
|
func (ci *localTempKVDirCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
severity := precheck.Critical
|
|
// for cases that have compressed files, the estimated size may not be accurate, set severity to Warn to avoid failure
|
|
if ci.hasCompressedFiles() {
|
|
severity = precheck.Warn
|
|
}
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: severity,
|
|
}
|
|
storageSize, err := common.GetStorageSize(ci.cfg.TikvImporter.SortedKVDir)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
localAvailable := int64(storageSize.Available)
|
|
availableStr := units.BytesSize(float64(localAvailable))
|
|
|
|
estimatedDataSizeResult, err := ci.preInfoGetter.EstimateSourceDataSize(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
estimatedDataSizeWithIndex := estimatedDataSizeResult.SizeWithIndex
|
|
estimatedStr := units.BytesSize(float64(estimatedDataSizeWithIndex))
|
|
|
|
diskQuota := int64(ci.cfg.TikvImporter.DiskQuota)
|
|
diskQuotaStr := units.BytesSize(float64(diskQuota))
|
|
|
|
// Warn the user if diskQuota is 0 or negative, as it's likely a misconfiguration
|
|
if diskQuota <= 0 {
|
|
logutil.Logger(ctx).Warn("`tikv-importer.disk-quota` is set to 0 or less; please configure a valid positive value")
|
|
}
|
|
|
|
switch {
|
|
case localAvailable > estimatedDataSizeWithIndex:
|
|
theResult.Message = fmt.Sprintf("local disk resources are rich, estimate sorted data size %s, local available is %s",
|
|
estimatedStr, availableStr)
|
|
theResult.Passed = true
|
|
case diskQuota > localAvailable:
|
|
theResult.Message = fmt.Sprintf("local disk space is insufficient to meet the configured disk-quota. "+
|
|
"Available space: %s, Configured disk-quota: %s. "+
|
|
"Please increase the available disk space or adjust the tikv-importer.disk-quota setting to a value lower than the available space and try again",
|
|
availableStr,
|
|
diskQuotaStr)
|
|
theResult.Passed = false
|
|
logutil.Logger(ctx).Error(theResult.Message)
|
|
default:
|
|
theResult.Message = fmt.Sprintf("local disk space may not enough to finish import, "+
|
|
"estimate sorted data size is %s, but local available is %s,"+
|
|
"we will use disk-quota (size: %s) to finish imports, which may slow down import",
|
|
estimatedStr,
|
|
availableStr, diskQuotaStr)
|
|
theResult.Passed = true
|
|
logutil.Logger(ctx).Warn(theResult.Message)
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
type checkpointCheckItem struct {
|
|
cfg *config.Config
|
|
preInfoGetter PreImportInfoGetter
|
|
dbMetas []*mydump.MDDatabaseMeta
|
|
checkpointsDB checkpoints.DB
|
|
}
|
|
|
|
// NewCheckpointCheckItem creates a new checkpointCheckItem.
|
|
func NewCheckpointCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta, checkpointsDB checkpoints.DB) precheck.Checker {
|
|
return &checkpointCheckItem{
|
|
cfg: cfg,
|
|
preInfoGetter: preInfoGetter,
|
|
dbMetas: dbMetas,
|
|
checkpointsDB: checkpointsDB,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker.GetCheckItemID.
|
|
func (*checkpointCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckCheckpoints
|
|
}
|
|
|
|
// Check implements Checker.Check.
|
|
func (ci *checkpointCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
if !ci.cfg.Checkpoint.Enable || ci.checkpointsDB == nil {
|
|
return nil, nil
|
|
}
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Critical,
|
|
Passed: true,
|
|
Message: "the checkpoints are valid",
|
|
}
|
|
|
|
checkMsgs := []string{}
|
|
dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
for _, dbInfo := range ci.dbMetas {
|
|
for _, tableInfo := range dbInfo.Tables {
|
|
msgs, err := ci.checkpointIsValid(ctx, tableInfo, dbInfos)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
checkMsgs = append(checkMsgs, msgs...)
|
|
}
|
|
}
|
|
if len(checkMsgs) > 0 {
|
|
theResult.Passed = false
|
|
theResult.Message = strings.Join(checkMsgs, "\n")
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
// checkpointIsValid checks whether we can start this import with this checkpoint.
|
|
func (ci *checkpointCheckItem) checkpointIsValid(ctx context.Context, tableInfo *mydump.MDTableMeta, dbInfos map[string]*checkpoints.TidbDBInfo) ([]string, error) {
|
|
msgs := make([]string, 0)
|
|
uniqueName := common.UniqueTable(tableInfo.DB, tableInfo.Name)
|
|
tableCheckPoint, err := ci.checkpointsDB.Get(ctx, uniqueName)
|
|
if err != nil {
|
|
if errors.IsNotFound(err) {
|
|
// there is no checkpoint
|
|
logutil.Logger(ctx).Debug("no checkpoint detected", zap.String("table", uniqueName))
|
|
return nil, nil
|
|
}
|
|
return nil, errors.Trace(err)
|
|
}
|
|
// if checkpoint enable and not missing, we skip the check table empty progress.
|
|
if tableCheckPoint.Status <= checkpoints.CheckpointStatusMissing {
|
|
return nil, nil
|
|
}
|
|
|
|
if tableCheckPoint.Status <= checkpoints.CheckpointStatusMaxInvalid {
|
|
failedStep := tableCheckPoint.Status * 10
|
|
var action strings.Builder
|
|
action.WriteString("./tidb-lightning-ctl --checkpoint-error-")
|
|
switch failedStep {
|
|
case checkpoints.CheckpointStatusAlteredAutoInc, checkpoints.CheckpointStatusAnalyzed:
|
|
action.WriteString("ignore")
|
|
default:
|
|
action.WriteString("destroy")
|
|
}
|
|
action.WriteString("='")
|
|
action.WriteString(uniqueName)
|
|
action.WriteString("' --config=...")
|
|
|
|
msgs = append(msgs, fmt.Sprintf("TiDB Lightning has failed last time. To prevent data loss, this run will stop now, "+
|
|
"%s failed in step(%s), please run command %s,"+
|
|
"You may also run `./tidb-lightning-ctl --checkpoint-error-destroy=all --config=...` to start from scratch,"+
|
|
"For details of this failure, read the log file from the PREVIOUS run",
|
|
uniqueName, failedStep.MetricName(), action.String()))
|
|
return msgs, nil
|
|
}
|
|
|
|
dbInfo, ok := dbInfos[tableInfo.DB]
|
|
if ok {
|
|
t, ok := dbInfo.Tables[tableInfo.Name]
|
|
if ok {
|
|
if tableCheckPoint.TableID > 0 && tableCheckPoint.TableID != t.ID {
|
|
msgs = append(msgs, fmt.Sprintf("TiDB Lightning has detected tables with illegal checkpoints. To prevent data loss, this run will stop now,"+
|
|
"please run command \"./tidb-lightning-ctl --checkpoint-remove='%s' --config=...\""+
|
|
"You may also run `./tidb-lightning-ctl --checkpoint-error-destroy=all --config=...` to start from scratch,"+
|
|
"For details of this failure, read the log file from the PREVIOUS run",
|
|
uniqueName))
|
|
return msgs, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
var permFromCheckpoint []int
|
|
var columns []string
|
|
for _, eng := range tableCheckPoint.Engines {
|
|
if len(eng.Chunks) > 0 {
|
|
chunk := eng.Chunks[0]
|
|
permFromCheckpoint = chunk.ColumnPermutation
|
|
columns = chunk.Chunk.Columns
|
|
if filepath.Dir(chunk.FileMeta.Path) != ci.cfg.Mydumper.SourceDir {
|
|
message := fmt.Sprintf("chunk checkpoints path is not equal to config"+
|
|
"checkpoint is %s, config source dir is %s", chunk.FileMeta.Path, ci.cfg.Mydumper.SourceDir)
|
|
msgs = append(msgs, message)
|
|
}
|
|
}
|
|
}
|
|
if len(columns) == 0 {
|
|
logutil.Logger(ctx).Debug("no valid checkpoint detected", zap.String("table", uniqueName))
|
|
return nil, nil
|
|
}
|
|
info := dbInfos[tableInfo.DB].Tables[tableInfo.Name]
|
|
if info != nil {
|
|
permFromTiDB, err := parseColumnPermutations(info.Core, columns, nil, log.Wrap(logutil.Logger(ctx)))
|
|
if err != nil {
|
|
msgs = append(msgs, fmt.Sprintf("failed to calculate columns %s, table %s's info has changed,"+
|
|
"consider remove this checkpoint, and start import again.", err.Error(), uniqueName))
|
|
}
|
|
if !reflect.DeepEqual(permFromCheckpoint, permFromTiDB) {
|
|
msgs = append(msgs, fmt.Sprintf("compare columns perm failed. table %s's info has changed,"+
|
|
"consider remove this checkpoint, and start import again.", uniqueName))
|
|
}
|
|
}
|
|
return msgs, nil
|
|
}
|
|
|
|
// CDCPITRCheckItem check downstream has enabled CDC or PiTR. It's exposed to let
|
|
// caller override the Instruction message.
|
|
type CDCPITRCheckItem struct {
|
|
cfg *config.Config
|
|
Instruction string
|
|
pdAddrsGetter func(context.Context) []string
|
|
// used in test
|
|
etcdCli *clientv3.Client
|
|
}
|
|
|
|
// NewCDCPITRCheckItem creates a checker to check downstream has enabled CDC or PiTR.
|
|
func NewCDCPITRCheckItem(cfg *config.Config, pdAddrsGetter func(context.Context) []string) precheck.Checker {
|
|
return &CDCPITRCheckItem{
|
|
cfg: cfg,
|
|
Instruction: "local backend is not compatible with them. Please switch to tidb backend then try again.",
|
|
pdAddrsGetter: pdAddrsGetter,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker interface.
|
|
func (*CDCPITRCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckTargetUsingCDCPITR
|
|
}
|
|
|
|
func dialEtcdWithCfg(
|
|
ctx context.Context,
|
|
cfg *config.Config,
|
|
addrs []string,
|
|
) (*clientv3.Client, error) {
|
|
cfg2, err := cfg.ToTLS()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tlsConfig := cfg2.TLSConfig()
|
|
|
|
return clientv3.New(clientv3.Config{
|
|
TLS: tlsConfig,
|
|
Endpoints: addrs,
|
|
AutoSyncInterval: 30 * time.Second,
|
|
DialTimeout: 5 * time.Second,
|
|
DialOptions: []grpc.DialOption{
|
|
config.DefaultGrpcKeepaliveParams,
|
|
grpc.WithBlock(),
|
|
grpc.WithReturnConnectionError(),
|
|
},
|
|
Context: ctx,
|
|
})
|
|
}
|
|
|
|
// Check implements Checker interface.
|
|
func (ci *CDCPITRCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Critical,
|
|
}
|
|
|
|
if ci.cfg.TikvImporter.Backend != config.BackendLocal {
|
|
theResult.Passed = true
|
|
theResult.Message = "TiDB Lightning is not using local backend, skip this check"
|
|
return theResult, nil
|
|
}
|
|
|
|
if ci.etcdCli == nil {
|
|
var err error
|
|
ci.etcdCli, err = dialEtcdWithCfg(ctx, ci.cfg, ci.pdAddrsGetter(ctx))
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
//nolint: errcheck
|
|
defer ci.etcdCli.Close()
|
|
}
|
|
|
|
errorMsg := make([]string, 0, 2)
|
|
|
|
pitrCli := streamhelper.NewMetaDataClient(ci.etcdCli)
|
|
tasks, err := pitrCli.GetAllTasks(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if len(tasks) > 0 {
|
|
names := make([]string, 0, len(tasks))
|
|
for _, task := range tasks {
|
|
names = append(names, task.Info.GetName())
|
|
}
|
|
errorMsg = append(errorMsg, fmt.Sprintf("found PiTR log streaming task(s): %v,", names))
|
|
}
|
|
|
|
nameSet, err := cdcutil.GetRunningChangefeeds(ctx, ci.etcdCli)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
|
|
if !nameSet.Empty() {
|
|
errorMsg = append(errorMsg, nameSet.MessageToUser())
|
|
}
|
|
|
|
if len(errorMsg) > 0 {
|
|
errorMsg = append(errorMsg, ci.Instruction)
|
|
theResult.Passed = false
|
|
theResult.Message = strings.Join(errorMsg, "\n")
|
|
} else {
|
|
theResult.Passed = true
|
|
theResult.Message = "no CDC or PiTR task found"
|
|
}
|
|
|
|
return theResult, nil
|
|
}
|
|
|
|
type onlyState struct {
|
|
State string `json:"state"`
|
|
}
|
|
|
|
type schemaCheckItem struct {
|
|
cfg *config.Config
|
|
preInfoGetter PreImportInfoGetter
|
|
dbMetas []*mydump.MDDatabaseMeta
|
|
checkpointsDB checkpoints.DB
|
|
}
|
|
|
|
// NewSchemaCheckItem creates a checker to check whether the schema is valid.
|
|
func NewSchemaCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta, cpdb checkpoints.DB) precheck.Checker {
|
|
return &schemaCheckItem{
|
|
cfg: cfg,
|
|
preInfoGetter: preInfoGetter,
|
|
dbMetas: dbMetas,
|
|
checkpointsDB: cpdb,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker interface.
|
|
func (*schemaCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckSourceSchemaValid
|
|
}
|
|
|
|
// Check implements Checker interface.
|
|
func (ci *schemaCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Critical,
|
|
Passed: true,
|
|
Message: "table schemas are valid",
|
|
}
|
|
|
|
dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
|
|
checkMsgs := []string{}
|
|
for _, dbInfo := range ci.dbMetas {
|
|
for _, tableInfo := range dbInfo.Tables {
|
|
if ci.cfg.Checkpoint.Enable && ci.checkpointsDB != nil {
|
|
uniqueName := common.UniqueTable(tableInfo.DB, tableInfo.Name)
|
|
if _, err := ci.checkpointsDB.Get(ctx, uniqueName); err == nil {
|
|
// there is a checkpoint
|
|
log.L().Debug("checkpoint detected, skip the schema check", zap.String("table", uniqueName))
|
|
continue
|
|
}
|
|
}
|
|
msgs, err := ci.SchemaIsValid(ctx, tableInfo, dbInfos)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
checkMsgs = append(checkMsgs, msgs...)
|
|
}
|
|
}
|
|
if len(checkMsgs) > 0 {
|
|
theResult.Passed = false
|
|
theResult.Message = strings.Join(checkMsgs, "\n")
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
// SchemaIsValid checks the import file and cluster schema is match.
|
|
func (ci *schemaCheckItem) SchemaIsValid(ctx context.Context, tableInfo *mydump.MDTableMeta, dbInfos map[string]*checkpoints.TidbDBInfo) ([]string, error) {
|
|
if len(tableInfo.DataFiles) == 0 {
|
|
logutil.Logger(ctx).Info("no data files detected", zap.String("db", tableInfo.DB), zap.String("table", tableInfo.Name))
|
|
return nil, nil
|
|
}
|
|
|
|
msgs := make([]string, 0)
|
|
|
|
info, ok := dbInfos[tableInfo.DB].Tables[tableInfo.Name]
|
|
if !ok {
|
|
msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s` doesn't exists,"+
|
|
"please give a schema file in source dir or create table manually", tableInfo.DB, tableInfo.Name))
|
|
return msgs, nil
|
|
}
|
|
|
|
igCol, err := ci.cfg.Mydumper.IgnoreColumns.GetIgnoreColumns(tableInfo.DB, tableInfo.Name, ci.cfg.Mydumper.CaseSensitive)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
igCols := igCol.ColumnsMap()
|
|
|
|
fullExtendColsSet := make(set.StringSet)
|
|
for _, fileInfo := range tableInfo.DataFiles {
|
|
for _, col := range fileInfo.FileMeta.ExtendData.Columns {
|
|
if _, ok = igCols[col]; ok {
|
|
msgs = append(msgs, fmt.Sprintf("extend column %s is also assigned in ignore-column for table `%s`.`%s`, "+
|
|
"please keep only either one of them", col, tableInfo.DB, tableInfo.Name))
|
|
}
|
|
fullExtendColsSet.Insert(col)
|
|
}
|
|
}
|
|
if len(msgs) > 0 {
|
|
return msgs, nil
|
|
}
|
|
|
|
colCountFromTiDB := len(info.Core.Columns)
|
|
if len(fullExtendColsSet) > 0 {
|
|
logutil.Logger(ctx).Info("check extend column count through data files", zap.String("db", tableInfo.DB),
|
|
zap.String("table", tableInfo.Name))
|
|
igColCnt := 0
|
|
for _, col := range info.Core.Columns {
|
|
if _, ok = igCols[col.Name.L]; ok {
|
|
igColCnt++
|
|
}
|
|
}
|
|
for _, f := range tableInfo.DataFiles {
|
|
cols, previewRows, err := ci.preInfoGetter.ReadFirstNRowsByFileMeta(ctx, f.FileMeta, 1)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if len(cols) > 0 {
|
|
colsSet := set.NewStringSet(cols...)
|
|
for _, extendCol := range f.FileMeta.ExtendData.Columns {
|
|
if colsSet.Exist(strings.ToLower(extendCol)) {
|
|
msgs = append(msgs, fmt.Sprintf("extend column %s is contained in table `%s`.`%s`'s header, "+
|
|
"please remove this column in data or remove this extend rule", extendCol, tableInfo.DB, tableInfo.Name))
|
|
}
|
|
}
|
|
} else if len(previewRows) > 0 && len(previewRows[0])+len(f.FileMeta.ExtendData.Columns) > colCountFromTiDB+igColCnt {
|
|
msgs = append(msgs, fmt.Sprintf("row count %d adding with extend column length %d is larger than columnCount %d plus ignore column count %d for table `%s`.`%s`, "+
|
|
"please make sure your source data don't have extend columns and target schema has all of them", len(previewRows[0]), len(f.FileMeta.ExtendData.Columns), colCountFromTiDB, igColCnt, tableInfo.DB, tableInfo.Name))
|
|
}
|
|
}
|
|
}
|
|
if len(msgs) > 0 {
|
|
return msgs, nil
|
|
}
|
|
|
|
core := info.Core
|
|
defaultCols := make(map[string]struct{})
|
|
autoRandomCol := common.GetAutoRandomColumn(core)
|
|
for _, col := range core.Columns {
|
|
// we can extend column the same with columns with default values
|
|
if _, isExtendCol := fullExtendColsSet[col.Name.O]; isExtendCol || hasDefault(col) || (autoRandomCol != nil && autoRandomCol.ID == col.ID) {
|
|
// this column has default value or it's auto random id, so we can ignore it
|
|
defaultCols[col.Name.L] = struct{}{}
|
|
}
|
|
delete(fullExtendColsSet, col.Name.O)
|
|
}
|
|
if len(fullExtendColsSet) > 0 {
|
|
extendCols := make([]string, 0, len(fullExtendColsSet))
|
|
for col := range fullExtendColsSet {
|
|
extendCols = append(extendCols, col)
|
|
}
|
|
msgs = append(msgs, fmt.Sprintf("extend column [%s] don't exist in target table `%s`.`%s` schema, "+
|
|
"please add these extend columns manually in downstream database/schema file", strings.Join(extendCols, ","), tableInfo.DB, tableInfo.Name))
|
|
return msgs, nil
|
|
}
|
|
|
|
// tidb_rowid have a default value.
|
|
defaultCols[model.ExtraHandleName.String()] = struct{}{}
|
|
|
|
// only check the first file of this table.
|
|
dataFile := tableInfo.DataFiles[0]
|
|
logutil.Logger(ctx).Info("datafile to check", zap.String("db", tableInfo.DB),
|
|
zap.String("table", tableInfo.Name), zap.String("path", dataFile.FileMeta.Path))
|
|
// get columns name from data file.
|
|
dataFileMeta := dataFile.FileMeta
|
|
|
|
if tp := dataFileMeta.Type; tp != mydump.SourceTypeCSV && tp != mydump.SourceTypeSQL && tp != mydump.SourceTypeParquet {
|
|
msgs = append(msgs, fmt.Sprintf("file '%s' with unknown source type '%s'", dataFileMeta.Path, dataFileMeta.Type.String()))
|
|
return msgs, nil
|
|
}
|
|
row := []types.Datum{}
|
|
colsFromDataFile, rows, err := ci.preInfoGetter.ReadFirstNRowsByFileMeta(ctx, dataFileMeta, 1)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if len(rows) > 0 {
|
|
row = rows[0]
|
|
}
|
|
if colsFromDataFile == nil && len(row) == 0 {
|
|
logutil.Logger(ctx).Info("file contains no data, skip checking against schema validity", zap.String("path", dataFileMeta.Path))
|
|
return msgs, nil
|
|
}
|
|
|
|
if colsFromDataFile == nil {
|
|
// when there is no columns name in data file. we must insert data in order.
|
|
// so the last several columns either can be ignored or has a default value.
|
|
for i := len(row); i < colCountFromTiDB; i++ {
|
|
if _, ok := defaultCols[core.Columns[i].Name.L]; !ok {
|
|
msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s` has %d columns, "+
|
|
"and data file has %d columns, but column %s is missing the default value, "+
|
|
"please give column a default value to skip this check",
|
|
tableInfo.DB, tableInfo.Name, colCountFromTiDB, len(row), core.Columns[i].Name.L))
|
|
}
|
|
}
|
|
return msgs, nil
|
|
}
|
|
|
|
// compare column names and make sure
|
|
// 1. TiDB table info has data file's all columns(besides ignore columns)
|
|
// 2. Those columns not introduced in data file always have a default value.
|
|
colMap := make(map[string]struct{})
|
|
for col := range igCols {
|
|
colMap[col] = struct{}{}
|
|
}
|
|
for _, col := range core.Columns {
|
|
if _, ok := colMap[col.Name.L]; ok {
|
|
// tidb's column is ignored
|
|
// we need ensure this column has the default value.
|
|
if _, hasDefault := defaultCols[col.Name.L]; !hasDefault {
|
|
msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s`'s column %s cannot be ignored, "+
|
|
"because it doesn't have a default value, please set tables.ignoreColumns properly",
|
|
tableInfo.DB, tableInfo.Name, col.Name.L))
|
|
}
|
|
} else {
|
|
colMap[col.Name.L] = struct{}{}
|
|
}
|
|
}
|
|
// tidb_rowid can be ignored in check
|
|
colMap[model.ExtraHandleName.String()] = struct{}{}
|
|
for _, col := range colsFromDataFile {
|
|
if _, ok := colMap[col]; !ok {
|
|
checkMsg := "please check table schema"
|
|
if dataFileMeta.Type == mydump.SourceTypeCSV && ci.cfg.Mydumper.CSV.Header {
|
|
checkMsg += " and csv file header"
|
|
}
|
|
msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s` doesn't have column %s, "+
|
|
"%s or use tables.ignoreColumns to ignore %s",
|
|
tableInfo.DB, tableInfo.Name, col, checkMsg, col))
|
|
} else {
|
|
// remove column for next iteration
|
|
delete(colMap, col)
|
|
}
|
|
}
|
|
// if theses rest columns don't have a default value.
|
|
for col := range colMap {
|
|
if _, ok := defaultCols[col]; ok {
|
|
continue
|
|
}
|
|
msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s` doesn't have the default value for %s. "+
|
|
"Please add default value for column '%s' or choose another column to ignore or add this column in data file",
|
|
tableInfo.DB, tableInfo.Name, col, col))
|
|
}
|
|
return msgs, nil
|
|
}
|
|
|
|
type csvHeaderCheckItem struct {
|
|
cfg *config.Config
|
|
preInfoGetter PreImportInfoGetter
|
|
dbMetas []*mydump.MDDatabaseMeta
|
|
}
|
|
|
|
// NewCSVHeaderCheckItem creates a new csvHeaderCheckItem.
|
|
func NewCSVHeaderCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
|
|
return &csvHeaderCheckItem{
|
|
cfg: cfg,
|
|
preInfoGetter: preInfoGetter,
|
|
dbMetas: dbMetas,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker interface.
|
|
func (*csvHeaderCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckCSVHeader
|
|
}
|
|
|
|
// Check tries to check whether the csv header config is consistent with the source csv files by:
|
|
// 1. pick one table with two CSV files and a unique/primary key
|
|
// 2. read the first row of those two CSV files
|
|
// 3. checks if the content of those first rows are compatible with the table schema, and whether the
|
|
// two rows are identical, to determine if the first rows are a header rows.
|
|
func (ci *csvHeaderCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
// if cfg set header = true but source files actually contain not header, former SchemaCheck should
|
|
// return error in this situation, so we need do it again.
|
|
if ci.cfg.Mydumper.CSV.Header {
|
|
return nil, nil
|
|
}
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Critical,
|
|
Passed: true,
|
|
Message: "the config [mydumper.csv.header] is set to false, and CSV header lines are really not detected in the data files",
|
|
}
|
|
var (
|
|
tableMeta *mydump.MDTableMeta
|
|
csvCount int
|
|
hasUniqueIdx bool
|
|
)
|
|
dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
// only check one table source files for better performance. The checked table is chosen based on following two factor:
|
|
// 1. contains at least 1 csv source file, 2 is preferable
|
|
// 2. table schema contains primary key or unique key
|
|
// if the two factors can't be both satisfied, the first one has a higher priority
|
|
outer:
|
|
for _, dbMeta := range ci.dbMetas {
|
|
for _, tblMeta := range dbMeta.Tables {
|
|
if len(tblMeta.DataFiles) == 0 {
|
|
continue
|
|
}
|
|
tableHasUniqueIdx := false
|
|
tableCSVCount := 0
|
|
for _, f := range tblMeta.DataFiles {
|
|
if f.FileMeta.Type == mydump.SourceTypeCSV {
|
|
tableCSVCount++
|
|
if tableCSVCount >= 2 {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if tableCSVCount == 0 {
|
|
continue
|
|
}
|
|
|
|
info := dbInfos[tblMeta.DB].Tables[tblMeta.Name]
|
|
for _, idx := range info.Core.Indices {
|
|
if idx.Primary || idx.Unique {
|
|
tableHasUniqueIdx = true
|
|
}
|
|
}
|
|
|
|
if tableCSVCount >= 2 && hasUniqueIdx {
|
|
tableMeta = tblMeta
|
|
// if a perfect table source is found, we can stop check more tables
|
|
break outer
|
|
}
|
|
if tableCSVCount > csvCount || (tableCSVCount == csvCount && !hasUniqueIdx && tableHasUniqueIdx) {
|
|
tableMeta = tblMeta
|
|
csvCount = tableCSVCount
|
|
hasUniqueIdx = tableHasUniqueIdx
|
|
}
|
|
}
|
|
}
|
|
|
|
if tableMeta == nil {
|
|
return theResult, nil
|
|
}
|
|
|
|
var rows [][]types.Datum
|
|
for _, f := range tableMeta.DataFiles {
|
|
if f.FileMeta.Type != mydump.SourceTypeCSV {
|
|
continue
|
|
}
|
|
|
|
row := []types.Datum{}
|
|
_, previewRows, err := ci.preInfoGetter.ReadFirstNRowsByFileMeta(ctx, f.FileMeta, 1)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if len(previewRows) > 0 {
|
|
row = previewRows[0]
|
|
}
|
|
if len(row) > 0 {
|
|
rows = append(rows, row)
|
|
}
|
|
// only check at most two of all the files
|
|
if len(rows) >= 2 {
|
|
break
|
|
}
|
|
}
|
|
if len(rows) == 0 {
|
|
return theResult, nil
|
|
} else if len(rows) >= 2 {
|
|
// if the first row in two source files are not the same, they should not be the header line
|
|
// NOTE: though lightning's logic allows different source files contains different columns or the
|
|
// order is difference, here we only check if they are exactly the same because this is the common case.
|
|
if len(rows[0]) != len(rows[1]) {
|
|
return theResult, nil
|
|
}
|
|
|
|
for i := range rows[0] {
|
|
if rows[0][i].GetString() != rows[1][i].GetString() {
|
|
return theResult, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// check if some fields are unique and not ignored
|
|
// if at least one field appears in a unique key, we can sure there is something wrong,
|
|
// they should be either the header line or the data is duplicated.
|
|
tableInfo := dbInfos[tableMeta.DB].Tables[tableMeta.Name]
|
|
tableFields := make(map[string]struct{})
|
|
uniqueIdxFields := make(map[string]struct{})
|
|
ignoreColumns, err := ci.cfg.Mydumper.IgnoreColumns.GetIgnoreColumns(tableMeta.DB, tableMeta.Name, ci.cfg.Mydumper.CaseSensitive)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
ignoreColsSet := make(map[string]struct{})
|
|
for _, col := range ignoreColumns.Columns {
|
|
ignoreColsSet[col] = struct{}{}
|
|
}
|
|
for _, idx := range tableInfo.Core.Indices {
|
|
if !idx.Unique && !idx.Primary {
|
|
continue
|
|
}
|
|
for _, col := range idx.Columns {
|
|
if _, ok := ignoreColsSet[col.Name.L]; !ok {
|
|
uniqueIdxFields[col.Name.L] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
for _, f := range tableInfo.Core.Columns {
|
|
tableFields[f.Name.L] = struct{}{}
|
|
}
|
|
if common.TableHasAutoRowID(tableInfo.Core) {
|
|
tableFields[model.ExtraHandleName.L] = struct{}{}
|
|
}
|
|
hasUniqueField := false
|
|
for _, d := range rows[0] {
|
|
val := strings.ToLower(d.GetString())
|
|
if _, ok := tableFields[val]; !ok {
|
|
return theResult, nil
|
|
}
|
|
if _, ok := uniqueIdxFields[val]; ok {
|
|
hasUniqueField = true
|
|
break
|
|
}
|
|
}
|
|
|
|
theResult.Passed = false
|
|
theResult.Message = fmt.Sprintf("source csv files contains header row but `mydumper.csv.header` is false, checked table is `%s`.`%s`",
|
|
tableMeta.DB, tableMeta.Name)
|
|
theResult.Severity = precheck.Warn
|
|
if hasUniqueField && len(rows) > 1 {
|
|
theResult.Severity = precheck.Critical
|
|
} else {
|
|
ok, err := checkFieldCompatibility(tableInfo.Core, ignoreColsSet, rows[0], log.Wrap(logutil.Logger(ctx)))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if !ok {
|
|
// if there are only 1 csv file or there is not unique key, try to check if all columns are compatible with string value
|
|
theResult.Severity = precheck.Critical
|
|
}
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
func checkFieldCompatibility(
|
|
tbl *model.TableInfo,
|
|
ignoreCols map[string]struct{},
|
|
values []types.Datum,
|
|
logger log.Logger,
|
|
) (bool, error) {
|
|
se, err := kv.NewSession(&encode.SessionOptions{
|
|
SQLMode: mysql.ModeStrictTransTables,
|
|
}, logger)
|
|
if err != nil {
|
|
return false, errors.Trace(err)
|
|
}
|
|
|
|
for i, col := range tbl.Columns {
|
|
// do not check ignored columns
|
|
if _, ok := ignoreCols[col.Name.L]; ok {
|
|
continue
|
|
}
|
|
if i >= len(values) {
|
|
break
|
|
}
|
|
_, err := table.CastColumnValue(se.GetExprCtx(), values[i], col, true, false)
|
|
if err != nil {
|
|
logger.Error("field value is not consistent with column type", zap.String("value", values[i].GetString()),
|
|
zap.Any("column_info", col), zap.Error(err))
|
|
return false, nil
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
type tableEmptyCheckItem struct {
|
|
cfg *config.Config
|
|
preInfoGetter PreImportInfoGetter
|
|
dbMetas []*mydump.MDDatabaseMeta
|
|
checkpointsDB checkpoints.DB
|
|
}
|
|
|
|
// NewTableEmptyCheckItem creates a new tableEmptyCheckItem
|
|
func NewTableEmptyCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta, cpdb checkpoints.DB) precheck.Checker {
|
|
return &tableEmptyCheckItem{
|
|
cfg: cfg,
|
|
preInfoGetter: preInfoGetter,
|
|
dbMetas: dbMetas,
|
|
checkpointsDB: cpdb,
|
|
}
|
|
}
|
|
|
|
// GetCheckItemID implements Checker interface
|
|
func (*tableEmptyCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckTargetTableEmpty
|
|
}
|
|
|
|
// Check implements Checker interface
|
|
func (ci *tableEmptyCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: ci.GetCheckItemID(),
|
|
Severity: precheck.Critical,
|
|
Passed: true,
|
|
Message: "all importing tables on the target are empty",
|
|
}
|
|
|
|
tableCount := 0
|
|
for _, db := range ci.dbMetas {
|
|
tableCount += len(db.Tables)
|
|
}
|
|
|
|
var lock sync.Mutex
|
|
tableNames := make([]string, 0)
|
|
concurrency := min(tableCount, ci.cfg.App.RegionConcurrency)
|
|
type tableNameComponents struct {
|
|
DBName string
|
|
TableName string
|
|
}
|
|
ch := make(chan tableNameComponents, concurrency)
|
|
eg, gCtx := errgroup.WithContext(ctx)
|
|
|
|
for range concurrency {
|
|
eg.Go(func() error {
|
|
for tblNameComp := range ch {
|
|
fullTableName := common.UniqueTable(tblNameComp.DBName, tblNameComp.TableName)
|
|
// skip tables that have checkpoint
|
|
if ci.cfg.Checkpoint.Enable && ci.checkpointsDB != nil {
|
|
_, err := ci.checkpointsDB.Get(gCtx, fullTableName)
|
|
switch {
|
|
case err == nil:
|
|
continue
|
|
case errors.IsNotFound(err):
|
|
default:
|
|
return errors.Trace(err)
|
|
}
|
|
}
|
|
|
|
isEmptyPtr, err1 := ci.preInfoGetter.IsTableEmpty(gCtx, tblNameComp.DBName, tblNameComp.TableName)
|
|
if err1 != nil {
|
|
return err1
|
|
}
|
|
if !(*isEmptyPtr) {
|
|
lock.Lock()
|
|
tableNames = append(tableNames, fullTableName)
|
|
lock.Unlock()
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
loop:
|
|
for _, db := range ci.dbMetas {
|
|
for _, tbl := range db.Tables {
|
|
select {
|
|
case ch <- tableNameComponents{tbl.DB, tbl.Name}:
|
|
case <-gCtx.Done():
|
|
break loop
|
|
}
|
|
}
|
|
}
|
|
close(ch)
|
|
if err := eg.Wait(); err != nil {
|
|
if common.IsContextCanceledError(err) {
|
|
return nil, nil
|
|
}
|
|
return nil, errors.Annotate(err, "check table contains data failed")
|
|
}
|
|
|
|
if len(tableNames) > 0 {
|
|
// sort the failed names
|
|
slices.Sort(tableNames)
|
|
theResult.Passed = false
|
|
theResult.Message = fmt.Sprintf("table(s) [%s] are not empty", strings.Join(tableNames, ", "))
|
|
}
|
|
return theResult, nil
|
|
}
|
|
|
|
// hasDefault represents col has default value.
|
|
func hasDefault(col *model.ColumnInfo) bool {
|
|
return col.DefaultIsExpr || col.DefaultValue != nil || !mysql.HasNotNullFlag(col.GetFlag()) ||
|
|
col.IsGenerated() || mysql.HasAutoIncrementFlag(col.GetFlag())
|
|
}
|
|
|
|
// pdTiDBFromSameClusterCheckItem provides two sources of PD addresses and use
|
|
// util.CheckIfSameCluster to check if they are from the same cluster.
|
|
//
|
|
// The first source stands for PD leader's all etcd client URL addresses in most
|
|
// time, the second source stands for all PD nodes' first etcd client URL
|
|
// addresses.
|
|
//
|
|
// If we can't reach PD leader, the first source will be replaced by the PD
|
|
// address set in lightning's task configuration, or in TiDB's configuration.
|
|
// Then it may have false alert if PD has multiple endpoints and above
|
|
// configuration uses one of them, while etcd information uses another one, and
|
|
// there are no common addresses passed to util.CheckIfSameCluster.
|
|
type pdTiDBFromSameClusterCheckItem struct {
|
|
db *sql.DB
|
|
pdAddrsGetter func(context.Context) []string
|
|
}
|
|
|
|
// NewPDTiDBFromSameClusterCheckItem creates a new pdTiDBFromSameClusterCheckItem.
|
|
func NewPDTiDBFromSameClusterCheckItem(
|
|
db *sql.DB,
|
|
pdAddrsGetter func(context.Context) []string,
|
|
) precheck.Checker {
|
|
return &pdTiDBFromSameClusterCheckItem{
|
|
db: db,
|
|
pdAddrsGetter: pdAddrsGetter,
|
|
}
|
|
}
|
|
|
|
func (i *pdTiDBFromSameClusterCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
|
|
theResult := &precheck.CheckResult{
|
|
Item: i.GetCheckItemID(),
|
|
Severity: precheck.Critical,
|
|
Passed: true,
|
|
Message: "PD and TiDB in configuration are from the same cluster",
|
|
}
|
|
|
|
pdLeaderAddrsGetter := func(ctx context.Context) ([]string, error) {
|
|
addrs := i.pdAddrsGetter(ctx)
|
|
for idx, addrURL := range addrs {
|
|
u, err2 := url.Parse(addrURL)
|
|
if err2 != nil {
|
|
return nil, errors.Trace(err2)
|
|
}
|
|
addrs[idx] = u.Host
|
|
}
|
|
return addrs, nil
|
|
}
|
|
|
|
sameCluster, pdAddrs, pdAddrsFromTiDB, err := util.CheckIfSameCluster(
|
|
ctx, pdLeaderAddrsGetter, util.GetPDsAddrWithoutScheme(i.db),
|
|
)
|
|
if err != nil {
|
|
return nil, errors.Trace(err)
|
|
}
|
|
if sameCluster {
|
|
return theResult, nil
|
|
}
|
|
|
|
theResult.Passed = false
|
|
theResult.Message = fmt.Sprintf(
|
|
"PD and TiDB in configuration are not from the same cluster, "+
|
|
"PD addresses read from PD are: %v, PD addresses read from TiDB are %v",
|
|
pdAddrs, pdAddrsFromTiDB,
|
|
)
|
|
return theResult, nil
|
|
}
|
|
|
|
func (*pdTiDBFromSameClusterCheckItem) GetCheckItemID() precheck.CheckItemID {
|
|
return precheck.CheckPDTiDBFromSameCluster
|
|
}
|