tidb/lightning/pkg/importer/precheck_impl.go

// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package importer

import (
	"cmp"
	"context"
	"database/sql"
	"fmt"
	"net/url"
	"path/filepath"
	"reflect"
	"slices"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/docker/go-units"
	"github.com/pingcap/errors"
	"github.com/pingcap/kvproto/pkg/metapb"
	"github.com/pingcap/tidb/br/pkg/storage"
	"github.com/pingcap/tidb/br/pkg/streamhelper"
	"github.com/pingcap/tidb/lightning/pkg/precheck"
	"github.com/pingcap/tidb/pkg/lightning/backend/encode"
	"github.com/pingcap/tidb/pkg/lightning/backend/kv"
	"github.com/pingcap/tidb/pkg/lightning/checkpoints"
	"github.com/pingcap/tidb/pkg/lightning/common"
	"github.com/pingcap/tidb/pkg/lightning/config"
	"github.com/pingcap/tidb/pkg/lightning/log"
	"github.com/pingcap/tidb/pkg/lightning/mydump"
	"github.com/pingcap/tidb/pkg/meta/model"
	"github.com/pingcap/tidb/pkg/parser/mysql"
	"github.com/pingcap/tidb/pkg/table"
	"github.com/pingcap/tidb/pkg/types"
	"github.com/pingcap/tidb/pkg/util"
	"github.com/pingcap/tidb/pkg/util/cdcutil"
	"github.com/pingcap/tidb/pkg/util/engine"
	"github.com/pingcap/tidb/pkg/util/logutil"
	"github.com/pingcap/tidb/pkg/util/set"
	pdhttp "github.com/tikv/pd/client/http"
	clientv3 "go.etcd.io/etcd/client/v3"
	"go.uber.org/zap"
	"golang.org/x/sync/errgroup"
	"google.golang.org/grpc"
)

type clusterResourceCheckItem struct {
	preInfoGetter PreImportInfoGetter
}

// NewClusterResourceCheckItem creates a new clusterResourceCheckItem.
func NewClusterResourceCheckItem(preInfoGetter PreImportInfoGetter) precheck.Checker {
	return &clusterResourceCheckItem{
		preInfoGetter: preInfoGetter,
	}
}

// GetCheckItemID implements Checker.GetCheckItemID.
func (*clusterResourceCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckTargetClusterSize
}

func (ci *clusterResourceCheckItem) getClusterAvail(ctx context.Context) (tikvAvail uint64, tiflashAvail uint64, err error) {
	storeInfo, err := ci.preInfoGetter.GetStorageInfo(ctx)
	if err != nil {
		return 0, 0, errors.Trace(err)
	}

	for _, store := range storeInfo.Stores {
		avail, err := units.RAMInBytes(store.Status.Available)
		if err != nil {
			return 0, 0, errors.Trace(err)
		}
		if engine.IsTiFlashHTTPResp(&store.Store) {
			tiflashAvail += uint64(avail)
		} else {
			tikvAvail += uint64(avail)
		}
	}
	return
}

// Check implements Checker.Check.
func (ci *clusterResourceCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Warn,
		Passed:   true,
		Message:  "",
	}

	var (
		err               error
		tikvAvail         uint64
		tiflashAvail      uint64
		tikvSourceSize    uint64
		tiflashSourceSize uint64
		taskMgr           taskMetaMgr
	)
	taskMgrVal := ctx.Value(taskManagerKey)
	if taskMgrVal != nil {
		if mgr, ok := taskMgrVal.(taskMetaMgr); ok {
			taskMgr = mgr
		}
	}
	if taskMgr == nil {
		var err error
		estimatedDataSizeResult, err := ci.preInfoGetter.EstimateSourceDataSize(ctx)
		if err != nil {
			return nil, errors.Trace(err)
		}
		tikvSourceSize = uint64(estimatedDataSizeResult.SizeWithIndex)
		tiflashSourceSize = uint64(estimatedDataSizeResult.TiFlashSize)
		tikvAvail, tiflashAvail, err = ci.getClusterAvail(ctx)
		if err != nil {
			return nil, errors.Trace(err)
		}
	} else {
		if err := taskMgr.CheckTasksExclusively(ctx, func(tasks []taskMeta) ([]taskMeta, error) {
			tikvAvail = 0
			tiflashAvail = 0
			tikvSourceSize = 0
			tiflashSourceSize = 0
			restoreStarted := false
			for _, task := range tasks {
				if task.status > taskMetaStatusInitial {
					restoreStarted = true
				}
				tikvSourceSize += task.tikvSourceBytes
				tiflashSourceSize += task.tiflashSourceBytes
				if task.tikvAvail > 0 {
					tikvAvail = task.tikvAvail
				}
				if task.tiflashAvail > 0 {
					tiflashAvail = task.tiflashAvail
				}
			}
			if restoreStarted || tikvAvail > 0 || tiflashAvail > 0 {
				return nil, nil
			}

			tikvAvail, tiflashAvail, err = ci.getClusterAvail(ctx)
			if err != nil {
				return nil, errors.Trace(err)
			}
			newTasks := slices.Clone(tasks)
			for i := range newTasks {
				newTasks[i].tikvAvail = tikvAvail
				newTasks[i].tiflashAvail = tiflashAvail
			}
			return newTasks, nil
		}); err != nil {
			return nil, errors.Trace(err)
		}
	}

	replicaCount, err := ci.preInfoGetter.GetMaxReplica(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	tikvSourceSize = tikvSourceSize * replicaCount

	if tikvSourceSize <= tikvAvail && tiflashSourceSize <= tiflashAvail {
		theResult.Message = fmt.Sprintf("The storage space is rich, which TiKV/Tiflash is %s/%s. The estimated storage space is %s/%s.",
			units.BytesSize(float64(tikvAvail)), units.BytesSize(float64(tiflashAvail)), units.BytesSize(float64(tikvSourceSize)), units.BytesSize(float64(tiflashSourceSize)))
	}

	if tikvSourceSize > tikvAvail {
		theResult.Passed = false
		theResult.Message += fmt.Sprintf("TiKV requires more storage space. Estimated required size: %s. Actual size: %s.",
			units.BytesSize(float64(tikvSourceSize)), units.BytesSize(float64(tikvAvail)))
	}
	if tiflashAvail > 0 && tiflashSourceSize > tiflashAvail {
		theResult.Passed = false
		theResult.Message += fmt.Sprintf(" TiFlash requires more storage space. Estimated required size: %s. Actual size: %s.",
			units.BytesSize(float64(tiflashSourceSize)), units.BytesSize(float64(tiflashAvail)))
	}
	if !theResult.Passed {
		theResult.Message += " Please increase storage to prevent import task failures."
	}
	return theResult, nil
}

type clusterVersionCheckItem struct {
	preInfoGetter PreImportInfoGetter
	dbMetas       []*mydump.MDDatabaseMeta
}

// NewClusterVersionCheckItem creates a new clusterVersionCheckItem.
func NewClusterVersionCheckItem(preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
	return &clusterVersionCheckItem{
		preInfoGetter: preInfoGetter,
		dbMetas:       dbMetas,
	}
}

// GetCheckItemID implements Checker.GetCheckItemID.
func (*clusterVersionCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckTargetClusterVersion
}

// Check implements Checker.Check.
func (ci *clusterVersionCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Critical,
		Passed:   true,
		Message:  "Cluster version check passed",
	}
	checkCtx := WithPreInfoGetterDBMetas(ctx, ci.dbMetas)
	if err := ci.preInfoGetter.CheckVersionRequirements(checkCtx); err != nil {
		err := common.NormalizeError(err)
		theResult.Passed = false
		theResult.Message = fmt.Sprintf("Cluster version check failed: %s", err.Error())
	}
	return theResult, nil
}

type emptyRegionCheckItem struct {
	preInfoGetter PreImportInfoGetter
	dbMetas       []*mydump.MDDatabaseMeta
}

// NewEmptyRegionCheckItem creates a new emptyRegionCheckItem.
func NewEmptyRegionCheckItem(preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
	return &emptyRegionCheckItem{
		preInfoGetter: preInfoGetter,
		dbMetas:       dbMetas,
	}
}

// GetCheckItemID implements Checker.GetCheckItemID.
func (*emptyRegionCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckTargetClusterEmptyRegion
}

// Check implements Checker.Check.
func (ci *emptyRegionCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Warn,
		Passed:   true,
		Message:  "Cluster doesn't have too many empty regions",
	}
	dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	storeInfo, err := ci.preInfoGetter.GetStorageInfo(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	if len(storeInfo.Stores) <= 1 {
		return theResult, nil
	}
	emptyRegionsInfo, err := ci.preInfoGetter.GetEmptyRegionsInfo(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	regions := make(map[int64]int)
	stores := make(map[int64]*pdhttp.StoreInfo)
	for _, region := range emptyRegionsInfo.Regions {
		for _, peer := range region.Peers {
			regions[peer.StoreID]++
		}
	}
	for _, store := range storeInfo.Stores {
		stores[store.Store.ID] = &store
	}
	tableCount := 0
	for _, db := range ci.dbMetas {
		info, ok := dbInfos[db.Name]
		if !ok {
			continue
		}
		tableCount += len(info.Tables)
	}
	errorThrehold := max(errorEmptyRegionCntPerStore, tableCount*3)
	warnThrehold := max(warnEmptyRegionCntPerStore, tableCount)
	var (
		errStores  []string
		warnStores []string
	)
	for storeID, regionCnt := range regions {
		if store, ok := stores[storeID]; ok {
			if metapb.StoreState(metapb.StoreState_value[store.Store.StateName]) != metapb.StoreState_Up {
				continue
			}
			if engine.IsTiFlashHTTPResp(&store.Store) {
				continue
			}
			if regionCnt > errorThrehold {
				errStores = append(errStores, strconv.Itoa(int(storeID)))
			} else if regionCnt > warnThrehold {
				warnStores = append(warnStores, strconv.Itoa(int(storeID)))
			}
		}
	}

	var messages []string
	if len(errStores) > 0 {
		theResult.Passed = false
		messages = append(messages, fmt.Sprintf("TiKV stores (%s) contains more than %v empty regions respectively, "+
			"which will greatly affect the import speed and success rate", strings.Join(errStores, ", "), errorEmptyRegionCntPerStore))
	}
	if len(warnStores) > 0 {
		messages = append(messages, fmt.Sprintf("TiKV stores (%s) contains more than %v empty regions respectively, "+
			"which will affect the import speed and success rate", strings.Join(warnStores, ", "), warnEmptyRegionCntPerStore))
	}
	if len(messages) > 0 {
		theResult.Message = strings.Join(messages, "\n")
	}
	return theResult, nil
}

type regionDistributionCheckItem struct {
	preInfoGetter PreImportInfoGetter
	dbMetas       []*mydump.MDDatabaseMeta
}

// NewRegionDistributionCheckItem creates a new regionDistributionCheckItem.
func NewRegionDistributionCheckItem(preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
	return &regionDistributionCheckItem{
		preInfoGetter: preInfoGetter,
		dbMetas:       dbMetas,
	}
}

// GetCheckItemID implements Checker.GetCheckItemID.
func (*regionDistributionCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckTargetClusterRegionDist
}

// Check implements Checker.Check.
func (ci *regionDistributionCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Warn,
		Passed:   true,
		Message:  "Cluster region distribution is balanced",
	}

	storesInfo, err := ci.preInfoGetter.GetStorageInfo(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	stores := make([]*pdhttp.StoreInfo, 0, len(storesInfo.Stores))
	for _, store := range storesInfo.Stores {
		if metapb.StoreState(metapb.StoreState_value[store.Store.StateName]) != metapb.StoreState_Up {
			continue
		}
		if engine.IsTiFlashHTTPResp(&store.Store) {
			continue
		}
		stores = append(stores, &store)
	}
	if len(stores) <= 1 {
		return theResult, nil
	}
	slices.SortFunc(stores, func(i, j *pdhttp.StoreInfo) int {
		return cmp.Compare(i.Status.RegionCount, j.Status.RegionCount)
	})
	minStore := stores[0]
	maxStore := stores[len(stores)-1]

	dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	tableCount := 0
	for _, db := range ci.dbMetas {
		info, ok := dbInfos[db.Name]
		if !ok {
			continue
		}
		tableCount += len(info.Tables)
	}
	threhold := max(checkRegionCntRatioThreshold, tableCount)
	if maxStore.Status.RegionCount <= int64(threhold) {
		return theResult, nil
	}
	ratio := float64(minStore.Status.RegionCount) / float64(maxStore.Status.RegionCount)
	if ratio < errorRegionCntMinMaxRatio {
		theResult.Passed = false
		theResult.Message = fmt.Sprintf("Region distribution is unbalanced, the ratio of the regions count of the store(%v) "+
			"with least regions(%v) to the store(%v) with most regions(%v) is %v, but we expect it must not be less than %v",
			minStore.Store.ID, minStore.Status.RegionCount, maxStore.Store.ID, maxStore.Status.RegionCount, ratio, errorRegionCntMinMaxRatio)
	} else if ratio < warnRegionCntMinMaxRatio {
		theResult.Message = fmt.Sprintf("Region distribution is unbalanced, the ratio of the regions count of the store(%v) "+
			"with least regions(%v) to the store(%v) with most regions(%v) is %v, but we expect it should not be less than %v",
			minStore.Store.ID, minStore.Status.RegionCount, maxStore.Store.ID, maxStore.Status.RegionCount, ratio, warnRegionCntMinMaxRatio)
	}
	return theResult, nil
}

type storagePermissionCheckItem struct {
	cfg *config.Config
}

// NewStoragePermissionCheckItem creates a new storagePermissionCheckItem.
func NewStoragePermissionCheckItem(cfg *config.Config) precheck.Checker {
	return &storagePermissionCheckItem{
		cfg: cfg,
	}
}

// GetCheckItemID implements Checker.GetCheckItemID.
func (*storagePermissionCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckSourcePermission
}

// Check implements Checker.Check.
func (ci *storagePermissionCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Critical,
		Passed:   true,
		Message:  "Lightning has the correct storage permission",
	}

	u, err := storage.ParseBackend(ci.cfg.Mydumper.SourceDir, nil)
	if err != nil {
		return nil, common.NormalizeError(err)
	}
	_, err = storage.New(ctx, u, &storage.ExternalStorageOptions{
		CheckPermissions: []storage.Permission{
			storage.ListObjects,
			storage.GetObject,
		},
	})
	if err != nil {
		theResult.Passed = false
		theResult.Message = err.Error()
	}
	return theResult, nil
}

type largeFileCheckItem struct {
	cfg     *config.Config
	dbMetas []*mydump.MDDatabaseMeta
}

// NewLargeFileCheckItem creates a new largeFileCheckItem.
func NewLargeFileCheckItem(cfg *config.Config, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
	return &largeFileCheckItem{
		cfg:     cfg,
		dbMetas: dbMetas,
	}
}

// GetCheckItemID implements Checker.GetCheckItemID.
func (*largeFileCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckLargeDataFile
}

// Check implements Checker.Check.
func (ci *largeFileCheckItem) Check(_ context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Warn,
		Passed:   true,
		Message:  "Source data files size is proper",
	}

	if !ci.cfg.Mydumper.StrictFormat {
		for _, db := range ci.dbMetas {
			for _, t := range db.Tables {
				for _, f := range t.DataFiles {
					if f.FileMeta.RealSize > defaultCSVSize {
						theResult.Message = fmt.Sprintf("large data file: %s file exists and it will slow down import performance", f.FileMeta.Path)
						theResult.Passed = false
					}
				}
			}
		}
	} else {
		theResult.Message = "Skip the data file size check, because config.StrictFormat is true"
	}
	return theResult, nil
}

type localDiskPlacementCheckItem struct {
	cfg *config.Config
}

// NewLocalDiskPlacementCheckItem creates a new localDiskPlacementCheckItem.
func NewLocalDiskPlacementCheckItem(cfg *config.Config) precheck.Checker {
	return &localDiskPlacementCheckItem{
		cfg: cfg,
	}
}

// GetCheckItemID implements Checker.GetCheckItemID.
func (*localDiskPlacementCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckLocalDiskPlacement
}

// Check implements Checker.Check.
func (ci *localDiskPlacementCheckItem) Check(_ context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Warn,
		Passed:   true,
		Message:  "local source dir and temp-kv dir are in different disks",
	}
	sourceDir := strings.TrimPrefix(ci.cfg.Mydumper.SourceDir, storage.LocalURIPrefix)
	same, err := common.SameDisk(sourceDir, ci.cfg.TikvImporter.SortedKVDir)
	if err != nil {
		return nil, errors.Trace(err)
	}
	if same {
		theResult.Passed = false
		theResult.Message = fmt.Sprintf("sorted-kv-dir:%s and data-source-dir:%s are in the same disk, may slow down performance",
			ci.cfg.TikvImporter.SortedKVDir, sourceDir)
	}
	return theResult, nil
}

type localTempKVDirCheckItem struct {
	cfg           *config.Config
	preInfoGetter PreImportInfoGetter
	dbMetas       []*mydump.MDDatabaseMeta
}

// NewLocalTempKVDirCheckItem creates a new localTempKVDirCheckItem.
func NewLocalTempKVDirCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
	return &localTempKVDirCheckItem{
		cfg:           cfg,
		preInfoGetter: preInfoGetter,
		dbMetas:       dbMetas,
	}
}

// GetCheckItemID implements Checker.GetCheckItemID.
func (*localTempKVDirCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckLocalTempKVDir
}

func (ci *localTempKVDirCheckItem) hasCompressedFiles() bool {
	for _, dbMeta := range ci.dbMetas {
		for _, tbMeta := range dbMeta.Tables {
			for _, file := range tbMeta.DataFiles {
				if file.FileMeta.Compression != mydump.CompressionNone {
					return true
				}
			}
		}
	}
	return false
}

// Check implements Checker.Check.
func (ci *localTempKVDirCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	severity := precheck.Critical
	// for cases that have compressed files, the estimated size may not be accurate, set severity to Warn to avoid failure
	if ci.hasCompressedFiles() {
		severity = precheck.Warn
	}
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: severity,
	}
	storageSize, err := common.GetStorageSize(ci.cfg.TikvImporter.SortedKVDir)
	if err != nil {
		return nil, errors.Trace(err)
	}
	localAvailable := int64(storageSize.Available)
	availableStr := units.BytesSize(float64(localAvailable))

	estimatedDataSizeResult, err := ci.preInfoGetter.EstimateSourceDataSize(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	estimatedDataSizeWithIndex := estimatedDataSizeResult.SizeWithIndex
	estimatedStr := units.BytesSize(float64(estimatedDataSizeWithIndex))

	diskQuota := int64(ci.cfg.TikvImporter.DiskQuota)
	diskQuotaStr := units.BytesSize(float64(diskQuota))

	// Warn the user if diskQuota is 0 or negative, as it's likely a misconfiguration
	if diskQuota <= 0 {
		logutil.Logger(ctx).Warn("`tikv-importer.disk-quota` is set to 0 or less; please configure a valid positive value")
	}

	switch {
	case localAvailable > estimatedDataSizeWithIndex:
		theResult.Message = fmt.Sprintf("local disk resources are rich, estimate sorted data size %s, local available is %s",
			estimatedStr, availableStr)
		theResult.Passed = true
	case diskQuota > localAvailable:
		theResult.Message = fmt.Sprintf("local disk space is insufficient to meet the configured disk-quota. "+
			"Available space: %s, Configured disk-quota: %s. "+
			"Please increase the available disk space or adjust the tikv-importer.disk-quota setting to a value lower than the available space and try again",
			availableStr,
			diskQuotaStr)
		theResult.Passed = false
		logutil.Logger(ctx).Error(theResult.Message)
	default:
		theResult.Message = fmt.Sprintf("local disk space may not enough to finish import, "+
			"estimate sorted data size is %s, but local available is %s,"+
			"we will use disk-quota (size: %s) to finish imports, which may slow down import",
			estimatedStr,
			availableStr, diskQuotaStr)
		theResult.Passed = true
		logutil.Logger(ctx).Warn(theResult.Message)
	}
	return theResult, nil
}

type checkpointCheckItem struct {
	cfg           *config.Config
	preInfoGetter PreImportInfoGetter
	dbMetas       []*mydump.MDDatabaseMeta
	checkpointsDB checkpoints.DB
}

// NewCheckpointCheckItem creates a new checkpointCheckItem.
func NewCheckpointCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta, checkpointsDB checkpoints.DB) precheck.Checker {
	return &checkpointCheckItem{
		cfg:           cfg,
		preInfoGetter: preInfoGetter,
		dbMetas:       dbMetas,
		checkpointsDB: checkpointsDB,
	}
}

// GetCheckItemID implements Checker.GetCheckItemID.
func (*checkpointCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckCheckpoints
}

// Check implements Checker.Check.
func (ci *checkpointCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	if !ci.cfg.Checkpoint.Enable || ci.checkpointsDB == nil {
		return nil, nil
	}
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Critical,
		Passed:   true,
		Message:  "the checkpoints are valid",
	}

	checkMsgs := []string{}
	dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	for _, dbInfo := range ci.dbMetas {
		for _, tableInfo := range dbInfo.Tables {
			msgs, err := ci.checkpointIsValid(ctx, tableInfo, dbInfos)
			if err != nil {
				return nil, errors.Trace(err)
			}
			checkMsgs = append(checkMsgs, msgs...)
		}
	}
	if len(checkMsgs) > 0 {
		theResult.Passed = false
		theResult.Message = strings.Join(checkMsgs, "\n")
	}
	return theResult, nil
}

// checkpointIsValid checks whether we can start this import with this checkpoint.
func (ci *checkpointCheckItem) checkpointIsValid(ctx context.Context, tableInfo *mydump.MDTableMeta, dbInfos map[string]*checkpoints.TidbDBInfo) ([]string, error) {
	msgs := make([]string, 0)
	uniqueName := common.UniqueTable(tableInfo.DB, tableInfo.Name)
	tableCheckPoint, err := ci.checkpointsDB.Get(ctx, uniqueName)
	if err != nil {
		if errors.IsNotFound(err) {
			// there is no checkpoint
			logutil.Logger(ctx).Debug("no checkpoint detected", zap.String("table", uniqueName))
			return nil, nil
		}
		return nil, errors.Trace(err)
	}
	// if checkpoint enable and not missing, we skip the check table empty progress.
	if tableCheckPoint.Status <= checkpoints.CheckpointStatusMissing {
		return nil, nil
	}

	if tableCheckPoint.Status <= checkpoints.CheckpointStatusMaxInvalid {
		failedStep := tableCheckPoint.Status * 10
		var action strings.Builder
		action.WriteString("./tidb-lightning-ctl --checkpoint-error-")
		switch failedStep {
		case checkpoints.CheckpointStatusAlteredAutoInc, checkpoints.CheckpointStatusAnalyzed:
			action.WriteString("ignore")
		default:
			action.WriteString("destroy")
		}
		action.WriteString("='")
		action.WriteString(uniqueName)
		action.WriteString("' --config=...")

		msgs = append(msgs, fmt.Sprintf("TiDB Lightning has failed last time. To prevent data loss, this run will stop now, "+
			"%s failed in step(%s), please run command %s,"+
			"You may also run `./tidb-lightning-ctl --checkpoint-error-destroy=all --config=...` to start from scratch,"+
			"For details of this failure, read the log file from the PREVIOUS run",
			uniqueName, failedStep.MetricName(), action.String()))
		return msgs, nil
	}

	dbInfo, ok := dbInfos[tableInfo.DB]
	if ok {
		t, ok := dbInfo.Tables[tableInfo.Name]
		if ok {
			if tableCheckPoint.TableID > 0 && tableCheckPoint.TableID != t.ID {
				msgs = append(msgs, fmt.Sprintf("TiDB Lightning has detected tables with illegal checkpoints. To prevent data loss, this run will stop now,"+
					"please run command \"./tidb-lightning-ctl --checkpoint-remove='%s' --config=...\""+
					"You may also run `./tidb-lightning-ctl --checkpoint-error-destroy=all --config=...` to start from scratch,"+
					"For details of this failure, read the log file from the PREVIOUS run",
					uniqueName))
				return msgs, nil
			}
		}
	}

	var permFromCheckpoint []int
	var columns []string
	for _, eng := range tableCheckPoint.Engines {
		if len(eng.Chunks) > 0 {
			chunk := eng.Chunks[0]
			permFromCheckpoint = chunk.ColumnPermutation
			columns = chunk.Chunk.Columns
			if filepath.Dir(chunk.FileMeta.Path) != ci.cfg.Mydumper.SourceDir {
				message := fmt.Sprintf("chunk checkpoints path is not equal to config"+
					"checkpoint is %s, config source dir is %s", chunk.FileMeta.Path, ci.cfg.Mydumper.SourceDir)
				msgs = append(msgs, message)
			}
		}
	}
	if len(columns) == 0 {
		logutil.Logger(ctx).Debug("no valid checkpoint detected", zap.String("table", uniqueName))
		return nil, nil
	}
	info := dbInfos[tableInfo.DB].Tables[tableInfo.Name]
	if info != nil {
		permFromTiDB, err := parseColumnPermutations(info.Core, columns, nil, log.Wrap(logutil.Logger(ctx)))
		if err != nil {
			msgs = append(msgs, fmt.Sprintf("failed to calculate columns %s, table %s's info has changed,"+
				"consider remove this checkpoint, and start import again.", err.Error(), uniqueName))
		}
		if !reflect.DeepEqual(permFromCheckpoint, permFromTiDB) {
			msgs = append(msgs, fmt.Sprintf("compare columns perm failed. table %s's info has changed,"+
				"consider remove this checkpoint, and start import again.", uniqueName))
		}
	}
	return msgs, nil
}

// CDCPITRCheckItem check downstream has enabled CDC or PiTR. It's exposed to let
// caller override the Instruction message.
type CDCPITRCheckItem struct {
	cfg           *config.Config
	Instruction   string
	pdAddrsGetter func(context.Context) []string
	// used in test
	etcdCli *clientv3.Client
}

// NewCDCPITRCheckItem creates a checker to check downstream has enabled CDC or PiTR.
func NewCDCPITRCheckItem(cfg *config.Config, pdAddrsGetter func(context.Context) []string) precheck.Checker {
	return &CDCPITRCheckItem{
		cfg:           cfg,
		Instruction:   "local backend is not compatible with them. Please switch to tidb backend then try again.",
		pdAddrsGetter: pdAddrsGetter,
	}
}

// GetCheckItemID implements Checker interface.
func (*CDCPITRCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckTargetUsingCDCPITR
}

func dialEtcdWithCfg(
	ctx context.Context,
	cfg *config.Config,
	addrs []string,
) (*clientv3.Client, error) {
	cfg2, err := cfg.ToTLS()
	if err != nil {
		return nil, err
	}
	tlsConfig := cfg2.TLSConfig()

	return clientv3.New(clientv3.Config{
		TLS:              tlsConfig,
		Endpoints:        addrs,
		AutoSyncInterval: 30 * time.Second,
		DialTimeout:      5 * time.Second,
		DialOptions: []grpc.DialOption{
			config.DefaultGrpcKeepaliveParams,
			grpc.WithBlock(),
			grpc.WithReturnConnectionError(),
		},
		Context: ctx,
	})
}

// Check implements Checker interface.
func (ci *CDCPITRCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Critical,
	}

	if ci.cfg.TikvImporter.Backend != config.BackendLocal {
		theResult.Passed = true
		theResult.Message = "TiDB Lightning is not using local backend, skip this check"
		return theResult, nil
	}

	if ci.etcdCli == nil {
		var err error
		ci.etcdCli, err = dialEtcdWithCfg(ctx, ci.cfg, ci.pdAddrsGetter(ctx))
		if err != nil {
			return nil, errors.Trace(err)
		}
		//nolint: errcheck
		defer ci.etcdCli.Close()
	}

	errorMsg := make([]string, 0, 2)

	pitrCli := streamhelper.NewMetaDataClient(ci.etcdCli)
	tasks, err := pitrCli.GetAllTasks(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	if len(tasks) > 0 {
		names := make([]string, 0, len(tasks))
		for _, task := range tasks {
			names = append(names, task.Info.GetName())
		}
		errorMsg = append(errorMsg, fmt.Sprintf("found PiTR log streaming task(s): %v,", names))
	}

	nameSet, err := cdcutil.GetRunningChangefeeds(ctx, ci.etcdCli)
	if err != nil {
		return nil, errors.Trace(err)
	}

	if !nameSet.Empty() {
		errorMsg = append(errorMsg, nameSet.MessageToUser())
	}

	if len(errorMsg) > 0 {
		errorMsg = append(errorMsg, ci.Instruction)
		theResult.Passed = false
		theResult.Message = strings.Join(errorMsg, "\n")
	} else {
		theResult.Passed = true
		theResult.Message = "no CDC or PiTR task found"
	}

	return theResult, nil
}

type onlyState struct {
	State string `json:"state"`
}

type schemaCheckItem struct {
	cfg           *config.Config
	preInfoGetter PreImportInfoGetter
	dbMetas       []*mydump.MDDatabaseMeta
	checkpointsDB checkpoints.DB
}

// NewSchemaCheckItem creates a checker to check whether the schema is valid.
func NewSchemaCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta, cpdb checkpoints.DB) precheck.Checker {
	return &schemaCheckItem{
		cfg:           cfg,
		preInfoGetter: preInfoGetter,
		dbMetas:       dbMetas,
		checkpointsDB: cpdb,
	}
}

// GetCheckItemID implements Checker interface.
func (*schemaCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckSourceSchemaValid
}

// Check implements Checker interface.
func (ci *schemaCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Critical,
		Passed:   true,
		Message:  "table schemas are valid",
	}

	dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}

	checkMsgs := []string{}
	for _, dbInfo := range ci.dbMetas {
		for _, tableInfo := range dbInfo.Tables {
			if ci.cfg.Checkpoint.Enable && ci.checkpointsDB != nil {
				uniqueName := common.UniqueTable(tableInfo.DB, tableInfo.Name)
				if _, err := ci.checkpointsDB.Get(ctx, uniqueName); err == nil {
					// there is a checkpoint
					log.L().Debug("checkpoint detected, skip the schema check", zap.String("table", uniqueName))
					continue
				}
			}
			msgs, err := ci.SchemaIsValid(ctx, tableInfo, dbInfos)
			if err != nil {
				return nil, errors.Trace(err)
			}
			checkMsgs = append(checkMsgs, msgs...)
		}
	}
	if len(checkMsgs) > 0 {
		theResult.Passed = false
		theResult.Message = strings.Join(checkMsgs, "\n")
	}
	return theResult, nil
}

// SchemaIsValid checks the import file and cluster schema is match.
func (ci *schemaCheckItem) SchemaIsValid(ctx context.Context, tableInfo *mydump.MDTableMeta, dbInfos map[string]*checkpoints.TidbDBInfo) ([]string, error) {
	if len(tableInfo.DataFiles) == 0 {
		logutil.Logger(ctx).Info("no data files detected", zap.String("db", tableInfo.DB), zap.String("table", tableInfo.Name))
		return nil, nil
	}

	msgs := make([]string, 0)

	info, ok := dbInfos[tableInfo.DB].Tables[tableInfo.Name]
	if !ok {
		msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s` doesn't exists,"+
			"please give a schema file in source dir or create table manually", tableInfo.DB, tableInfo.Name))
		return msgs, nil
	}

	igCol, err := ci.cfg.Mydumper.IgnoreColumns.GetIgnoreColumns(tableInfo.DB, tableInfo.Name, ci.cfg.Mydumper.CaseSensitive)
	if err != nil {
		return nil, errors.Trace(err)
	}
	igCols := igCol.ColumnsMap()

	fullExtendColsSet := make(set.StringSet)
	for _, fileInfo := range tableInfo.DataFiles {
		for _, col := range fileInfo.FileMeta.ExtendData.Columns {
			if _, ok = igCols[col]; ok {
				msgs = append(msgs, fmt.Sprintf("extend column %s is also assigned in ignore-column for table `%s`.`%s`, "+
					"please keep only either one of them", col, tableInfo.DB, tableInfo.Name))
			}
			fullExtendColsSet.Insert(col)
		}
	}
	if len(msgs) > 0 {
		return msgs, nil
	}

	colCountFromTiDB := len(info.Core.Columns)
	if len(fullExtendColsSet) > 0 {
		logutil.Logger(ctx).Info("check extend column count through data files", zap.String("db", tableInfo.DB),
			zap.String("table", tableInfo.Name))
		igColCnt := 0
		for _, col := range info.Core.Columns {
			if _, ok = igCols[col.Name.L]; ok {
				igColCnt++
			}
		}
		for _, f := range tableInfo.DataFiles {
			cols, previewRows, err := ci.preInfoGetter.ReadFirstNRowsByFileMeta(ctx, f.FileMeta, 1)
			if err != nil {
				return nil, errors.Trace(err)
			}
			if len(cols) > 0 {
				colsSet := set.NewStringSet(cols...)
				for _, extendCol := range f.FileMeta.ExtendData.Columns {
					if colsSet.Exist(strings.ToLower(extendCol)) {
						msgs = append(msgs, fmt.Sprintf("extend column %s is contained in table `%s`.`%s`'s header, "+
							"please remove this column in data or remove this extend rule", extendCol, tableInfo.DB, tableInfo.Name))
					}
				}
			} else if len(previewRows) > 0 && len(previewRows[0])+len(f.FileMeta.ExtendData.Columns) > colCountFromTiDB+igColCnt {
				msgs = append(msgs, fmt.Sprintf("row count %d adding with extend column length %d is larger than columnCount %d plus ignore column count %d for table `%s`.`%s`, "+
					"please make sure your source data don't have extend columns and target schema has all of them", len(previewRows[0]), len(f.FileMeta.ExtendData.Columns), colCountFromTiDB, igColCnt, tableInfo.DB, tableInfo.Name))
			}
		}
	}
	if len(msgs) > 0 {
		return msgs, nil
	}

	core := info.Core
	defaultCols := make(map[string]struct{})
	autoRandomCol := common.GetAutoRandomColumn(core)
	for _, col := range core.Columns {
		// we can extend column the same with columns with default values
		if _, isExtendCol := fullExtendColsSet[col.Name.O]; isExtendCol || hasDefault(col) || (autoRandomCol != nil && autoRandomCol.ID == col.ID) {
			// this column has default value or it's auto random id, so we can ignore it
			defaultCols[col.Name.L] = struct{}{}
		}
		delete(fullExtendColsSet, col.Name.O)
	}
	if len(fullExtendColsSet) > 0 {
		extendCols := make([]string, 0, len(fullExtendColsSet))
		for col := range fullExtendColsSet {
			extendCols = append(extendCols, col)
		}
		msgs = append(msgs, fmt.Sprintf("extend column [%s] don't exist in target table `%s`.`%s` schema, "+
			"please add these extend columns manually in downstream database/schema file", strings.Join(extendCols, ","), tableInfo.DB, tableInfo.Name))
		return msgs, nil
	}

	// tidb_rowid have a default value.
	defaultCols[model.ExtraHandleName.String()] = struct{}{}

	// only check the first file of this table.
	dataFile := tableInfo.DataFiles[0]
	logutil.Logger(ctx).Info("datafile to check", zap.String("db", tableInfo.DB),
		zap.String("table", tableInfo.Name), zap.String("path", dataFile.FileMeta.Path))
	// get columns name from data file.
	dataFileMeta := dataFile.FileMeta

	if tp := dataFileMeta.Type; tp != mydump.SourceTypeCSV && tp != mydump.SourceTypeSQL && tp != mydump.SourceTypeParquet {
		msgs = append(msgs, fmt.Sprintf("file '%s' with unknown source type '%s'", dataFileMeta.Path, dataFileMeta.Type.String()))
		return msgs, nil
	}
	row := []types.Datum{}
	colsFromDataFile, rows, err := ci.preInfoGetter.ReadFirstNRowsByFileMeta(ctx, dataFileMeta, 1)
	if err != nil {
		return nil, errors.Trace(err)
	}
	if len(rows) > 0 {
		row = rows[0]
	}
	if colsFromDataFile == nil && len(row) == 0 {
		logutil.Logger(ctx).Info("file contains no data, skip checking against schema validity", zap.String("path", dataFileMeta.Path))
		return msgs, nil
	}

	if colsFromDataFile == nil {
		// when there is no columns name in data file. we must insert data in order.
		// so the last several columns either can be ignored or has a default value.
		for i := len(row); i < colCountFromTiDB; i++ {
			if _, ok := defaultCols[core.Columns[i].Name.L]; !ok {
				msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s` has %d columns, "+
					"and data file has %d columns, but column %s is missing the default value, "+
					"please give column a default value to skip this check",
					tableInfo.DB, tableInfo.Name, colCountFromTiDB, len(row), core.Columns[i].Name.L))
			}
		}
		return msgs, nil
	}

	// compare column names and make sure
	// 1. TiDB table info has data file's all columns(besides ignore columns)
	// 2. Those columns not introduced in data file always have a default value.
	colMap := make(map[string]struct{})
	for col := range igCols {
		colMap[col] = struct{}{}
	}
	for _, col := range core.Columns {
		if _, ok := colMap[col.Name.L]; ok {
			// tidb's column is ignored
			// we need ensure this column has the default value.
			if _, hasDefault := defaultCols[col.Name.L]; !hasDefault {
				msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s`'s column %s cannot be ignored, "+
					"because it doesn't have a default value, please set tables.ignoreColumns properly",
					tableInfo.DB, tableInfo.Name, col.Name.L))
			}
		} else {
			colMap[col.Name.L] = struct{}{}
		}
	}
	// tidb_rowid can be ignored in check
	colMap[model.ExtraHandleName.String()] = struct{}{}
	for _, col := range colsFromDataFile {
		if _, ok := colMap[col]; !ok {
			checkMsg := "please check table schema"
			if dataFileMeta.Type == mydump.SourceTypeCSV && ci.cfg.Mydumper.CSV.Header {
				checkMsg += " and csv file header"
			}
			msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s` doesn't have column %s, "+
				"%s or use tables.ignoreColumns to ignore %s",
				tableInfo.DB, tableInfo.Name, col, checkMsg, col))
		} else {
			// remove column for next iteration
			delete(colMap, col)
		}
	}
	// if theses rest columns don't have a default value.
	for col := range colMap {
		if _, ok := defaultCols[col]; ok {
			continue
		}
		msgs = append(msgs, fmt.Sprintf("TiDB schema `%s`.`%s` doesn't have the default value for %s. "+
			"Please add default value for column '%s' or choose another column to ignore or add this column in data file",
			tableInfo.DB, tableInfo.Name, col, col))
	}
	return msgs, nil
}

type csvHeaderCheckItem struct {
	cfg           *config.Config
	preInfoGetter PreImportInfoGetter
	dbMetas       []*mydump.MDDatabaseMeta
}

// NewCSVHeaderCheckItem creates a new csvHeaderCheckItem.
func NewCSVHeaderCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta) precheck.Checker {
	return &csvHeaderCheckItem{
		cfg:           cfg,
		preInfoGetter: preInfoGetter,
		dbMetas:       dbMetas,
	}
}

// GetCheckItemID implements Checker interface.
func (*csvHeaderCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckCSVHeader
}

// Check tries to check whether the csv header config is consistent with the source csv files by:
//  1. pick one table with two CSV files and a unique/primary key
//  2. read the first row of those two CSV files
//  3. checks if the content of those first rows are compatible with the table schema, and whether the
//     two rows are identical, to determine if the first rows are a header rows.
func (ci *csvHeaderCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	// if cfg set header = true but source files actually contain not header, former SchemaCheck should
	// return error in this situation, so we need do it again.
	if ci.cfg.Mydumper.CSV.Header {
		return nil, nil
	}
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Critical,
		Passed:   true,
		Message:  "the config [mydumper.csv.header] is set to false, and CSV header lines are really not detected in the data files",
	}
	var (
		tableMeta    *mydump.MDTableMeta
		csvCount     int
		hasUniqueIdx bool
	)
	dbInfos, err := ci.preInfoGetter.GetAllTableStructures(ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	// only check one table source files for better performance. The checked table is chosen based on following two factor:
	// 1. contains at least 1 csv source file, 2 is preferable
	// 2. table schema contains primary key or unique key
	// if the two factors can't be both satisfied, the first one has a higher priority
outer:
	for _, dbMeta := range ci.dbMetas {
		for _, tblMeta := range dbMeta.Tables {
			if len(tblMeta.DataFiles) == 0 {
				continue
			}
			tableHasUniqueIdx := false
			tableCSVCount := 0
			for _, f := range tblMeta.DataFiles {
				if f.FileMeta.Type == mydump.SourceTypeCSV {
					tableCSVCount++
					if tableCSVCount >= 2 {
						break
					}
				}
			}
			if tableCSVCount == 0 {
				continue
			}

			info := dbInfos[tblMeta.DB].Tables[tblMeta.Name]
			for _, idx := range info.Core.Indices {
				if idx.Primary || idx.Unique {
					tableHasUniqueIdx = true
				}
			}

			if tableCSVCount >= 2 && hasUniqueIdx {
				tableMeta = tblMeta
				// if a perfect table source is found, we can stop check more tables
				break outer
			}
			if tableCSVCount > csvCount || (tableCSVCount == csvCount && !hasUniqueIdx && tableHasUniqueIdx) {
				tableMeta = tblMeta
				csvCount = tableCSVCount
				hasUniqueIdx = tableHasUniqueIdx
			}
		}
	}

	if tableMeta == nil {
		return theResult, nil
	}

	var rows [][]types.Datum
	for _, f := range tableMeta.DataFiles {
		if f.FileMeta.Type != mydump.SourceTypeCSV {
			continue
		}

		row := []types.Datum{}
		_, previewRows, err := ci.preInfoGetter.ReadFirstNRowsByFileMeta(ctx, f.FileMeta, 1)
		if err != nil {
			return nil, errors.Trace(err)
		}
		if len(previewRows) > 0 {
			row = previewRows[0]
		}
		if len(row) > 0 {
			rows = append(rows, row)
		}
		// only check at most two of all the files
		if len(rows) >= 2 {
			break
		}
	}
	if len(rows) == 0 {
		return theResult, nil
	} else if len(rows) >= 2 {
		// if the first row in two source files are not the same, they should not be the header line
		// NOTE: though lightning's logic allows different source files contains different columns or the
		// order is difference, here we only check if they are exactly the same because this is the common case.
		if len(rows[0]) != len(rows[1]) {
			return theResult, nil
		}

		for i := range rows[0] {
			if rows[0][i].GetString() != rows[1][i].GetString() {
				return theResult, nil
			}
		}
	}

	// check if some fields are unique and not ignored
	// if at least one field appears in a unique key, we can sure there is something wrong,
	// they should be either the header line or the data is duplicated.
	tableInfo := dbInfos[tableMeta.DB].Tables[tableMeta.Name]
	tableFields := make(map[string]struct{})
	uniqueIdxFields := make(map[string]struct{})
	ignoreColumns, err := ci.cfg.Mydumper.IgnoreColumns.GetIgnoreColumns(tableMeta.DB, tableMeta.Name, ci.cfg.Mydumper.CaseSensitive)
	if err != nil {
		return nil, errors.Trace(err)
	}
	ignoreColsSet := make(map[string]struct{})
	for _, col := range ignoreColumns.Columns {
		ignoreColsSet[col] = struct{}{}
	}
	for _, idx := range tableInfo.Core.Indices {
		if !idx.Unique && !idx.Primary {
			continue
		}
		for _, col := range idx.Columns {
			if _, ok := ignoreColsSet[col.Name.L]; !ok {
				uniqueIdxFields[col.Name.L] = struct{}{}
			}
		}
	}
	for _, f := range tableInfo.Core.Columns {
		tableFields[f.Name.L] = struct{}{}
	}
	if common.TableHasAutoRowID(tableInfo.Core) {
		tableFields[model.ExtraHandleName.L] = struct{}{}
	}
	hasUniqueField := false
	for _, d := range rows[0] {
		val := strings.ToLower(d.GetString())
		if _, ok := tableFields[val]; !ok {
			return theResult, nil
		}
		if _, ok := uniqueIdxFields[val]; ok {
			hasUniqueField = true
			break
		}
	}

	theResult.Passed = false
	theResult.Message = fmt.Sprintf("source csv files contains header row but `mydumper.csv.header` is false, checked table is `%s`.`%s`",
		tableMeta.DB, tableMeta.Name)
	theResult.Severity = precheck.Warn
	if hasUniqueField && len(rows) > 1 {
		theResult.Severity = precheck.Critical
	} else {
		ok, err := checkFieldCompatibility(tableInfo.Core, ignoreColsSet, rows[0], log.Wrap(logutil.Logger(ctx)))
		if err != nil {
			return nil, err
		}

		if !ok {
			// if there are only 1 csv file or there is not unique key, try to check if all columns are compatible with string value
			theResult.Severity = precheck.Critical
		}
	}
	return theResult, nil
}

func checkFieldCompatibility(
	tbl *model.TableInfo,
	ignoreCols map[string]struct{},
	values []types.Datum,
	logger log.Logger,
) (bool, error) {
	se, err := kv.NewSession(&encode.SessionOptions{
		SQLMode: mysql.ModeStrictTransTables,
	}, logger)
	if err != nil {
		return false, errors.Trace(err)
	}

	for i, col := range tbl.Columns {
		// do not check ignored columns
		if _, ok := ignoreCols[col.Name.L]; ok {
			continue
		}
		if i >= len(values) {
			break
		}
		_, err := table.CastColumnValue(se.GetExprCtx(), values[i], col, true, false)
		if err != nil {
			logger.Error("field value is not consistent with column type", zap.String("value", values[i].GetString()),
				zap.Any("column_info", col), zap.Error(err))
			return false, nil
		}
	}
	return true, nil
}

type tableEmptyCheckItem struct {
	cfg           *config.Config
	preInfoGetter PreImportInfoGetter
	dbMetas       []*mydump.MDDatabaseMeta
	checkpointsDB checkpoints.DB
}

// NewTableEmptyCheckItem creates a new tableEmptyCheckItem
func NewTableEmptyCheckItem(cfg *config.Config, preInfoGetter PreImportInfoGetter, dbMetas []*mydump.MDDatabaseMeta, cpdb checkpoints.DB) precheck.Checker {
	return &tableEmptyCheckItem{
		cfg:           cfg,
		preInfoGetter: preInfoGetter,
		dbMetas:       dbMetas,
		checkpointsDB: cpdb,
	}
}

// GetCheckItemID implements Checker interface
func (*tableEmptyCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckTargetTableEmpty
}

// Check implements Checker interface
func (ci *tableEmptyCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     ci.GetCheckItemID(),
		Severity: precheck.Critical,
		Passed:   true,
		Message:  "all importing tables on the target are empty",
	}

	tableCount := 0
	for _, db := range ci.dbMetas {
		tableCount += len(db.Tables)
	}

	var lock sync.Mutex
	tableNames := make([]string, 0)
	concurrency := min(tableCount, ci.cfg.App.RegionConcurrency)
	type tableNameComponents struct {
		DBName    string
		TableName string
	}
	ch := make(chan tableNameComponents, concurrency)
	eg, gCtx := errgroup.WithContext(ctx)

	for range concurrency {
		eg.Go(func() error {
			for tblNameComp := range ch {
				fullTableName := common.UniqueTable(tblNameComp.DBName, tblNameComp.TableName)
				// skip tables that have checkpoint
				if ci.cfg.Checkpoint.Enable && ci.checkpointsDB != nil {
					_, err := ci.checkpointsDB.Get(gCtx, fullTableName)
					switch {
					case err == nil:
						continue
					case errors.IsNotFound(err):
					default:
						return errors.Trace(err)
					}
				}

				isEmptyPtr, err1 := ci.preInfoGetter.IsTableEmpty(gCtx, tblNameComp.DBName, tblNameComp.TableName)
				if err1 != nil {
					return err1
				}
				if !(*isEmptyPtr) {
					lock.Lock()
					tableNames = append(tableNames, fullTableName)
					lock.Unlock()
				}
			}
			return nil
		})
	}
loop:
	for _, db := range ci.dbMetas {
		for _, tbl := range db.Tables {
			select {
			case ch <- tableNameComponents{tbl.DB, tbl.Name}:
			case <-gCtx.Done():
				break loop
			}
		}
	}
	close(ch)
	if err := eg.Wait(); err != nil {
		if common.IsContextCanceledError(err) {
			return nil, nil
		}
		return nil, errors.Annotate(err, "check table contains data failed")
	}

	if len(tableNames) > 0 {
		// sort the failed names
		slices.Sort(tableNames)
		theResult.Passed = false
		theResult.Message = fmt.Sprintf("table(s) [%s] are not empty", strings.Join(tableNames, ", "))
	}
	return theResult, nil
}

// hasDefault represents col has default value.
func hasDefault(col *model.ColumnInfo) bool {
	return col.DefaultIsExpr || col.DefaultValue != nil || !mysql.HasNotNullFlag(col.GetFlag()) ||
		col.IsGenerated() || mysql.HasAutoIncrementFlag(col.GetFlag())
}

// pdTiDBFromSameClusterCheckItem provides two sources of PD addresses and use
// util.CheckIfSameCluster to check if they are from the same cluster.
//
// The first source stands for PD leader's all etcd client URL addresses in most
// time, the second source stands for all PD nodes' first etcd client URL
// addresses.
//
// If we can't reach PD leader, the first source will be replaced by the PD
// address set in lightning's task configuration, or in TiDB's configuration.
// Then it may have false alert if PD has multiple endpoints and above
// configuration uses one of them, while etcd information uses another one, and
// there are no common addresses passed to util.CheckIfSameCluster.
type pdTiDBFromSameClusterCheckItem struct {
	db            *sql.DB
	pdAddrsGetter func(context.Context) []string
}

// NewPDTiDBFromSameClusterCheckItem creates a new pdTiDBFromSameClusterCheckItem.
func NewPDTiDBFromSameClusterCheckItem(
	db *sql.DB,
	pdAddrsGetter func(context.Context) []string,
) precheck.Checker {
	return &pdTiDBFromSameClusterCheckItem{
		db:            db,
		pdAddrsGetter: pdAddrsGetter,
	}
}

func (i *pdTiDBFromSameClusterCheckItem) Check(ctx context.Context) (*precheck.CheckResult, error) {
	theResult := &precheck.CheckResult{
		Item:     i.GetCheckItemID(),
		Severity: precheck.Critical,
		Passed:   true,
		Message:  "PD and TiDB in configuration are from the same cluster",
	}

	pdLeaderAddrsGetter := func(ctx context.Context) ([]string, error) {
		addrs := i.pdAddrsGetter(ctx)
		for idx, addrURL := range addrs {
			u, err2 := url.Parse(addrURL)
			if err2 != nil {
				return nil, errors.Trace(err2)
			}
			addrs[idx] = u.Host
		}
		return addrs, nil
	}

	sameCluster, pdAddrs, pdAddrsFromTiDB, err := util.CheckIfSameCluster(
		ctx, pdLeaderAddrsGetter, util.GetPDsAddrWithoutScheme(i.db),
	)
	if err != nil {
		return nil, errors.Trace(err)
	}
	if sameCluster {
		return theResult, nil
	}

	theResult.Passed = false
	theResult.Message = fmt.Sprintf(
		"PD and TiDB in configuration are not from the same cluster, "+
			"PD addresses read from PD are: %v, PD addresses read from TiDB are %v",
		pdAddrs, pdAddrsFromTiDB,
	)
	return theResult, nil
}

func (*pdTiDBFromSameClusterCheckItem) GetCheckItemID() precheck.CheckItemID {
	return precheck.CheckPDTiDBFromSameCluster
}