tidb/pkg/ddl/reorg.go

// Copyright 2015 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ddl

import (
	"context"
	"encoding/hex"
	"fmt"
	"math/rand"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/pingcap/errors"
	"github.com/pingcap/failpoint"
	"github.com/pingcap/tidb/pkg/ddl/ingest"
	"github.com/pingcap/tidb/pkg/ddl/logutil"
	sess "github.com/pingcap/tidb/pkg/ddl/session"
	"github.com/pingcap/tidb/pkg/distsql"
	distsqlctx "github.com/pingcap/tidb/pkg/distsql/context"
	"github.com/pingcap/tidb/pkg/errctx"
	exprctx "github.com/pingcap/tidb/pkg/expression/context"
	"github.com/pingcap/tidb/pkg/expression/contextstatic"
	"github.com/pingcap/tidb/pkg/kv"
	"github.com/pingcap/tidb/pkg/meta"
	"github.com/pingcap/tidb/pkg/meta/autoid"
	"github.com/pingcap/tidb/pkg/meta/model"
	"github.com/pingcap/tidb/pkg/metrics"
	"github.com/pingcap/tidb/pkg/parser/mysql"
	"github.com/pingcap/tidb/pkg/parser/terror"
	"github.com/pingcap/tidb/pkg/sessionctx"
	"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
	"github.com/pingcap/tidb/pkg/sessionctx/variable"
	"github.com/pingcap/tidb/pkg/statistics"
	"github.com/pingcap/tidb/pkg/table"
	tbctx "github.com/pingcap/tidb/pkg/table/context"
	"github.com/pingcap/tidb/pkg/table/tables"
	"github.com/pingcap/tidb/pkg/tablecodec"
	"github.com/pingcap/tidb/pkg/types"
	"github.com/pingcap/tidb/pkg/util/chunk"
	"github.com/pingcap/tidb/pkg/util/codec"
	contextutil "github.com/pingcap/tidb/pkg/util/context"
	"github.com/pingcap/tidb/pkg/util/dbterror"
	"github.com/pingcap/tidb/pkg/util/intest"
	"github.com/pingcap/tidb/pkg/util/mock"
	"github.com/pingcap/tidb/pkg/util/ranger"
	"github.com/pingcap/tidb/pkg/util/rowcodec"
	"github.com/pingcap/tidb/pkg/util/timeutil"
	"github.com/pingcap/tipb/go-tipb"
	atomicutil "go.uber.org/atomic"
	"go.uber.org/zap"
)

// reorgCtx is for reorganization.
type reorgCtx struct {
	// doneCh is used to notify.
	// If the reorganization job is done, we will use this channel to notify outer.
	// TODO: Now we use goroutine to simulate reorganization jobs, later we may
	// use a persistent job list.
	doneCh chan reorgFnResult
	// rowCount is used to simulate a job's row count.
	rowCount int64
	jobState model.JobState

	mu struct {
		sync.Mutex
		// warnings are used to store the warnings when doing the reorg job under certain SQL modes.
		warnings      map[errors.ErrorID]*terror.Error
		warningsCount map[errors.ErrorID]int64
	}

	references atomicutil.Int32
}

// reorgFnResult records the DDL owner TS before executing reorg function, in order to help
// receiver determine if the result is from reorg function of previous DDL owner in this instance.
type reorgFnResult struct {
	ownerTS int64
	err     error
}

func newReorgExprCtx() *contextstatic.StaticExprContext {
	evalCtx := contextstatic.NewStaticEvalContext(
		contextstatic.WithSQLMode(mysql.ModeNone),
		contextstatic.WithTypeFlags(types.DefaultStmtFlags),
		contextstatic.WithErrLevelMap(stmtctx.DefaultStmtErrLevels),
	)

	planCacheTracker := contextutil.NewPlanCacheTracker(contextutil.IgnoreWarn)

	return contextstatic.NewStaticExprContext(
		contextstatic.WithEvalCtx(evalCtx),
		contextstatic.WithPlanCacheTracker(&planCacheTracker),
	)
}

func newReorgExprCtxWithReorgMeta(reorgMeta *model.DDLReorgMeta, warnHandler contextutil.WarnHandler) (*contextstatic.StaticExprContext, error) {
	intest.AssertNotNil(reorgMeta)
	intest.AssertNotNil(warnHandler)
	loc, err := reorgTimeZoneWithTzLoc(reorgMeta.Location)
	if err != nil {
		return nil, errors.Trace(err)
	}

	ctx := newReorgExprCtx()
	evalCtx := ctx.GetStaticEvalCtx().Apply(
		contextstatic.WithSQLMode(reorgMeta.SQLMode),
		contextstatic.WithLocation(loc),
		contextstatic.WithTypeFlags(reorgTypeFlagsWithSQLMode(reorgMeta.SQLMode)),
		contextstatic.WithErrLevelMap(reorgErrLevelsWithSQLMode(reorgMeta.SQLMode)),
		contextstatic.WithWarnHandler(warnHandler),
	)
	return ctx.Apply(contextstatic.WithEvalCtx(evalCtx)), nil
}

// reorgTableMutateContext implements table.MutateContext for reorganization.
type reorgTableMutateContext struct {
	exprCtx            exprctx.ExprContext
	encodingConfig     tbctx.RowEncodingConfig
	mutateBuffers      *tbctx.MutateBuffers
	shardID            *variable.RowIDShardGenerator
	reservedRowIDAlloc stmtctx.ReservedRowIDAlloc
}

// AlternativeAllocators implements table.MutateContext.AlternativeAllocators.
func (*reorgTableMutateContext) AlternativeAllocators(*model.TableInfo) (autoid.Allocators, bool) {
	// No alternative allocators for all tables because temporary tables
	// are not supported (temporary tables do not have any data in TiKV) in reorganization.
	return autoid.Allocators{}, false
}

// GetExprCtx implements table.MutateContext.GetExprCtx.
func (ctx *reorgTableMutateContext) GetExprCtx() exprctx.ExprContext {
	return ctx.exprCtx
}

// ConnectionID implements table.MutateContext.ConnectionID.
func (*reorgTableMutateContext) ConnectionID() uint64 {
	return 0
}

// InRestrictedSQL implements table.MutateContext.InRestrictedSQL.
func (*reorgTableMutateContext) InRestrictedSQL() bool {
	return false
}

// TxnAssertionLevel implements table.MutateContext.TxnAssertionLevel.
func (*reorgTableMutateContext) TxnAssertionLevel() variable.AssertionLevel {
	// Because only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method,
	// we can just return `AssertionLevelOff`.
	return variable.AssertionLevelOff
}

// EnableMutationChecker implements table.MutateContext.EnableMutationChecker.
func (*reorgTableMutateContext) EnableMutationChecker() bool {
	// Because only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method,
	// we can just return false.
	return false
}

// GetRowEncodingConfig implements table.MutateContext.GetRowEncodingConfig.
func (ctx *reorgTableMutateContext) GetRowEncodingConfig() tbctx.RowEncodingConfig {
	return ctx.encodingConfig
}

// GetMutateBuffers implements table.MutateContext.GetMutateBuffers.
func (ctx *reorgTableMutateContext) GetMutateBuffers() *tbctx.MutateBuffers {
	return ctx.mutateBuffers
}

// GetRowIDShardGenerator implements table.MutateContext.GetRowIDShardGenerator.
func (ctx *reorgTableMutateContext) GetRowIDShardGenerator() *variable.RowIDShardGenerator {
	return ctx.shardID
}

// GetReservedRowIDAlloc implements table.MutateContext.GetReservedRowIDAlloc.
func (ctx *reorgTableMutateContext) GetReservedRowIDAlloc() (*stmtctx.ReservedRowIDAlloc, bool) {
	return &ctx.reservedRowIDAlloc, true
}

// GetBinlogSupport implements table.MutateContext.GetBinlogSupport.
func (*reorgTableMutateContext) GetBinlogSupport() (tbctx.BinlogSupport, bool) {
	// We can just return `(nil, false)` because:
	// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
	// - Data change in DDL reorganization should not write binlog.
	return nil, false
}

// GetStatisticsSupport implements table.MutateContext.GetStatisticsSupport.
func (*reorgTableMutateContext) GetStatisticsSupport() (tbctx.StatisticsSupport, bool) {
	// We can just return `(nil, false)` because:
	// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
	// - DDL reorg do need to collect statistics in this way.
	return nil, false
}

// GetCachedTableSupport implements table.MutateContext.GetCachedTableSupport.
func (*reorgTableMutateContext) GetCachedTableSupport() (tbctx.CachedTableSupport, bool) {
	// We can just return `(nil, false)` because:
	// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
	// - It is not allowed to execute DDL on a cached table.
	return nil, false
}

// GetTemporaryTableSupport implements table.MutateContext.GetTemporaryTableSupport.
func (*reorgTableMutateContext) GetTemporaryTableSupport() (tbctx.TemporaryTableSupport, bool) {
	// We can just return `(nil, false)` because:
	// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
	// - Temporary tables do not have any data in TiKV.
	return nil, false
}

// GetExchangePartitionDMLSupport implements table.MutateContext.GetExchangePartitionDMLSupport.
func (*reorgTableMutateContext) GetExchangePartitionDMLSupport() (tbctx.ExchangePartitionDMLSupport, bool) {
	// We can just return `(nil, false)` because:
	// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
	return nil, false
}

// newReorgTableMutateContext creates a new table.MutateContext for reorganization.
func newReorgTableMutateContext(exprCtx exprctx.ExprContext) table.MutateContext {
	rowEncoder := &rowcodec.Encoder{
		Enable: variable.GetDDLReorgRowFormat() != variable.DefTiDBRowFormatV1,
	}

	encodingConfig := tbctx.RowEncodingConfig{
		IsRowLevelChecksumEnabled: rowEncoder.Enable,
		RowEncoder:                rowEncoder,
	}

	return &reorgTableMutateContext{
		exprCtx:        exprCtx,
		encodingConfig: encodingConfig,
		mutateBuffers:  tbctx.NewMutateBuffers(&variable.WriteStmtBufs{}),
		// Though currently, `RowIDShardGenerator` is not required in DDL reorg,
		// we still provide a valid one to keep the context complete and to avoid panic if it is used in the future.
		shardID: variable.NewRowIDShardGenerator(
			rand.New(rand.NewSource(time.Now().UnixNano())), // #nosec G404
			variable.DefTiDBShardAllocateStep,
		),
	}
}

func reorgTypeFlagsWithSQLMode(mode mysql.SQLMode) types.Flags {
	return types.StrictFlags.
		WithTruncateAsWarning(!mode.HasStrictMode()).
		WithIgnoreInvalidDateErr(mode.HasAllowInvalidDatesMode()).
		WithIgnoreZeroInDate(!mode.HasStrictMode() || mode.HasAllowInvalidDatesMode()).
		WithCastTimeToYearThroughConcat(true)
}

func reorgErrLevelsWithSQLMode(mode mysql.SQLMode) errctx.LevelMap {
	return errctx.LevelMap{
		errctx.ErrGroupTruncate: errctx.ResolveErrLevel(false, !mode.HasStrictMode()),
		errctx.ErrGroupBadNull:  errctx.ResolveErrLevel(false, !mode.HasStrictMode()),
		errctx.ErrGroupDividedByZero: errctx.ResolveErrLevel(
			!mode.HasErrorForDivisionByZeroMode(),
			!mode.HasStrictMode(),
		),
	}
}

func reorgTimeZoneWithTzLoc(tzLoc *model.TimeZoneLocation) (*time.Location, error) {
	if tzLoc == nil {
		// It is set to SystemLocation to be compatible with nil LocationInfo.
		return timeutil.SystemLocation(), nil
	}
	return tzLoc.GetLocation()
}

// ReorgWaitTimeout is the timeout that wait ddl in write reorganization stage.
// make it a var for testing.
var ReorgWaitTimeout = 5 * time.Second

func (rc *reorgCtx) notifyJobState(state model.JobState) {
	atomic.StoreInt32((*int32)(&rc.jobState), int32(state))
}

func (rc *reorgCtx) isReorgCanceled() bool {
	s := atomic.LoadInt32((*int32)(&rc.jobState))
	return int32(model.JobStateCancelled) == s || int32(model.JobStateCancelling) == s
}

func (rc *reorgCtx) isReorgPaused() bool {
	s := atomic.LoadInt32((*int32)(&rc.jobState))
	return int32(model.JobStatePaused) == s || int32(model.JobStatePausing) == s
}

func (rc *reorgCtx) setRowCount(count int64) {
	atomic.StoreInt64(&rc.rowCount, count)
}

func (rc *reorgCtx) mergeWarnings(warnings map[errors.ErrorID]*terror.Error, warningsCount map[errors.ErrorID]int64) {
	if len(warnings) == 0 || len(warningsCount) == 0 {
		return
	}
	rc.mu.Lock()
	defer rc.mu.Unlock()
	rc.mu.warnings, rc.mu.warningsCount = mergeWarningsAndWarningsCount(warnings, rc.mu.warnings, warningsCount, rc.mu.warningsCount)
}

func (rc *reorgCtx) resetWarnings() {
	rc.mu.Lock()
	defer rc.mu.Unlock()
	rc.mu.warnings = make(map[errors.ErrorID]*terror.Error)
	rc.mu.warningsCount = make(map[errors.ErrorID]int64)
}

func (rc *reorgCtx) increaseRowCount(count int64) {
	atomic.AddInt64(&rc.rowCount, count)
}

func (rc *reorgCtx) getRowCount() int64 {
	row := atomic.LoadInt64(&rc.rowCount)
	return row
}

// runReorgJob is used as a portal to do the reorganization work.
// eg:
// 1: add index
// 2: alter column type
// 3: clean global index
// 4: reorganize partitions
/*
 ddl goroutine >---------+
   ^                     |
   |                     |
   |                     |
   |                     | <---(doneCh)--- f()
 HandleDDLQueue(...)     | <---(regular timeout)
   |                     | <---(ctx done)
   |                     |
   |                     |
 A more ddl round  <-----+
*/
// How can we cancel reorg job?
//
// The background reorg is continuously running except for several factors, for instances, ddl owner change,
// logic error (kv duplicate when insert index / cast error when alter column), ctx done, and cancel signal.
//
// When `admin cancel ddl jobs xxx` takes effect, we will give this kind of reorg ddl one more round.
// because we should pull the result from doneCh out, otherwise, the reorg worker will hang on `f()` logic,
// which is a kind of goroutine leak.
//
// That's why we couldn't set the job to rollingback state directly in `convertJob2RollbackJob`, which is a
// cancelling portal for admin cancel action.
//
// In other words, the cancelling signal is informed from the bottom up, we set the atomic cancel variable
// in the cancelling portal to notify the lower worker goroutine, and fetch the cancel error from them in
// the additional ddl round.
//
// After that, we can make sure that the worker goroutine is correctly shut down.
func (w *worker) runReorgJob(
	reorgInfo *reorgInfo,
	tblInfo *model.TableInfo,
	reorgFn func() error,
) error {
	job := reorgInfo.Job
	d := reorgInfo.jobCtx.oldDDLCtx
	// This is for tests compatible, because most of the early tests try to build the reorg job manually
	// without reorg meta info, which will cause nil pointer in here.
	if job.ReorgMeta == nil {
		job.ReorgMeta = &model.DDLReorgMeta{
			SQLMode:       mysql.ModeNone,
			Warnings:      make(map[errors.ErrorID]*terror.Error),
			WarningsCount: make(map[errors.ErrorID]int64),
			Location:      &model.TimeZoneLocation{Name: time.UTC.String(), Offset: 0},
			Version:       model.CurrentReorgMetaVersion,
		}
	}

	rc := w.getReorgCtx(job.ID)
	if rc == nil {
		// This job is cancelling, we should return ErrCancelledDDLJob directly.
		//
		// Q: Is there any possibility that the job is cancelling and has no reorgCtx?
		// A: Yes, consider the case that :
		// - we cancel the job when backfilling the last batch of data, the cancel txn is commit first,
		// - and then the backfill workers send signal to the `doneCh` of the reorgCtx,
		// - and then the DDL worker will remove the reorgCtx
		// - and update the DDL job to `done`
		// - but at the commit time, the DDL txn will raise a "write conflict" error and retry, and it happens.
		if job.IsCancelling() {
			return dbterror.ErrCancelledDDLJob
		}

		beOwnerTS := w.ddlCtx.reorgCtx.getOwnerTS()
		rc = w.newReorgCtx(reorgInfo.Job.ID, reorgInfo.Job.GetRowCount())
		w.wg.Add(1)
		go func() {
			defer w.wg.Done()
			err := reorgFn()
			rc.doneCh <- reorgFnResult{ownerTS: beOwnerTS, err: err}
		}()
	}

	waitTimeout := ReorgWaitTimeout
	// wait reorganization job done or timeout
	select {
	case res := <-rc.doneCh:
		err := res.err
		curTS := w.ddlCtx.reorgCtx.getOwnerTS()
		if res.ownerTS != curTS {
			d.removeReorgCtx(job.ID)
			logutil.DDLLogger().Warn("owner ts mismatch, return timeout error and retry",
				zap.Int64("prevTS", res.ownerTS),
				zap.Int64("curTS", curTS))
			return dbterror.ErrWaitReorgTimeout
		}
		// Since job is cancelled，we don't care about its partial counts.
		if rc.isReorgCanceled() || terror.ErrorEqual(err, dbterror.ErrCancelledDDLJob) {
			d.removeReorgCtx(job.ID)
			return dbterror.ErrCancelledDDLJob
		}
		rowCount := rc.getRowCount()
		job.SetRowCount(rowCount)
		if err != nil {
			logutil.DDLLogger().Warn("run reorg job done", zap.Int64("handled rows", rowCount), zap.Error(err))
		} else {
			logutil.DDLLogger().Info("run reorg job done", zap.Int64("handled rows", rowCount))
		}

		// Update a job's warnings.
		w.mergeWarningsIntoJob(job)

		d.removeReorgCtx(job.ID)

		updateBackfillProgress(w, reorgInfo, tblInfo, rowCount)

		// For other errors, even err is not nil here, we still wait the partial counts to be collected.
		// since in the next round, the startKey is brand new which is stored by last time.
		if err != nil {
			return errors.Trace(err)
		}
	case <-time.After(waitTimeout):
		rowCount := rc.getRowCount()
		job.SetRowCount(rowCount)
		updateBackfillProgress(w, reorgInfo, tblInfo, rowCount)

		// Update a job's warnings.
		w.mergeWarningsIntoJob(job)

		rc.resetWarnings()

		logutil.DDLLogger().Info("run reorg job wait timeout",
			zap.Duration("wait time", waitTimeout),
			zap.Int64("total added row count", rowCount))
		// If timeout, we will return, check the owner and retry to wait job done again.
		return dbterror.ErrWaitReorgTimeout
	}
	return nil
}

func overwriteReorgInfoFromGlobalCheckpoint(w *worker, sess *sess.Session, job *model.Job, reorgInfo *reorgInfo) error {
	if job.ReorgMeta.ReorgTp != model.ReorgTypeLitMerge {
		// Only used for the ingest mode job.
		return nil
	}
	if reorgInfo.mergingTmpIdx {
		// Merging the temporary index uses txn mode, so we don't need to consider the checkpoint.
		return nil
	}
	if job.ReorgMeta.IsDistReorg {
		// The global checkpoint is not used in distributed tasks.
		return nil
	}
	if w.getReorgCtx(job.ID) != nil {
		// We only overwrite from checkpoint when the job runs for the first time on this TiDB instance.
		return nil
	}
	start, pid, err := getImportedKeyFromCheckpoint(sess, job)
	if err != nil {
		return errors.Trace(err)
	}
	if pid != reorgInfo.PhysicalTableID {
		// Current physical ID does not match checkpoint physical ID.
		// Don't overwrite reorgInfo.StartKey.
		return nil
	}
	if len(start) > 0 {
		reorgInfo.StartKey = start
	}
	return nil
}

func extractElemIDs(r *reorgInfo) []int64 {
	elemIDs := make([]int64, 0, len(r.elements))
	for _, elem := range r.elements {
		elemIDs = append(elemIDs, elem.ID)
	}
	return elemIDs
}

func (w *worker) mergeWarningsIntoJob(job *model.Job) {
	rc := w.getReorgCtx(job.ID)
	rc.mu.Lock()
	partWarnings := rc.mu.warnings
	partWarningsCount := rc.mu.warningsCount
	rc.mu.Unlock()
	warnings, warningsCount := job.GetWarnings()
	warnings, warningsCount = mergeWarningsAndWarningsCount(partWarnings, warnings, partWarningsCount, warningsCount)
	job.SetWarnings(warnings, warningsCount)
}

func updateBackfillProgress(w *worker, reorgInfo *reorgInfo, tblInfo *model.TableInfo,
	addedRowCount int64) {
	if tblInfo == nil {
		return
	}
	progress := float64(0)
	if addedRowCount != 0 {
		totalCount := getTableTotalCount(w, tblInfo)
		if totalCount > 0 {
			progress = float64(addedRowCount) / float64(totalCount)
		} else {
			progress = 0
		}
		if progress > 1 {
			progress = 1
		}
		logutil.DDLLogger().Debug("update progress",
			zap.Float64("progress", progress),
			zap.Int64("addedRowCount", addedRowCount),
			zap.Int64("totalCount", totalCount))
	}
	switch reorgInfo.Type {
	case model.ActionAddIndex, model.ActionAddPrimaryKey:
		var label string
		if reorgInfo.mergingTmpIdx {
			label = metrics.LblAddIndexMerge
		} else {
			label = metrics.LblAddIndex
		}
		metrics.GetBackfillProgressByLabel(label, reorgInfo.SchemaName, tblInfo.Name.String()).Set(progress * 100)
	case model.ActionModifyColumn:
		metrics.GetBackfillProgressByLabel(metrics.LblModifyColumn, reorgInfo.SchemaName, tblInfo.Name.String()).Set(progress * 100)
	case model.ActionReorganizePartition, model.ActionRemovePartitioning,
		model.ActionAlterTablePartitioning:
		metrics.GetBackfillProgressByLabel(metrics.LblReorgPartition, reorgInfo.SchemaName, tblInfo.Name.String()).Set(progress * 100)
	}
}

func getTableTotalCount(w *worker, tblInfo *model.TableInfo) int64 {
	var ctx sessionctx.Context
	ctx, err := w.sessPool.Get()
	if err != nil {
		return statistics.PseudoRowCount
	}
	defer w.sessPool.Put(ctx)

	// `mock.Context` is used in tests, which doesn't support sql exec
	if _, ok := ctx.(*mock.Context); ok {
		return statistics.PseudoRowCount
	}

	executor := ctx.GetRestrictedSQLExecutor()
	var rows []chunk.Row
	if tblInfo.Partition != nil && len(tblInfo.Partition.DroppingDefinitions) > 0 {
		// if Reorganize Partition, only select number of rows from the selected partitions!
		defs := tblInfo.Partition.DroppingDefinitions
		partIDs := make([]string, 0, len(defs))
		for _, def := range defs {
			partIDs = append(partIDs, strconv.FormatInt(def.ID, 10))
		}
		sql := "select sum(table_rows) from information_schema.partitions where tidb_partition_id in (%?);"
		rows, _, err = executor.ExecRestrictedSQL(w.ctx, nil, sql, strings.Join(partIDs, ","))
	} else {
		sql := "select table_rows from information_schema.tables where tidb_table_id=%?;"
		rows, _, err = executor.ExecRestrictedSQL(w.ctx, nil, sql, tblInfo.ID)
	}
	if err != nil {
		return statistics.PseudoRowCount
	}
	if len(rows) != 1 {
		return statistics.PseudoRowCount
	}
	return rows[0].GetInt64(0)
}

func (dc *ddlCtx) isReorgCancelled(jobID int64) bool {
	return dc.getReorgCtx(jobID).isReorgCanceled()
}
func (dc *ddlCtx) isReorgPaused(jobID int64) bool {
	return dc.getReorgCtx(jobID).isReorgPaused()
}

func (dc *ddlCtx) isReorgRunnable(jobID int64, isDistReorg bool) error {
	if dc.ctx.Err() != nil {
		// Worker is closed. So it can't do the reorganization.
		return dbterror.ErrInvalidWorker.GenWithStack("worker is closed")
	}

	if dc.isReorgCancelled(jobID) {
		// Job is cancelled. So it can't be done.
		return dbterror.ErrCancelledDDLJob
	}

	if dc.isReorgPaused(jobID) {
		logutil.DDLLogger().Warn("job paused by user", zap.String("ID", dc.uuid))
		return dbterror.ErrPausedDDLJob.GenWithStackByArgs(jobID)
	}

	// If isDistReorg is true, we needn't check if it is owner.
	if isDistReorg {
		return nil
	}
	if !dc.isOwner() {
		// If it's not the owner, we will try later, so here just returns an error.
		logutil.DDLLogger().Info("DDL is not the DDL owner", zap.String("ID", dc.uuid))
		return errors.Trace(dbterror.ErrNotOwner)
	}
	return nil
}

type reorgInfo struct {
	*model.Job

	StartKey      kv.Key
	EndKey        kv.Key
	jobCtx        *jobContext
	first         bool
	mergingTmpIdx bool
	// PhysicalTableID is used for partitioned table.
	// DDL reorganize for a partitioned table will handle partitions one by one,
	// PhysicalTableID is used to trace the current partition we are handling.
	// If the table is not partitioned, PhysicalTableID would be TableID.
	PhysicalTableID int64
	dbInfo          *model.DBInfo
	elements        []*meta.Element
	currElement     *meta.Element
}

func (r *reorgInfo) NewJobContext() *ReorgContext {
	return r.jobCtx.oldDDLCtx.jobContext(r.Job.ID, r.Job.ReorgMeta)
}

func (r *reorgInfo) String() string {
	var isEnabled bool
	if ingest.LitInitialized {
		_, isEnabled = ingest.LitBackCtxMgr.Load(r.Job.ID)
	}
	return "CurrElementType:" + string(r.currElement.TypeKey) + "," +
		"CurrElementID:" + strconv.FormatInt(r.currElement.ID, 10) + "," +
		"StartKey:" + hex.EncodeToString(r.StartKey) + "," +
		"EndKey:" + hex.EncodeToString(r.EndKey) + "," +
		"First:" + strconv.FormatBool(r.first) + "," +
		"PhysicalTableID:" + strconv.FormatInt(r.PhysicalTableID, 10) + "," +
		"Ingest mode:" + strconv.FormatBool(isEnabled)
}

func constructDescTableScanPB(physicalTableID int64, tblInfo *model.TableInfo, handleCols []*model.ColumnInfo) *tipb.Executor {
	tblScan := tables.BuildTableScanFromInfos(tblInfo, handleCols, false)
	tblScan.TableId = physicalTableID
	tblScan.Desc = true
	return &tipb.Executor{Tp: tipb.ExecType_TypeTableScan, TblScan: tblScan}
}

func constructLimitPB(count uint64) *tipb.Executor {
	limitExec := &tipb.Limit{
		Limit: count,
	}
	return &tipb.Executor{Tp: tipb.ExecType_TypeLimit, Limit: limitExec}
}

func buildDescTableScanDAG(distSQLCtx *distsqlctx.DistSQLContext, tbl table.PhysicalTable, handleCols []*model.ColumnInfo, limit uint64) (*tipb.DAGRequest, error) {
	dagReq := &tipb.DAGRequest{}
	_, timeZoneOffset := time.Now().In(time.UTC).Zone()
	dagReq.TimeZoneOffset = int64(timeZoneOffset)
	for i := range handleCols {
		dagReq.OutputOffsets = append(dagReq.OutputOffsets, uint32(i))
	}
	dagReq.Flags |= model.FlagInSelectStmt

	tblScanExec := constructDescTableScanPB(tbl.GetPhysicalID(), tbl.Meta(), handleCols)
	dagReq.Executors = append(dagReq.Executors, tblScanExec)
	dagReq.Executors = append(dagReq.Executors, constructLimitPB(limit))
	distsql.SetEncodeType(distSQLCtx, dagReq)
	return dagReq, nil
}

func getColumnsTypes(columns []*model.ColumnInfo) []*types.FieldType {
	colTypes := make([]*types.FieldType, 0, len(columns))
	for _, col := range columns {
		colTypes = append(colTypes, &col.FieldType)
	}
	return colTypes
}

// buildDescTableScan builds a desc table scan upon tblInfo.
func buildDescTableScan(ctx *ReorgContext, store kv.Storage, startTS uint64, tbl table.PhysicalTable,
	handleCols []*model.ColumnInfo, limit uint64) (distsql.SelectResult, error) {
	distSQLCtx := newDefaultReorgDistSQLCtx(store.GetClient(), contextutil.NewStaticWarnHandler(0))
	dagPB, err := buildDescTableScanDAG(distSQLCtx, tbl, handleCols, limit)
	if err != nil {
		return nil, errors.Trace(err)
	}
	var b distsql.RequestBuilder
	var builder *distsql.RequestBuilder
	var ranges []*ranger.Range
	if tbl.Meta().IsCommonHandle {
		ranges = ranger.FullNotNullRange()
	} else {
		ranges = ranger.FullIntRange(false)
	}
	builder = b.SetHandleRanges(distSQLCtx, tbl.GetPhysicalID(), tbl.Meta().IsCommonHandle, ranges)
	builder.SetDAGRequest(dagPB).
		SetStartTS(startTS).
		SetKeepOrder(true).
		SetConcurrency(1).
		SetDesc(true).
		SetResourceGroupTagger(ctx.getResourceGroupTaggerForTopSQL()).
		SetResourceGroupName(ctx.resourceGroupName)

	builder.Request.NotFillCache = true
	builder.Request.Priority = kv.PriorityLow
	builder.RequestSource.RequestSourceInternal = true
	builder.RequestSource.RequestSourceType = ctx.ddlJobSourceType()

	kvReq, err := builder.Build()
	if err != nil {
		return nil, errors.Trace(err)
	}

	result, err := distsql.Select(ctx.ddlJobCtx, distSQLCtx, kvReq, getColumnsTypes(handleCols))
	if err != nil {
		return nil, errors.Trace(err)
	}
	return result, nil
}

// GetTableMaxHandle gets the max handle of a PhysicalTable.
func GetTableMaxHandle(ctx *ReorgContext, store kv.Storage, startTS uint64, tbl table.PhysicalTable) (maxHandle kv.Handle, emptyTable bool, err error) {
	var handleCols []*model.ColumnInfo
	var pkIdx *model.IndexInfo
	tblInfo := tbl.Meta()
	switch {
	case tblInfo.PKIsHandle:
		for _, col := range tbl.Meta().Columns {
			if mysql.HasPriKeyFlag(col.GetFlag()) {
				handleCols = []*model.ColumnInfo{col}
				break
			}
		}
	case tblInfo.IsCommonHandle:
		pkIdx = tables.FindPrimaryIndex(tblInfo)
		cols := tblInfo.Cols()
		for _, idxCol := range pkIdx.Columns {
			handleCols = append(handleCols, cols[idxCol.Offset])
		}
	default:
		handleCols = []*model.ColumnInfo{model.NewExtraHandleColInfo()}
	}

	// build a desc scan of tblInfo, which limit is 1, we can use it to retrieve the last handle of the table.
	result, err := buildDescTableScan(ctx, store, startTS, tbl, handleCols, 1)
	if err != nil {
		return nil, false, errors.Trace(err)
	}
	defer terror.Call(result.Close)

	chk := chunk.New(getColumnsTypes(handleCols), 1, 1)
	err = result.Next(ctx.ddlJobCtx, chk)
	if err != nil {
		return nil, false, errors.Trace(err)
	}

	if chk.NumRows() == 0 {
		// empty table
		return nil, true, nil
	}
	row := chk.GetRow(0)
	if tblInfo.IsCommonHandle {
		maxHandle, err = buildCommonHandleFromChunkRow(time.UTC, tblInfo, pkIdx, handleCols, row)
		return maxHandle, false, err
	}
	return kv.IntHandle(row.GetInt64(0)), false, nil
}

func buildCommonHandleFromChunkRow(loc *time.Location, tblInfo *model.TableInfo, idxInfo *model.IndexInfo,
	cols []*model.ColumnInfo, row chunk.Row) (kv.Handle, error) {
	fieldTypes := make([]*types.FieldType, 0, len(cols))
	for _, col := range cols {
		fieldTypes = append(fieldTypes, &col.FieldType)
	}
	datumRow := row.GetDatumRow(fieldTypes)
	tablecodec.TruncateIndexValues(tblInfo, idxInfo, datumRow)

	var handleBytes []byte
	handleBytes, err := codec.EncodeKey(loc, nil, datumRow...)
	if err != nil {
		return nil, err
	}
	return kv.NewCommonHandle(handleBytes)
}

// getTableRange gets the start and end handle of a table (or partition).
func getTableRange(ctx *ReorgContext, store kv.Storage, tbl table.PhysicalTable, snapshotVer uint64, priority int) (startHandleKey, endHandleKey kv.Key, err error) {
	// Get the start handle of this partition.
	err = iterateSnapshotKeys(ctx, store, priority, tbl.RecordPrefix(), snapshotVer, nil, nil,
		func(_ kv.Handle, rowKey kv.Key, _ []byte) (bool, error) {
			startHandleKey = rowKey
			return false, nil
		})
	if err != nil {
		return startHandleKey, endHandleKey, errors.Trace(err)
	}
	maxHandle, isEmptyTable, err := GetTableMaxHandle(ctx, store, snapshotVer, tbl)
	if err != nil {
		return startHandleKey, nil, errors.Trace(err)
	}
	if maxHandle != nil {
		endHandleKey = tablecodec.EncodeRecordKey(tbl.RecordPrefix(), maxHandle).Next()
	}
	if isEmptyTable || endHandleKey.Cmp(startHandleKey) <= 0 {
		logutil.DDLLogger().Info("get noop table range",
			zap.String("table", fmt.Sprintf("%v", tbl.Meta())),
			zap.Int64("table/partition ID", tbl.GetPhysicalID()),
			zap.String("start key", hex.EncodeToString(startHandleKey)),
			zap.String("end key", hex.EncodeToString(endHandleKey)),
			zap.Bool("is empty table", isEmptyTable))
		if startHandleKey == nil {
			endHandleKey = nil
		} else {
			endHandleKey = startHandleKey.Next()
		}
	}
	return
}

func getValidCurrentVersion(store kv.Storage) (ver kv.Version, err error) {
	ver, err = store.CurrentVersion(kv.GlobalTxnScope)
	if err != nil {
		return ver, errors.Trace(err)
	} else if ver.Ver <= 0 {
		return ver, dbterror.ErrInvalidStoreVer.GenWithStack("invalid storage current version %d", ver.Ver)
	}
	return ver, nil
}

func getReorgInfo(ctx *ReorgContext, jobCtx *jobContext, rh *reorgHandler, job *model.Job, dbInfo *model.DBInfo,
	tbl table.Table, elements []*meta.Element, mergingTmpIdx bool) (*reorgInfo, error) {
	var (
		element *meta.Element
		start   kv.Key
		end     kv.Key
		pid     int64
		info    reorgInfo
	)

	if job.SnapshotVer == 0 {
		// For the case of the old TiDB version(do not exist the element information) is upgraded to the new TiDB version.
		// Third step, we need to remove the element information to make sure we can save the reorganized information to storage.
		failpoint.Inject("MockGetIndexRecordErr", func(val failpoint.Value) {
			if val.(string) == "addIdxNotOwnerErr" && atomic.CompareAndSwapUint32(&mockNotOwnerErrOnce, 3, 4) {
				if err := rh.RemoveReorgElementFailPoint(job); err != nil {
					failpoint.Return(nil, errors.Trace(err))
				}
				info.first = true
				failpoint.Return(&info, nil)
			}
		})

		info.first = true
		delayForAsyncCommit()
		ver, err := getValidCurrentVersion(jobCtx.store)
		if err != nil {
			return nil, errors.Trace(err)
		}
		tblInfo := tbl.Meta()
		pid = tblInfo.ID
		var tb table.PhysicalTable
		if pi := tblInfo.GetPartitionInfo(); pi != nil {
			pid = pi.Definitions[0].ID
			tb = tbl.(table.PartitionedTable).GetPartition(pid)
		} else {
			tb = tbl.(table.PhysicalTable)
		}
		if mergingTmpIdx {
			firstElemTempID := tablecodec.TempIndexPrefix | elements[0].ID
			lastElemTempID := tablecodec.TempIndexPrefix | elements[len(elements)-1].ID
			start = tablecodec.EncodeIndexSeekKey(pid, firstElemTempID, nil)
			end = tablecodec.EncodeIndexSeekKey(pid, lastElemTempID, []byte{255})
		} else {
			start, end, err = getTableRange(ctx, jobCtx.store, tb, ver.Ver, job.Priority)
			if err != nil {
				return nil, errors.Trace(err)
			}
		}
		logutil.DDLLogger().Info("job get table range",
			zap.Int64("jobID", job.ID), zap.Int64("physicalTableID", pid),
			zap.String("startKey", hex.EncodeToString(start)),
			zap.String("endKey", hex.EncodeToString(end)))

		failpoint.Inject("errorUpdateReorgHandle", func() (*reorgInfo, error) {
			return &info, errors.New("occur an error when update reorg handle")
		})
		err = rh.InitDDLReorgHandle(job, start, end, pid, elements[0])
		if err != nil {
			return &info, errors.Trace(err)
		}
		// Update info should after data persistent.
		job.SnapshotVer = ver.Ver
		element = elements[0]
	} else {
		failpoint.Inject("MockGetIndexRecordErr", func(val failpoint.Value) {
			// For the case of the old TiDB version(do not exist the element information) is upgraded to the new TiDB version.
			// Second step, we need to remove the element information to make sure we can get the error of "ErrDDLReorgElementNotExist".
			// However, since "txn.Reset()" will be called later, the reorganized information cannot be saved to storage.
			if val.(string) == "addIdxNotOwnerErr" && atomic.CompareAndSwapUint32(&mockNotOwnerErrOnce, 2, 3) {
				if err := rh.RemoveReorgElementFailPoint(job); err != nil {
					failpoint.Return(nil, errors.Trace(err))
				}
			}
		})

		var err error
		element, start, end, pid, err = rh.GetDDLReorgHandle(job)
		if err != nil {
			// If the reorg element doesn't exist, this reorg info should be saved by the older TiDB versions.
			// It's compatible with the older TiDB versions.
			// We'll try to remove it in the next major TiDB version.
			if meta.ErrDDLReorgElementNotExist.Equal(err) {
				job.SnapshotVer = 0
				logutil.DDLLogger().Warn("get reorg info, the element does not exist", zap.Stringer("job", job))
				if job.IsCancelling() {
					return nil, nil
				}
			}
			return &info, errors.Trace(err)
		}
	}
	info.Job = job
	info.jobCtx = jobCtx
	info.StartKey = start
	info.EndKey = end
	info.PhysicalTableID = pid
	info.currElement = element
	info.elements = elements
	info.mergingTmpIdx = mergingTmpIdx
	info.dbInfo = dbInfo

	return &info, nil
}

func getReorgInfoFromPartitions(ctx *ReorgContext, jobCtx *jobContext, rh *reorgHandler, job *model.Job, dbInfo *model.DBInfo, tbl table.PartitionedTable, partitionIDs []int64, elements []*meta.Element) (*reorgInfo, error) {
	var (
		element *meta.Element
		start   kv.Key
		end     kv.Key
		pid     int64
		info    reorgInfo
	)
	if job.SnapshotVer == 0 {
		info.first = true
		delayForAsyncCommit()
		ver, err := getValidCurrentVersion(jobCtx.store)
		if err != nil {
			return nil, errors.Trace(err)
		}
		pid = partitionIDs[0]
		physTbl := tbl.GetPartition(pid)

		start, end, err = getTableRange(ctx, jobCtx.store, physTbl, ver.Ver, job.Priority)
		if err != nil {
			return nil, errors.Trace(err)
		}
		logutil.DDLLogger().Info("job get table range",
			zap.Int64("job ID", job.ID), zap.Int64("physical table ID", pid),
			zap.String("start key", hex.EncodeToString(start)),
			zap.String("end key", hex.EncodeToString(end)))

		err = rh.InitDDLReorgHandle(job, start, end, pid, elements[0])
		if err != nil {
			return &info, errors.Trace(err)
		}
		// Update info should after data persistent.
		job.SnapshotVer = ver.Ver
		element = elements[0]
	} else {
		var err error
		element, start, end, pid, err = rh.GetDDLReorgHandle(job)
		if err != nil {
			// If the reorg element doesn't exist, this reorg info should be saved by the older TiDB versions.
			// It's compatible with the older TiDB versions.
			// We'll try to remove it in the next major TiDB version.
			if meta.ErrDDLReorgElementNotExist.Equal(err) {
				job.SnapshotVer = 0
				logutil.DDLLogger().Warn("get reorg info, the element does not exist", zap.Stringer("job", job))
			}
			return &info, errors.Trace(err)
		}
	}
	info.Job = job
	info.jobCtx = jobCtx
	info.StartKey = start
	info.EndKey = end
	info.PhysicalTableID = pid
	info.currElement = element
	info.elements = elements
	info.dbInfo = dbInfo

	return &info, nil
}

// UpdateReorgMeta creates a new transaction and updates tidb_ddl_reorg table,
// so the reorg can restart in case of issues.
func (r *reorgInfo) UpdateReorgMeta(startKey kv.Key, pool *sess.Pool) (err error) {
	if startKey == nil && r.EndKey == nil {
		return nil
	}
	sctx, err := pool.Get()
	if err != nil {
		return
	}
	defer pool.Put(sctx)

	se := sess.NewSession(sctx)
	err = se.Begin(context.Background())
	if err != nil {
		return
	}
	rh := newReorgHandler(se)
	err = updateDDLReorgHandle(rh.s, r.Job.ID, startKey, r.EndKey, r.PhysicalTableID, r.currElement)
	err1 := se.Commit(context.Background())
	if err == nil {
		err = err1
	}
	return errors.Trace(err)
}

// reorgHandler is used to handle the reorg information duration reorganization DDL job.
type reorgHandler struct {
	s *sess.Session
}

// NewReorgHandlerForTest creates a new reorgHandler, only used in test.
func NewReorgHandlerForTest(se sessionctx.Context) *reorgHandler {
	return newReorgHandler(sess.NewSession(se))
}

func newReorgHandler(sess *sess.Session) *reorgHandler {
	return &reorgHandler{s: sess}
}

// InitDDLReorgHandle initializes the job reorganization information.
func (r *reorgHandler) InitDDLReorgHandle(job *model.Job, startKey, endKey kv.Key, physicalTableID int64, element *meta.Element) error {
	return initDDLReorgHandle(r.s, job.ID, startKey, endKey, physicalTableID, element)
}

// RemoveReorgElementFailPoint removes the element of the reorganization information.
func (r *reorgHandler) RemoveReorgElementFailPoint(job *model.Job) error {
	return removeReorgElement(r.s, job)
}

// RemoveDDLReorgHandle removes the job reorganization related handles.
func (r *reorgHandler) RemoveDDLReorgHandle(job *model.Job, elements []*meta.Element) error {
	return removeDDLReorgHandle(r.s, job, elements)
}

// cleanupDDLReorgHandles removes the job reorganization related handles.
func cleanupDDLReorgHandles(job *model.Job, s *sess.Session) {
	if job != nil && !job.IsFinished() && !job.IsSynced() {
		// Job is given, but it is neither finished nor synced; do nothing
		return
	}

	err := cleanDDLReorgHandles(s, job)
	if err != nil {
		// ignore error, cleanup is not that critical
		logutil.DDLLogger().Warn("Failed removing the DDL reorg entry in tidb_ddl_reorg", zap.Stringer("job", job), zap.Error(err))
	}
}

// GetDDLReorgHandle gets the latest processed DDL reorganize position.
func (r *reorgHandler) GetDDLReorgHandle(job *model.Job) (element *meta.Element, startKey, endKey kv.Key, physicalTableID int64, err error) {
	element, startKey, endKey, physicalTableID, err = getDDLReorgHandle(r.s, job)
	if err != nil {
		return element, startKey, endKey, physicalTableID, err
	}
	adjustedEndKey := adjustEndKeyAcrossVersion(job, endKey)
	return element, startKey, adjustedEndKey, physicalTableID, nil
}

// #46306 changes the table range from [start_key, end_key] to [start_key, end_key.next).
// For old version TiDB, the semantic is still [start_key, end_key], we need to adjust it in new version TiDB.
func adjustEndKeyAcrossVersion(job *model.Job, endKey kv.Key) kv.Key {
	if job.ReorgMeta != nil && job.ReorgMeta.Version == model.ReorgMetaVersion0 {
		logutil.DDLLogger().Info("adjust range end key for old version ReorgMetas",
			zap.Int64("jobID", job.ID),
			zap.Int64("reorgMetaVersion", job.ReorgMeta.Version),
			zap.String("endKey", hex.EncodeToString(endKey)))
		return endKey.Next()
	}
	return endKey
}