Files
tidb/pkg/ddl/reorg.go

1110 lines
39 KiB
Go

// Copyright 2015 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ddl
import (
"context"
"encoding/hex"
"fmt"
"math/rand"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/ddl/ingest"
"github.com/pingcap/tidb/pkg/ddl/logutil"
sess "github.com/pingcap/tidb/pkg/ddl/session"
"github.com/pingcap/tidb/pkg/distsql"
distsqlctx "github.com/pingcap/tidb/pkg/distsql/context"
"github.com/pingcap/tidb/pkg/errctx"
exprctx "github.com/pingcap/tidb/pkg/expression/context"
"github.com/pingcap/tidb/pkg/expression/contextstatic"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/meta"
"github.com/pingcap/tidb/pkg/meta/autoid"
"github.com/pingcap/tidb/pkg/meta/model"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/parser/terror"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/statistics"
"github.com/pingcap/tidb/pkg/table"
tbctx "github.com/pingcap/tidb/pkg/table/context"
"github.com/pingcap/tidb/pkg/table/tables"
"github.com/pingcap/tidb/pkg/tablecodec"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/chunk"
"github.com/pingcap/tidb/pkg/util/codec"
contextutil "github.com/pingcap/tidb/pkg/util/context"
"github.com/pingcap/tidb/pkg/util/dbterror"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/mock"
"github.com/pingcap/tidb/pkg/util/ranger"
"github.com/pingcap/tidb/pkg/util/rowcodec"
"github.com/pingcap/tidb/pkg/util/timeutil"
"github.com/pingcap/tipb/go-tipb"
atomicutil "go.uber.org/atomic"
"go.uber.org/zap"
)
// reorgCtx is for reorganization.
type reorgCtx struct {
// doneCh is used to notify.
// If the reorganization job is done, we will use this channel to notify outer.
// TODO: Now we use goroutine to simulate reorganization jobs, later we may
// use a persistent job list.
doneCh chan reorgFnResult
// rowCount is used to simulate a job's row count.
rowCount int64
jobState model.JobState
mu struct {
sync.Mutex
// warnings are used to store the warnings when doing the reorg job under certain SQL modes.
warnings map[errors.ErrorID]*terror.Error
warningsCount map[errors.ErrorID]int64
}
references atomicutil.Int32
}
// reorgFnResult records the DDL owner TS before executing reorg function, in order to help
// receiver determine if the result is from reorg function of previous DDL owner in this instance.
type reorgFnResult struct {
ownerTS int64
err error
}
func newReorgExprCtx() *contextstatic.StaticExprContext {
evalCtx := contextstatic.NewStaticEvalContext(
contextstatic.WithSQLMode(mysql.ModeNone),
contextstatic.WithTypeFlags(types.DefaultStmtFlags),
contextstatic.WithErrLevelMap(stmtctx.DefaultStmtErrLevels),
)
planCacheTracker := contextutil.NewPlanCacheTracker(contextutil.IgnoreWarn)
return contextstatic.NewStaticExprContext(
contextstatic.WithEvalCtx(evalCtx),
contextstatic.WithPlanCacheTracker(&planCacheTracker),
)
}
func newReorgExprCtxWithReorgMeta(reorgMeta *model.DDLReorgMeta, warnHandler contextutil.WarnHandler) (*contextstatic.StaticExprContext, error) {
intest.AssertNotNil(reorgMeta)
intest.AssertNotNil(warnHandler)
loc, err := reorgTimeZoneWithTzLoc(reorgMeta.Location)
if err != nil {
return nil, errors.Trace(err)
}
ctx := newReorgExprCtx()
evalCtx := ctx.GetStaticEvalCtx().Apply(
contextstatic.WithSQLMode(reorgMeta.SQLMode),
contextstatic.WithLocation(loc),
contextstatic.WithTypeFlags(reorgTypeFlagsWithSQLMode(reorgMeta.SQLMode)),
contextstatic.WithErrLevelMap(reorgErrLevelsWithSQLMode(reorgMeta.SQLMode)),
contextstatic.WithWarnHandler(warnHandler),
)
return ctx.Apply(contextstatic.WithEvalCtx(evalCtx)), nil
}
// reorgTableMutateContext implements table.MutateContext for reorganization.
type reorgTableMutateContext struct {
exprCtx exprctx.ExprContext
encodingConfig tbctx.RowEncodingConfig
mutateBuffers *tbctx.MutateBuffers
shardID *variable.RowIDShardGenerator
reservedRowIDAlloc stmtctx.ReservedRowIDAlloc
}
// AlternativeAllocators implements table.MutateContext.AlternativeAllocators.
func (*reorgTableMutateContext) AlternativeAllocators(*model.TableInfo) (autoid.Allocators, bool) {
// No alternative allocators for all tables because temporary tables
// are not supported (temporary tables do not have any data in TiKV) in reorganization.
return autoid.Allocators{}, false
}
// GetExprCtx implements table.MutateContext.GetExprCtx.
func (ctx *reorgTableMutateContext) GetExprCtx() exprctx.ExprContext {
return ctx.exprCtx
}
// ConnectionID implements table.MutateContext.ConnectionID.
func (*reorgTableMutateContext) ConnectionID() uint64 {
return 0
}
// InRestrictedSQL implements table.MutateContext.InRestrictedSQL.
func (*reorgTableMutateContext) InRestrictedSQL() bool {
return false
}
// TxnAssertionLevel implements table.MutateContext.TxnAssertionLevel.
func (*reorgTableMutateContext) TxnAssertionLevel() variable.AssertionLevel {
// Because only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method,
// we can just return `AssertionLevelOff`.
return variable.AssertionLevelOff
}
// EnableMutationChecker implements table.MutateContext.EnableMutationChecker.
func (*reorgTableMutateContext) EnableMutationChecker() bool {
// Because only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method,
// we can just return false.
return false
}
// GetRowEncodingConfig implements table.MutateContext.GetRowEncodingConfig.
func (ctx *reorgTableMutateContext) GetRowEncodingConfig() tbctx.RowEncodingConfig {
return ctx.encodingConfig
}
// GetMutateBuffers implements table.MutateContext.GetMutateBuffers.
func (ctx *reorgTableMutateContext) GetMutateBuffers() *tbctx.MutateBuffers {
return ctx.mutateBuffers
}
// GetRowIDShardGenerator implements table.MutateContext.GetRowIDShardGenerator.
func (ctx *reorgTableMutateContext) GetRowIDShardGenerator() *variable.RowIDShardGenerator {
return ctx.shardID
}
// GetReservedRowIDAlloc implements table.MutateContext.GetReservedRowIDAlloc.
func (ctx *reorgTableMutateContext) GetReservedRowIDAlloc() (*stmtctx.ReservedRowIDAlloc, bool) {
return &ctx.reservedRowIDAlloc, true
}
// GetBinlogSupport implements table.MutateContext.GetBinlogSupport.
func (*reorgTableMutateContext) GetBinlogSupport() (tbctx.BinlogSupport, bool) {
// We can just return `(nil, false)` because:
// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
// - Data change in DDL reorganization should not write binlog.
return nil, false
}
// GetStatisticsSupport implements table.MutateContext.GetStatisticsSupport.
func (*reorgTableMutateContext) GetStatisticsSupport() (tbctx.StatisticsSupport, bool) {
// We can just return `(nil, false)` because:
// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
// - DDL reorg do need to collect statistics in this way.
return nil, false
}
// GetCachedTableSupport implements table.MutateContext.GetCachedTableSupport.
func (*reorgTableMutateContext) GetCachedTableSupport() (tbctx.CachedTableSupport, bool) {
// We can just return `(nil, false)` because:
// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
// - It is not allowed to execute DDL on a cached table.
return nil, false
}
// GetTemporaryTableSupport implements table.MutateContext.GetTemporaryTableSupport.
func (*reorgTableMutateContext) GetTemporaryTableSupport() (tbctx.TemporaryTableSupport, bool) {
// We can just return `(nil, false)` because:
// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
// - Temporary tables do not have any data in TiKV.
return nil, false
}
// GetExchangePartitionDMLSupport implements table.MutateContext.GetExchangePartitionDMLSupport.
func (*reorgTableMutateContext) GetExchangePartitionDMLSupport() (tbctx.ExchangePartitionDMLSupport, bool) {
// We can just return `(nil, false)` because:
// - Only `index.Create` and `index.Delete` are invoked in reorganization which does not use this method.
return nil, false
}
// newReorgTableMutateContext creates a new table.MutateContext for reorganization.
func newReorgTableMutateContext(exprCtx exprctx.ExprContext) table.MutateContext {
rowEncoder := &rowcodec.Encoder{
Enable: variable.GetDDLReorgRowFormat() != variable.DefTiDBRowFormatV1,
}
encodingConfig := tbctx.RowEncodingConfig{
IsRowLevelChecksumEnabled: rowEncoder.Enable,
RowEncoder: rowEncoder,
}
return &reorgTableMutateContext{
exprCtx: exprCtx,
encodingConfig: encodingConfig,
mutateBuffers: tbctx.NewMutateBuffers(&variable.WriteStmtBufs{}),
// Though currently, `RowIDShardGenerator` is not required in DDL reorg,
// we still provide a valid one to keep the context complete and to avoid panic if it is used in the future.
shardID: variable.NewRowIDShardGenerator(
rand.New(rand.NewSource(time.Now().UnixNano())), // #nosec G404
variable.DefTiDBShardAllocateStep,
),
}
}
func reorgTypeFlagsWithSQLMode(mode mysql.SQLMode) types.Flags {
return types.StrictFlags.
WithTruncateAsWarning(!mode.HasStrictMode()).
WithIgnoreInvalidDateErr(mode.HasAllowInvalidDatesMode()).
WithIgnoreZeroInDate(!mode.HasStrictMode() || mode.HasAllowInvalidDatesMode()).
WithCastTimeToYearThroughConcat(true)
}
func reorgErrLevelsWithSQLMode(mode mysql.SQLMode) errctx.LevelMap {
return errctx.LevelMap{
errctx.ErrGroupTruncate: errctx.ResolveErrLevel(false, !mode.HasStrictMode()),
errctx.ErrGroupBadNull: errctx.ResolveErrLevel(false, !mode.HasStrictMode()),
errctx.ErrGroupDividedByZero: errctx.ResolveErrLevel(
!mode.HasErrorForDivisionByZeroMode(),
!mode.HasStrictMode(),
),
}
}
func reorgTimeZoneWithTzLoc(tzLoc *model.TimeZoneLocation) (*time.Location, error) {
if tzLoc == nil {
// It is set to SystemLocation to be compatible with nil LocationInfo.
return timeutil.SystemLocation(), nil
}
return tzLoc.GetLocation()
}
// ReorgWaitTimeout is the timeout that wait ddl in write reorganization stage.
// make it a var for testing.
var ReorgWaitTimeout = 5 * time.Second
func (rc *reorgCtx) notifyJobState(state model.JobState) {
atomic.StoreInt32((*int32)(&rc.jobState), int32(state))
}
func (rc *reorgCtx) isReorgCanceled() bool {
s := atomic.LoadInt32((*int32)(&rc.jobState))
return int32(model.JobStateCancelled) == s || int32(model.JobStateCancelling) == s
}
func (rc *reorgCtx) isReorgPaused() bool {
s := atomic.LoadInt32((*int32)(&rc.jobState))
return int32(model.JobStatePaused) == s || int32(model.JobStatePausing) == s
}
func (rc *reorgCtx) setRowCount(count int64) {
atomic.StoreInt64(&rc.rowCount, count)
}
func (rc *reorgCtx) mergeWarnings(warnings map[errors.ErrorID]*terror.Error, warningsCount map[errors.ErrorID]int64) {
if len(warnings) == 0 || len(warningsCount) == 0 {
return
}
rc.mu.Lock()
defer rc.mu.Unlock()
rc.mu.warnings, rc.mu.warningsCount = mergeWarningsAndWarningsCount(warnings, rc.mu.warnings, warningsCount, rc.mu.warningsCount)
}
func (rc *reorgCtx) resetWarnings() {
rc.mu.Lock()
defer rc.mu.Unlock()
rc.mu.warnings = make(map[errors.ErrorID]*terror.Error)
rc.mu.warningsCount = make(map[errors.ErrorID]int64)
}
func (rc *reorgCtx) increaseRowCount(count int64) {
atomic.AddInt64(&rc.rowCount, count)
}
func (rc *reorgCtx) getRowCount() int64 {
row := atomic.LoadInt64(&rc.rowCount)
return row
}
// runReorgJob is used as a portal to do the reorganization work.
// eg:
// 1: add index
// 2: alter column type
// 3: clean global index
// 4: reorganize partitions
/*
ddl goroutine >---------+
^ |
| |
| |
| | <---(doneCh)--- f()
HandleDDLQueue(...) | <---(regular timeout)
| | <---(ctx done)
| |
| |
A more ddl round <-----+
*/
// How can we cancel reorg job?
//
// The background reorg is continuously running except for several factors, for instances, ddl owner change,
// logic error (kv duplicate when insert index / cast error when alter column), ctx done, and cancel signal.
//
// When `admin cancel ddl jobs xxx` takes effect, we will give this kind of reorg ddl one more round.
// because we should pull the result from doneCh out, otherwise, the reorg worker will hang on `f()` logic,
// which is a kind of goroutine leak.
//
// That's why we couldn't set the job to rollingback state directly in `convertJob2RollbackJob`, which is a
// cancelling portal for admin cancel action.
//
// In other words, the cancelling signal is informed from the bottom up, we set the atomic cancel variable
// in the cancelling portal to notify the lower worker goroutine, and fetch the cancel error from them in
// the additional ddl round.
//
// After that, we can make sure that the worker goroutine is correctly shut down.
func (w *worker) runReorgJob(
reorgInfo *reorgInfo,
tblInfo *model.TableInfo,
reorgFn func() error,
) error {
job := reorgInfo.Job
d := reorgInfo.jobCtx.oldDDLCtx
// This is for tests compatible, because most of the early tests try to build the reorg job manually
// without reorg meta info, which will cause nil pointer in here.
if job.ReorgMeta == nil {
job.ReorgMeta = &model.DDLReorgMeta{
SQLMode: mysql.ModeNone,
Warnings: make(map[errors.ErrorID]*terror.Error),
WarningsCount: make(map[errors.ErrorID]int64),
Location: &model.TimeZoneLocation{Name: time.UTC.String(), Offset: 0},
Version: model.CurrentReorgMetaVersion,
}
}
rc := w.getReorgCtx(job.ID)
if rc == nil {
// This job is cancelling, we should return ErrCancelledDDLJob directly.
//
// Q: Is there any possibility that the job is cancelling and has no reorgCtx?
// A: Yes, consider the case that :
// - we cancel the job when backfilling the last batch of data, the cancel txn is commit first,
// - and then the backfill workers send signal to the `doneCh` of the reorgCtx,
// - and then the DDL worker will remove the reorgCtx
// - and update the DDL job to `done`
// - but at the commit time, the DDL txn will raise a "write conflict" error and retry, and it happens.
if job.IsCancelling() {
return dbterror.ErrCancelledDDLJob
}
beOwnerTS := w.ddlCtx.reorgCtx.getOwnerTS()
rc = w.newReorgCtx(reorgInfo.Job.ID, reorgInfo.Job.GetRowCount())
w.wg.Add(1)
go func() {
defer w.wg.Done()
err := reorgFn()
rc.doneCh <- reorgFnResult{ownerTS: beOwnerTS, err: err}
}()
}
waitTimeout := ReorgWaitTimeout
// wait reorganization job done or timeout
select {
case res := <-rc.doneCh:
err := res.err
curTS := w.ddlCtx.reorgCtx.getOwnerTS()
if res.ownerTS != curTS {
d.removeReorgCtx(job.ID)
logutil.DDLLogger().Warn("owner ts mismatch, return timeout error and retry",
zap.Int64("prevTS", res.ownerTS),
zap.Int64("curTS", curTS))
return dbterror.ErrWaitReorgTimeout
}
// Since job is cancelled,we don't care about its partial counts.
if rc.isReorgCanceled() || terror.ErrorEqual(err, dbterror.ErrCancelledDDLJob) {
d.removeReorgCtx(job.ID)
return dbterror.ErrCancelledDDLJob
}
rowCount := rc.getRowCount()
job.SetRowCount(rowCount)
if err != nil {
logutil.DDLLogger().Warn("run reorg job done", zap.Int64("handled rows", rowCount), zap.Error(err))
} else {
logutil.DDLLogger().Info("run reorg job done", zap.Int64("handled rows", rowCount))
}
// Update a job's warnings.
w.mergeWarningsIntoJob(job)
d.removeReorgCtx(job.ID)
updateBackfillProgress(w, reorgInfo, tblInfo, rowCount)
// For other errors, even err is not nil here, we still wait the partial counts to be collected.
// since in the next round, the startKey is brand new which is stored by last time.
if err != nil {
return errors.Trace(err)
}
case <-time.After(waitTimeout):
rowCount := rc.getRowCount()
job.SetRowCount(rowCount)
updateBackfillProgress(w, reorgInfo, tblInfo, rowCount)
// Update a job's warnings.
w.mergeWarningsIntoJob(job)
rc.resetWarnings()
logutil.DDLLogger().Info("run reorg job wait timeout",
zap.Duration("wait time", waitTimeout),
zap.Int64("total added row count", rowCount))
// If timeout, we will return, check the owner and retry to wait job done again.
return dbterror.ErrWaitReorgTimeout
}
return nil
}
func overwriteReorgInfoFromGlobalCheckpoint(w *worker, sess *sess.Session, job *model.Job, reorgInfo *reorgInfo) error {
if job.ReorgMeta.ReorgTp != model.ReorgTypeLitMerge {
// Only used for the ingest mode job.
return nil
}
if reorgInfo.mergingTmpIdx {
// Merging the temporary index uses txn mode, so we don't need to consider the checkpoint.
return nil
}
if job.ReorgMeta.IsDistReorg {
// The global checkpoint is not used in distributed tasks.
return nil
}
if w.getReorgCtx(job.ID) != nil {
// We only overwrite from checkpoint when the job runs for the first time on this TiDB instance.
return nil
}
start, pid, err := getImportedKeyFromCheckpoint(sess, job)
if err != nil {
return errors.Trace(err)
}
if pid != reorgInfo.PhysicalTableID {
// Current physical ID does not match checkpoint physical ID.
// Don't overwrite reorgInfo.StartKey.
return nil
}
if len(start) > 0 {
reorgInfo.StartKey = start
}
return nil
}
func extractElemIDs(r *reorgInfo) []int64 {
elemIDs := make([]int64, 0, len(r.elements))
for _, elem := range r.elements {
elemIDs = append(elemIDs, elem.ID)
}
return elemIDs
}
func (w *worker) mergeWarningsIntoJob(job *model.Job) {
rc := w.getReorgCtx(job.ID)
rc.mu.Lock()
partWarnings := rc.mu.warnings
partWarningsCount := rc.mu.warningsCount
rc.mu.Unlock()
warnings, warningsCount := job.GetWarnings()
warnings, warningsCount = mergeWarningsAndWarningsCount(partWarnings, warnings, partWarningsCount, warningsCount)
job.SetWarnings(warnings, warningsCount)
}
func updateBackfillProgress(w *worker, reorgInfo *reorgInfo, tblInfo *model.TableInfo,
addedRowCount int64) {
if tblInfo == nil {
return
}
progress := float64(0)
if addedRowCount != 0 {
totalCount := getTableTotalCount(w, tblInfo)
if totalCount > 0 {
progress = float64(addedRowCount) / float64(totalCount)
} else {
progress = 0
}
if progress > 1 {
progress = 1
}
logutil.DDLLogger().Debug("update progress",
zap.Float64("progress", progress),
zap.Int64("addedRowCount", addedRowCount),
zap.Int64("totalCount", totalCount))
}
switch reorgInfo.Type {
case model.ActionAddIndex, model.ActionAddPrimaryKey:
var label string
if reorgInfo.mergingTmpIdx {
label = metrics.LblAddIndexMerge
} else {
label = metrics.LblAddIndex
}
metrics.GetBackfillProgressByLabel(label, reorgInfo.SchemaName, tblInfo.Name.String()).Set(progress * 100)
case model.ActionModifyColumn:
metrics.GetBackfillProgressByLabel(metrics.LblModifyColumn, reorgInfo.SchemaName, tblInfo.Name.String()).Set(progress * 100)
case model.ActionReorganizePartition, model.ActionRemovePartitioning,
model.ActionAlterTablePartitioning:
metrics.GetBackfillProgressByLabel(metrics.LblReorgPartition, reorgInfo.SchemaName, tblInfo.Name.String()).Set(progress * 100)
}
}
func getTableTotalCount(w *worker, tblInfo *model.TableInfo) int64 {
var ctx sessionctx.Context
ctx, err := w.sessPool.Get()
if err != nil {
return statistics.PseudoRowCount
}
defer w.sessPool.Put(ctx)
// `mock.Context` is used in tests, which doesn't support sql exec
if _, ok := ctx.(*mock.Context); ok {
return statistics.PseudoRowCount
}
executor := ctx.GetRestrictedSQLExecutor()
var rows []chunk.Row
if tblInfo.Partition != nil && len(tblInfo.Partition.DroppingDefinitions) > 0 {
// if Reorganize Partition, only select number of rows from the selected partitions!
defs := tblInfo.Partition.DroppingDefinitions
partIDs := make([]string, 0, len(defs))
for _, def := range defs {
partIDs = append(partIDs, strconv.FormatInt(def.ID, 10))
}
sql := "select sum(table_rows) from information_schema.partitions where tidb_partition_id in (%?);"
rows, _, err = executor.ExecRestrictedSQL(w.ctx, nil, sql, strings.Join(partIDs, ","))
} else {
sql := "select table_rows from information_schema.tables where tidb_table_id=%?;"
rows, _, err = executor.ExecRestrictedSQL(w.ctx, nil, sql, tblInfo.ID)
}
if err != nil {
return statistics.PseudoRowCount
}
if len(rows) != 1 {
return statistics.PseudoRowCount
}
return rows[0].GetInt64(0)
}
func (dc *ddlCtx) isReorgCancelled(jobID int64) bool {
return dc.getReorgCtx(jobID).isReorgCanceled()
}
func (dc *ddlCtx) isReorgPaused(jobID int64) bool {
return dc.getReorgCtx(jobID).isReorgPaused()
}
func (dc *ddlCtx) isReorgRunnable(jobID int64, isDistReorg bool) error {
if dc.ctx.Err() != nil {
// Worker is closed. So it can't do the reorganization.
return dbterror.ErrInvalidWorker.GenWithStack("worker is closed")
}
if dc.isReorgCancelled(jobID) {
// Job is cancelled. So it can't be done.
return dbterror.ErrCancelledDDLJob
}
if dc.isReorgPaused(jobID) {
logutil.DDLLogger().Warn("job paused by user", zap.String("ID", dc.uuid))
return dbterror.ErrPausedDDLJob.GenWithStackByArgs(jobID)
}
// If isDistReorg is true, we needn't check if it is owner.
if isDistReorg {
return nil
}
if !dc.isOwner() {
// If it's not the owner, we will try later, so here just returns an error.
logutil.DDLLogger().Info("DDL is not the DDL owner", zap.String("ID", dc.uuid))
return errors.Trace(dbterror.ErrNotOwner)
}
return nil
}
type reorgInfo struct {
*model.Job
StartKey kv.Key
EndKey kv.Key
jobCtx *jobContext
first bool
mergingTmpIdx bool
// PhysicalTableID is used for partitioned table.
// DDL reorganize for a partitioned table will handle partitions one by one,
// PhysicalTableID is used to trace the current partition we are handling.
// If the table is not partitioned, PhysicalTableID would be TableID.
PhysicalTableID int64
dbInfo *model.DBInfo
elements []*meta.Element
currElement *meta.Element
}
func (r *reorgInfo) NewJobContext() *ReorgContext {
return r.jobCtx.oldDDLCtx.jobContext(r.Job.ID, r.Job.ReorgMeta)
}
func (r *reorgInfo) String() string {
var isEnabled bool
if ingest.LitInitialized {
_, isEnabled = ingest.LitBackCtxMgr.Load(r.Job.ID)
}
return "CurrElementType:" + string(r.currElement.TypeKey) + "," +
"CurrElementID:" + strconv.FormatInt(r.currElement.ID, 10) + "," +
"StartKey:" + hex.EncodeToString(r.StartKey) + "," +
"EndKey:" + hex.EncodeToString(r.EndKey) + "," +
"First:" + strconv.FormatBool(r.first) + "," +
"PhysicalTableID:" + strconv.FormatInt(r.PhysicalTableID, 10) + "," +
"Ingest mode:" + strconv.FormatBool(isEnabled)
}
func constructDescTableScanPB(physicalTableID int64, tblInfo *model.TableInfo, handleCols []*model.ColumnInfo) *tipb.Executor {
tblScan := tables.BuildTableScanFromInfos(tblInfo, handleCols, false)
tblScan.TableId = physicalTableID
tblScan.Desc = true
return &tipb.Executor{Tp: tipb.ExecType_TypeTableScan, TblScan: tblScan}
}
func constructLimitPB(count uint64) *tipb.Executor {
limitExec := &tipb.Limit{
Limit: count,
}
return &tipb.Executor{Tp: tipb.ExecType_TypeLimit, Limit: limitExec}
}
func buildDescTableScanDAG(distSQLCtx *distsqlctx.DistSQLContext, tbl table.PhysicalTable, handleCols []*model.ColumnInfo, limit uint64) (*tipb.DAGRequest, error) {
dagReq := &tipb.DAGRequest{}
_, timeZoneOffset := time.Now().In(time.UTC).Zone()
dagReq.TimeZoneOffset = int64(timeZoneOffset)
for i := range handleCols {
dagReq.OutputOffsets = append(dagReq.OutputOffsets, uint32(i))
}
dagReq.Flags |= model.FlagInSelectStmt
tblScanExec := constructDescTableScanPB(tbl.GetPhysicalID(), tbl.Meta(), handleCols)
dagReq.Executors = append(dagReq.Executors, tblScanExec)
dagReq.Executors = append(dagReq.Executors, constructLimitPB(limit))
distsql.SetEncodeType(distSQLCtx, dagReq)
return dagReq, nil
}
func getColumnsTypes(columns []*model.ColumnInfo) []*types.FieldType {
colTypes := make([]*types.FieldType, 0, len(columns))
for _, col := range columns {
colTypes = append(colTypes, &col.FieldType)
}
return colTypes
}
// buildDescTableScan builds a desc table scan upon tblInfo.
func buildDescTableScan(ctx *ReorgContext, store kv.Storage, startTS uint64, tbl table.PhysicalTable,
handleCols []*model.ColumnInfo, limit uint64) (distsql.SelectResult, error) {
distSQLCtx := newDefaultReorgDistSQLCtx(store.GetClient(), contextutil.NewStaticWarnHandler(0))
dagPB, err := buildDescTableScanDAG(distSQLCtx, tbl, handleCols, limit)
if err != nil {
return nil, errors.Trace(err)
}
var b distsql.RequestBuilder
var builder *distsql.RequestBuilder
var ranges []*ranger.Range
if tbl.Meta().IsCommonHandle {
ranges = ranger.FullNotNullRange()
} else {
ranges = ranger.FullIntRange(false)
}
builder = b.SetHandleRanges(distSQLCtx, tbl.GetPhysicalID(), tbl.Meta().IsCommonHandle, ranges)
builder.SetDAGRequest(dagPB).
SetStartTS(startTS).
SetKeepOrder(true).
SetConcurrency(1).
SetDesc(true).
SetResourceGroupTagger(ctx.getResourceGroupTaggerForTopSQL()).
SetResourceGroupName(ctx.resourceGroupName)
builder.Request.NotFillCache = true
builder.Request.Priority = kv.PriorityLow
builder.RequestSource.RequestSourceInternal = true
builder.RequestSource.RequestSourceType = ctx.ddlJobSourceType()
kvReq, err := builder.Build()
if err != nil {
return nil, errors.Trace(err)
}
result, err := distsql.Select(ctx.ddlJobCtx, distSQLCtx, kvReq, getColumnsTypes(handleCols))
if err != nil {
return nil, errors.Trace(err)
}
return result, nil
}
// GetTableMaxHandle gets the max handle of a PhysicalTable.
func GetTableMaxHandle(ctx *ReorgContext, store kv.Storage, startTS uint64, tbl table.PhysicalTable) (maxHandle kv.Handle, emptyTable bool, err error) {
var handleCols []*model.ColumnInfo
var pkIdx *model.IndexInfo
tblInfo := tbl.Meta()
switch {
case tblInfo.PKIsHandle:
for _, col := range tbl.Meta().Columns {
if mysql.HasPriKeyFlag(col.GetFlag()) {
handleCols = []*model.ColumnInfo{col}
break
}
}
case tblInfo.IsCommonHandle:
pkIdx = tables.FindPrimaryIndex(tblInfo)
cols := tblInfo.Cols()
for _, idxCol := range pkIdx.Columns {
handleCols = append(handleCols, cols[idxCol.Offset])
}
default:
handleCols = []*model.ColumnInfo{model.NewExtraHandleColInfo()}
}
// build a desc scan of tblInfo, which limit is 1, we can use it to retrieve the last handle of the table.
result, err := buildDescTableScan(ctx, store, startTS, tbl, handleCols, 1)
if err != nil {
return nil, false, errors.Trace(err)
}
defer terror.Call(result.Close)
chk := chunk.New(getColumnsTypes(handleCols), 1, 1)
err = result.Next(ctx.ddlJobCtx, chk)
if err != nil {
return nil, false, errors.Trace(err)
}
if chk.NumRows() == 0 {
// empty table
return nil, true, nil
}
row := chk.GetRow(0)
if tblInfo.IsCommonHandle {
maxHandle, err = buildCommonHandleFromChunkRow(time.UTC, tblInfo, pkIdx, handleCols, row)
return maxHandle, false, err
}
return kv.IntHandle(row.GetInt64(0)), false, nil
}
func buildCommonHandleFromChunkRow(loc *time.Location, tblInfo *model.TableInfo, idxInfo *model.IndexInfo,
cols []*model.ColumnInfo, row chunk.Row) (kv.Handle, error) {
fieldTypes := make([]*types.FieldType, 0, len(cols))
for _, col := range cols {
fieldTypes = append(fieldTypes, &col.FieldType)
}
datumRow := row.GetDatumRow(fieldTypes)
tablecodec.TruncateIndexValues(tblInfo, idxInfo, datumRow)
var handleBytes []byte
handleBytes, err := codec.EncodeKey(loc, nil, datumRow...)
if err != nil {
return nil, err
}
return kv.NewCommonHandle(handleBytes)
}
// getTableRange gets the start and end handle of a table (or partition).
func getTableRange(ctx *ReorgContext, store kv.Storage, tbl table.PhysicalTable, snapshotVer uint64, priority int) (startHandleKey, endHandleKey kv.Key, err error) {
// Get the start handle of this partition.
err = iterateSnapshotKeys(ctx, store, priority, tbl.RecordPrefix(), snapshotVer, nil, nil,
func(_ kv.Handle, rowKey kv.Key, _ []byte) (bool, error) {
startHandleKey = rowKey
return false, nil
})
if err != nil {
return startHandleKey, endHandleKey, errors.Trace(err)
}
maxHandle, isEmptyTable, err := GetTableMaxHandle(ctx, store, snapshotVer, tbl)
if err != nil {
return startHandleKey, nil, errors.Trace(err)
}
if maxHandle != nil {
endHandleKey = tablecodec.EncodeRecordKey(tbl.RecordPrefix(), maxHandle).Next()
}
if isEmptyTable || endHandleKey.Cmp(startHandleKey) <= 0 {
logutil.DDLLogger().Info("get noop table range",
zap.String("table", fmt.Sprintf("%v", tbl.Meta())),
zap.Int64("table/partition ID", tbl.GetPhysicalID()),
zap.String("start key", hex.EncodeToString(startHandleKey)),
zap.String("end key", hex.EncodeToString(endHandleKey)),
zap.Bool("is empty table", isEmptyTable))
if startHandleKey == nil {
endHandleKey = nil
} else {
endHandleKey = startHandleKey.Next()
}
}
return
}
func getValidCurrentVersion(store kv.Storage) (ver kv.Version, err error) {
ver, err = store.CurrentVersion(kv.GlobalTxnScope)
if err != nil {
return ver, errors.Trace(err)
} else if ver.Ver <= 0 {
return ver, dbterror.ErrInvalidStoreVer.GenWithStack("invalid storage current version %d", ver.Ver)
}
return ver, nil
}
func getReorgInfo(ctx *ReorgContext, jobCtx *jobContext, rh *reorgHandler, job *model.Job, dbInfo *model.DBInfo,
tbl table.Table, elements []*meta.Element, mergingTmpIdx bool) (*reorgInfo, error) {
var (
element *meta.Element
start kv.Key
end kv.Key
pid int64
info reorgInfo
)
if job.SnapshotVer == 0 {
// For the case of the old TiDB version(do not exist the element information) is upgraded to the new TiDB version.
// Third step, we need to remove the element information to make sure we can save the reorganized information to storage.
failpoint.Inject("MockGetIndexRecordErr", func(val failpoint.Value) {
if val.(string) == "addIdxNotOwnerErr" && atomic.CompareAndSwapUint32(&mockNotOwnerErrOnce, 3, 4) {
if err := rh.RemoveReorgElementFailPoint(job); err != nil {
failpoint.Return(nil, errors.Trace(err))
}
info.first = true
failpoint.Return(&info, nil)
}
})
info.first = true
delayForAsyncCommit()
ver, err := getValidCurrentVersion(jobCtx.store)
if err != nil {
return nil, errors.Trace(err)
}
tblInfo := tbl.Meta()
pid = tblInfo.ID
var tb table.PhysicalTable
if pi := tblInfo.GetPartitionInfo(); pi != nil {
pid = pi.Definitions[0].ID
tb = tbl.(table.PartitionedTable).GetPartition(pid)
} else {
tb = tbl.(table.PhysicalTable)
}
if mergingTmpIdx {
firstElemTempID := tablecodec.TempIndexPrefix | elements[0].ID
lastElemTempID := tablecodec.TempIndexPrefix | elements[len(elements)-1].ID
start = tablecodec.EncodeIndexSeekKey(pid, firstElemTempID, nil)
end = tablecodec.EncodeIndexSeekKey(pid, lastElemTempID, []byte{255})
} else {
start, end, err = getTableRange(ctx, jobCtx.store, tb, ver.Ver, job.Priority)
if err != nil {
return nil, errors.Trace(err)
}
}
logutil.DDLLogger().Info("job get table range",
zap.Int64("jobID", job.ID), zap.Int64("physicalTableID", pid),
zap.String("startKey", hex.EncodeToString(start)),
zap.String("endKey", hex.EncodeToString(end)))
failpoint.Inject("errorUpdateReorgHandle", func() (*reorgInfo, error) {
return &info, errors.New("occur an error when update reorg handle")
})
err = rh.InitDDLReorgHandle(job, start, end, pid, elements[0])
if err != nil {
return &info, errors.Trace(err)
}
// Update info should after data persistent.
job.SnapshotVer = ver.Ver
element = elements[0]
} else {
failpoint.Inject("MockGetIndexRecordErr", func(val failpoint.Value) {
// For the case of the old TiDB version(do not exist the element information) is upgraded to the new TiDB version.
// Second step, we need to remove the element information to make sure we can get the error of "ErrDDLReorgElementNotExist".
// However, since "txn.Reset()" will be called later, the reorganized information cannot be saved to storage.
if val.(string) == "addIdxNotOwnerErr" && atomic.CompareAndSwapUint32(&mockNotOwnerErrOnce, 2, 3) {
if err := rh.RemoveReorgElementFailPoint(job); err != nil {
failpoint.Return(nil, errors.Trace(err))
}
}
})
var err error
element, start, end, pid, err = rh.GetDDLReorgHandle(job)
if err != nil {
// If the reorg element doesn't exist, this reorg info should be saved by the older TiDB versions.
// It's compatible with the older TiDB versions.
// We'll try to remove it in the next major TiDB version.
if meta.ErrDDLReorgElementNotExist.Equal(err) {
job.SnapshotVer = 0
logutil.DDLLogger().Warn("get reorg info, the element does not exist", zap.Stringer("job", job))
if job.IsCancelling() {
return nil, nil
}
}
return &info, errors.Trace(err)
}
}
info.Job = job
info.jobCtx = jobCtx
info.StartKey = start
info.EndKey = end
info.PhysicalTableID = pid
info.currElement = element
info.elements = elements
info.mergingTmpIdx = mergingTmpIdx
info.dbInfo = dbInfo
return &info, nil
}
func getReorgInfoFromPartitions(ctx *ReorgContext, jobCtx *jobContext, rh *reorgHandler, job *model.Job, dbInfo *model.DBInfo, tbl table.PartitionedTable, partitionIDs []int64, elements []*meta.Element) (*reorgInfo, error) {
var (
element *meta.Element
start kv.Key
end kv.Key
pid int64
info reorgInfo
)
if job.SnapshotVer == 0 {
info.first = true
delayForAsyncCommit()
ver, err := getValidCurrentVersion(jobCtx.store)
if err != nil {
return nil, errors.Trace(err)
}
pid = partitionIDs[0]
physTbl := tbl.GetPartition(pid)
start, end, err = getTableRange(ctx, jobCtx.store, physTbl, ver.Ver, job.Priority)
if err != nil {
return nil, errors.Trace(err)
}
logutil.DDLLogger().Info("job get table range",
zap.Int64("job ID", job.ID), zap.Int64("physical table ID", pid),
zap.String("start key", hex.EncodeToString(start)),
zap.String("end key", hex.EncodeToString(end)))
err = rh.InitDDLReorgHandle(job, start, end, pid, elements[0])
if err != nil {
return &info, errors.Trace(err)
}
// Update info should after data persistent.
job.SnapshotVer = ver.Ver
element = elements[0]
} else {
var err error
element, start, end, pid, err = rh.GetDDLReorgHandle(job)
if err != nil {
// If the reorg element doesn't exist, this reorg info should be saved by the older TiDB versions.
// It's compatible with the older TiDB versions.
// We'll try to remove it in the next major TiDB version.
if meta.ErrDDLReorgElementNotExist.Equal(err) {
job.SnapshotVer = 0
logutil.DDLLogger().Warn("get reorg info, the element does not exist", zap.Stringer("job", job))
}
return &info, errors.Trace(err)
}
}
info.Job = job
info.jobCtx = jobCtx
info.StartKey = start
info.EndKey = end
info.PhysicalTableID = pid
info.currElement = element
info.elements = elements
info.dbInfo = dbInfo
return &info, nil
}
// UpdateReorgMeta creates a new transaction and updates tidb_ddl_reorg table,
// so the reorg can restart in case of issues.
func (r *reorgInfo) UpdateReorgMeta(startKey kv.Key, pool *sess.Pool) (err error) {
if startKey == nil && r.EndKey == nil {
return nil
}
sctx, err := pool.Get()
if err != nil {
return
}
defer pool.Put(sctx)
se := sess.NewSession(sctx)
err = se.Begin(context.Background())
if err != nil {
return
}
rh := newReorgHandler(se)
err = updateDDLReorgHandle(rh.s, r.Job.ID, startKey, r.EndKey, r.PhysicalTableID, r.currElement)
err1 := se.Commit(context.Background())
if err == nil {
err = err1
}
return errors.Trace(err)
}
// reorgHandler is used to handle the reorg information duration reorganization DDL job.
type reorgHandler struct {
s *sess.Session
}
// NewReorgHandlerForTest creates a new reorgHandler, only used in test.
func NewReorgHandlerForTest(se sessionctx.Context) *reorgHandler {
return newReorgHandler(sess.NewSession(se))
}
func newReorgHandler(sess *sess.Session) *reorgHandler {
return &reorgHandler{s: sess}
}
// InitDDLReorgHandle initializes the job reorganization information.
func (r *reorgHandler) InitDDLReorgHandle(job *model.Job, startKey, endKey kv.Key, physicalTableID int64, element *meta.Element) error {
return initDDLReorgHandle(r.s, job.ID, startKey, endKey, physicalTableID, element)
}
// RemoveReorgElementFailPoint removes the element of the reorganization information.
func (r *reorgHandler) RemoveReorgElementFailPoint(job *model.Job) error {
return removeReorgElement(r.s, job)
}
// RemoveDDLReorgHandle removes the job reorganization related handles.
func (r *reorgHandler) RemoveDDLReorgHandle(job *model.Job, elements []*meta.Element) error {
return removeDDLReorgHandle(r.s, job, elements)
}
// cleanupDDLReorgHandles removes the job reorganization related handles.
func cleanupDDLReorgHandles(job *model.Job, s *sess.Session) {
if job != nil && !job.IsFinished() && !job.IsSynced() {
// Job is given, but it is neither finished nor synced; do nothing
return
}
err := cleanDDLReorgHandles(s, job)
if err != nil {
// ignore error, cleanup is not that critical
logutil.DDLLogger().Warn("Failed removing the DDL reorg entry in tidb_ddl_reorg", zap.Stringer("job", job), zap.Error(err))
}
}
// GetDDLReorgHandle gets the latest processed DDL reorganize position.
func (r *reorgHandler) GetDDLReorgHandle(job *model.Job) (element *meta.Element, startKey, endKey kv.Key, physicalTableID int64, err error) {
element, startKey, endKey, physicalTableID, err = getDDLReorgHandle(r.s, job)
if err != nil {
return element, startKey, endKey, physicalTableID, err
}
adjustedEndKey := adjustEndKeyAcrossVersion(job, endKey)
return element, startKey, adjustedEndKey, physicalTableID, nil
}
// #46306 changes the table range from [start_key, end_key] to [start_key, end_key.next).
// For old version TiDB, the semantic is still [start_key, end_key], we need to adjust it in new version TiDB.
func adjustEndKeyAcrossVersion(job *model.Job, endKey kv.Key) kv.Key {
if job.ReorgMeta != nil && job.ReorgMeta.Version == model.ReorgMetaVersion0 {
logutil.DDLLogger().Info("adjust range end key for old version ReorgMetas",
zap.Int64("jobID", job.ID),
zap.Int64("reorgMetaVersion", job.ReorgMeta.Version),
zap.String("endKey", hex.EncodeToString(endKey)))
return endKey.Next()
}
return endKey
}