3249 lines
104 KiB
Go
3249 lines
104 KiB
Go
// Copyright 2015 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package domain
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
"math/rand"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/ngaut/pools"
|
|
"github.com/pingcap/errors"
|
|
"github.com/pingcap/failpoint"
|
|
"github.com/pingcap/kvproto/pkg/metapb"
|
|
"github.com/pingcap/kvproto/pkg/pdpb"
|
|
"github.com/pingcap/log"
|
|
"github.com/pingcap/tidb/br/pkg/streamhelper"
|
|
"github.com/pingcap/tidb/br/pkg/streamhelper/daemon"
|
|
"github.com/pingcap/tidb/pkg/bindinfo"
|
|
"github.com/pingcap/tidb/pkg/config"
|
|
"github.com/pingcap/tidb/pkg/ddl"
|
|
"github.com/pingcap/tidb/pkg/ddl/placement"
|
|
"github.com/pingcap/tidb/pkg/ddl/schematracker"
|
|
"github.com/pingcap/tidb/pkg/ddl/systable"
|
|
ddlutil "github.com/pingcap/tidb/pkg/ddl/util"
|
|
"github.com/pingcap/tidb/pkg/disttask/framework/scheduler"
|
|
"github.com/pingcap/tidb/pkg/disttask/framework/storage"
|
|
"github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor"
|
|
"github.com/pingcap/tidb/pkg/domain/globalconfigsync"
|
|
"github.com/pingcap/tidb/pkg/domain/infosync"
|
|
"github.com/pingcap/tidb/pkg/errno"
|
|
"github.com/pingcap/tidb/pkg/infoschema"
|
|
infoschema_metrics "github.com/pingcap/tidb/pkg/infoschema/metrics"
|
|
"github.com/pingcap/tidb/pkg/infoschema/perfschema"
|
|
"github.com/pingcap/tidb/pkg/keyspace"
|
|
"github.com/pingcap/tidb/pkg/kv"
|
|
"github.com/pingcap/tidb/pkg/meta"
|
|
"github.com/pingcap/tidb/pkg/meta/autoid"
|
|
"github.com/pingcap/tidb/pkg/meta/model"
|
|
"github.com/pingcap/tidb/pkg/metrics"
|
|
"github.com/pingcap/tidb/pkg/owner"
|
|
"github.com/pingcap/tidb/pkg/parser"
|
|
"github.com/pingcap/tidb/pkg/parser/ast"
|
|
"github.com/pingcap/tidb/pkg/parser/mysql"
|
|
"github.com/pingcap/tidb/pkg/parser/terror"
|
|
metrics2 "github.com/pingcap/tidb/pkg/planner/core/metrics"
|
|
"github.com/pingcap/tidb/pkg/privilege/privileges"
|
|
"github.com/pingcap/tidb/pkg/resourcegroup/runaway"
|
|
"github.com/pingcap/tidb/pkg/sessionctx"
|
|
"github.com/pingcap/tidb/pkg/sessionctx/sessionstates"
|
|
"github.com/pingcap/tidb/pkg/sessionctx/sysproctrack"
|
|
"github.com/pingcap/tidb/pkg/sessionctx/variable"
|
|
"github.com/pingcap/tidb/pkg/statistics/handle"
|
|
"github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze"
|
|
"github.com/pingcap/tidb/pkg/statistics/handle/initstats"
|
|
statslogutil "github.com/pingcap/tidb/pkg/statistics/handle/logutil"
|
|
handleutil "github.com/pingcap/tidb/pkg/statistics/handle/util"
|
|
"github.com/pingcap/tidb/pkg/store/helper"
|
|
"github.com/pingcap/tidb/pkg/ttl/ttlworker"
|
|
"github.com/pingcap/tidb/pkg/types"
|
|
"github.com/pingcap/tidb/pkg/util"
|
|
"github.com/pingcap/tidb/pkg/util/dbterror"
|
|
disttaskutil "github.com/pingcap/tidb/pkg/util/disttask"
|
|
"github.com/pingcap/tidb/pkg/util/domainutil"
|
|
"github.com/pingcap/tidb/pkg/util/engine"
|
|
"github.com/pingcap/tidb/pkg/util/etcd"
|
|
"github.com/pingcap/tidb/pkg/util/expensivequery"
|
|
"github.com/pingcap/tidb/pkg/util/gctuner"
|
|
"github.com/pingcap/tidb/pkg/util/globalconn"
|
|
"github.com/pingcap/tidb/pkg/util/intest"
|
|
"github.com/pingcap/tidb/pkg/util/logutil"
|
|
"github.com/pingcap/tidb/pkg/util/mathutil"
|
|
"github.com/pingcap/tidb/pkg/util/memory"
|
|
"github.com/pingcap/tidb/pkg/util/memoryusagealarm"
|
|
"github.com/pingcap/tidb/pkg/util/replayer"
|
|
"github.com/pingcap/tidb/pkg/util/servermemorylimit"
|
|
"github.com/pingcap/tidb/pkg/util/sqlkiller"
|
|
"github.com/pingcap/tidb/pkg/util/syncutil"
|
|
"github.com/tikv/client-go/v2/tikv"
|
|
"github.com/tikv/client-go/v2/txnkv/transaction"
|
|
pd "github.com/tikv/pd/client"
|
|
pdhttp "github.com/tikv/pd/client/http"
|
|
rmclient "github.com/tikv/pd/client/resource_group/controller"
|
|
clientv3 "go.etcd.io/etcd/client/v3"
|
|
"go.etcd.io/etcd/client/v3/concurrency"
|
|
atomicutil "go.uber.org/atomic"
|
|
"go.uber.org/zap"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/backoff"
|
|
"google.golang.org/grpc/keepalive"
|
|
)
|
|
|
|
var (
|
|
mdlCheckLookDuration = 50 * time.Millisecond
|
|
|
|
// LoadSchemaDiffVersionGapThreshold is the threshold for version gap to reload domain by loading schema diffs
|
|
LoadSchemaDiffVersionGapThreshold int64 = 10000
|
|
|
|
// NewInstancePlanCache creates a new instance level plan cache, this function is designed to avoid cycle-import.
|
|
NewInstancePlanCache func(softMemLimit, hardMemLimit int64) sessionctx.InstancePlanCache
|
|
)
|
|
|
|
const (
|
|
indexUsageGCDuration = 30 * time.Minute
|
|
)
|
|
|
|
func init() {
|
|
if intest.InTest {
|
|
// In test we can set duration lower to make test faster.
|
|
mdlCheckLookDuration = 2 * time.Millisecond
|
|
}
|
|
}
|
|
|
|
// NewMockDomain is only used for test
|
|
func NewMockDomain() *Domain {
|
|
do := &Domain{}
|
|
do.infoCache = infoschema.NewCache(do, 1)
|
|
do.infoCache.Insert(infoschema.MockInfoSchema(nil), 0)
|
|
return do
|
|
}
|
|
|
|
// Domain represents a storage space. Different domains can use the same database name.
|
|
// Multiple domains can be used in parallel without synchronization.
|
|
type Domain struct {
|
|
store kv.Storage
|
|
infoCache *infoschema.InfoCache
|
|
privHandle *privileges.Handle
|
|
bindHandle atomic.Value
|
|
statsHandle atomic.Pointer[handle.Handle]
|
|
statsLease time.Duration
|
|
ddl ddl.DDL
|
|
ddlExecutor ddl.Executor
|
|
info *infosync.InfoSyncer
|
|
globalCfgSyncer *globalconfigsync.GlobalConfigSyncer
|
|
m syncutil.Mutex
|
|
SchemaValidator SchemaValidator
|
|
schemaLease time.Duration
|
|
sysSessionPool util.SessionPool
|
|
exit chan struct{}
|
|
// `etcdClient` must be used when keyspace is not set, or when the logic to each etcd path needs to be separated by keyspace.
|
|
etcdClient *clientv3.Client
|
|
// autoidClient is used when there are tables with AUTO_ID_CACHE=1, it is the client to the autoid service.
|
|
autoidClient *autoid.ClientDiscover
|
|
// `unprefixedEtcdCli` will never set the etcd namespace prefix by keyspace.
|
|
// It is only used in storeMinStartTS and RemoveMinStartTS now.
|
|
// It must be used when the etcd path isn't needed to separate by keyspace.
|
|
// See keyspace RFC: https://github.com/pingcap/tidb/pull/39685
|
|
unprefixedEtcdCli *clientv3.Client
|
|
sysVarCache sysVarCache // replaces GlobalVariableCache
|
|
slowQuery *topNSlowQueries
|
|
expensiveQueryHandle *expensivequery.Handle
|
|
memoryUsageAlarmHandle *memoryusagealarm.Handle
|
|
serverMemoryLimitHandle *servermemorylimit.Handle
|
|
// TODO: use Run for each process in future pr
|
|
wg *util.WaitGroupEnhancedWrapper
|
|
statsUpdating atomicutil.Int32
|
|
// this is the parent context of DDL, and also used by other loops such as closestReplicaReadCheckLoop.
|
|
// there are other top level contexts in the domain, such as the ones used in
|
|
// InitDistTaskLoop and loadStatsWorker, domain only stores the cancelFns of them.
|
|
// TODO unify top level context.
|
|
ctx context.Context
|
|
cancelFns struct {
|
|
mu sync.Mutex
|
|
fns []context.CancelFunc
|
|
}
|
|
dumpFileGcChecker *dumpFileGcChecker
|
|
planReplayerHandle *planReplayerHandle
|
|
extractTaskHandle *ExtractHandle
|
|
expiredTimeStamp4PC struct {
|
|
// let `expiredTimeStamp4PC` use its own lock to avoid any block across domain.Reload()
|
|
// and compiler.Compile(), see issue https://github.com/pingcap/tidb/issues/45400
|
|
sync.RWMutex
|
|
expiredTimeStamp types.Time
|
|
}
|
|
|
|
logBackupAdvancer *daemon.OwnerDaemon
|
|
historicalStatsWorker *HistoricalStatsWorker
|
|
ttlJobManager atomic.Pointer[ttlworker.JobManager]
|
|
runawayManager *runaway.Manager
|
|
resourceGroupsController *rmclient.ResourceGroupsController
|
|
|
|
serverID uint64
|
|
serverIDSession *concurrency.Session
|
|
isLostConnectionToPD atomicutil.Int32 // !0: true, 0: false.
|
|
connIDAllocator globalconn.Allocator
|
|
|
|
onClose func()
|
|
sysExecutorFactory func(*Domain) (pools.Resource, error)
|
|
|
|
sysProcesses SysProcesses
|
|
|
|
mdlCheckTableInfo *mdlCheckTableInfo
|
|
|
|
mdlCheckCh chan struct{}
|
|
stopAutoAnalyze atomicutil.Bool
|
|
minJobIDRefresher *systable.MinJobIDRefresher
|
|
|
|
instancePlanCache sessionctx.InstancePlanCache // the instance level plan cache
|
|
|
|
// deferFn is used to release infoschema object lazily during v1 and v2 switch
|
|
deferFn
|
|
}
|
|
|
|
type deferFn struct {
|
|
sync.Mutex
|
|
data []deferFnRecord
|
|
}
|
|
|
|
type deferFnRecord struct {
|
|
fn func()
|
|
fire time.Time
|
|
}
|
|
|
|
func (df *deferFn) add(fn func(), fire time.Time) {
|
|
df.Lock()
|
|
defer df.Unlock()
|
|
df.data = append(df.data, deferFnRecord{fn: fn, fire: fire})
|
|
}
|
|
|
|
func (df *deferFn) check() {
|
|
now := time.Now()
|
|
df.Lock()
|
|
defer df.Unlock()
|
|
|
|
// iterate the slice, call the defer function and remove it.
|
|
rm := 0
|
|
for i := 0; i < len(df.data); i++ {
|
|
record := &df.data[i]
|
|
if now.After(record.fire) {
|
|
record.fn()
|
|
rm++
|
|
} else {
|
|
df.data[i-rm] = df.data[i]
|
|
}
|
|
}
|
|
df.data = df.data[:len(df.data)-rm]
|
|
}
|
|
|
|
type mdlCheckTableInfo struct {
|
|
mu sync.Mutex
|
|
newestVer int64
|
|
jobsVerMap map[int64]int64
|
|
jobsIDsMap map[int64]string
|
|
}
|
|
|
|
// InfoCache export for test.
|
|
func (do *Domain) InfoCache() *infoschema.InfoCache {
|
|
return do.infoCache
|
|
}
|
|
|
|
// EtcdClient export for test.
|
|
func (do *Domain) EtcdClient() *clientv3.Client {
|
|
return do.etcdClient
|
|
}
|
|
|
|
// loadInfoSchema loads infoschema at startTS.
|
|
// It returns:
|
|
// 1. the needed infoschema
|
|
// 2. cache hit indicator
|
|
// 3. currentSchemaVersion(before loading)
|
|
// 4. the changed table IDs if it is not full load
|
|
// 5. an error if any
|
|
func (do *Domain) loadInfoSchema(startTS uint64, isSnapshot bool) (infoschema.InfoSchema, bool, int64, *transaction.RelatedSchemaChange, error) {
|
|
beginTime := time.Now()
|
|
defer func() {
|
|
infoschema_metrics.LoadSchemaDurationTotal.Observe(time.Since(beginTime).Seconds())
|
|
}()
|
|
snapshot := do.store.GetSnapshot(kv.NewVersion(startTS))
|
|
// Using the KV timeout read feature to address the issue of potential DDL lease expiration when
|
|
// the meta region leader is slow.
|
|
snapshot.SetOption(kv.TiKVClientReadTimeout, uint64(3000)) // 3000ms.
|
|
m := meta.NewSnapshotMeta(snapshot)
|
|
neededSchemaVersion, err := m.GetSchemaVersionWithNonEmptyDiff()
|
|
if err != nil {
|
|
return nil, false, 0, nil, err
|
|
}
|
|
// fetch the commit timestamp of the schema diff
|
|
schemaTs, err := do.getTimestampForSchemaVersionWithNonEmptyDiff(m, neededSchemaVersion, startTS)
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("failed to get schema version", zap.Error(err), zap.Int64("version", neededSchemaVersion))
|
|
schemaTs = 0
|
|
}
|
|
|
|
var oldIsV2 bool
|
|
enableV2 := variable.SchemaCacheSize.Load() > 0
|
|
currentSchemaVersion := int64(0)
|
|
if oldInfoSchema := do.infoCache.GetLatest(); oldInfoSchema != nil {
|
|
currentSchemaVersion = oldInfoSchema.SchemaMetaVersion()
|
|
oldIsV2, _ = infoschema.IsV2(oldInfoSchema)
|
|
}
|
|
useV2, isV1V2Switch := shouldUseV2(enableV2, oldIsV2, isSnapshot)
|
|
|
|
if is := do.infoCache.GetByVersion(neededSchemaVersion); is != nil {
|
|
isV2, raw := infoschema.IsV2(is)
|
|
if isV2 {
|
|
// Copy the infoschema V2 instance and update its ts.
|
|
// For example, the DDL run 30 minutes ago, GC happened 10 minutes ago. If we use
|
|
// that infoschema it would get error "GC life time is shorter than transaction
|
|
// duration" when visiting TiKV.
|
|
// So we keep updating the ts of the infoschema v2.
|
|
is = raw.CloneAndUpdateTS(startTS)
|
|
}
|
|
|
|
// try to insert here as well to correct the schemaTs if previous is wrong
|
|
// the insert method check if schemaTs is zero
|
|
do.infoCache.Insert(is, schemaTs)
|
|
|
|
if !isV1V2Switch {
|
|
return is, true, 0, nil, nil
|
|
}
|
|
}
|
|
|
|
// TODO: tryLoadSchemaDiffs has potential risks of failure. And it becomes worse in history reading cases.
|
|
// It is only kept because there is no alternative diff/partial loading solution.
|
|
// And it is only used to diff upgrading the current latest infoschema, if:
|
|
// 1. Not first time bootstrap loading, which needs a full load.
|
|
// 2. It is newer than the current one, so it will be "the current one" after this function call.
|
|
// 3. There are less 100 diffs.
|
|
// 4. No regenerated schema diff.
|
|
startTime := time.Now()
|
|
if !isV1V2Switch && currentSchemaVersion != 0 && neededSchemaVersion > currentSchemaVersion && neededSchemaVersion-currentSchemaVersion < LoadSchemaDiffVersionGapThreshold {
|
|
is, relatedChanges, diffTypes, err := do.tryLoadSchemaDiffs(useV2, m, currentSchemaVersion, neededSchemaVersion, startTS)
|
|
if err == nil {
|
|
infoschema_metrics.LoadSchemaDurationLoadDiff.Observe(time.Since(startTime).Seconds())
|
|
isV2, _ := infoschema.IsV2(is)
|
|
do.infoCache.Insert(is, schemaTs)
|
|
logutil.BgLogger().Info("diff load InfoSchema success",
|
|
zap.Bool("isV2", isV2),
|
|
zap.Int64("currentSchemaVersion", currentSchemaVersion),
|
|
zap.Int64("neededSchemaVersion", neededSchemaVersion),
|
|
zap.Duration("elapsed time", time.Since(startTime)),
|
|
zap.Int64("gotSchemaVersion", is.SchemaMetaVersion()),
|
|
zap.Int64s("phyTblIDs", relatedChanges.PhyTblIDS),
|
|
zap.Uint64s("actionTypes", relatedChanges.ActionTypes),
|
|
zap.Strings("diffTypes", diffTypes))
|
|
return is, false, currentSchemaVersion, relatedChanges, nil
|
|
}
|
|
// We can fall back to full load, don't need to return the error.
|
|
logutil.BgLogger().Error("failed to load schema diff", zap.Error(err))
|
|
}
|
|
// full load.
|
|
schemas, err := do.fetchAllSchemasWithTables(m)
|
|
if err != nil {
|
|
return nil, false, currentSchemaVersion, nil, err
|
|
}
|
|
|
|
policies, err := do.fetchPolicies(m)
|
|
if err != nil {
|
|
return nil, false, currentSchemaVersion, nil, err
|
|
}
|
|
|
|
resourceGroups, err := do.fetchResourceGroups(m)
|
|
if err != nil {
|
|
return nil, false, currentSchemaVersion, nil, err
|
|
}
|
|
infoschema_metrics.LoadSchemaDurationLoadAll.Observe(time.Since(startTime).Seconds())
|
|
|
|
data := do.infoCache.Data
|
|
if isSnapshot {
|
|
// Use a NewData() to avoid adding the snapshot schema to the infoschema history.
|
|
// Why? imagine that the current schema version is [103 104 105 ...]
|
|
// Then a snapshot read require infoschem version 53, and it's added
|
|
// Now the history becomes [53, ... 103, 104, 105 ...]
|
|
// Then if a query ask for version 74, we'll mistakenly use 53!
|
|
// Not adding snapshot schema to history can avoid such cases.
|
|
data = infoschema.NewData()
|
|
}
|
|
builder := infoschema.NewBuilder(do, do.sysFacHack, data, useV2)
|
|
err = builder.InitWithDBInfos(schemas, policies, resourceGroups, neededSchemaVersion)
|
|
if err != nil {
|
|
return nil, false, currentSchemaVersion, nil, err
|
|
}
|
|
is := builder.Build(startTS)
|
|
isV2, _ := infoschema.IsV2(is)
|
|
logutil.BgLogger().Info("full load InfoSchema success",
|
|
zap.Bool("isV2", isV2),
|
|
zap.Int64("currentSchemaVersion", currentSchemaVersion),
|
|
zap.Int64("neededSchemaVersion", neededSchemaVersion),
|
|
zap.Duration("elapsed time", time.Since(startTime)))
|
|
|
|
if isV1V2Switch && schemaTs > 0 {
|
|
// Reset the whole info cache to avoid co-existing of both v1 and v2, causing the memory usage doubled.
|
|
fn := do.infoCache.Upsert(is, schemaTs)
|
|
do.deferFn.add(fn, time.Now().Add(10*time.Minute))
|
|
logutil.BgLogger().Info("infoschema v1/v2 switch")
|
|
} else {
|
|
do.infoCache.Insert(is, schemaTs)
|
|
}
|
|
return is, false, currentSchemaVersion, nil, nil
|
|
}
|
|
|
|
// Returns the timestamp of a schema version, which is the commit timestamp of the schema diff
|
|
func (do *Domain) getTimestampForSchemaVersionWithNonEmptyDiff(m *meta.Meta, version int64, startTS uint64) (uint64, error) {
|
|
tikvStore, ok := do.Store().(helper.Storage)
|
|
if ok {
|
|
newHelper := helper.NewHelper(tikvStore)
|
|
mvccResp, err := newHelper.GetMvccByEncodedKeyWithTS(m.EncodeSchemaDiffKey(version), startTS)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if mvccResp == nil || mvccResp.Info == nil || len(mvccResp.Info.Writes) == 0 {
|
|
return 0, errors.Errorf("There is no Write MVCC info for the schema version")
|
|
}
|
|
return mvccResp.Info.Writes[0].CommitTs, nil
|
|
}
|
|
return 0, errors.Errorf("cannot get store from domain")
|
|
}
|
|
|
|
func (do *Domain) sysFacHack() (pools.Resource, error) {
|
|
// TODO: Here we create new sessions with sysFac in DDL,
|
|
// which will use `do` as Domain instead of call `domap.Get`.
|
|
// That's because `domap.Get` requires a lock, but before
|
|
// we initialize Domain finish, we can't require that again.
|
|
// After we remove the lazy logic of creating Domain, we
|
|
// can simplify code here.
|
|
return do.sysExecutorFactory(do)
|
|
}
|
|
|
|
func (*Domain) fetchPolicies(m *meta.Meta) ([]*model.PolicyInfo, error) {
|
|
allPolicies, err := m.ListPolicies()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return allPolicies, nil
|
|
}
|
|
|
|
func (*Domain) fetchResourceGroups(m *meta.Meta) ([]*model.ResourceGroupInfo, error) {
|
|
allResourceGroups, err := m.ListResourceGroups()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return allResourceGroups, nil
|
|
}
|
|
|
|
func (do *Domain) fetchAllSchemasWithTables(m *meta.Meta) ([]*model.DBInfo, error) {
|
|
allSchemas, err := m.ListDatabases()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(allSchemas) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
splittedSchemas := do.splitForConcurrentFetch(allSchemas)
|
|
concurrency := min(len(splittedSchemas), 128)
|
|
|
|
eg, ectx := util.NewErrorGroupWithRecoverWithCtx(context.Background())
|
|
eg.SetLimit(concurrency)
|
|
for _, schemas := range splittedSchemas {
|
|
ss := schemas
|
|
eg.Go(func() error {
|
|
return do.fetchSchemasWithTables(ectx, ss, m)
|
|
})
|
|
}
|
|
if err := eg.Wait(); err != nil {
|
|
return nil, err
|
|
}
|
|
return allSchemas, nil
|
|
}
|
|
|
|
// fetchSchemaConcurrency controls the goroutines to load schemas, but more goroutines
|
|
// increase the memory usage when calling json.Unmarshal(), which would cause OOM,
|
|
// so we decrease the concurrency.
|
|
const fetchSchemaConcurrency = 1
|
|
|
|
func (*Domain) splitForConcurrentFetch(schemas []*model.DBInfo) [][]*model.DBInfo {
|
|
groupCnt := fetchSchemaConcurrency
|
|
schemaCnt := len(schemas)
|
|
if variable.SchemaCacheSize.Load() > 0 && schemaCnt > 1000 {
|
|
// TODO: Temporary solution to speed up when too many databases, will refactor it later.
|
|
groupCnt = 8
|
|
}
|
|
|
|
splitted := make([][]*model.DBInfo, 0, groupCnt)
|
|
groupSizes := mathutil.Divide2Batches(schemaCnt, groupCnt)
|
|
|
|
start := 0
|
|
for _, groupSize := range groupSizes {
|
|
splitted = append(splitted, schemas[start:start+groupSize])
|
|
start += groupSize
|
|
}
|
|
|
|
return splitted
|
|
}
|
|
|
|
func (*Domain) fetchSchemasWithTables(ctx context.Context, schemas []*model.DBInfo, m *meta.Meta) error {
|
|
failpoint.Inject("failed-fetch-schemas-with-tables", func() {
|
|
failpoint.Return(errors.New("failpoint: failed to fetch schemas with tables"))
|
|
})
|
|
|
|
for _, di := range schemas {
|
|
// if the ctx has been canceled, stop fetching schemas.
|
|
if err := ctx.Err(); err != nil {
|
|
return err
|
|
}
|
|
var tables []*model.TableInfo
|
|
var err error
|
|
if variable.SchemaCacheSize.Load() > 0 && !infoschema.IsSpecialDB(di.Name.L) {
|
|
name2ID, specialTableInfos, err := meta.GetAllNameToIDAndTheMustLoadedTableInfo(m, di.ID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
di.TableName2ID = name2ID
|
|
tables = specialTableInfos
|
|
} else {
|
|
tables, err = m.ListTables(di.ID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
// If TreatOldVersionUTF8AsUTF8MB4 was enable, need to convert the old version schema UTF8 charset to UTF8MB4.
|
|
if config.GetGlobalConfig().TreatOldVersionUTF8AsUTF8MB4 {
|
|
for _, tbInfo := range tables {
|
|
infoschema.ConvertOldVersionUTF8ToUTF8MB4IfNeed(tbInfo)
|
|
}
|
|
}
|
|
diTables := make([]*model.TableInfo, 0, len(tables))
|
|
for _, tbl := range tables {
|
|
infoschema.ConvertCharsetCollateToLowerCaseIfNeed(tbl)
|
|
// Check whether the table is in repair mode.
|
|
if domainutil.RepairInfo.InRepairMode() && domainutil.RepairInfo.CheckAndFetchRepairedTable(di, tbl) {
|
|
if tbl.State != model.StatePublic {
|
|
// Do not load it because we are reparing the table and the table info could be `bad`
|
|
// before repair is done.
|
|
continue
|
|
}
|
|
// If the state is public, it means that the DDL job is done, but the table
|
|
// haven't been deleted from the repair table list.
|
|
// Since the repairment is done and table is visible, we should load it.
|
|
}
|
|
diTables = append(diTables, tbl)
|
|
}
|
|
di.Deprecated.Tables = diTables
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// shouldUseV2 decides whether to use infoschema v2.
|
|
// When loading snapshot, infoschema should keep the same as before to avoid v1/v2 switch.
|
|
// Otherwise, it is decided by enabledV2.
|
|
func shouldUseV2(enableV2 bool, oldIsV2 bool, isSnapshot bool) (useV2 bool, isV1V2Switch bool) {
|
|
if isSnapshot {
|
|
return oldIsV2, false
|
|
}
|
|
return enableV2, enableV2 != oldIsV2
|
|
}
|
|
|
|
// tryLoadSchemaDiffs tries to only load latest schema changes.
|
|
// Return true if the schema is loaded successfully.
|
|
// Return false if the schema can not be loaded by schema diff, then we need to do full load.
|
|
// The second returned value is the delta updated table and partition IDs.
|
|
func (do *Domain) tryLoadSchemaDiffs(useV2 bool, m *meta.Meta, usedVersion, newVersion int64, startTS uint64) (infoschema.InfoSchema, *transaction.RelatedSchemaChange, []string, error) {
|
|
var diffs []*model.SchemaDiff
|
|
for usedVersion < newVersion {
|
|
usedVersion++
|
|
diff, err := m.GetSchemaDiff(usedVersion)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
if diff == nil {
|
|
// Empty diff means the txn of generating schema version is committed, but the txn of `runDDLJob` is not or fail.
|
|
// It is safe to skip the empty diff because the infoschema is new enough and consistent.
|
|
logutil.BgLogger().Info("diff load InfoSchema get empty schema diff", zap.Int64("version", usedVersion))
|
|
do.infoCache.InsertEmptySchemaVersion(usedVersion)
|
|
continue
|
|
}
|
|
diffs = append(diffs, diff)
|
|
}
|
|
|
|
failpoint.Inject("MockTryLoadDiffError", func(val failpoint.Value) {
|
|
switch val.(string) {
|
|
case "exchangepartition":
|
|
if diffs[0].Type == model.ActionExchangeTablePartition {
|
|
failpoint.Return(nil, nil, nil, errors.New("mock error"))
|
|
}
|
|
case "renametable":
|
|
if diffs[0].Type == model.ActionRenameTable {
|
|
failpoint.Return(nil, nil, nil, errors.New("mock error"))
|
|
}
|
|
case "dropdatabase":
|
|
if diffs[0].Type == model.ActionDropSchema {
|
|
failpoint.Return(nil, nil, nil, errors.New("mock error"))
|
|
}
|
|
}
|
|
})
|
|
|
|
builder := infoschema.NewBuilder(do, do.sysFacHack, do.infoCache.Data, useV2)
|
|
err := builder.InitWithOldInfoSchema(do.infoCache.GetLatest())
|
|
if err != nil {
|
|
return nil, nil, nil, errors.Trace(err)
|
|
}
|
|
|
|
builder.WithStore(do.store).SetDeltaUpdateBundles()
|
|
phyTblIDs := make([]int64, 0, len(diffs))
|
|
actions := make([]uint64, 0, len(diffs))
|
|
diffTypes := make([]string, 0, len(diffs))
|
|
for _, diff := range diffs {
|
|
if diff.RegenerateSchemaMap {
|
|
return nil, nil, nil, errors.Errorf("Meets a schema diff with RegenerateSchemaMap flag")
|
|
}
|
|
ids, err := builder.ApplyDiff(m, diff)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
if canSkipSchemaCheckerDDL(diff.Type) {
|
|
continue
|
|
}
|
|
diffTypes = append(diffTypes, diff.Type.String())
|
|
phyTblIDs = append(phyTblIDs, ids...)
|
|
for i := 0; i < len(ids); i++ {
|
|
actions = append(actions, uint64(diff.Type))
|
|
}
|
|
}
|
|
|
|
is := builder.Build(startTS)
|
|
relatedChange := transaction.RelatedSchemaChange{}
|
|
relatedChange.PhyTblIDS = phyTblIDs
|
|
relatedChange.ActionTypes = actions
|
|
return is, &relatedChange, diffTypes, nil
|
|
}
|
|
|
|
func canSkipSchemaCheckerDDL(tp model.ActionType) bool {
|
|
switch tp {
|
|
case model.ActionUpdateTiFlashReplicaStatus, model.ActionSetTiFlashReplica:
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// InfoSchema gets the latest information schema from domain.
|
|
func (do *Domain) InfoSchema() infoschema.InfoSchema {
|
|
return do.infoCache.GetLatest()
|
|
}
|
|
|
|
// GetSnapshotInfoSchema gets a snapshot information schema.
|
|
func (do *Domain) GetSnapshotInfoSchema(snapshotTS uint64) (infoschema.InfoSchema, error) {
|
|
// if the snapshotTS is new enough, we can get infoschema directly through snapshotTS.
|
|
if is := do.infoCache.GetBySnapshotTS(snapshotTS); is != nil {
|
|
return is, nil
|
|
}
|
|
is, _, _, _, err := do.loadInfoSchema(snapshotTS, true)
|
|
infoschema_metrics.LoadSchemaCounterSnapshot.Inc()
|
|
return is, err
|
|
}
|
|
|
|
// GetSnapshotMeta gets a new snapshot meta at startTS.
|
|
func (do *Domain) GetSnapshotMeta(startTS uint64) *meta.Meta {
|
|
snapshot := do.store.GetSnapshot(kv.NewVersion(startTS))
|
|
return meta.NewSnapshotMeta(snapshot)
|
|
}
|
|
|
|
// ExpiredTimeStamp4PC gets expiredTimeStamp4PC from domain.
|
|
func (do *Domain) ExpiredTimeStamp4PC() types.Time {
|
|
do.expiredTimeStamp4PC.RLock()
|
|
defer do.expiredTimeStamp4PC.RUnlock()
|
|
|
|
return do.expiredTimeStamp4PC.expiredTimeStamp
|
|
}
|
|
|
|
// SetExpiredTimeStamp4PC sets the expiredTimeStamp4PC from domain.
|
|
func (do *Domain) SetExpiredTimeStamp4PC(time types.Time) {
|
|
do.expiredTimeStamp4PC.Lock()
|
|
defer do.expiredTimeStamp4PC.Unlock()
|
|
|
|
do.expiredTimeStamp4PC.expiredTimeStamp = time
|
|
}
|
|
|
|
// DDL gets DDL from domain.
|
|
func (do *Domain) DDL() ddl.DDL {
|
|
return do.ddl
|
|
}
|
|
|
|
// DDLExecutor gets the ddl executor from domain.
|
|
func (do *Domain) DDLExecutor() ddl.Executor {
|
|
return do.ddlExecutor
|
|
}
|
|
|
|
// SetDDL sets DDL to domain, it's only used in tests.
|
|
func (do *Domain) SetDDL(d ddl.DDL, executor ddl.Executor) {
|
|
do.ddl = d
|
|
do.ddlExecutor = executor
|
|
}
|
|
|
|
// InfoSyncer gets infoSyncer from domain.
|
|
func (do *Domain) InfoSyncer() *infosync.InfoSyncer {
|
|
return do.info
|
|
}
|
|
|
|
// NotifyGlobalConfigChange notify global config syncer to store the global config into PD.
|
|
func (do *Domain) NotifyGlobalConfigChange(name, value string) {
|
|
do.globalCfgSyncer.Notify(pd.GlobalConfigItem{Name: name, Value: value, EventType: pdpb.EventType_PUT})
|
|
}
|
|
|
|
// GetGlobalConfigSyncer exports for testing.
|
|
func (do *Domain) GetGlobalConfigSyncer() *globalconfigsync.GlobalConfigSyncer {
|
|
return do.globalCfgSyncer
|
|
}
|
|
|
|
// Store gets KV store from domain.
|
|
func (do *Domain) Store() kv.Storage {
|
|
return do.store
|
|
}
|
|
|
|
// GetScope gets the status variables scope.
|
|
func (*Domain) GetScope(string) variable.ScopeFlag {
|
|
// Now domain status variables scope are all default scope.
|
|
return variable.DefaultStatusVarScopeFlag
|
|
}
|
|
|
|
func getFlashbackStartTSFromErrorMsg(err error) uint64 {
|
|
slices := strings.Split(err.Error(), "is in flashback progress, FlashbackStartTS is ")
|
|
if len(slices) != 2 {
|
|
return 0
|
|
}
|
|
version, err := strconv.ParseUint(slices[1], 10, 0)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return version
|
|
}
|
|
|
|
// Reload reloads InfoSchema.
|
|
// It's public in order to do the test.
|
|
func (do *Domain) Reload() error {
|
|
failpoint.Inject("ErrorMockReloadFailed", func(val failpoint.Value) {
|
|
if val.(bool) {
|
|
failpoint.Return(errors.New("mock reload failed"))
|
|
}
|
|
})
|
|
|
|
// Lock here for only once at the same time.
|
|
do.m.Lock()
|
|
defer do.m.Unlock()
|
|
|
|
startTime := time.Now()
|
|
ver, err := do.store.CurrentVersion(kv.GlobalTxnScope)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
version := ver.Ver
|
|
is, hitCache, oldSchemaVersion, changes, err := do.loadInfoSchema(version, false)
|
|
if err != nil {
|
|
if version = getFlashbackStartTSFromErrorMsg(err); version != 0 {
|
|
// use the latest available version to create domain
|
|
version--
|
|
is, hitCache, oldSchemaVersion, changes, err = do.loadInfoSchema(version, false)
|
|
}
|
|
}
|
|
if err != nil {
|
|
metrics.LoadSchemaCounter.WithLabelValues("failed").Inc()
|
|
return err
|
|
}
|
|
metrics.LoadSchemaCounter.WithLabelValues("succ").Inc()
|
|
|
|
// only update if it is not from cache
|
|
if !hitCache {
|
|
// loaded newer schema
|
|
if oldSchemaVersion < is.SchemaMetaVersion() {
|
|
// Update self schema version to etcd.
|
|
err = do.ddl.SchemaSyncer().UpdateSelfVersion(context.Background(), 0, is.SchemaMetaVersion())
|
|
if err != nil {
|
|
logutil.BgLogger().Info("update self version failed",
|
|
zap.Int64("oldSchemaVersion", oldSchemaVersion),
|
|
zap.Int64("neededSchemaVersion", is.SchemaMetaVersion()), zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// it is full load
|
|
if changes == nil {
|
|
logutil.BgLogger().Info("full load and reset schema validator")
|
|
do.SchemaValidator.Reset()
|
|
}
|
|
}
|
|
|
|
// lease renew, so it must be executed despite it is cache or not
|
|
do.SchemaValidator.Update(version, oldSchemaVersion, is.SchemaMetaVersion(), changes)
|
|
lease := do.GetSchemaLease()
|
|
sub := time.Since(startTime)
|
|
// Reload interval is lease / 2, if load schema time elapses more than this interval,
|
|
// some query maybe responded by ErrInfoSchemaExpired error.
|
|
if sub > (lease/2) && lease > 0 {
|
|
logutil.BgLogger().Warn("loading schema takes a long time", zap.Duration("take time", sub))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// LogSlowQuery keeps topN recent slow queries in domain.
|
|
func (do *Domain) LogSlowQuery(query *SlowQueryInfo) {
|
|
do.slowQuery.mu.RLock()
|
|
defer do.slowQuery.mu.RUnlock()
|
|
if do.slowQuery.mu.closed {
|
|
return
|
|
}
|
|
|
|
select {
|
|
case do.slowQuery.ch <- query:
|
|
default:
|
|
}
|
|
}
|
|
|
|
// ShowSlowQuery returns the slow queries.
|
|
func (do *Domain) ShowSlowQuery(showSlow *ast.ShowSlow) []*SlowQueryInfo {
|
|
msg := &showSlowMessage{
|
|
request: showSlow,
|
|
}
|
|
msg.Add(1)
|
|
do.slowQuery.msgCh <- msg
|
|
msg.Wait()
|
|
return msg.result
|
|
}
|
|
|
|
func (do *Domain) topNSlowQueryLoop() {
|
|
defer util.Recover(metrics.LabelDomain, "topNSlowQueryLoop", nil, false)
|
|
ticker := time.NewTicker(time.Minute * 10)
|
|
defer func() {
|
|
ticker.Stop()
|
|
logutil.BgLogger().Info("topNSlowQueryLoop exited.")
|
|
}()
|
|
for {
|
|
select {
|
|
case now := <-ticker.C:
|
|
do.slowQuery.RemoveExpired(now)
|
|
case info, ok := <-do.slowQuery.ch:
|
|
if !ok {
|
|
return
|
|
}
|
|
do.slowQuery.Append(info)
|
|
case msg := <-do.slowQuery.msgCh:
|
|
req := msg.request
|
|
switch req.Tp {
|
|
case ast.ShowSlowTop:
|
|
msg.result = do.slowQuery.QueryTop(int(req.Count), req.Kind)
|
|
case ast.ShowSlowRecent:
|
|
msg.result = do.slowQuery.QueryRecent(int(req.Count))
|
|
default:
|
|
msg.result = do.slowQuery.QueryAll()
|
|
}
|
|
msg.Done()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (do *Domain) infoSyncerKeeper() {
|
|
defer func() {
|
|
logutil.BgLogger().Info("infoSyncerKeeper exited.")
|
|
}()
|
|
|
|
defer util.Recover(metrics.LabelDomain, "infoSyncerKeeper", nil, false)
|
|
|
|
ticker := time.NewTicker(infosync.ReportInterval)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
do.info.ReportMinStartTS(do.Store())
|
|
case <-do.info.Done():
|
|
logutil.BgLogger().Info("server info syncer need to restart")
|
|
if err := do.info.Restart(context.Background()); err != nil {
|
|
logutil.BgLogger().Error("server info syncer restart failed", zap.Error(err))
|
|
} else {
|
|
logutil.BgLogger().Info("server info syncer restarted")
|
|
}
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (do *Domain) globalConfigSyncerKeeper() {
|
|
defer func() {
|
|
logutil.BgLogger().Info("globalConfigSyncerKeeper exited.")
|
|
}()
|
|
|
|
defer util.Recover(metrics.LabelDomain, "globalConfigSyncerKeeper", nil, false)
|
|
|
|
for {
|
|
select {
|
|
case entry := <-do.globalCfgSyncer.NotifyCh:
|
|
err := do.globalCfgSyncer.StoreGlobalConfig(context.Background(), entry)
|
|
if err != nil {
|
|
logutil.BgLogger().Error("global config syncer store failed", zap.Error(err))
|
|
}
|
|
// TODO(crazycs520): Add owner to maintain global config is consistency with global variable.
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (do *Domain) topologySyncerKeeper() {
|
|
defer util.Recover(metrics.LabelDomain, "topologySyncerKeeper", nil, false)
|
|
ticker := time.NewTicker(infosync.TopologyTimeToRefresh)
|
|
defer func() {
|
|
ticker.Stop()
|
|
logutil.BgLogger().Info("topologySyncerKeeper exited.")
|
|
}()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
err := do.info.StoreTopologyInfo(context.Background())
|
|
if err != nil {
|
|
logutil.BgLogger().Error("refresh topology in loop failed", zap.Error(err))
|
|
}
|
|
case <-do.info.TopologyDone():
|
|
logutil.BgLogger().Info("server topology syncer need to restart")
|
|
if err := do.info.RestartTopology(context.Background()); err != nil {
|
|
logutil.BgLogger().Error("server topology syncer restart failed", zap.Error(err))
|
|
} else {
|
|
logutil.BgLogger().Info("server topology syncer restarted")
|
|
}
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// CheckAutoAnalyzeWindows checks the auto analyze windows and kill the auto analyze process if it is not in the window.
|
|
func (do *Domain) CheckAutoAnalyzeWindows() {
|
|
se, err := do.sysSessionPool.Get()
|
|
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("get system session failed", zap.Error(err))
|
|
return
|
|
}
|
|
// Make sure the session is new.
|
|
sctx := se.(sessionctx.Context)
|
|
defer do.sysSessionPool.Put(se)
|
|
if !autoanalyze.CheckAutoAnalyzeWindow(sctx) {
|
|
for _, id := range handleutil.GlobalAutoAnalyzeProcessList.All() {
|
|
do.SysProcTracker().KillSysProcess(id)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (do *Domain) refreshMDLCheckTableInfo() {
|
|
se, err := do.sysSessionPool.Get()
|
|
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("get system session failed", zap.Error(err))
|
|
return
|
|
}
|
|
// Make sure the session is new.
|
|
sctx := se.(sessionctx.Context)
|
|
ctx := kv.WithInternalSourceType(context.Background(), kv.InternalTxnMeta)
|
|
if _, err := sctx.GetSQLExecutor().ExecuteInternal(ctx, "rollback"); err != nil {
|
|
se.Close()
|
|
return
|
|
}
|
|
defer do.sysSessionPool.Put(se)
|
|
exec := sctx.GetRestrictedSQLExecutor()
|
|
domainSchemaVer := do.InfoSchema().SchemaMetaVersion()
|
|
// the job must stay inside tidb_ddl_job if we need to wait schema version for it.
|
|
sql := fmt.Sprintf(`select job_id, version, table_ids from mysql.tidb_mdl_info
|
|
where job_id >= %d and version <= %d`, do.minJobIDRefresher.GetCurrMinJobID(), domainSchemaVer)
|
|
rows, _, err := exec.ExecRestrictedSQL(ctx, nil, sql)
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("get mdl info from tidb_mdl_info failed", zap.Error(err))
|
|
return
|
|
}
|
|
do.mdlCheckTableInfo.mu.Lock()
|
|
defer do.mdlCheckTableInfo.mu.Unlock()
|
|
|
|
do.mdlCheckTableInfo.newestVer = domainSchemaVer
|
|
do.mdlCheckTableInfo.jobsVerMap = make(map[int64]int64, len(rows))
|
|
do.mdlCheckTableInfo.jobsIDsMap = make(map[int64]string, len(rows))
|
|
for i := 0; i < len(rows); i++ {
|
|
do.mdlCheckTableInfo.jobsVerMap[rows[i].GetInt64(0)] = rows[i].GetInt64(1)
|
|
do.mdlCheckTableInfo.jobsIDsMap[rows[i].GetInt64(0)] = rows[i].GetString(2)
|
|
}
|
|
}
|
|
|
|
func (do *Domain) mdlCheckLoop() {
|
|
ticker := time.Tick(mdlCheckLookDuration)
|
|
var saveMaxSchemaVersion int64
|
|
jobNeedToSync := false
|
|
jobCache := make(map[int64]int64, 1000)
|
|
|
|
for {
|
|
// Wait for channels
|
|
select {
|
|
case <-do.mdlCheckCh:
|
|
case <-ticker:
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
|
|
if !variable.EnableMDL.Load() {
|
|
continue
|
|
}
|
|
|
|
do.mdlCheckTableInfo.mu.Lock()
|
|
maxVer := do.mdlCheckTableInfo.newestVer
|
|
if maxVer > saveMaxSchemaVersion {
|
|
saveMaxSchemaVersion = maxVer
|
|
} else if !jobNeedToSync {
|
|
// Schema doesn't change, and no job to check in the last run.
|
|
do.mdlCheckTableInfo.mu.Unlock()
|
|
continue
|
|
}
|
|
|
|
jobNeedToCheckCnt := len(do.mdlCheckTableInfo.jobsVerMap)
|
|
if jobNeedToCheckCnt == 0 {
|
|
jobNeedToSync = false
|
|
do.mdlCheckTableInfo.mu.Unlock()
|
|
continue
|
|
}
|
|
|
|
jobsVerMap := make(map[int64]int64, len(do.mdlCheckTableInfo.jobsVerMap))
|
|
jobsIDsMap := make(map[int64]string, len(do.mdlCheckTableInfo.jobsIDsMap))
|
|
for k, v := range do.mdlCheckTableInfo.jobsVerMap {
|
|
jobsVerMap[k] = v
|
|
}
|
|
for k, v := range do.mdlCheckTableInfo.jobsIDsMap {
|
|
jobsIDsMap[k] = v
|
|
}
|
|
do.mdlCheckTableInfo.mu.Unlock()
|
|
|
|
jobNeedToSync = true
|
|
|
|
sm := do.InfoSyncer().GetSessionManager()
|
|
if sm == nil {
|
|
logutil.BgLogger().Info("session manager is nil")
|
|
} else {
|
|
sm.CheckOldRunningTxn(jobsVerMap, jobsIDsMap)
|
|
}
|
|
|
|
if len(jobsVerMap) == jobNeedToCheckCnt {
|
|
jobNeedToSync = false
|
|
}
|
|
|
|
// Try to gc jobCache.
|
|
if len(jobCache) > 1000 {
|
|
jobCache = make(map[int64]int64, 1000)
|
|
}
|
|
|
|
for jobID, ver := range jobsVerMap {
|
|
if cver, ok := jobCache[jobID]; ok && cver >= ver {
|
|
// Already update, skip it.
|
|
continue
|
|
}
|
|
logutil.BgLogger().Info("mdl gets lock, update self version to owner", zap.Int64("jobID", jobID), zap.Int64("version", ver))
|
|
err := do.ddl.SchemaSyncer().UpdateSelfVersion(context.Background(), jobID, ver)
|
|
if err != nil {
|
|
jobNeedToSync = true
|
|
logutil.BgLogger().Warn("mdl gets lock, update self version to owner failed",
|
|
zap.Int64("jobID", jobID), zap.Int64("version", ver), zap.Error(err))
|
|
} else {
|
|
jobCache[jobID] = ver
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (do *Domain) loadSchemaInLoop(ctx context.Context) {
|
|
defer util.Recover(metrics.LabelDomain, "loadSchemaInLoop", nil, true)
|
|
// Lease renewal can run at any frequency.
|
|
// Use lease/2 here as recommend by paper.
|
|
ticker := time.NewTicker(do.schemaLease / 2)
|
|
defer func() {
|
|
ticker.Stop()
|
|
logutil.BgLogger().Info("loadSchemaInLoop exited.")
|
|
}()
|
|
syncer := do.ddl.SchemaSyncer()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
failpoint.Inject("disableOnTickReload", func() {
|
|
failpoint.Continue()
|
|
})
|
|
err := do.Reload()
|
|
if err != nil {
|
|
logutil.BgLogger().Error("reload schema in loop failed", zap.Error(err))
|
|
}
|
|
do.deferFn.check()
|
|
case _, ok := <-syncer.GlobalVersionCh():
|
|
err := do.Reload()
|
|
if err != nil {
|
|
logutil.BgLogger().Error("reload schema in loop failed", zap.Error(err))
|
|
}
|
|
if !ok {
|
|
logutil.BgLogger().Warn("reload schema in loop, schema syncer need rewatch")
|
|
// Make sure the rewatch doesn't affect load schema, so we watch the global schema version asynchronously.
|
|
syncer.WatchGlobalSchemaVer(context.Background())
|
|
}
|
|
case <-syncer.Done():
|
|
// The schema syncer stops, we need stop the schema validator to synchronize the schema version.
|
|
logutil.BgLogger().Info("reload schema in loop, schema syncer need restart")
|
|
// The etcd is responsible for schema synchronization, we should ensure there is at most two different schema version
|
|
// in the TiDB cluster, to make the data/schema be consistent. If we lost connection/session to etcd, the cluster
|
|
// will treats this TiDB as a down instance, and etcd will remove the key of `/tidb/ddl/all_schema_versions/tidb-id`.
|
|
// Say the schema version now is 1, the owner is changing the schema version to 2, it will not wait for this down TiDB syncing the schema,
|
|
// then continue to change the TiDB schema to version 3. Unfortunately, this down TiDB schema version will still be version 1.
|
|
// And version 1 is not consistent to version 3. So we need to stop the schema validator to prohibit the DML executing.
|
|
do.SchemaValidator.Stop()
|
|
err := do.mustRestartSyncer(ctx)
|
|
if err != nil {
|
|
logutil.BgLogger().Error("reload schema in loop, schema syncer restart failed", zap.Error(err))
|
|
break
|
|
}
|
|
// The schema maybe changed, must reload schema then the schema validator can restart.
|
|
exitLoop := do.mustReload()
|
|
// domain is closed.
|
|
if exitLoop {
|
|
logutil.BgLogger().Error("domain is closed, exit loadSchemaInLoop")
|
|
return
|
|
}
|
|
do.SchemaValidator.Restart()
|
|
logutil.BgLogger().Info("schema syncer restarted")
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
do.refreshMDLCheckTableInfo()
|
|
select {
|
|
case do.mdlCheckCh <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
|
|
// mustRestartSyncer tries to restart the SchemaSyncer.
|
|
// It returns until it's successful or the domain is stopped.
|
|
func (do *Domain) mustRestartSyncer(ctx context.Context) error {
|
|
syncer := do.ddl.SchemaSyncer()
|
|
|
|
for {
|
|
err := syncer.Restart(ctx)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
// If the domain has stopped, we return an error immediately.
|
|
if do.isClose() {
|
|
return err
|
|
}
|
|
logutil.BgLogger().Error("restart the schema syncer failed", zap.Error(err))
|
|
time.Sleep(time.Second)
|
|
}
|
|
}
|
|
|
|
// mustReload tries to Reload the schema, it returns until it's successful or the domain is closed.
|
|
// it returns false when it is successful, returns true when the domain is closed.
|
|
func (do *Domain) mustReload() (exitLoop bool) {
|
|
for {
|
|
err := do.Reload()
|
|
if err == nil {
|
|
logutil.BgLogger().Info("mustReload succeed")
|
|
return false
|
|
}
|
|
|
|
// If the domain is closed, we returns immediately.
|
|
logutil.BgLogger().Info("reload the schema failed", zap.Error(err))
|
|
if do.isClose() {
|
|
return true
|
|
}
|
|
time.Sleep(200 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
func (do *Domain) isClose() bool {
|
|
select {
|
|
case <-do.exit:
|
|
logutil.BgLogger().Info("domain is closed")
|
|
return true
|
|
default:
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Close closes the Domain and release its resource.
|
|
func (do *Domain) Close() {
|
|
if do == nil {
|
|
return
|
|
}
|
|
startTime := time.Now()
|
|
if do.ddl != nil {
|
|
terror.Log(do.ddl.Stop())
|
|
}
|
|
if do.info != nil {
|
|
do.info.RemoveServerInfo()
|
|
do.info.RemoveMinStartTS()
|
|
}
|
|
ttlJobManager := do.ttlJobManager.Load()
|
|
if ttlJobManager != nil {
|
|
logutil.BgLogger().Info("stopping ttlJobManager")
|
|
ttlJobManager.Stop()
|
|
err := ttlJobManager.WaitStopped(context.Background(), func() time.Duration {
|
|
if intest.InTest {
|
|
return 10 * time.Second
|
|
}
|
|
return 30 * time.Second
|
|
}())
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("fail to wait until the ttl job manager stop", zap.Error(err))
|
|
} else {
|
|
logutil.BgLogger().Info("ttlJobManager exited.")
|
|
}
|
|
}
|
|
do.releaseServerID(context.Background())
|
|
close(do.exit)
|
|
if do.etcdClient != nil {
|
|
terror.Log(errors.Trace(do.etcdClient.Close()))
|
|
}
|
|
|
|
do.runawayManager.Stop()
|
|
|
|
if do.unprefixedEtcdCli != nil {
|
|
terror.Log(errors.Trace(do.unprefixedEtcdCli.Close()))
|
|
}
|
|
|
|
do.slowQuery.Close()
|
|
do.cancelFns.mu.Lock()
|
|
for _, f := range do.cancelFns.fns {
|
|
f()
|
|
}
|
|
do.cancelFns.mu.Unlock()
|
|
do.wg.Wait()
|
|
do.sysSessionPool.Close()
|
|
variable.UnregisterStatistics(do.BindHandle())
|
|
if do.onClose != nil {
|
|
do.onClose()
|
|
}
|
|
gctuner.WaitMemoryLimitTunerExitInTest()
|
|
close(do.mdlCheckCh)
|
|
|
|
// close MockGlobalServerInfoManagerEntry in order to refresh mock server info.
|
|
if intest.InTest {
|
|
infosync.MockGlobalServerInfoManagerEntry.Close()
|
|
}
|
|
if handle := do.statsHandle.Load(); handle != nil {
|
|
handle.Close()
|
|
}
|
|
|
|
logutil.BgLogger().Info("domain closed", zap.Duration("take time", time.Since(startTime)))
|
|
}
|
|
|
|
const resourceIdleTimeout = 3 * time.Minute // resources in the ResourcePool will be recycled after idleTimeout
|
|
|
|
// NewDomain creates a new domain. Should not create multiple domains for the same store.
|
|
func NewDomain(store kv.Storage, schemaLease time.Duration, statsLease time.Duration, dumpFileGcLease time.Duration, factory pools.Factory) *Domain {
|
|
intest.Assert(schemaLease > 0, "schema lease should be a positive duration")
|
|
capacity := 200 // capacity of the sysSessionPool size
|
|
do := &Domain{
|
|
store: store,
|
|
exit: make(chan struct{}),
|
|
sysSessionPool: util.NewSessionPool(
|
|
capacity, factory,
|
|
func(r pools.Resource) {
|
|
_, ok := r.(sessionctx.Context)
|
|
intest.Assert(ok)
|
|
infosync.StoreInternalSession(r)
|
|
},
|
|
func(r pools.Resource) {
|
|
_, ok := r.(sessionctx.Context)
|
|
intest.Assert(ok)
|
|
infosync.DeleteInternalSession(r)
|
|
},
|
|
),
|
|
statsLease: statsLease,
|
|
schemaLease: schemaLease,
|
|
slowQuery: newTopNSlowQueries(config.GetGlobalConfig().InMemSlowQueryTopNNum, time.Hour*24*7, config.GetGlobalConfig().InMemSlowQueryRecentNum),
|
|
dumpFileGcChecker: &dumpFileGcChecker{gcLease: dumpFileGcLease, paths: []string{replayer.GetPlanReplayerDirName(), GetOptimizerTraceDirName(), GetExtractTaskDirName()}},
|
|
mdlCheckTableInfo: &mdlCheckTableInfo{
|
|
mu: sync.Mutex{},
|
|
jobsVerMap: make(map[int64]int64),
|
|
jobsIDsMap: make(map[int64]string),
|
|
},
|
|
mdlCheckCh: make(chan struct{}),
|
|
}
|
|
|
|
do.infoCache = infoschema.NewCache(do, int(variable.SchemaVersionCacheLimit.Load()))
|
|
do.stopAutoAnalyze.Store(false)
|
|
do.wg = util.NewWaitGroupEnhancedWrapper("domain", do.exit, config.GetGlobalConfig().TiDBEnableExitCheck)
|
|
do.SchemaValidator = NewSchemaValidator(schemaLease, do)
|
|
do.expensiveQueryHandle = expensivequery.NewExpensiveQueryHandle(do.exit)
|
|
do.memoryUsageAlarmHandle = memoryusagealarm.NewMemoryUsageAlarmHandle(do.exit)
|
|
do.serverMemoryLimitHandle = servermemorylimit.NewServerMemoryLimitHandle(do.exit)
|
|
do.sysProcesses = SysProcesses{mu: &sync.RWMutex{}, procMap: make(map[uint64]sysproctrack.TrackProc)}
|
|
do.initDomainSysVars()
|
|
do.expiredTimeStamp4PC.expiredTimeStamp = types.NewTime(types.ZeroCoreTime, mysql.TypeTimestamp, types.DefaultFsp)
|
|
return do
|
|
}
|
|
|
|
const serverIDForStandalone = 1 // serverID for standalone deployment.
|
|
|
|
func newEtcdCli(addrs []string, ebd kv.EtcdBackend) (*clientv3.Client, error) {
|
|
cfg := config.GetGlobalConfig()
|
|
etcdLogCfg := zap.NewProductionConfig()
|
|
etcdLogCfg.Level = zap.NewAtomicLevelAt(zap.ErrorLevel)
|
|
backoffCfg := backoff.DefaultConfig
|
|
backoffCfg.MaxDelay = 3 * time.Second
|
|
cli, err := clientv3.New(clientv3.Config{
|
|
LogConfig: &etcdLogCfg,
|
|
Endpoints: addrs,
|
|
AutoSyncInterval: 30 * time.Second,
|
|
DialTimeout: 5 * time.Second,
|
|
DialOptions: []grpc.DialOption{
|
|
grpc.WithConnectParams(grpc.ConnectParams{
|
|
Backoff: backoffCfg,
|
|
}),
|
|
grpc.WithKeepaliveParams(keepalive.ClientParameters{
|
|
Time: time.Duration(cfg.TiKVClient.GrpcKeepAliveTime) * time.Second,
|
|
Timeout: time.Duration(cfg.TiKVClient.GrpcKeepAliveTimeout) * time.Second,
|
|
}),
|
|
},
|
|
TLS: ebd.TLSConfig(),
|
|
})
|
|
return cli, err
|
|
}
|
|
|
|
// Init initializes a domain. after return, session can be used to do DMLs but not
|
|
// DDLs which can be used after domain Start.
|
|
func (do *Domain) Init(
|
|
sysExecutorFactory func(*Domain) (pools.Resource, error),
|
|
ddlInjector func(ddl.DDL, ddl.Executor, *infoschema.InfoCache) *schematracker.Checker,
|
|
) error {
|
|
do.sysExecutorFactory = sysExecutorFactory
|
|
perfschema.Init()
|
|
if ebd, ok := do.store.(kv.EtcdBackend); ok {
|
|
var addrs []string
|
|
var err error
|
|
if addrs, err = ebd.EtcdAddrs(); err != nil {
|
|
return err
|
|
}
|
|
if addrs != nil {
|
|
cli, err := newEtcdCli(addrs, ebd)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
etcd.SetEtcdCliByNamespace(cli, keyspace.MakeKeyspaceEtcdNamespace(do.store.GetCodec()))
|
|
|
|
do.etcdClient = cli
|
|
|
|
do.autoidClient = autoid.NewClientDiscover(cli)
|
|
|
|
unprefixedEtcdCli, err := newEtcdCli(addrs, ebd)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
do.unprefixedEtcdCli = unprefixedEtcdCli
|
|
}
|
|
}
|
|
|
|
ctx, cancelFunc := context.WithCancel(context.Background())
|
|
do.ctx = ctx
|
|
do.cancelFns.mu.Lock()
|
|
do.cancelFns.fns = append(do.cancelFns.fns, cancelFunc)
|
|
do.cancelFns.mu.Unlock()
|
|
d := do.ddl
|
|
eBak := do.ddlExecutor
|
|
do.ddl, do.ddlExecutor = ddl.NewDDL(
|
|
ctx,
|
|
ddl.WithEtcdClient(do.etcdClient),
|
|
ddl.WithStore(do.store),
|
|
ddl.WithAutoIDClient(do.autoidClient),
|
|
ddl.WithInfoCache(do.infoCache),
|
|
ddl.WithLease(do.schemaLease),
|
|
ddl.WithSchemaLoader(do),
|
|
)
|
|
|
|
failpoint.Inject("MockReplaceDDL", func(val failpoint.Value) {
|
|
if val.(bool) {
|
|
do.ddl = d
|
|
do.ddlExecutor = eBak
|
|
}
|
|
})
|
|
if ddlInjector != nil {
|
|
checker := ddlInjector(do.ddl, do.ddlExecutor, do.infoCache)
|
|
checker.CreateTestDB(nil)
|
|
do.ddl = checker
|
|
do.ddlExecutor = checker
|
|
}
|
|
|
|
// step 1: prepare the info/schema syncer which domain reload needed.
|
|
pdCli, pdHTTPCli := do.GetPDClient(), do.GetPDHTTPClient()
|
|
skipRegisterToDashboard := config.GetGlobalConfig().SkipRegisterToDashboard
|
|
var err error
|
|
do.info, err = infosync.GlobalInfoSyncerInit(ctx, do.ddl.GetID(), do.ServerID,
|
|
do.etcdClient, do.unprefixedEtcdCli, pdCli, pdHTTPCli,
|
|
do.Store().GetCodec(), skipRegisterToDashboard)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
do.globalCfgSyncer = globalconfigsync.NewGlobalConfigSyncer(pdCli)
|
|
err = do.ddl.SchemaSyncer().Init(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// step 2: initialize the global kill, which depends on `globalInfoSyncer`.`
|
|
if config.GetGlobalConfig().EnableGlobalKill {
|
|
do.connIDAllocator = globalconn.NewGlobalAllocator(do.ServerID, config.GetGlobalConfig().Enable32BitsConnectionID)
|
|
|
|
if do.etcdClient != nil {
|
|
err := do.acquireServerID(ctx)
|
|
if err != nil {
|
|
logutil.BgLogger().Error("acquire serverID failed", zap.Error(err))
|
|
do.isLostConnectionToPD.Store(1) // will retry in `do.serverIDKeeper`
|
|
} else {
|
|
if err := do.info.StoreServerInfo(context.Background()); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
do.isLostConnectionToPD.Store(0)
|
|
}
|
|
} else {
|
|
// set serverID for standalone deployment to enable 'KILL'.
|
|
atomic.StoreUint64(&do.serverID, serverIDForStandalone)
|
|
}
|
|
} else {
|
|
do.connIDAllocator = globalconn.NewSimpleAllocator()
|
|
}
|
|
|
|
// should put `initResourceGroupsController` after fetching server ID
|
|
err = do.initResourceGroupsController(ctx, pdCli, do.ServerID())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
startReloadTime := time.Now()
|
|
// step 3: domain reload the infoSchema.
|
|
err = do.Reload()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
sub := time.Since(startReloadTime)
|
|
// The reload(in step 2) operation takes more than ddlLease and a new reload operation was not performed,
|
|
// the next query will respond by ErrInfoSchemaExpired error. So we do a new reload to update schemaValidator.latestSchemaExpire.
|
|
if sub > (do.schemaLease / 2) {
|
|
logutil.BgLogger().Warn("loading schema and starting ddl take a long time, we do a new reload", zap.Duration("take time", sub))
|
|
err = do.Reload()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Start starts the domain. After start, DDLs can be executed using session, see
|
|
// Init also.
|
|
func (do *Domain) Start() error {
|
|
gCfg := config.GetGlobalConfig()
|
|
if gCfg.EnableGlobalKill && do.etcdClient != nil {
|
|
do.wg.Add(1)
|
|
go do.serverIDKeeper()
|
|
}
|
|
|
|
// TODO: Here we create new sessions with sysFac in DDL,
|
|
// which will use `do` as Domain instead of call `domap.Get`.
|
|
// That's because `domap.Get` requires a lock, but before
|
|
// we initialize Domain finish, we can't require that again.
|
|
// After we remove the lazy logic of creating Domain, we
|
|
// can simplify code here.
|
|
sysFac := func() (pools.Resource, error) {
|
|
return do.sysExecutorFactory(do)
|
|
}
|
|
sysCtxPool := pools.NewResourcePool(sysFac, 512, 512, resourceIdleTimeout)
|
|
|
|
// start the ddl after the domain reload, avoiding some internal sql running before infoSchema construction.
|
|
err := do.ddl.Start(sysCtxPool)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
do.minJobIDRefresher = do.ddl.GetMinJobIDRefresher()
|
|
|
|
// Local store needs to get the change information for every DDL state in each session.
|
|
do.wg.Run(func() {
|
|
do.loadSchemaInLoop(do.ctx)
|
|
}, "loadSchemaInLoop")
|
|
do.wg.Run(do.mdlCheckLoop, "mdlCheckLoop")
|
|
do.wg.Run(do.topNSlowQueryLoop, "topNSlowQueryLoop")
|
|
do.wg.Run(do.infoSyncerKeeper, "infoSyncerKeeper")
|
|
do.wg.Run(do.globalConfigSyncerKeeper, "globalConfigSyncerKeeper")
|
|
do.wg.Run(do.runawayStartLoop, "runawayStartLoop")
|
|
do.wg.Run(do.requestUnitsWriterLoop, "requestUnitsWriterLoop")
|
|
skipRegisterToDashboard := gCfg.SkipRegisterToDashboard
|
|
if !skipRegisterToDashboard {
|
|
do.wg.Run(do.topologySyncerKeeper, "topologySyncerKeeper")
|
|
}
|
|
pdCli := do.GetPDClient()
|
|
if pdCli != nil {
|
|
do.wg.Run(func() {
|
|
do.closestReplicaReadCheckLoop(do.ctx, pdCli)
|
|
}, "closestReplicaReadCheckLoop")
|
|
}
|
|
|
|
err = do.initLogBackup(do.ctx, pdCli)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetSchemaLease return the schema lease.
|
|
func (do *Domain) GetSchemaLease() time.Duration {
|
|
return do.schemaLease
|
|
}
|
|
|
|
// InitInfo4Test init infosync for distributed execution test.
|
|
func (do *Domain) InitInfo4Test() {
|
|
infosync.MockGlobalServerInfoManagerEntry.Add(do.ddl.GetID(), do.ServerID)
|
|
}
|
|
|
|
// SetOnClose used to set do.onClose func.
|
|
func (do *Domain) SetOnClose(onClose func()) {
|
|
do.onClose = onClose
|
|
}
|
|
|
|
func (do *Domain) initLogBackup(ctx context.Context, pdClient pd.Client) error {
|
|
cfg := config.GetGlobalConfig()
|
|
if pdClient == nil || do.etcdClient == nil {
|
|
log.Warn("pd / etcd client not provided, won't begin Advancer.")
|
|
return nil
|
|
}
|
|
tikvStore, ok := do.Store().(tikv.Storage)
|
|
if !ok {
|
|
log.Warn("non tikv store, stop begin Advancer.")
|
|
return nil
|
|
}
|
|
env, err := streamhelper.TiDBEnv(tikvStore, pdClient, do.etcdClient, cfg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
adv := streamhelper.NewCheckpointAdvancer(env)
|
|
do.logBackupAdvancer = daemon.New(adv, streamhelper.OwnerManagerForLogBackup(ctx, do.etcdClient), adv.Config().TickDuration)
|
|
loop, err := do.logBackupAdvancer.Begin(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
do.wg.Run(loop, "logBackupAdvancer")
|
|
return nil
|
|
}
|
|
|
|
// when tidb_replica_read = 'closest-adaptive', check tidb and tikv's zone label matches.
|
|
// if not match, disable replica_read to avoid uneven read traffic distribution.
|
|
func (do *Domain) closestReplicaReadCheckLoop(ctx context.Context, pdClient pd.Client) {
|
|
defer util.Recover(metrics.LabelDomain, "closestReplicaReadCheckLoop", nil, false)
|
|
|
|
// trigger check once instantly.
|
|
if err := do.checkReplicaRead(ctx, pdClient); err != nil {
|
|
logutil.BgLogger().Warn("refresh replicaRead flag failed", zap.Error(err))
|
|
}
|
|
|
|
ticker := time.NewTicker(time.Minute)
|
|
defer func() {
|
|
ticker.Stop()
|
|
logutil.BgLogger().Info("closestReplicaReadCheckLoop exited.")
|
|
}()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
if err := do.checkReplicaRead(ctx, pdClient); err != nil {
|
|
logutil.BgLogger().Warn("refresh replicaRead flag failed", zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Periodically check and update the replica-read status when `tidb_replica_read` is set to "closest-adaptive"
|
|
// We disable "closest-adaptive" in following conditions to ensure the read traffic is evenly distributed across
|
|
// all AZs:
|
|
// - There are no TiKV servers in the AZ of this tidb instance
|
|
// - The AZ if this tidb contains more tidb than other AZ and this tidb's id is the bigger one.
|
|
func (do *Domain) checkReplicaRead(ctx context.Context, pdClient pd.Client) error {
|
|
do.sysVarCache.RLock()
|
|
replicaRead := do.sysVarCache.global[variable.TiDBReplicaRead]
|
|
do.sysVarCache.RUnlock()
|
|
|
|
if !strings.EqualFold(replicaRead, "closest-adaptive") {
|
|
logutil.BgLogger().Debug("closest replica read is not enabled, skip check!", zap.String("mode", replicaRead))
|
|
return nil
|
|
}
|
|
|
|
serverInfo, err := infosync.GetServerInfo()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
zone := ""
|
|
for k, v := range serverInfo.Labels {
|
|
if k == placement.DCLabelKey && v != "" {
|
|
zone = v
|
|
break
|
|
}
|
|
}
|
|
if zone == "" {
|
|
logutil.BgLogger().Debug("server contains no 'zone' label, disable closest replica read", zap.Any("labels", serverInfo.Labels))
|
|
variable.SetEnableAdaptiveReplicaRead(false)
|
|
return nil
|
|
}
|
|
|
|
stores, err := pdClient.GetAllStores(ctx, pd.WithExcludeTombstone())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
storeZones := make(map[string]int)
|
|
for _, s := range stores {
|
|
// skip tumbstone stores or tiflash
|
|
if s.NodeState == metapb.NodeState_Removing || s.NodeState == metapb.NodeState_Removed || engine.IsTiFlash(s) {
|
|
continue
|
|
}
|
|
for _, label := range s.Labels {
|
|
if label.Key == placement.DCLabelKey && label.Value != "" {
|
|
storeZones[label.Value] = 0
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// no stores in this AZ
|
|
if _, ok := storeZones[zone]; !ok {
|
|
variable.SetEnableAdaptiveReplicaRead(false)
|
|
return nil
|
|
}
|
|
|
|
servers, err := infosync.GetAllServerInfo(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
svrIDsInThisZone := make([]string, 0)
|
|
for _, s := range servers {
|
|
if v, ok := s.Labels[placement.DCLabelKey]; ok && v != "" {
|
|
if _, ok := storeZones[v]; ok {
|
|
storeZones[v]++
|
|
if v == zone {
|
|
svrIDsInThisZone = append(svrIDsInThisZone, s.ID)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
enabledCount := math.MaxInt
|
|
for _, count := range storeZones {
|
|
if count < enabledCount {
|
|
enabledCount = count
|
|
}
|
|
}
|
|
// sort tidb in the same AZ by ID and disable the tidb with bigger ID
|
|
// because ID is unchangeable, so this is a simple and stable algorithm to select
|
|
// some instances across all tidb servers.
|
|
if enabledCount < len(svrIDsInThisZone) {
|
|
sort.Slice(svrIDsInThisZone, func(i, j int) bool {
|
|
return strings.Compare(svrIDsInThisZone[i], svrIDsInThisZone[j]) < 0
|
|
})
|
|
}
|
|
enabled := true
|
|
for _, s := range svrIDsInThisZone[enabledCount:] {
|
|
if s == serverInfo.ID {
|
|
enabled = false
|
|
break
|
|
}
|
|
}
|
|
|
|
if variable.SetEnableAdaptiveReplicaRead(enabled) {
|
|
logutil.BgLogger().Info("tidb server adaptive closest replica read is changed", zap.Bool("enable", enabled))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// InitDistTaskLoop initializes the distributed task framework.
|
|
func (do *Domain) InitDistTaskLoop() error {
|
|
ctx := kv.WithInternalSourceType(context.Background(), kv.InternalDistTask)
|
|
failpoint.Inject("MockDisableDistTask", func(val failpoint.Value) {
|
|
if val.(bool) {
|
|
failpoint.Return(nil)
|
|
}
|
|
})
|
|
|
|
taskManager := storage.NewTaskManager(do.sysSessionPool)
|
|
var serverID string
|
|
if intest.InTest {
|
|
do.InitInfo4Test()
|
|
serverID = disttaskutil.GenerateSubtaskExecID4Test(do.ddl.GetID())
|
|
} else {
|
|
serverID = disttaskutil.GenerateSubtaskExecID(ctx, do.ddl.GetID())
|
|
}
|
|
|
|
if serverID == "" {
|
|
errMsg := fmt.Sprintf("TiDB node ID( = %s ) not found in available TiDB nodes list", do.ddl.GetID())
|
|
return errors.New(errMsg)
|
|
}
|
|
managerCtx, cancel := context.WithCancel(ctx)
|
|
do.cancelFns.mu.Lock()
|
|
do.cancelFns.fns = append(do.cancelFns.fns, cancel)
|
|
do.cancelFns.mu.Unlock()
|
|
executorManager, err := taskexecutor.NewManager(managerCtx, serverID, taskManager)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
storage.SetTaskManager(taskManager)
|
|
if err = executorManager.InitMeta(); err != nil {
|
|
// executor manager loop will try to recover meta repeatedly, so we can
|
|
// just log the error here.
|
|
logutil.BgLogger().Warn("init task executor manager meta failed", zap.Error(err))
|
|
}
|
|
do.wg.Run(func() {
|
|
defer func() {
|
|
storage.SetTaskManager(nil)
|
|
}()
|
|
do.distTaskFrameworkLoop(ctx, taskManager, executorManager, serverID)
|
|
}, "distTaskFrameworkLoop")
|
|
return nil
|
|
}
|
|
|
|
func (do *Domain) distTaskFrameworkLoop(ctx context.Context, taskManager *storage.TaskManager, executorManager *taskexecutor.Manager, serverID string) {
|
|
err := executorManager.Start()
|
|
if err != nil {
|
|
logutil.BgLogger().Error("dist task executor manager start failed", zap.Error(err))
|
|
return
|
|
}
|
|
logutil.BgLogger().Info("dist task executor manager started")
|
|
defer func() {
|
|
logutil.BgLogger().Info("stopping dist task executor manager")
|
|
executorManager.Stop()
|
|
logutil.BgLogger().Info("dist task executor manager stopped")
|
|
}()
|
|
|
|
var schedulerManager *scheduler.Manager
|
|
startSchedulerMgrIfNeeded := func() {
|
|
if schedulerManager != nil && schedulerManager.Initialized() {
|
|
return
|
|
}
|
|
schedulerManager = scheduler.NewManager(ctx, taskManager, serverID)
|
|
schedulerManager.Start()
|
|
}
|
|
stopSchedulerMgrIfNeeded := func() {
|
|
if schedulerManager != nil && schedulerManager.Initialized() {
|
|
logutil.BgLogger().Info("stopping dist task scheduler manager because the current node is not DDL owner anymore", zap.String("id", do.ddl.GetID()))
|
|
schedulerManager.Stop()
|
|
logutil.BgLogger().Info("dist task scheduler manager stopped", zap.String("id", do.ddl.GetID()))
|
|
}
|
|
}
|
|
|
|
ticker := time.NewTicker(time.Second)
|
|
for {
|
|
select {
|
|
case <-do.exit:
|
|
stopSchedulerMgrIfNeeded()
|
|
return
|
|
case <-ticker.C:
|
|
if do.ddl.OwnerManager().IsOwner() {
|
|
startSchedulerMgrIfNeeded()
|
|
} else {
|
|
stopSchedulerMgrIfNeeded()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// SysSessionPool returns the system session pool.
|
|
func (do *Domain) SysSessionPool() util.SessionPool {
|
|
return do.sysSessionPool
|
|
}
|
|
|
|
// SysProcTracker returns the system processes tracker.
|
|
func (do *Domain) SysProcTracker() sysproctrack.Tracker {
|
|
return &do.sysProcesses
|
|
}
|
|
|
|
// GetEtcdClient returns the etcd client.
|
|
func (do *Domain) GetEtcdClient() *clientv3.Client {
|
|
return do.etcdClient
|
|
}
|
|
|
|
// AutoIDClient returns the autoid client.
|
|
func (do *Domain) AutoIDClient() *autoid.ClientDiscover {
|
|
return do.autoidClient
|
|
}
|
|
|
|
// GetPDClient returns the PD client.
|
|
func (do *Domain) GetPDClient() pd.Client {
|
|
if store, ok := do.store.(kv.StorageWithPD); ok {
|
|
return store.GetPDClient()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetPDHTTPClient returns the PD HTTP client.
|
|
func (do *Domain) GetPDHTTPClient() pdhttp.Client {
|
|
if store, ok := do.store.(kv.StorageWithPD); ok {
|
|
return store.GetPDHTTPClient()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// LoadPrivilegeLoop create a goroutine loads privilege tables in a loop, it
|
|
// should be called only once in BootstrapSession.
|
|
func (do *Domain) LoadPrivilegeLoop(sctx sessionctx.Context) error {
|
|
ctx := kv.WithInternalSourceType(context.Background(), kv.InternalTxnPrivilege)
|
|
sctx.GetSessionVars().InRestrictedSQL = true
|
|
_, err := sctx.GetSQLExecutor().ExecuteInternal(ctx, "set @@autocommit = 1")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
do.privHandle = privileges.NewHandle()
|
|
err = do.privHandle.Update(sctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var watchCh clientv3.WatchChan
|
|
duration := 5 * time.Minute
|
|
if do.etcdClient != nil {
|
|
watchCh = do.etcdClient.Watch(context.Background(), privilegeKey)
|
|
duration = 10 * time.Minute
|
|
}
|
|
|
|
do.wg.Run(func() {
|
|
defer func() {
|
|
logutil.BgLogger().Info("loadPrivilegeInLoop exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "loadPrivilegeInLoop", nil, false)
|
|
|
|
var count int
|
|
for {
|
|
ok := true
|
|
select {
|
|
case <-do.exit:
|
|
return
|
|
case _, ok = <-watchCh:
|
|
case <-time.After(duration):
|
|
}
|
|
if !ok {
|
|
logutil.BgLogger().Error("load privilege loop watch channel closed")
|
|
watchCh = do.etcdClient.Watch(context.Background(), privilegeKey)
|
|
count++
|
|
if count > 10 {
|
|
time.Sleep(time.Duration(count) * time.Second)
|
|
}
|
|
continue
|
|
}
|
|
|
|
count = 0
|
|
err := do.privHandle.Update(sctx)
|
|
metrics.LoadPrivilegeCounter.WithLabelValues(metrics.RetLabel(err)).Inc()
|
|
if err != nil {
|
|
logutil.BgLogger().Error("load privilege failed", zap.Error(err))
|
|
}
|
|
}
|
|
}, "loadPrivilegeInLoop")
|
|
return nil
|
|
}
|
|
|
|
// LoadSysVarCacheLoop create a goroutine loads sysvar cache in a loop,
|
|
// it should be called only once in BootstrapSession.
|
|
func (do *Domain) LoadSysVarCacheLoop(ctx sessionctx.Context) error {
|
|
ctx.GetSessionVars().InRestrictedSQL = true
|
|
err := do.rebuildSysVarCache(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var watchCh clientv3.WatchChan
|
|
duration := 30 * time.Second
|
|
if do.etcdClient != nil {
|
|
watchCh = do.etcdClient.Watch(context.Background(), sysVarCacheKey)
|
|
}
|
|
|
|
do.wg.Run(func() {
|
|
defer func() {
|
|
logutil.BgLogger().Info("LoadSysVarCacheLoop exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "LoadSysVarCacheLoop", nil, false)
|
|
|
|
var count int
|
|
for {
|
|
ok := true
|
|
select {
|
|
case <-do.exit:
|
|
return
|
|
case _, ok = <-watchCh:
|
|
case <-time.After(duration):
|
|
}
|
|
|
|
failpoint.Inject("skipLoadSysVarCacheLoop", func(val failpoint.Value) {
|
|
// In some pkg integration test, there are many testSuite, and each testSuite has separate storage and
|
|
// `LoadSysVarCacheLoop` background goroutine. Then each testSuite `RebuildSysVarCache` from it's
|
|
// own storage.
|
|
// Each testSuit will also call `checkEnableServerGlobalVar` to update some local variables.
|
|
// That's the problem, each testSuit use different storage to update some same local variables.
|
|
// So just skip `RebuildSysVarCache` in some integration testing.
|
|
if val.(bool) {
|
|
failpoint.Continue()
|
|
}
|
|
})
|
|
|
|
if !ok {
|
|
logutil.BgLogger().Error("LoadSysVarCacheLoop loop watch channel closed")
|
|
watchCh = do.etcdClient.Watch(context.Background(), sysVarCacheKey)
|
|
count++
|
|
if count > 10 {
|
|
time.Sleep(time.Duration(count) * time.Second)
|
|
}
|
|
continue
|
|
}
|
|
count = 0
|
|
logutil.BgLogger().Debug("Rebuilding sysvar cache from etcd watch event.")
|
|
err := do.rebuildSysVarCache(ctx)
|
|
metrics.LoadSysVarCacheCounter.WithLabelValues(metrics.RetLabel(err)).Inc()
|
|
if err != nil {
|
|
logutil.BgLogger().Error("LoadSysVarCacheLoop failed", zap.Error(err))
|
|
}
|
|
}
|
|
}, "LoadSysVarCacheLoop")
|
|
return nil
|
|
}
|
|
|
|
// WatchTiFlashComputeNodeChange create a routine to watch if the topology of tiflash_compute node is changed.
|
|
// TODO: tiflashComputeNodeKey is not put to etcd yet(finish this when AutoScaler is done)
|
|
//
|
|
// store cache will only be invalidated every n seconds.
|
|
func (do *Domain) WatchTiFlashComputeNodeChange() error {
|
|
var watchCh clientv3.WatchChan
|
|
if do.etcdClient != nil {
|
|
watchCh = do.etcdClient.Watch(context.Background(), tiflashComputeNodeKey)
|
|
}
|
|
duration := 10 * time.Second
|
|
do.wg.Run(func() {
|
|
defer func() {
|
|
logutil.BgLogger().Info("WatchTiFlashComputeNodeChange exit")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "WatchTiFlashComputeNodeChange", nil, false)
|
|
|
|
var count int
|
|
var logCount int
|
|
for {
|
|
ok := true
|
|
var watched bool
|
|
select {
|
|
case <-do.exit:
|
|
return
|
|
case _, ok = <-watchCh:
|
|
watched = true
|
|
case <-time.After(duration):
|
|
}
|
|
if !ok {
|
|
logutil.BgLogger().Error("WatchTiFlashComputeNodeChange watch channel closed")
|
|
watchCh = do.etcdClient.Watch(context.Background(), tiflashComputeNodeKey)
|
|
count++
|
|
if count > 10 {
|
|
time.Sleep(time.Duration(count) * time.Second)
|
|
}
|
|
continue
|
|
}
|
|
count = 0
|
|
switch s := do.store.(type) {
|
|
case tikv.Storage:
|
|
logCount++
|
|
s.GetRegionCache().InvalidateTiFlashComputeStores()
|
|
if logCount == 6 {
|
|
// Print log every 6*duration seconds.
|
|
logutil.BgLogger().Debug("tiflash_compute store cache invalied, will update next query", zap.Bool("watched", watched))
|
|
logCount = 0
|
|
}
|
|
default:
|
|
logutil.BgLogger().Debug("No need to watch tiflash_compute store cache for non-tikv store")
|
|
return
|
|
}
|
|
}
|
|
}, "WatchTiFlashComputeNodeChange")
|
|
return nil
|
|
}
|
|
|
|
// PrivilegeHandle returns the MySQLPrivilege.
|
|
func (do *Domain) PrivilegeHandle() *privileges.Handle {
|
|
return do.privHandle
|
|
}
|
|
|
|
// BindHandle returns domain's bindHandle.
|
|
func (do *Domain) BindHandle() bindinfo.GlobalBindingHandle {
|
|
v := do.bindHandle.Load()
|
|
if v == nil {
|
|
return nil
|
|
}
|
|
return v.(bindinfo.GlobalBindingHandle)
|
|
}
|
|
|
|
// LoadBindInfoLoop create a goroutine loads BindInfo in a loop, it should
|
|
// be called only once in BootstrapSession.
|
|
func (do *Domain) LoadBindInfoLoop(ctxForHandle sessionctx.Context, ctxForEvolve sessionctx.Context) error {
|
|
ctxForHandle.GetSessionVars().InRestrictedSQL = true
|
|
ctxForEvolve.GetSessionVars().InRestrictedSQL = true
|
|
if !do.bindHandle.CompareAndSwap(nil, bindinfo.NewGlobalBindingHandle(do.sysSessionPool)) {
|
|
do.BindHandle().Reset()
|
|
}
|
|
|
|
err := do.BindHandle().LoadFromStorageToCache(true)
|
|
if err != nil || bindinfo.Lease == 0 {
|
|
return err
|
|
}
|
|
|
|
owner := do.newOwnerManager(bindinfo.Prompt, bindinfo.OwnerKey)
|
|
do.globalBindHandleWorkerLoop(owner)
|
|
return nil
|
|
}
|
|
|
|
func (do *Domain) globalBindHandleWorkerLoop(owner owner.Manager) {
|
|
do.wg.Run(func() {
|
|
defer func() {
|
|
logutil.BgLogger().Info("globalBindHandleWorkerLoop exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "globalBindHandleWorkerLoop", nil, false)
|
|
|
|
bindWorkerTicker := time.NewTicker(bindinfo.Lease)
|
|
gcBindTicker := time.NewTicker(100 * bindinfo.Lease)
|
|
defer func() {
|
|
bindWorkerTicker.Stop()
|
|
gcBindTicker.Stop()
|
|
}()
|
|
for {
|
|
select {
|
|
case <-do.exit:
|
|
owner.Cancel()
|
|
return
|
|
case <-bindWorkerTicker.C:
|
|
bindHandle := do.BindHandle()
|
|
err := bindHandle.LoadFromStorageToCache(false)
|
|
if err != nil {
|
|
logutil.BgLogger().Error("update bindinfo failed", zap.Error(err))
|
|
}
|
|
bindHandle.DropInvalidGlobalBinding()
|
|
// Get Global
|
|
optVal, err := do.GetGlobalVar(variable.TiDBCapturePlanBaseline)
|
|
if err == nil && variable.TiDBOptOn(optVal) {
|
|
bindHandle.CaptureBaselines()
|
|
}
|
|
case <-gcBindTicker.C:
|
|
if !owner.IsOwner() {
|
|
continue
|
|
}
|
|
err := do.BindHandle().GCGlobalBinding()
|
|
if err != nil {
|
|
logutil.BgLogger().Error("GC bind record failed", zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
}, "globalBindHandleWorkerLoop")
|
|
}
|
|
|
|
// SetupPlanReplayerHandle setup plan replayer handle
|
|
func (do *Domain) SetupPlanReplayerHandle(collectorSctx sessionctx.Context, workersSctxs []sessionctx.Context) {
|
|
ctx := kv.WithInternalSourceType(context.Background(), kv.InternalTxnStats)
|
|
do.planReplayerHandle = &planReplayerHandle{}
|
|
do.planReplayerHandle.planReplayerTaskCollectorHandle = &planReplayerTaskCollectorHandle{
|
|
ctx: ctx,
|
|
sctx: collectorSctx,
|
|
}
|
|
taskCH := make(chan *PlanReplayerDumpTask, 16)
|
|
taskStatus := &planReplayerDumpTaskStatus{}
|
|
taskStatus.finishedTaskMu.finishedTask = map[replayer.PlanReplayerTaskKey]struct{}{}
|
|
taskStatus.runningTaskMu.runningTasks = map[replayer.PlanReplayerTaskKey]struct{}{}
|
|
|
|
do.planReplayerHandle.planReplayerTaskDumpHandle = &planReplayerTaskDumpHandle{
|
|
taskCH: taskCH,
|
|
status: taskStatus,
|
|
}
|
|
do.planReplayerHandle.planReplayerTaskDumpHandle.workers = make([]*planReplayerTaskDumpWorker, 0)
|
|
for i := 0; i < len(workersSctxs); i++ {
|
|
worker := &planReplayerTaskDumpWorker{
|
|
ctx: ctx,
|
|
sctx: workersSctxs[i],
|
|
taskCH: taskCH,
|
|
status: taskStatus,
|
|
}
|
|
do.planReplayerHandle.planReplayerTaskDumpHandle.workers = append(do.planReplayerHandle.planReplayerTaskDumpHandle.workers, worker)
|
|
}
|
|
}
|
|
|
|
// RunawayManager returns the runaway manager.
|
|
func (do *Domain) RunawayManager() *runaway.Manager {
|
|
return do.runawayManager
|
|
}
|
|
|
|
// ResourceGroupsController returns the resource groups controller.
|
|
func (do *Domain) ResourceGroupsController() *rmclient.ResourceGroupsController {
|
|
return do.resourceGroupsController
|
|
}
|
|
|
|
// SetResourceGroupsController is only used in test.
|
|
func (do *Domain) SetResourceGroupsController(controller *rmclient.ResourceGroupsController) {
|
|
do.resourceGroupsController = controller
|
|
}
|
|
|
|
// SetupHistoricalStatsWorker setups worker
|
|
func (do *Domain) SetupHistoricalStatsWorker(ctx sessionctx.Context) {
|
|
do.historicalStatsWorker = &HistoricalStatsWorker{
|
|
tblCH: make(chan int64, 16),
|
|
sctx: ctx,
|
|
}
|
|
}
|
|
|
|
// SetupDumpFileGCChecker setup sctx
|
|
func (do *Domain) SetupDumpFileGCChecker(ctx sessionctx.Context) {
|
|
do.dumpFileGcChecker.setupSctx(ctx)
|
|
do.dumpFileGcChecker.planReplayerTaskStatus = do.planReplayerHandle.status
|
|
}
|
|
|
|
// SetupExtractHandle setups extract handler
|
|
func (do *Domain) SetupExtractHandle(sctxs []sessionctx.Context) {
|
|
do.extractTaskHandle = newExtractHandler(do.ctx, sctxs)
|
|
}
|
|
|
|
var planReplayerHandleLease atomic.Uint64
|
|
|
|
func init() {
|
|
planReplayerHandleLease.Store(uint64(10 * time.Second))
|
|
enableDumpHistoricalStats.Store(true)
|
|
}
|
|
|
|
// DisablePlanReplayerBackgroundJob4Test disable plan replayer handle for test
|
|
func DisablePlanReplayerBackgroundJob4Test() {
|
|
planReplayerHandleLease.Store(0)
|
|
}
|
|
|
|
// DisableDumpHistoricalStats4Test disable historical dump worker for test
|
|
func DisableDumpHistoricalStats4Test() {
|
|
enableDumpHistoricalStats.Store(false)
|
|
}
|
|
|
|
// StartPlanReplayerHandle start plan replayer handle job
|
|
func (do *Domain) StartPlanReplayerHandle() {
|
|
lease := planReplayerHandleLease.Load()
|
|
if lease < 1 {
|
|
return
|
|
}
|
|
do.wg.Run(func() {
|
|
logutil.BgLogger().Info("PlanReplayerTaskCollectHandle started")
|
|
tikcer := time.NewTicker(time.Duration(lease))
|
|
defer func() {
|
|
tikcer.Stop()
|
|
logutil.BgLogger().Info("PlanReplayerTaskCollectHandle exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "PlanReplayerTaskCollectHandle", nil, false)
|
|
|
|
for {
|
|
select {
|
|
case <-do.exit:
|
|
return
|
|
case <-tikcer.C:
|
|
err := do.planReplayerHandle.CollectPlanReplayerTask()
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("plan replayer handle collect tasks failed", zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
}, "PlanReplayerTaskCollectHandle")
|
|
|
|
do.wg.Run(func() {
|
|
logutil.BgLogger().Info("PlanReplayerTaskDumpHandle started")
|
|
defer func() {
|
|
logutil.BgLogger().Info("PlanReplayerTaskDumpHandle exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "PlanReplayerTaskDumpHandle", nil, false)
|
|
|
|
for _, worker := range do.planReplayerHandle.planReplayerTaskDumpHandle.workers {
|
|
go worker.run()
|
|
}
|
|
<-do.exit
|
|
do.planReplayerHandle.planReplayerTaskDumpHandle.Close()
|
|
}, "PlanReplayerTaskDumpHandle")
|
|
}
|
|
|
|
// GetPlanReplayerHandle returns plan replayer handle
|
|
func (do *Domain) GetPlanReplayerHandle() *planReplayerHandle {
|
|
return do.planReplayerHandle
|
|
}
|
|
|
|
// GetExtractHandle returns extract handle
|
|
func (do *Domain) GetExtractHandle() *ExtractHandle {
|
|
return do.extractTaskHandle
|
|
}
|
|
|
|
// GetDumpFileGCChecker returns dump file GC checker for plan replayer and plan trace
|
|
func (do *Domain) GetDumpFileGCChecker() *dumpFileGcChecker {
|
|
return do.dumpFileGcChecker
|
|
}
|
|
|
|
// DumpFileGcCheckerLoop creates a goroutine that handles `exit` and `gc`.
|
|
func (do *Domain) DumpFileGcCheckerLoop() {
|
|
do.wg.Run(func() {
|
|
logutil.BgLogger().Info("dumpFileGcChecker started")
|
|
gcTicker := time.NewTicker(do.dumpFileGcChecker.gcLease)
|
|
defer func() {
|
|
gcTicker.Stop()
|
|
logutil.BgLogger().Info("dumpFileGcChecker exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "dumpFileGcCheckerLoop", nil, false)
|
|
|
|
for {
|
|
select {
|
|
case <-do.exit:
|
|
return
|
|
case <-gcTicker.C:
|
|
do.dumpFileGcChecker.GCDumpFiles(time.Hour, time.Hour*24*7)
|
|
}
|
|
}
|
|
}, "dumpFileGcChecker")
|
|
}
|
|
|
|
// GetHistoricalStatsWorker gets historical workers
|
|
func (do *Domain) GetHistoricalStatsWorker() *HistoricalStatsWorker {
|
|
return do.historicalStatsWorker
|
|
}
|
|
|
|
// EnableDumpHistoricalStats used to control whether enable dump stats for unit test
|
|
var enableDumpHistoricalStats atomic.Bool
|
|
|
|
// StartHistoricalStatsWorker start historical workers running
|
|
func (do *Domain) StartHistoricalStatsWorker() {
|
|
if !enableDumpHistoricalStats.Load() {
|
|
return
|
|
}
|
|
do.wg.Run(func() {
|
|
logutil.BgLogger().Info("HistoricalStatsWorker started")
|
|
defer func() {
|
|
logutil.BgLogger().Info("HistoricalStatsWorker exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "HistoricalStatsWorkerLoop", nil, false)
|
|
|
|
for {
|
|
select {
|
|
case <-do.exit:
|
|
close(do.historicalStatsWorker.tblCH)
|
|
return
|
|
case tblID, ok := <-do.historicalStatsWorker.tblCH:
|
|
if !ok {
|
|
return
|
|
}
|
|
err := do.historicalStatsWorker.DumpHistoricalStats(tblID, do.StatsHandle())
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("dump historical stats failed", zap.Error(err), zap.Int64("tableID", tblID))
|
|
}
|
|
}
|
|
}
|
|
}, "HistoricalStatsWorker")
|
|
}
|
|
|
|
// StatsHandle returns the statistic handle.
|
|
func (do *Domain) StatsHandle() *handle.Handle {
|
|
return do.statsHandle.Load()
|
|
}
|
|
|
|
// CreateStatsHandle is used only for test.
|
|
func (do *Domain) CreateStatsHandle(ctx, initStatsCtx sessionctx.Context) error {
|
|
h, err := handle.NewHandle(ctx, initStatsCtx, do.statsLease, do.sysSessionPool, &do.sysProcesses, do.NextConnID, do.ReleaseConnID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
h.StartWorker()
|
|
do.statsHandle.Store(h)
|
|
return nil
|
|
}
|
|
|
|
// StatsUpdating checks if the stats worker is updating.
|
|
func (do *Domain) StatsUpdating() bool {
|
|
return do.statsUpdating.Load() > 0
|
|
}
|
|
|
|
// SetStatsUpdating sets the value of stats updating.
|
|
func (do *Domain) SetStatsUpdating(val bool) {
|
|
if val {
|
|
do.statsUpdating.Store(1)
|
|
} else {
|
|
do.statsUpdating.Store(0)
|
|
}
|
|
}
|
|
|
|
// LoadAndUpdateStatsLoop loads and updates stats info.
|
|
func (do *Domain) LoadAndUpdateStatsLoop(ctxs []sessionctx.Context, initStatsCtx sessionctx.Context) error {
|
|
if err := do.UpdateTableStatsLoop(ctxs[0], initStatsCtx); err != nil {
|
|
return err
|
|
}
|
|
do.StartLoadStatsSubWorkers(ctxs[1:])
|
|
return nil
|
|
}
|
|
|
|
// UpdateTableStatsLoop creates a goroutine loads stats info and updates stats info in a loop.
|
|
// It will also start a goroutine to analyze tables automatically.
|
|
// It should be called only once in BootstrapSession.
|
|
func (do *Domain) UpdateTableStatsLoop(ctx, initStatsCtx sessionctx.Context) error {
|
|
ctx.GetSessionVars().InRestrictedSQL = true
|
|
statsHandle, err := handle.NewHandle(ctx, initStatsCtx, do.statsLease, do.sysSessionPool, &do.sysProcesses, do.NextConnID, do.ReleaseConnID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
statsHandle.StartWorker()
|
|
do.statsHandle.Store(statsHandle)
|
|
do.ddl.RegisterStatsHandle(statsHandle)
|
|
// Negative stats lease indicates that it is in test or in br binary mode, it does not need update.
|
|
if do.statsLease >= 0 {
|
|
do.wg.Run(do.loadStatsWorker, "loadStatsWorker")
|
|
}
|
|
owner := do.newOwnerManager(handle.StatsPrompt, handle.StatsOwnerKey)
|
|
do.wg.Run(func() {
|
|
do.indexUsageWorker()
|
|
}, "indexUsageWorker")
|
|
if do.statsLease <= 0 {
|
|
// For statsLease > 0, `updateStatsWorker` handles the quit of stats owner.
|
|
do.wg.Run(func() { quitStatsOwner(do, owner) }, "quitStatsOwner")
|
|
return nil
|
|
}
|
|
do.SetStatsUpdating(true)
|
|
// The stats updated worker doesn't require the stats initialization to be completed.
|
|
// This is because the updated worker's primary responsibilities are to update the change delta and handle DDL operations.
|
|
// These tasks do not interfere with or depend on the initialization process.
|
|
do.wg.Run(func() { do.updateStatsWorker(ctx, owner) }, "updateStatsWorker")
|
|
do.wg.Run(func() {
|
|
do.handleDDLEvent()
|
|
}, "handleDDLEvent")
|
|
// Wait for the stats worker to finish the initialization.
|
|
// Otherwise, we may start the auto analyze worker before the stats cache is initialized.
|
|
do.wg.Run(
|
|
func() {
|
|
select {
|
|
case <-do.StatsHandle().InitStatsDone:
|
|
case <-do.exit: // It may happen that before initStatsDone, tidb receive Ctrl+C
|
|
return
|
|
}
|
|
do.autoAnalyzeWorker(owner)
|
|
},
|
|
"autoAnalyzeWorker",
|
|
)
|
|
do.wg.Run(
|
|
func() {
|
|
select {
|
|
case <-do.StatsHandle().InitStatsDone:
|
|
case <-do.exit: // It may happen that before initStatsDone, tidb receive Ctrl+C
|
|
return
|
|
}
|
|
do.analyzeJobsCleanupWorker(owner)
|
|
},
|
|
"analyzeJobsCleanupWorker",
|
|
)
|
|
do.wg.Run(
|
|
func() {
|
|
// The initStatsCtx is used to store the internal session for initializing stats,
|
|
// so we need the gc min start ts calculation to track it as an internal session.
|
|
// Since the session manager may not be ready at this moment, `infosync.StoreInternalSession` can fail.
|
|
// we need to retry until the session manager is ready or the init stats completes.
|
|
for !infosync.StoreInternalSession(initStatsCtx) {
|
|
waitRetry := time.After(time.Second)
|
|
select {
|
|
case <-do.StatsHandle().InitStatsDone:
|
|
return
|
|
case <-waitRetry:
|
|
}
|
|
}
|
|
select {
|
|
case <-do.StatsHandle().InitStatsDone:
|
|
case <-do.exit: // It may happen that before initStatsDone, tidb receive Ctrl+C
|
|
return
|
|
}
|
|
infosync.DeleteInternalSession(initStatsCtx)
|
|
},
|
|
"RemoveInitStatsFromInternalSessions",
|
|
)
|
|
return nil
|
|
}
|
|
|
|
func quitStatsOwner(do *Domain, mgr owner.Manager) {
|
|
<-do.exit
|
|
mgr.Cancel()
|
|
}
|
|
|
|
// StartLoadStatsSubWorkers starts sub workers with new sessions to load stats concurrently.
|
|
func (do *Domain) StartLoadStatsSubWorkers(ctxList []sessionctx.Context) {
|
|
statsHandle := do.StatsHandle()
|
|
for _, ctx := range ctxList {
|
|
do.wg.Add(1)
|
|
go statsHandle.SubLoadWorker(ctx, do.exit, do.wg)
|
|
}
|
|
logutil.BgLogger().Info("start load stats sub workers", zap.Int("worker count", len(ctxList)))
|
|
}
|
|
|
|
func (do *Domain) newOwnerManager(prompt, ownerKey string) owner.Manager {
|
|
id := do.ddl.OwnerManager().ID()
|
|
var statsOwner owner.Manager
|
|
if do.etcdClient == nil {
|
|
statsOwner = owner.NewMockManager(context.Background(), id, do.store, ownerKey)
|
|
} else {
|
|
statsOwner = owner.NewOwnerManager(context.Background(), do.etcdClient, prompt, id, ownerKey)
|
|
}
|
|
// TODO: Need to do something when err is not nil.
|
|
err := statsOwner.CampaignOwner()
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("campaign owner failed", zap.Error(err))
|
|
}
|
|
return statsOwner
|
|
}
|
|
|
|
func (do *Domain) initStats(ctx context.Context) {
|
|
statsHandle := do.StatsHandle()
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
logutil.BgLogger().Error("panic when initiating stats", zap.Any("r", r),
|
|
zap.Stack("stack"))
|
|
}
|
|
close(statsHandle.InitStatsDone)
|
|
}()
|
|
t := time.Now()
|
|
liteInitStats := config.GetGlobalConfig().Performance.LiteInitStats
|
|
initstats.InitStatsPercentage.Store(0)
|
|
var err error
|
|
if liteInitStats {
|
|
err = statsHandle.InitStatsLite(ctx, do.InfoSchema())
|
|
} else {
|
|
err = statsHandle.InitStats(ctx, do.InfoSchema())
|
|
}
|
|
initstats.InitStatsPercentage.Store(100)
|
|
if err != nil {
|
|
logutil.BgLogger().Error("init stats info failed", zap.Bool("lite", liteInitStats), zap.Duration("take time", time.Since(t)), zap.Error(err))
|
|
} else {
|
|
logutil.BgLogger().Info("init stats info time", zap.Bool("lite", liteInitStats), zap.Duration("take time", time.Since(t)))
|
|
}
|
|
}
|
|
|
|
func (do *Domain) loadStatsWorker() {
|
|
defer util.Recover(metrics.LabelDomain, "loadStatsWorker", nil, false)
|
|
lease := do.statsLease
|
|
if lease == 0 {
|
|
lease = 3 * time.Second
|
|
}
|
|
loadTicker := time.NewTicker(lease)
|
|
updStatsHealthyTicker := time.NewTicker(20 * lease)
|
|
defer func() {
|
|
loadTicker.Stop()
|
|
updStatsHealthyTicker.Stop()
|
|
logutil.BgLogger().Info("loadStatsWorker exited.")
|
|
}()
|
|
|
|
ctx, cancelFunc := context.WithCancel(context.Background())
|
|
do.cancelFns.mu.Lock()
|
|
do.cancelFns.fns = append(do.cancelFns.fns, cancelFunc)
|
|
do.cancelFns.mu.Unlock()
|
|
|
|
do.initStats(ctx)
|
|
statsHandle := do.StatsHandle()
|
|
var err error
|
|
for {
|
|
select {
|
|
case <-loadTicker.C:
|
|
err = statsHandle.Update(ctx, do.InfoSchema())
|
|
if err != nil {
|
|
logutil.BgLogger().Debug("update stats info failed", zap.Error(err))
|
|
}
|
|
err = statsHandle.LoadNeededHistograms()
|
|
if err != nil {
|
|
logutil.BgLogger().Debug("load histograms failed", zap.Error(err))
|
|
}
|
|
case <-updStatsHealthyTicker.C:
|
|
statsHandle.UpdateStatsHealthyMetrics()
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (do *Domain) indexUsageWorker() {
|
|
defer util.Recover(metrics.LabelDomain, "indexUsageWorker", nil, false)
|
|
gcStatsTicker := time.NewTicker(indexUsageGCDuration)
|
|
handle := do.StatsHandle()
|
|
defer func() {
|
|
logutil.BgLogger().Info("indexUsageWorker exited.")
|
|
}()
|
|
for {
|
|
select {
|
|
case <-do.exit:
|
|
return
|
|
case <-gcStatsTicker.C:
|
|
if err := handle.GCIndexUsage(); err != nil {
|
|
statslogutil.StatsLogger().Error("gc index usage failed", zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (*Domain) updateStatsWorkerExitPreprocessing(statsHandle *handle.Handle, owner owner.Manager) {
|
|
ch := make(chan struct{}, 1)
|
|
timeout, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
go func() {
|
|
logutil.BgLogger().Info("updateStatsWorker is going to exit, start to flush stats")
|
|
statsHandle.FlushStats()
|
|
logutil.BgLogger().Info("updateStatsWorker ready to release owner")
|
|
owner.Cancel()
|
|
ch <- struct{}{}
|
|
}()
|
|
select {
|
|
case <-ch:
|
|
logutil.BgLogger().Info("updateStatsWorker exit preprocessing finished")
|
|
return
|
|
case <-timeout.Done():
|
|
logutil.BgLogger().Warn("updateStatsWorker exit preprocessing timeout, force exiting")
|
|
return
|
|
}
|
|
}
|
|
|
|
func (do *Domain) handleDDLEvent() {
|
|
logutil.BgLogger().Info("handleDDLEvent started.")
|
|
defer util.Recover(metrics.LabelDomain, "handleDDLEvent", nil, false)
|
|
statsHandle := do.StatsHandle()
|
|
for {
|
|
select {
|
|
case <-do.exit:
|
|
return
|
|
// This channel is sent only by ddl owner.
|
|
case t := <-statsHandle.DDLEventCh():
|
|
err := statsHandle.HandleDDLEvent(t)
|
|
if err != nil {
|
|
logutil.BgLogger().Error("handle ddl event failed", zap.String("event", t.String()), zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (do *Domain) updateStatsWorker(_ sessionctx.Context, owner owner.Manager) {
|
|
defer util.Recover(metrics.LabelDomain, "updateStatsWorker", nil, false)
|
|
logutil.BgLogger().Info("updateStatsWorker started.")
|
|
lease := do.statsLease
|
|
// We need to have different nodes trigger tasks at different times to avoid the herd effect.
|
|
randDuration := time.Duration(rand.Int63n(int64(time.Minute)))
|
|
deltaUpdateTicker := time.NewTicker(20*lease + randDuration)
|
|
gcStatsTicker := time.NewTicker(100 * lease)
|
|
dumpColStatsUsageTicker := time.NewTicker(100 * lease)
|
|
readMemTicker := time.NewTicker(memory.ReadMemInterval)
|
|
statsHandle := do.StatsHandle()
|
|
defer func() {
|
|
dumpColStatsUsageTicker.Stop()
|
|
gcStatsTicker.Stop()
|
|
deltaUpdateTicker.Stop()
|
|
readMemTicker.Stop()
|
|
do.SetStatsUpdating(false)
|
|
logutil.BgLogger().Info("updateStatsWorker exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "updateStatsWorker", nil, false)
|
|
|
|
for {
|
|
select {
|
|
case <-do.exit:
|
|
do.updateStatsWorkerExitPreprocessing(statsHandle, owner)
|
|
return
|
|
case <-deltaUpdateTicker.C:
|
|
err := statsHandle.DumpStatsDeltaToKV(false)
|
|
if err != nil {
|
|
logutil.BgLogger().Debug("dump stats delta failed", zap.Error(err))
|
|
}
|
|
case <-gcStatsTicker.C:
|
|
if !owner.IsOwner() {
|
|
continue
|
|
}
|
|
err := statsHandle.GCStats(do.InfoSchema(), do.GetSchemaLease())
|
|
if err != nil {
|
|
logutil.BgLogger().Debug("GC stats failed", zap.Error(err))
|
|
}
|
|
do.CheckAutoAnalyzeWindows()
|
|
case <-dumpColStatsUsageTicker.C:
|
|
err := statsHandle.DumpColStatsUsageToKV()
|
|
if err != nil {
|
|
logutil.BgLogger().Debug("dump column stats usage failed", zap.Error(err))
|
|
}
|
|
|
|
case <-readMemTicker.C:
|
|
memory.ForceReadMemStats()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (do *Domain) autoAnalyzeWorker(owner owner.Manager) {
|
|
defer util.Recover(metrics.LabelDomain, "autoAnalyzeWorker", nil, false)
|
|
statsHandle := do.StatsHandle()
|
|
analyzeTicker := time.NewTicker(do.statsLease)
|
|
defer func() {
|
|
analyzeTicker.Stop()
|
|
logutil.BgLogger().Info("autoAnalyzeWorker exited.")
|
|
}()
|
|
for {
|
|
select {
|
|
case <-analyzeTicker.C:
|
|
if variable.RunAutoAnalyze.Load() && !do.stopAutoAnalyze.Load() && owner.IsOwner() {
|
|
statsHandle.HandleAutoAnalyze()
|
|
}
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// analyzeJobsCleanupWorker is a background worker that periodically performs two main tasks:
|
|
//
|
|
// 1. Garbage Collection: It removes outdated analyze jobs from the statistics handle.
|
|
// This operation is performed every hour and only if the current instance is the owner.
|
|
// Analyze jobs older than 7 days are considered outdated and are removed.
|
|
//
|
|
// 2. Cleanup: It cleans up corrupted analyze jobs.
|
|
// A corrupted analyze job is one that is in a 'pending' or 'running' state,
|
|
// but is associated with a TiDB instance that is either not currently running or has been restarted.
|
|
// Also, if the analyze job is killed by the user, it is considered corrupted.
|
|
// This operation is performed every 100 stats leases.
|
|
// It first retrieves the list of current analyze processes, then removes any analyze job
|
|
// that is not associated with a current process. Additionally, if the current instance is the owner,
|
|
// it also cleans up corrupted analyze jobs on dead instances.
|
|
func (do *Domain) analyzeJobsCleanupWorker(owner owner.Manager) {
|
|
defer util.Recover(metrics.LabelDomain, "analyzeJobsCleanupWorker", nil, false)
|
|
// For GC.
|
|
const gcInterval = time.Hour
|
|
const daysToKeep = 7
|
|
gcTicker := time.NewTicker(gcInterval)
|
|
// For clean up.
|
|
// Default stats lease is 3 * time.Second.
|
|
// So cleanupInterval is 100 * 3 * time.Second = 5 * time.Minute.
|
|
var cleanupInterval = do.statsLease * 100
|
|
cleanupTicker := time.NewTicker(cleanupInterval)
|
|
defer func() {
|
|
gcTicker.Stop()
|
|
cleanupTicker.Stop()
|
|
logutil.BgLogger().Info("analyzeJobsCleanupWorker exited.")
|
|
}()
|
|
statsHandle := do.StatsHandle()
|
|
for {
|
|
select {
|
|
case <-gcTicker.C:
|
|
// Only the owner should perform this operation.
|
|
if owner.IsOwner() {
|
|
updateTime := time.Now().AddDate(0, 0, -daysToKeep)
|
|
err := statsHandle.DeleteAnalyzeJobs(updateTime)
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("gc analyze history failed", zap.Error(err))
|
|
}
|
|
}
|
|
case <-cleanupTicker.C:
|
|
sm := do.InfoSyncer().GetSessionManager()
|
|
if sm == nil {
|
|
continue
|
|
}
|
|
analyzeProcessIDs := make(map[uint64]struct{}, 8)
|
|
for _, process := range sm.ShowProcessList() {
|
|
if isAnalyzeTableSQL(process.Info) {
|
|
analyzeProcessIDs[process.ID] = struct{}{}
|
|
}
|
|
}
|
|
|
|
err := statsHandle.CleanupCorruptedAnalyzeJobsOnCurrentInstance(analyzeProcessIDs)
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("cleanup analyze jobs on current instance failed", zap.Error(err))
|
|
}
|
|
|
|
if owner.IsOwner() {
|
|
err = statsHandle.CleanupCorruptedAnalyzeJobsOnDeadInstances()
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("cleanup analyze jobs on dead instances failed", zap.Error(err))
|
|
}
|
|
}
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func isAnalyzeTableSQL(sql string) bool {
|
|
// Get rid of the comments.
|
|
normalizedSQL := parser.Normalize(sql, "ON")
|
|
return strings.HasPrefix(normalizedSQL, "analyze table")
|
|
}
|
|
|
|
// ExpensiveQueryHandle returns the expensive query handle.
|
|
func (do *Domain) ExpensiveQueryHandle() *expensivequery.Handle {
|
|
return do.expensiveQueryHandle
|
|
}
|
|
|
|
// MemoryUsageAlarmHandle returns the memory usage alarm handle.
|
|
func (do *Domain) MemoryUsageAlarmHandle() *memoryusagealarm.Handle {
|
|
return do.memoryUsageAlarmHandle
|
|
}
|
|
|
|
// ServerMemoryLimitHandle returns the expensive query handle.
|
|
func (do *Domain) ServerMemoryLimitHandle() *servermemorylimit.Handle {
|
|
return do.serverMemoryLimitHandle
|
|
}
|
|
|
|
const (
|
|
privilegeKey = "/tidb/privilege"
|
|
sysVarCacheKey = "/tidb/sysvars"
|
|
tiflashComputeNodeKey = "/tiflash/new_tiflash_compute_nodes"
|
|
)
|
|
|
|
// NotifyUpdatePrivilege updates privilege key in etcd, TiDB client that watches
|
|
// the key will get notification.
|
|
func (do *Domain) NotifyUpdatePrivilege() error {
|
|
// No matter skip-grant-table is configured or not, sending an etcd message is required.
|
|
// Because we need to tell other TiDB instances to update privilege data, say, we're changing the
|
|
// password using a special TiDB instance and want the new password to take effect.
|
|
if do.etcdClient != nil {
|
|
row := do.etcdClient.KV
|
|
_, err := row.Put(context.Background(), privilegeKey, "")
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("notify update privilege failed", zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// If skip-grant-table is configured, do not flush privileges.
|
|
// Because LoadPrivilegeLoop does not run and the privilege Handle is nil,
|
|
// the call to do.PrivilegeHandle().Update would panic.
|
|
if config.GetGlobalConfig().Security.SkipGrantTable {
|
|
return nil
|
|
}
|
|
|
|
// update locally
|
|
ctx, err := do.sysSessionPool.Get()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer do.sysSessionPool.Put(ctx)
|
|
return do.PrivilegeHandle().Update(ctx.(sessionctx.Context))
|
|
}
|
|
|
|
// NotifyUpdateSysVarCache updates the sysvar cache key in etcd, which other TiDB
|
|
// clients are subscribed to for updates. For the caller, the cache is also built
|
|
// synchronously so that the effect is immediate.
|
|
func (do *Domain) NotifyUpdateSysVarCache(updateLocal bool) {
|
|
if do.etcdClient != nil {
|
|
row := do.etcdClient.KV
|
|
_, err := row.Put(context.Background(), sysVarCacheKey, "")
|
|
if err != nil {
|
|
logutil.BgLogger().Warn("notify update sysvar cache failed", zap.Error(err))
|
|
}
|
|
}
|
|
// update locally
|
|
if updateLocal {
|
|
if err := do.rebuildSysVarCache(nil); err != nil {
|
|
logutil.BgLogger().Error("rebuilding sysvar cache failed", zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
|
|
// LoadSigningCertLoop loads the signing cert periodically to make sure it's fresh new.
|
|
func (do *Domain) LoadSigningCertLoop(signingCert, signingKey string) {
|
|
sessionstates.SetCertPath(signingCert)
|
|
sessionstates.SetKeyPath(signingKey)
|
|
|
|
do.wg.Run(func() {
|
|
defer func() {
|
|
logutil.BgLogger().Debug("loadSigningCertLoop exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "LoadSigningCertLoop", nil, false)
|
|
|
|
for {
|
|
select {
|
|
case <-time.After(sessionstates.LoadCertInterval):
|
|
sessionstates.ReloadSigningCert()
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}, "loadSigningCertLoop")
|
|
}
|
|
|
|
// ServerID gets serverID.
|
|
func (do *Domain) ServerID() uint64 {
|
|
return atomic.LoadUint64(&do.serverID)
|
|
}
|
|
|
|
// IsLostConnectionToPD indicates lost connection to PD or not.
|
|
func (do *Domain) IsLostConnectionToPD() bool {
|
|
return do.isLostConnectionToPD.Load() != 0
|
|
}
|
|
|
|
// NextConnID return next connection ID.
|
|
func (do *Domain) NextConnID() uint64 {
|
|
return do.connIDAllocator.NextID()
|
|
}
|
|
|
|
// ReleaseConnID releases connection ID.
|
|
func (do *Domain) ReleaseConnID(connID uint64) {
|
|
do.connIDAllocator.Release(connID)
|
|
}
|
|
|
|
const (
|
|
serverIDEtcdPath = "/tidb/server_id"
|
|
refreshServerIDRetryCnt = 3
|
|
acquireServerIDRetryInterval = 300 * time.Millisecond
|
|
acquireServerIDTimeout = 10 * time.Second
|
|
retrieveServerIDSessionTimeout = 10 * time.Second
|
|
|
|
acquire32BitsServerIDRetryCnt = 3
|
|
)
|
|
|
|
var (
|
|
// serverIDTTL should be LONG ENOUGH to avoid barbarically killing an on-going long-run SQL.
|
|
serverIDTTL = 12 * time.Hour
|
|
// serverIDTimeToKeepAlive is the interval that we keep serverID TTL alive periodically.
|
|
serverIDTimeToKeepAlive = 5 * time.Minute
|
|
// serverIDTimeToCheckPDConnectionRestored is the interval that we check connection to PD restored (after broken) periodically.
|
|
serverIDTimeToCheckPDConnectionRestored = 10 * time.Second
|
|
// lostConnectionToPDTimeout is the duration that when TiDB cannot connect to PD excceeds this limit,
|
|
// we realize the connection to PD is lost utterly, and server ID acquired before should be released.
|
|
// Must be SHORTER than `serverIDTTL`.
|
|
lostConnectionToPDTimeout = 6 * time.Hour
|
|
)
|
|
|
|
var (
|
|
ldflagIsGlobalKillTest = "0" // 1:Yes, otherwise:No.
|
|
ldflagServerIDTTL = "10" // in seconds.
|
|
ldflagServerIDTimeToKeepAlive = "1" // in seconds.
|
|
ldflagServerIDTimeToCheckPDConnectionRestored = "1" // in seconds.
|
|
ldflagLostConnectionToPDTimeout = "5" // in seconds.
|
|
)
|
|
|
|
func initByLDFlagsForGlobalKill() {
|
|
if ldflagIsGlobalKillTest == "1" {
|
|
var (
|
|
i int
|
|
err error
|
|
)
|
|
|
|
if i, err = strconv.Atoi(ldflagServerIDTTL); err != nil {
|
|
panic("invalid ldflagServerIDTTL")
|
|
}
|
|
serverIDTTL = time.Duration(i) * time.Second
|
|
|
|
if i, err = strconv.Atoi(ldflagServerIDTimeToKeepAlive); err != nil {
|
|
panic("invalid ldflagServerIDTimeToKeepAlive")
|
|
}
|
|
serverIDTimeToKeepAlive = time.Duration(i) * time.Second
|
|
|
|
if i, err = strconv.Atoi(ldflagServerIDTimeToCheckPDConnectionRestored); err != nil {
|
|
panic("invalid ldflagServerIDTimeToCheckPDConnectionRestored")
|
|
}
|
|
serverIDTimeToCheckPDConnectionRestored = time.Duration(i) * time.Second
|
|
|
|
if i, err = strconv.Atoi(ldflagLostConnectionToPDTimeout); err != nil {
|
|
panic("invalid ldflagLostConnectionToPDTimeout")
|
|
}
|
|
lostConnectionToPDTimeout = time.Duration(i) * time.Second
|
|
|
|
logutil.BgLogger().Info("global_kill_test is enabled", zap.Duration("serverIDTTL", serverIDTTL),
|
|
zap.Duration("serverIDTimeToKeepAlive", serverIDTimeToKeepAlive),
|
|
zap.Duration("serverIDTimeToCheckPDConnectionRestored", serverIDTimeToCheckPDConnectionRestored),
|
|
zap.Duration("lostConnectionToPDTimeout", lostConnectionToPDTimeout))
|
|
}
|
|
}
|
|
|
|
func (do *Domain) retrieveServerIDSession(ctx context.Context) (*concurrency.Session, error) {
|
|
if do.serverIDSession != nil {
|
|
return do.serverIDSession, nil
|
|
}
|
|
|
|
// `etcdClient.Grant` needs a shortterm timeout, to avoid blocking if connection to PD lost,
|
|
// while `etcdClient.KeepAlive` should be longterm.
|
|
// So we separately invoke `etcdClient.Grant` and `concurrency.NewSession` with leaseID.
|
|
childCtx, cancel := context.WithTimeout(ctx, retrieveServerIDSessionTimeout)
|
|
resp, err := do.etcdClient.Grant(childCtx, int64(serverIDTTL.Seconds()))
|
|
cancel()
|
|
if err != nil {
|
|
logutil.BgLogger().Error("retrieveServerIDSession.Grant fail", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
leaseID := resp.ID
|
|
|
|
session, err := concurrency.NewSession(do.etcdClient,
|
|
concurrency.WithLease(leaseID), concurrency.WithContext(context.Background()))
|
|
if err != nil {
|
|
logutil.BgLogger().Error("retrieveServerIDSession.NewSession fail", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
do.serverIDSession = session
|
|
return session, nil
|
|
}
|
|
|
|
func (do *Domain) acquireServerID(ctx context.Context) error {
|
|
atomic.StoreUint64(&do.serverID, 0)
|
|
|
|
session, err := do.retrieveServerIDSession(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
conflictCnt := 0
|
|
for {
|
|
var proposeServerID uint64
|
|
if config.GetGlobalConfig().Enable32BitsConnectionID {
|
|
proposeServerID, err = do.proposeServerID(ctx, conflictCnt)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
} else {
|
|
// get a random serverID: [1, MaxServerID64]
|
|
proposeServerID = uint64(rand.Int63n(int64(globalconn.MaxServerID64)) + 1) // #nosec G404
|
|
}
|
|
|
|
key := fmt.Sprintf("%s/%v", serverIDEtcdPath, proposeServerID)
|
|
cmp := clientv3.Compare(clientv3.CreateRevision(key), "=", 0)
|
|
value := "0"
|
|
|
|
childCtx, cancel := context.WithTimeout(ctx, acquireServerIDTimeout)
|
|
txn := do.etcdClient.Txn(childCtx)
|
|
t := txn.If(cmp)
|
|
resp, err := t.Then(clientv3.OpPut(key, value, clientv3.WithLease(session.Lease()))).Commit()
|
|
cancel()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !resp.Succeeded {
|
|
logutil.BgLogger().Info("propose serverID exists, try again", zap.Uint64("proposeServerID", proposeServerID))
|
|
time.Sleep(acquireServerIDRetryInterval)
|
|
conflictCnt++
|
|
continue
|
|
}
|
|
|
|
atomic.StoreUint64(&do.serverID, proposeServerID)
|
|
logutil.BgLogger().Info("acquireServerID", zap.Uint64("serverID", do.ServerID()),
|
|
zap.String("lease id", strconv.FormatInt(int64(session.Lease()), 16)))
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func (do *Domain) releaseServerID(context.Context) {
|
|
serverID := do.ServerID()
|
|
if serverID == 0 {
|
|
return
|
|
}
|
|
atomic.StoreUint64(&do.serverID, 0)
|
|
|
|
if do.etcdClient == nil {
|
|
return
|
|
}
|
|
key := fmt.Sprintf("%s/%v", serverIDEtcdPath, serverID)
|
|
err := ddlutil.DeleteKeyFromEtcd(key, do.etcdClient, refreshServerIDRetryCnt, acquireServerIDTimeout)
|
|
if err != nil {
|
|
logutil.BgLogger().Error("releaseServerID fail", zap.Uint64("serverID", serverID), zap.Error(err))
|
|
} else {
|
|
logutil.BgLogger().Info("releaseServerID succeed", zap.Uint64("serverID", serverID))
|
|
}
|
|
}
|
|
|
|
// propose server ID by random.
|
|
func (*Domain) proposeServerID(ctx context.Context, conflictCnt int) (uint64, error) {
|
|
// get a random server ID in range [min, max]
|
|
randomServerID := func(min uint64, max uint64) uint64 {
|
|
return uint64(rand.Int63n(int64(max-min+1)) + int64(min)) // #nosec G404
|
|
}
|
|
|
|
if conflictCnt < acquire32BitsServerIDRetryCnt {
|
|
// get existing server IDs.
|
|
allServerInfo, err := infosync.GetAllServerInfo(ctx)
|
|
if err != nil {
|
|
return 0, errors.Trace(err)
|
|
}
|
|
// `allServerInfo` contains current TiDB.
|
|
if float32(len(allServerInfo)) <= 0.9*float32(globalconn.MaxServerID32) {
|
|
serverIDs := make(map[uint64]struct{}, len(allServerInfo))
|
|
for _, info := range allServerInfo {
|
|
serverID := info.ServerIDGetter()
|
|
if serverID <= globalconn.MaxServerID32 {
|
|
serverIDs[serverID] = struct{}{}
|
|
}
|
|
}
|
|
|
|
for retry := 0; retry < 15; retry++ {
|
|
randServerID := randomServerID(1, globalconn.MaxServerID32)
|
|
if _, ok := serverIDs[randServerID]; !ok {
|
|
return randServerID, nil
|
|
}
|
|
}
|
|
}
|
|
logutil.BgLogger().Info("upgrade to 64 bits server ID due to used up", zap.Int("len(allServerInfo)", len(allServerInfo)))
|
|
} else {
|
|
logutil.BgLogger().Info("upgrade to 64 bits server ID due to conflict", zap.Int("conflictCnt", conflictCnt))
|
|
}
|
|
|
|
// upgrade to 64 bits.
|
|
return randomServerID(globalconn.MaxServerID32+1, globalconn.MaxServerID64), nil
|
|
}
|
|
|
|
func (do *Domain) refreshServerIDTTL(ctx context.Context) error {
|
|
session, err := do.retrieveServerIDSession(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
key := fmt.Sprintf("%s/%v", serverIDEtcdPath, do.ServerID())
|
|
value := "0"
|
|
err = ddlutil.PutKVToEtcd(ctx, do.etcdClient, refreshServerIDRetryCnt, key, value, clientv3.WithLease(session.Lease()))
|
|
if err != nil {
|
|
logutil.BgLogger().Error("refreshServerIDTTL fail", zap.Uint64("serverID", do.ServerID()), zap.Error(err))
|
|
} else {
|
|
logutil.BgLogger().Info("refreshServerIDTTL succeed", zap.Uint64("serverID", do.ServerID()),
|
|
zap.String("lease id", strconv.FormatInt(int64(session.Lease()), 16)))
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (do *Domain) serverIDKeeper() {
|
|
defer func() {
|
|
do.wg.Done()
|
|
logutil.BgLogger().Info("serverIDKeeper exited.")
|
|
}()
|
|
defer util.Recover(metrics.LabelDomain, "serverIDKeeper", func() {
|
|
logutil.BgLogger().Info("recover serverIDKeeper.")
|
|
// should be called before `do.wg.Done()`, to ensure that Domain.Close() waits for the new `serverIDKeeper()` routine.
|
|
do.wg.Add(1)
|
|
go do.serverIDKeeper()
|
|
}, false)
|
|
|
|
tickerKeepAlive := time.NewTicker(serverIDTimeToKeepAlive)
|
|
tickerCheckRestored := time.NewTicker(serverIDTimeToCheckPDConnectionRestored)
|
|
defer func() {
|
|
tickerKeepAlive.Stop()
|
|
tickerCheckRestored.Stop()
|
|
}()
|
|
|
|
blocker := make(chan struct{}) // just used for blocking the sessionDone() when session is nil.
|
|
sessionDone := func() <-chan struct{} {
|
|
if do.serverIDSession == nil {
|
|
return blocker
|
|
}
|
|
return do.serverIDSession.Done()
|
|
}
|
|
|
|
var lastSucceedTimestamp time.Time
|
|
|
|
onConnectionToPDRestored := func() {
|
|
logutil.BgLogger().Info("restored connection to PD")
|
|
do.isLostConnectionToPD.Store(0)
|
|
lastSucceedTimestamp = time.Now()
|
|
|
|
if err := do.info.StoreServerInfo(context.Background()); err != nil {
|
|
logutil.BgLogger().Error("StoreServerInfo failed", zap.Error(err))
|
|
}
|
|
}
|
|
|
|
onConnectionToPDLost := func() {
|
|
logutil.BgLogger().Warn("lost connection to PD")
|
|
do.isLostConnectionToPD.Store(1)
|
|
|
|
// Kill all connections when lost connection to PD,
|
|
// to avoid the possibility that another TiDB instance acquires the same serverID and generates a same connection ID,
|
|
// which will lead to a wrong connection killed.
|
|
do.InfoSyncer().GetSessionManager().KillAllConnections()
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case <-tickerKeepAlive.C:
|
|
if !do.IsLostConnectionToPD() {
|
|
if err := do.refreshServerIDTTL(context.Background()); err == nil {
|
|
lastSucceedTimestamp = time.Now()
|
|
} else {
|
|
if lostConnectionToPDTimeout > 0 && time.Since(lastSucceedTimestamp) > lostConnectionToPDTimeout {
|
|
onConnectionToPDLost()
|
|
}
|
|
}
|
|
}
|
|
case <-tickerCheckRestored.C:
|
|
if do.IsLostConnectionToPD() {
|
|
if err := do.acquireServerID(context.Background()); err == nil {
|
|
onConnectionToPDRestored()
|
|
}
|
|
}
|
|
case <-sessionDone():
|
|
// inform that TTL of `serverID` is expired. See https://godoc.org/github.com/coreos/etcd/clientv3/concurrency#Session.Done
|
|
// Should be in `IsLostConnectionToPD` state, as `lostConnectionToPDTimeout` is shorter than `serverIDTTL`.
|
|
// So just set `do.serverIDSession = nil` to restart `serverID` session in `retrieveServerIDSession()`.
|
|
logutil.BgLogger().Info("serverIDSession need restart")
|
|
do.serverIDSession = nil
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// StartTTLJobManager creates and starts the ttl job manager
|
|
func (do *Domain) StartTTLJobManager() {
|
|
ttlJobManager := ttlworker.NewJobManager(do.ddl.GetID(), do.sysSessionPool, do.store, do.etcdClient, do.ddl.OwnerManager().IsOwner)
|
|
do.ttlJobManager.Store(ttlJobManager)
|
|
ttlJobManager.Start()
|
|
}
|
|
|
|
// TTLJobManager returns the ttl job manager on this domain
|
|
func (do *Domain) TTLJobManager() *ttlworker.JobManager {
|
|
return do.ttlJobManager.Load()
|
|
}
|
|
|
|
// StopAutoAnalyze stops (*Domain).autoAnalyzeWorker to launch new auto analyze jobs.
|
|
func (do *Domain) StopAutoAnalyze() {
|
|
do.stopAutoAnalyze.Store(true)
|
|
}
|
|
|
|
// InitInstancePlanCache initializes the instance level plan cache for this Domain.
|
|
func (do *Domain) InitInstancePlanCache() {
|
|
softLimit := variable.InstancePlanCacheTargetMemSize.Load()
|
|
hardLimit := variable.InstancePlanCacheMaxMemSize.Load()
|
|
do.instancePlanCache = NewInstancePlanCache(softLimit, hardLimit)
|
|
// use a separate goroutine to avoid the eviction blocking other operations.
|
|
do.wg.Run(do.planCacheEvictTrigger, "planCacheEvictTrigger")
|
|
do.wg.Run(do.planCacheMetricsAndVars, "planCacheMetricsAndVars")
|
|
}
|
|
|
|
// GetInstancePlanCache returns the instance level plan cache in this Domain.
|
|
func (do *Domain) GetInstancePlanCache() sessionctx.InstancePlanCache {
|
|
return do.instancePlanCache
|
|
}
|
|
|
|
// planCacheMetricsAndVars updates metrics and variables for Instance Plan Cache periodically.
|
|
func (do *Domain) planCacheMetricsAndVars() {
|
|
defer util.Recover(metrics.LabelDomain, "planCacheMetricsAndVars", nil, false)
|
|
ticker := time.NewTicker(time.Second * 15) // 15s by default
|
|
defer func() {
|
|
ticker.Stop()
|
|
logutil.BgLogger().Info("planCacheMetricsAndVars exited.")
|
|
}()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
// update limits
|
|
softLimit := variable.InstancePlanCacheTargetMemSize.Load()
|
|
hardLimit := variable.InstancePlanCacheMaxMemSize.Load()
|
|
curSoft, curHard := do.instancePlanCache.GetLimits()
|
|
if curSoft != softLimit || curHard != hardLimit {
|
|
do.instancePlanCache.SetLimits(softLimit, hardLimit)
|
|
}
|
|
|
|
// update the metrics
|
|
size := do.instancePlanCache.Size()
|
|
memUsage := do.instancePlanCache.MemUsage()
|
|
metrics2.GetPlanCacheInstanceNumCounter(true).Set(float64(size))
|
|
metrics2.GetPlanCacheInstanceMemoryUsage(true).Set(float64(memUsage))
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// planCacheEvictTrigger triggers the plan cache eviction periodically.
|
|
func (do *Domain) planCacheEvictTrigger() {
|
|
defer util.Recover(metrics.LabelDomain, "planCacheEvictTrigger", nil, false)
|
|
ticker := time.NewTicker(time.Second * 30) // 30s by default
|
|
defer func() {
|
|
ticker.Stop()
|
|
logutil.BgLogger().Info("planCacheEvictTrigger exited.")
|
|
}()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
// trigger the eviction
|
|
begin := time.Now()
|
|
detailInfo, numEvicted := do.instancePlanCache.Evict()
|
|
metrics2.GetPlanCacheInstanceEvict().Set(float64(numEvicted))
|
|
if numEvicted > 0 {
|
|
logutil.BgLogger().Info("instance plan eviction",
|
|
zap.String("detail", detailInfo),
|
|
zap.Int64("num_evicted", int64(numEvicted)),
|
|
zap.Duration("time_spent", time.Since(begin)))
|
|
}
|
|
case <-do.exit:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func init() {
|
|
initByLDFlagsForGlobalKill()
|
|
}
|
|
|
|
var (
|
|
// ErrInfoSchemaExpired returns the error that information schema is out of date.
|
|
ErrInfoSchemaExpired = dbterror.ClassDomain.NewStd(errno.ErrInfoSchemaExpired)
|
|
// ErrInfoSchemaChanged returns the error that information schema is changed.
|
|
ErrInfoSchemaChanged = dbterror.ClassDomain.NewStdErr(errno.ErrInfoSchemaChanged,
|
|
mysql.Message(errno.MySQLErrName[errno.ErrInfoSchemaChanged].Raw+". "+kv.TxnRetryableMark, nil))
|
|
)
|
|
|
|
// SysProcesses holds the sys processes infos
|
|
type SysProcesses struct {
|
|
mu *sync.RWMutex
|
|
procMap map[uint64]sysproctrack.TrackProc
|
|
}
|
|
|
|
// Track tracks the sys process into procMap
|
|
func (s *SysProcesses) Track(id uint64, proc sysproctrack.TrackProc) error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if oldProc, ok := s.procMap[id]; ok && oldProc != proc {
|
|
return errors.Errorf("The ID is in use: %v", id)
|
|
}
|
|
s.procMap[id] = proc
|
|
proc.GetSessionVars().ConnectionID = id
|
|
proc.GetSessionVars().SQLKiller.Reset()
|
|
return nil
|
|
}
|
|
|
|
// UnTrack removes the sys process from procMap
|
|
func (s *SysProcesses) UnTrack(id uint64) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if proc, ok := s.procMap[id]; ok {
|
|
delete(s.procMap, id)
|
|
proc.GetSessionVars().ConnectionID = 0
|
|
proc.GetSessionVars().SQLKiller.Reset()
|
|
}
|
|
}
|
|
|
|
// GetSysProcessList gets list of system ProcessInfo
|
|
func (s *SysProcesses) GetSysProcessList() map[uint64]*util.ProcessInfo {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
rs := make(map[uint64]*util.ProcessInfo)
|
|
for connID, proc := range s.procMap {
|
|
// if session is still tracked in this map, it's not returned to sysSessionPool yet
|
|
if pi := proc.ShowProcess(); pi != nil && pi.ID == connID {
|
|
rs[connID] = pi
|
|
}
|
|
}
|
|
return rs
|
|
}
|
|
|
|
// KillSysProcess kills sys process with specified ID
|
|
func (s *SysProcesses) KillSysProcess(id uint64) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
if proc, ok := s.procMap[id]; ok {
|
|
proc.GetSessionVars().SQLKiller.SendKillSignal(sqlkiller.QueryInterrupted)
|
|
}
|
|
}
|