Files
tidb/pkg/resourcegroup/runaway/manager.go

446 lines
14 KiB
Go

// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package runaway
import (
"context"
"sync"
"sync/atomic"
"time"
"github.com/jellydator/ttlcache/v3"
"github.com/pingcap/failpoint"
rmpb "github.com/pingcap/kvproto/pkg/resource_manager"
"github.com/pingcap/tidb/pkg/ddl"
"github.com/pingcap/tidb/pkg/infoschema"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/util"
"github.com/pingcap/tidb/pkg/util/generic"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/prometheus/client_golang/prometheus"
rmclient "github.com/tikv/pd/client/resource_group/controller"
"go.uber.org/zap"
)
const (
// ManualSource shows the item added manually.
ManualSource = "manual"
// MaxWaitDuration is the max duration to wait for acquiring token buckets.
MaxWaitDuration = time.Second * 30
maxWatchListCap = 10000
maxWatchRecordChannelSize = 1024
runawayRecordFlushInterval = time.Second
runawayRecordGCInterval = time.Hour * 24
runawayRecordExpiredDuration = time.Hour * 24 * 7
runawayRecordGCBatchSize = 100
runawayRecordGCSelectBatchSize = runawayRecordGCBatchSize * 5
runawayLoopLogErrorIntervalCount = 1800
)
// Manager is used to detect and record runaway queries.
type Manager struct {
logOnce sync.Once
exit chan struct{}
// queryLock is used to avoid repeated additions. Since we will add new items to the system table,
// in order to avoid repeated additions, we need a lock to ensure that
// action "judging whether there is this record in the current watch list and adding records" have atomicity.
queryLock sync.Mutex
watchList *ttlcache.Cache[string, *QuarantineRecord]
// activeGroup is used to manage the active runaway watches of resource group
ActiveGroup map[string]int64
ActiveLock sync.RWMutex
MetricsMap generic.SyncMap[string, prometheus.Counter]
ResourceGroupCtl *rmclient.ResourceGroupsController
serverID string
runawayQueriesChan chan *Record
quarantineChan chan *QuarantineRecord
// staleQuarantineRecord is used to clean outdated record. There are three scenarios:
// 1. Record is expired in watch list.
// 2. The record that will be added is itself out of date.
// Like that tidb cluster is paused, and record is expired when restarting.
// 3. Duplicate added records.
// It replaces clean up loop.
staleQuarantineRecord chan *QuarantineRecord
evictionCancel func()
insertionCancel func()
syncerInitialized atomic.Bool
// domain related fields
infoCache *infoschema.InfoCache
ddl ddl.DDL
// syncer is used to sync runaway watch records.
runawaySyncer *syncer
sysSessionPool util.SessionPool
}
// NewRunawayManager creates a new Manager.
func NewRunawayManager(resourceGroupCtl *rmclient.ResourceGroupsController, serverAddr string,
pool util.SessionPool, exit chan struct{}, infoCache *infoschema.InfoCache, ddl ddl.DDL) *Manager {
watchList := ttlcache.New[string, *QuarantineRecord](
ttlcache.WithTTL[string, *QuarantineRecord](ttlcache.NoTTL),
ttlcache.WithCapacity[string, *QuarantineRecord](maxWatchListCap),
ttlcache.WithDisableTouchOnHit[string, *QuarantineRecord](),
)
go watchList.Start()
staleQuarantineChan := make(chan *QuarantineRecord, maxWatchRecordChannelSize)
m := &Manager{
syncerInitialized: atomic.Bool{},
ResourceGroupCtl: resourceGroupCtl,
watchList: watchList,
serverID: serverAddr,
runawayQueriesChan: make(chan *Record, maxWatchRecordChannelSize),
quarantineChan: make(chan *QuarantineRecord, maxWatchRecordChannelSize),
staleQuarantineRecord: staleQuarantineChan,
ActiveGroup: make(map[string]int64),
MetricsMap: generic.NewSyncMap[string, prometheus.Counter](8),
sysSessionPool: pool,
exit: exit,
infoCache: infoCache,
ddl: ddl,
}
m.insertionCancel = watchList.OnInsertion(func(_ context.Context, i *ttlcache.Item[string, *QuarantineRecord]) {
m.ActiveLock.Lock()
m.ActiveGroup[i.Value().ResourceGroupName]++
m.ActiveLock.Unlock()
})
m.evictionCancel = watchList.OnEviction(func(_ context.Context, _ ttlcache.EvictionReason, i *ttlcache.Item[string, *QuarantineRecord]) {
m.ActiveLock.Lock()
m.ActiveGroup[i.Value().ResourceGroupName]--
m.ActiveLock.Unlock()
if i.Value().ID == 0 {
return
}
staleQuarantineChan <- i.Value()
})
m.runawaySyncer = newSyncer(pool)
return m
}
// RunawayRecordFlushLoop is used to flush runaway records.
func (rm *Manager) RunawayRecordFlushLoop() {
defer util.Recover(metrics.LabelDomain, "runawayRecordFlushLoop", nil, false)
// this times used to batch flushing records, with 1s duration,
// we can guarantee a watch record can be seen by the user within 1s.
runawayRecordFlushTimer := time.NewTimer(runawayRecordFlushInterval)
runawayRecordGCTicker := time.NewTicker(runawayRecordGCInterval)
failpoint.Inject("FastRunawayGC", func() {
runawayRecordFlushTimer.Stop()
runawayRecordGCTicker.Stop()
runawayRecordFlushTimer = time.NewTimer(time.Millisecond * 50)
runawayRecordGCTicker = time.NewTicker(time.Millisecond * 200)
})
fired := false
recordCh := rm.runawayRecordChan()
quarantineRecordCh := rm.quarantineRecordChan()
staleQuarantineRecordCh := rm.staleQuarantineRecordChan()
flushThreshold := flushThreshold()
records := make([]*Record, 0, flushThreshold)
flushRunawayRecords := func() {
if len(records) == 0 {
return
}
sql, params := genRunawayQueriesStmt(records)
if _, err := ExecRCRestrictedSQL(rm.sysSessionPool, sql, params); err != nil {
logutil.BgLogger().Error("flush runaway records failed", zap.Error(err), zap.Int("count", len(records)))
}
records = records[:0]
}
for {
select {
case <-rm.exit:
return
case <-runawayRecordFlushTimer.C:
flushRunawayRecords()
fired = true
case r := <-recordCh:
records = append(records, r)
failpoint.Inject("FastRunawayGC", func() {
flushRunawayRecords()
})
if len(records) >= flushThreshold {
flushRunawayRecords()
} else if fired {
fired = false
// meet a new record, reset the timer.
runawayRecordFlushTimer.Reset(runawayRecordFlushInterval)
}
case <-runawayRecordGCTicker.C:
go rm.deleteExpiredRows(runawayRecordExpiredDuration)
case r := <-quarantineRecordCh:
go func() {
_, err := rm.AddRunawayWatch(r)
if err != nil {
logutil.BgLogger().Error("add runaway watch", zap.Error(err))
}
}()
case r := <-staleQuarantineRecordCh:
go func() {
for i := 0; i < 3; i++ {
err := handleRemoveStaleRunawayWatch(rm.sysSessionPool, r)
if err == nil {
break
}
logutil.BgLogger().Error("remove stale runaway watch", zap.Error(err))
time.Sleep(time.Second)
}
}()
}
}
}
// RunawayWatchSyncLoop is used to sync runaway watch records.
func (rm *Manager) RunawayWatchSyncLoop() {
defer util.Recover(metrics.LabelDomain, "runawayWatchSyncLoop", nil, false)
runawayWatchSyncTicker := time.NewTicker(watchSyncInterval)
count := 0
for {
select {
case <-rm.exit:
return
case <-runawayWatchSyncTicker.C:
err := rm.UpdateNewAndDoneWatch()
if err != nil {
if count %= runawayLoopLogErrorIntervalCount; count == 0 {
logutil.BgLogger().Warn("get runaway watch record failed", zap.Error(err))
}
count++
}
}
}
}
func (rm *Manager) markQuarantine(
resourceGroupName, convict string,
watchType rmpb.RunawayWatchType, action rmpb.RunawayAction, switchGroupName string,
ttl time.Duration, now *time.Time,
) {
var endTime time.Time
if ttl > 0 {
endTime = now.UTC().Add(ttl)
}
record := &QuarantineRecord{
ResourceGroupName: resourceGroupName,
StartTime: now.UTC(),
EndTime: endTime,
Watch: watchType,
WatchText: convict,
Source: rm.serverID,
Action: action,
SwitchGroupName: switchGroupName,
}
// Add record without ID into watch list in this TiDB right now.
rm.addWatchList(record, ttl, false)
if !rm.syncerInitialized.Load() {
rm.logOnce.Do(func() {
logutil.BgLogger().Warn("runaway syncer is not initialized, so can't records about runaway")
})
return
}
select {
case rm.quarantineChan <- record:
default:
// TODO: add warning for discard flush records
}
}
func (rm *Manager) addWatchList(record *QuarantineRecord, ttl time.Duration, force bool) {
key := record.getRecordKey()
// This is a pre-check, because we generally believe that in most cases, we will not add a watch list to a key repeatedly.
item := rm.getWatchFromWatchList(key)
if force {
rm.queryLock.Lock()
defer rm.queryLock.Unlock()
if item != nil {
// check the ID because of the earlier scan.
if item.ID == record.ID {
return
}
rm.watchList.Delete(key)
}
rm.watchList.Set(key, record, ttl)
} else {
if item == nil {
rm.queryLock.Lock()
// When watchList get record, it will check whether the record is stale, so add new record if returns nil.
if rm.watchList.Get(key) == nil {
rm.watchList.Set(key, record, ttl)
} else {
rm.staleQuarantineRecord <- record
}
rm.queryLock.Unlock()
} else if item.ID == 0 {
// to replace the record without ID.
rm.queryLock.Lock()
defer rm.queryLock.Unlock()
rm.watchList.Set(key, record, ttl)
} else if item.ID != record.ID {
// check the ID because of the earlier scan.
rm.staleQuarantineRecord <- record
}
}
}
// GetWatchList is used to get all watch items.
func (rm *Manager) GetWatchList() []*QuarantineRecord {
items := rm.watchList.Items()
ret := make([]*QuarantineRecord, 0, len(items))
for _, item := range items {
ret = append(ret, item.Value())
}
return ret
}
func (rm *Manager) getWatchFromWatchList(key string) *QuarantineRecord {
item := rm.watchList.Get(key)
if item != nil {
return item.Value()
}
return nil
}
func (rm *Manager) markRunaway(resourceGroupName, originalSQL, planDigest, action, matchType string, now *time.Time) {
source := rm.serverID
if !rm.syncerInitialized.Load() {
rm.logOnce.Do(func() {
logutil.BgLogger().Warn("runaway syncer is not initialized, so can't records about runaway")
})
return
}
select {
case rm.runawayQueriesChan <- &Record{
ResourceGroupName: resourceGroupName,
Time: *now,
Match: matchType,
Action: action,
SQLText: originalSQL,
PlanDigest: planDigest,
Source: source,
}:
default:
// TODO: add warning for discard flush records
}
}
// runawayRecordChan returns the channel of Record
func (rm *Manager) runawayRecordChan() <-chan *Record {
return rm.runawayQueriesChan
}
// quarantineRecordChan returns the channel of QuarantineRecord
func (rm *Manager) quarantineRecordChan() <-chan *QuarantineRecord {
return rm.quarantineChan
}
// staleQuarantineRecordChan returns the channel of staleQuarantineRecord
func (rm *Manager) staleQuarantineRecordChan() <-chan *QuarantineRecord {
return rm.staleQuarantineRecord
}
// examineWatchList check whether the query is in watch list.
func (rm *Manager) examineWatchList(resourceGroupName string, convict string) (bool, rmpb.RunawayAction, string) {
item := rm.getWatchFromWatchList(resourceGroupName + "/" + convict)
if item == nil {
return false, 0, ""
}
return true, item.Action, item.getSwitchGroupName()
}
// Stop stops the watchList which is a ttlCache.
func (rm *Manager) Stop() {
if rm == nil {
return
}
if rm.watchList != nil {
rm.watchList.Stop()
}
}
// UpdateNewAndDoneWatch is used to update new and done watch items.
func (rm *Manager) UpdateNewAndDoneWatch() error {
rm.runawaySyncer.mu.Lock()
defer rm.runawaySyncer.mu.Unlock()
records, err := rm.runawaySyncer.getNewWatchRecords()
if err != nil {
return err
}
for _, r := range records {
rm.AddWatch(r)
}
doneRecords, err := rm.runawaySyncer.getNewWatchDoneRecords()
if err != nil {
return err
}
for _, r := range doneRecords {
rm.removeWatch(r)
}
return nil
}
// AddWatch is used to add watch items from system table.
func (rm *Manager) AddWatch(record *QuarantineRecord) {
ttl := time.Until(record.EndTime)
if record.EndTime.Equal(NullTime) {
ttl = 0
} else if ttl <= 0 {
rm.staleQuarantineRecord <- record
return
}
force := false
// The manual record replaces the old record.
force = record.Source == ManualSource
rm.addWatchList(record, ttl, force)
}
// removeWatch is used to remove watch item, and this action is triggered by reading done watch system table.
func (rm *Manager) removeWatch(record *QuarantineRecord) {
// we should check whether the cached record is not the same as the removing record.
rm.queryLock.Lock()
defer rm.queryLock.Unlock()
item := rm.getWatchFromWatchList(record.getRecordKey())
if item == nil {
return
}
if item.ID == record.ID {
rm.watchList.Delete(record.getRecordKey())
}
}
// FlushThreshold specifies the threshold for the number of records in trigger flush
func flushThreshold() int {
return maxWatchRecordChannelSize / 2
}
// MarkSyncerInitialized is used to mark the syncer is initialized.
func (rm *Manager) MarkSyncerInitialized() {
rm.syncerInitialized.Store(true)
}
// IsSyncerInitialized is only used for test.
func (rm *Manager) IsSyncerInitialized() bool {
return rm.syncerInitialized.Load()
}