560 lines
17 KiB
Go
560 lines
17 KiB
Go
// Copyright 2018 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package memory
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"sort"
|
|
"strconv"
|
|
"sync"
|
|
"sync/atomic"
|
|
)
|
|
|
|
// Tracker is used to track the memory usage during query execution.
|
|
// It contains an optional limit and can be arranged into a tree structure
|
|
// such that the consumption tracked by a Tracker is also tracked by
|
|
// its ancestors. The main idea comes from Apache Impala:
|
|
//
|
|
// https://github.com/cloudera/Impala/blob/cdh5-trunk/be/src/runtime/mem-tracker.h
|
|
//
|
|
// By default, memory consumption is tracked via calls to "Consume()", either to
|
|
// the tracker itself or to one of its descendents. A typical sequence of calls
|
|
// for a single Tracker is:
|
|
// 1. tracker.SetLabel() / tracker.SetActionOnExceed() / tracker.AttachTo()
|
|
// 2. tracker.Consume() / tracker.ReplaceChild() / tracker.BytesConsumed()
|
|
//
|
|
// NOTE: We only protect concurrent access to "bytesConsumed" and "children",
|
|
// that is to say:
|
|
// 1. Only "BytesConsumed()", "Consume()" and "AttachTo()" are thread-safe.
|
|
// 2. Other operations of a Tracker tree is not thread-safe.
|
|
//
|
|
// We have two limits for the memory quota: soft limit and hard limit.
|
|
// If the soft limit is exceeded, we will trigger the action that alleviates the
|
|
// speed of memory growth. The soft limit is hard-coded as `0.8*hard limit`.
|
|
// The actions that could be triggered are: AggSpillDiskAction.
|
|
//
|
|
// If the hard limit is exceeded, we will trigger the action that immediately
|
|
// reduces memory usage. The hard limit is set by the config item `mem-quota-query`
|
|
// or the system variable `tidb_mem_query_quota`.
|
|
// The actions that could be triggered are: SpillDiskAction, SortAndSpillDiskAction, rateLimitAction,
|
|
// PanicOnExceed, globalPanicOnExceed, LogOnExceed.
|
|
type Tracker struct {
|
|
mu struct {
|
|
sync.Mutex
|
|
// The children memory trackers. If the Tracker is the Global Tracker, like executor.GlobalDiskUsageTracker,
|
|
// we wouldn't maintain its children in order to avoiding mutex contention.
|
|
children map[int][]*Tracker
|
|
}
|
|
actionMuForHardLimit actionMu
|
|
actionMuForSoftLimit actionMu
|
|
parMu struct {
|
|
sync.Mutex
|
|
parent *Tracker // The parent memory tracker.
|
|
}
|
|
|
|
label int // Label of this "Tracker".
|
|
bytesConsumed int64 // Consumed bytes.
|
|
bytesHardLimit int64 // bytesHardLimit <= 0 means no limit.
|
|
bytesSoftLimit int64
|
|
maxConsumed int64 // max number of bytes consumed during execution.
|
|
isGlobal bool // isGlobal indicates whether this tracker is global tracker
|
|
}
|
|
|
|
type actionMu struct {
|
|
sync.Mutex
|
|
actionOnExceed ActionOnExceed
|
|
}
|
|
|
|
// softScale means the scale of the soft limit to the hard limit.
|
|
const softScale = 0.8
|
|
|
|
// InitTracker initializes a memory tracker.
|
|
// 1. "label" is the label used in the usage string.
|
|
// 2. "bytesLimit <= 0" means no limit.
|
|
// For the common tracker, isGlobal is default as false
|
|
func InitTracker(t *Tracker, label int, bytesLimit int64, action ActionOnExceed) {
|
|
t.mu.children = nil
|
|
t.actionMuForHardLimit.actionOnExceed = action
|
|
t.actionMuForSoftLimit.actionOnExceed = nil
|
|
t.parMu.parent = nil
|
|
|
|
t.label = label
|
|
t.bytesHardLimit = bytesLimit
|
|
t.bytesSoftLimit = int64(float64(bytesLimit) * softScale)
|
|
t.maxConsumed = 0
|
|
t.isGlobal = false
|
|
}
|
|
|
|
// NewTracker creates a memory tracker.
|
|
// 1. "label" is the label used in the usage string.
|
|
// 2. "bytesLimit <= 0" means no limit.
|
|
// For the common tracker, isGlobal is default as false
|
|
func NewTracker(label int, bytesLimit int64) *Tracker {
|
|
t := &Tracker{
|
|
label: label,
|
|
bytesHardLimit: bytesLimit,
|
|
}
|
|
t.bytesSoftLimit = int64(float64(bytesLimit) * softScale)
|
|
t.actionMuForHardLimit.actionOnExceed = &LogOnExceed{}
|
|
t.isGlobal = false
|
|
return t
|
|
}
|
|
|
|
// NewGlobalTracker creates a global tracker, its isGlobal is default as true
|
|
func NewGlobalTracker(label int, bytesLimit int64) *Tracker {
|
|
t := &Tracker{
|
|
label: label,
|
|
bytesHardLimit: bytesLimit,
|
|
}
|
|
t.bytesSoftLimit = int64(float64(bytesLimit) * softScale)
|
|
t.actionMuForHardLimit.actionOnExceed = &LogOnExceed{}
|
|
t.isGlobal = true
|
|
return t
|
|
}
|
|
|
|
// CheckBytesLimit check whether the bytes limit of the tracker is equal to a value.
|
|
// Only used in test.
|
|
func (t *Tracker) CheckBytesLimit(val int64) bool {
|
|
return t.bytesHardLimit == val
|
|
}
|
|
|
|
// SetBytesLimit sets the bytes limit for this tracker.
|
|
// "bytesHardLimit <= 0" means no limit.
|
|
func (t *Tracker) SetBytesLimit(bytesLimit int64) {
|
|
t.bytesHardLimit = bytesLimit
|
|
t.bytesSoftLimit = int64(float64(bytesLimit) * softScale)
|
|
}
|
|
|
|
// GetBytesLimit gets the bytes limit for this tracker.
|
|
// "bytesHardLimit <= 0" means no limit.
|
|
func (t *Tracker) GetBytesLimit() int64 {
|
|
return t.bytesHardLimit
|
|
}
|
|
|
|
// CheckExceed checks whether the consumed bytes is exceed for this tracker.
|
|
func (t *Tracker) CheckExceed() bool {
|
|
return atomic.LoadInt64(&t.bytesConsumed) >= t.bytesHardLimit && t.bytesHardLimit > 0
|
|
}
|
|
|
|
// SetActionOnExceed sets the action when memory usage exceeds bytesHardLimit.
|
|
func (t *Tracker) SetActionOnExceed(a ActionOnExceed) {
|
|
t.actionMuForHardLimit.Lock()
|
|
t.actionMuForHardLimit.actionOnExceed = a
|
|
t.actionMuForHardLimit.Unlock()
|
|
}
|
|
|
|
// FallbackOldAndSetNewAction sets the action when memory usage exceeds bytesHardLimit
|
|
// and set the original action as its fallback.
|
|
func (t *Tracker) FallbackOldAndSetNewAction(a ActionOnExceed) {
|
|
t.actionMuForHardLimit.Lock()
|
|
defer t.actionMuForHardLimit.Unlock()
|
|
t.actionMuForHardLimit.actionOnExceed = reArrangeFallback(t.actionMuForHardLimit.actionOnExceed, a)
|
|
}
|
|
|
|
// FallbackOldAndSetNewActionForSoftLimit sets the action when memory usage exceeds bytesSoftLimit
|
|
// and set the original action as its fallback.
|
|
func (t *Tracker) FallbackOldAndSetNewActionForSoftLimit(a ActionOnExceed) {
|
|
t.actionMuForSoftLimit.Lock()
|
|
defer t.actionMuForSoftLimit.Unlock()
|
|
t.actionMuForSoftLimit.actionOnExceed = reArrangeFallback(t.actionMuForSoftLimit.actionOnExceed, a)
|
|
}
|
|
|
|
// GetFallbackForTest get the oom action used by test.
|
|
func (t *Tracker) GetFallbackForTest() ActionOnExceed {
|
|
t.actionMuForHardLimit.Lock()
|
|
defer t.actionMuForHardLimit.Unlock()
|
|
return t.actionMuForHardLimit.actionOnExceed
|
|
}
|
|
|
|
// reArrangeFallback merge two action chains and rearrange them by priority in descending order.
|
|
func reArrangeFallback(a ActionOnExceed, b ActionOnExceed) ActionOnExceed {
|
|
if a == nil {
|
|
return b
|
|
}
|
|
if b == nil {
|
|
return a
|
|
}
|
|
if a.GetPriority() < b.GetPriority() {
|
|
a, b = b, a
|
|
a.SetFallback(b)
|
|
} else {
|
|
a.SetFallback(reArrangeFallback(a.GetFallback(), b))
|
|
}
|
|
return a
|
|
}
|
|
|
|
// SetLabel sets the label of a Tracker.
|
|
func (t *Tracker) SetLabel(label int) {
|
|
t.label = label
|
|
}
|
|
|
|
// Label gets the label of a Tracker.
|
|
func (t *Tracker) Label() int {
|
|
return t.label
|
|
}
|
|
|
|
// AttachTo attaches this memory tracker as a child to another Tracker. If it
|
|
// already has a parent, this function will remove it from the old parent.
|
|
// Its consumed memory usage is used to update all its ancestors.
|
|
func (t *Tracker) AttachTo(parent *Tracker) {
|
|
oldParent := t.getParent()
|
|
if oldParent != nil {
|
|
oldParent.remove(t)
|
|
}
|
|
parent.mu.Lock()
|
|
if parent.mu.children == nil {
|
|
parent.mu.children = make(map[int][]*Tracker)
|
|
}
|
|
parent.mu.children[t.label] = append(parent.mu.children[t.label], t)
|
|
parent.mu.Unlock()
|
|
|
|
t.setParent(parent)
|
|
parent.Consume(t.BytesConsumed())
|
|
}
|
|
|
|
// Detach de-attach the tracker child from its parent, then set its parent property as nil
|
|
func (t *Tracker) Detach() {
|
|
parent := t.getParent()
|
|
if parent == nil {
|
|
return
|
|
}
|
|
parent.remove(t)
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
t.setParent(nil)
|
|
}
|
|
|
|
func (t *Tracker) remove(oldChild *Tracker) {
|
|
found := false
|
|
label := oldChild.label
|
|
t.mu.Lock()
|
|
if t.mu.children != nil {
|
|
children := t.mu.children[label]
|
|
for i, child := range children {
|
|
if child == oldChild {
|
|
children = append(children[:i], children[i+1:]...)
|
|
if len(children) > 0 {
|
|
t.mu.children[label] = children
|
|
} else {
|
|
delete(t.mu.children, label)
|
|
}
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
}
|
|
t.mu.Unlock()
|
|
if found {
|
|
oldChild.setParent(nil)
|
|
t.Consume(-oldChild.BytesConsumed())
|
|
}
|
|
}
|
|
|
|
// ReplaceChild removes the old child specified in "oldChild" and add a new
|
|
// child specified in "newChild". old child's memory consumption will be
|
|
// removed and new child's memory consumption will be added.
|
|
func (t *Tracker) ReplaceChild(oldChild, newChild *Tracker) {
|
|
if newChild == nil {
|
|
t.remove(oldChild)
|
|
return
|
|
}
|
|
|
|
if oldChild.label != newChild.label {
|
|
t.remove(oldChild)
|
|
newChild.AttachTo(t)
|
|
return
|
|
}
|
|
|
|
newConsumed := newChild.BytesConsumed()
|
|
newChild.setParent(t)
|
|
|
|
label := oldChild.label
|
|
t.mu.Lock()
|
|
if t.mu.children != nil {
|
|
children := t.mu.children[label]
|
|
for i, child := range children {
|
|
if child != oldChild {
|
|
continue
|
|
}
|
|
|
|
newConsumed -= oldChild.BytesConsumed()
|
|
oldChild.setParent(nil)
|
|
children[i] = newChild
|
|
t.mu.children[label] = children
|
|
break
|
|
}
|
|
}
|
|
t.mu.Unlock()
|
|
|
|
t.Consume(newConsumed)
|
|
}
|
|
|
|
// Consume is used to consume a memory usage. "bytes" can be a negative value,
|
|
// which means this is a memory release operation. When memory usage of a tracker
|
|
// exceeds its bytesSoftLimit/bytesHardLimit, the tracker calls its action, so does each of its ancestors.
|
|
func (t *Tracker) Consume(bytes int64) {
|
|
if bytes == 0 {
|
|
return
|
|
}
|
|
var rootExceed, rootExceedForSoftLimit *Tracker
|
|
for tracker := t; tracker != nil; tracker = tracker.getParent() {
|
|
bytesConsumed := atomic.AddInt64(&tracker.bytesConsumed, bytes)
|
|
if bytesConsumed >= tracker.bytesHardLimit && tracker.bytesHardLimit > 0 {
|
|
rootExceed = tracker
|
|
}
|
|
if bytesConsumed >= tracker.bytesSoftLimit && tracker.bytesSoftLimit > 0 {
|
|
rootExceedForSoftLimit = tracker
|
|
}
|
|
|
|
for {
|
|
maxNow := atomic.LoadInt64(&tracker.maxConsumed)
|
|
consumed := atomic.LoadInt64(&tracker.bytesConsumed)
|
|
if consumed > maxNow && !atomic.CompareAndSwapInt64(&tracker.maxConsumed, maxNow, consumed) {
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
tryAction := func(mu *actionMu, tracker *Tracker) {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
if mu.actionOnExceed != nil {
|
|
mu.actionOnExceed.Action(tracker)
|
|
}
|
|
}
|
|
|
|
if bytes > 0 && rootExceedForSoftLimit != nil {
|
|
tryAction(&rootExceedForSoftLimit.actionMuForSoftLimit, rootExceedForSoftLimit)
|
|
}
|
|
if bytes > 0 && rootExceed != nil {
|
|
tryAction(&rootExceed.actionMuForHardLimit, rootExceed)
|
|
}
|
|
}
|
|
|
|
// BytesConsumed returns the consumed memory usage value in bytes.
|
|
func (t *Tracker) BytesConsumed() int64 {
|
|
return atomic.LoadInt64(&t.bytesConsumed)
|
|
}
|
|
|
|
// MaxConsumed returns max number of bytes consumed during execution.
|
|
func (t *Tracker) MaxConsumed() int64 {
|
|
return atomic.LoadInt64(&t.maxConsumed)
|
|
}
|
|
|
|
// SearchTrackerWithoutLock searches the specific tracker under this tracker without lock.
|
|
func (t *Tracker) SearchTrackerWithoutLock(label int) *Tracker {
|
|
if t.label == label {
|
|
return t
|
|
}
|
|
children := t.mu.children[label]
|
|
if len(children) > 0 {
|
|
return children[0]
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// String returns the string representation of this Tracker tree.
|
|
func (t *Tracker) String() string {
|
|
buffer := bytes.NewBufferString("\n")
|
|
t.toString("", buffer)
|
|
return buffer.String()
|
|
}
|
|
|
|
func (t *Tracker) toString(indent string, buffer *bytes.Buffer) {
|
|
fmt.Fprintf(buffer, "%s\"%d\"{\n", indent, t.label)
|
|
if t.bytesHardLimit > 0 {
|
|
fmt.Fprintf(buffer, "%s \"quota\": %s\n", indent, t.FormatBytes(t.bytesHardLimit))
|
|
}
|
|
fmt.Fprintf(buffer, "%s \"consumed\": %s\n", indent, t.FormatBytes(t.BytesConsumed()))
|
|
|
|
t.mu.Lock()
|
|
labels := make([]int, 0, len(t.mu.children))
|
|
for label := range t.mu.children {
|
|
labels = append(labels, label)
|
|
}
|
|
sort.Ints(labels)
|
|
for _, label := range labels {
|
|
children := t.mu.children[label]
|
|
for _, child := range children {
|
|
child.toString(indent+" ", buffer)
|
|
}
|
|
}
|
|
t.mu.Unlock()
|
|
buffer.WriteString(indent + "}\n")
|
|
}
|
|
|
|
// FormatBytes uses to format bytes, this function will prune precision before format bytes.
|
|
func (t *Tracker) FormatBytes(numBytes int64) string {
|
|
return FormatBytes(numBytes)
|
|
}
|
|
|
|
// BytesToString converts the memory consumption to a readable string.
|
|
func BytesToString(numBytes int64) string {
|
|
GB := float64(numBytes) / float64(byteSizeGB)
|
|
if GB > 1 {
|
|
return fmt.Sprintf("%v GB", GB)
|
|
}
|
|
|
|
MB := float64(numBytes) / float64(byteSizeMB)
|
|
if MB > 1 {
|
|
return fmt.Sprintf("%v MB", MB)
|
|
}
|
|
|
|
KB := float64(numBytes) / float64(byteSizeKB)
|
|
if KB > 1 {
|
|
return fmt.Sprintf("%v KB", KB)
|
|
}
|
|
|
|
return fmt.Sprintf("%v Bytes", numBytes)
|
|
}
|
|
|
|
const (
|
|
byteSizeGB = int64(1 << 30)
|
|
byteSizeMB = int64(1 << 20)
|
|
byteSizeKB = int64(1 << 10)
|
|
byteSizeBB = int64(1)
|
|
)
|
|
|
|
// FormatBytes uses to format bytes, this function will prune precision before format bytes.
|
|
func FormatBytes(numBytes int64) string {
|
|
if numBytes <= byteSizeKB {
|
|
return BytesToString(numBytes)
|
|
}
|
|
unit, unitStr := getByteUnit(numBytes)
|
|
if unit == byteSizeBB {
|
|
return BytesToString(numBytes)
|
|
}
|
|
v := float64(numBytes) / float64(unit)
|
|
decimal := 1
|
|
if numBytes%unit == 0 {
|
|
decimal = 0
|
|
} else if v < 10 {
|
|
decimal = 2
|
|
}
|
|
return fmt.Sprintf("%v %s", strconv.FormatFloat(v, 'f', decimal, 64), unitStr)
|
|
}
|
|
|
|
func getByteUnit(b int64) (int64, string) {
|
|
if b > byteSizeGB {
|
|
return byteSizeGB, "GB"
|
|
} else if b > byteSizeMB {
|
|
return byteSizeMB, "MB"
|
|
} else if b > byteSizeKB {
|
|
return byteSizeKB, "KB"
|
|
}
|
|
return byteSizeBB, "Bytes"
|
|
}
|
|
|
|
// AttachToGlobalTracker attach the tracker to the global tracker
|
|
// AttachToGlobalTracker should be called at the initialization for the session executor's tracker
|
|
func (t *Tracker) AttachToGlobalTracker(globalTracker *Tracker) {
|
|
if globalTracker == nil {
|
|
return
|
|
}
|
|
if !globalTracker.isGlobal {
|
|
panic("Attach to a non-GlobalTracker")
|
|
}
|
|
parent := t.getParent()
|
|
if parent != nil {
|
|
if parent.isGlobal {
|
|
parent.Consume(-t.BytesConsumed())
|
|
} else {
|
|
parent.remove(t)
|
|
}
|
|
}
|
|
t.setParent(globalTracker)
|
|
globalTracker.Consume(t.BytesConsumed())
|
|
}
|
|
|
|
// DetachFromGlobalTracker detach itself from its parent
|
|
// Note that only the parent of this tracker is Global Tracker could call this function
|
|
// Otherwise it should use Detach
|
|
func (t *Tracker) DetachFromGlobalTracker() {
|
|
parent := t.getParent()
|
|
if parent == nil {
|
|
return
|
|
}
|
|
if !parent.isGlobal {
|
|
panic("Detach from a non-GlobalTracker")
|
|
}
|
|
parent.Consume(-t.BytesConsumed())
|
|
t.setParent(nil)
|
|
}
|
|
|
|
// ReplaceBytesUsed replace bytesConsume for the tracker
|
|
func (t *Tracker) ReplaceBytesUsed(bytes int64) {
|
|
t.Consume(-t.BytesConsumed())
|
|
t.Consume(bytes)
|
|
}
|
|
|
|
func (t *Tracker) getParent() *Tracker {
|
|
t.parMu.Lock()
|
|
defer t.parMu.Unlock()
|
|
return t.parMu.parent
|
|
}
|
|
|
|
func (t *Tracker) setParent(parent *Tracker) {
|
|
t.parMu.Lock()
|
|
defer t.parMu.Unlock()
|
|
t.parMu.parent = parent
|
|
}
|
|
|
|
const (
|
|
// LabelForSQLText represents the label of the SQL Text
|
|
LabelForSQLText int = -1
|
|
// LabelForIndexWorker represents the label of the index worker
|
|
LabelForIndexWorker int = -2
|
|
// LabelForInnerList represents the label of the inner list
|
|
LabelForInnerList int = -3
|
|
// LabelForInnerTable represents the label of the inner table
|
|
LabelForInnerTable int = -4
|
|
// LabelForOuterTable represents the label of the outer table
|
|
LabelForOuterTable int = -5
|
|
// LabelForCoprocessor represents the label of the coprocessor
|
|
LabelForCoprocessor int = -6
|
|
// LabelForChunkList represents the label of the chunk list
|
|
LabelForChunkList int = -7
|
|
// LabelForGlobalSimpleLRUCache represents the label of the Global SimpleLRUCache
|
|
LabelForGlobalSimpleLRUCache int = -8
|
|
// LabelForChunkListInDisk represents the label of the chunk list in disk
|
|
LabelForChunkListInDisk int = -9
|
|
// LabelForRowContainer represents the label of the row container
|
|
LabelForRowContainer int = -10
|
|
// LabelForGlobalStorage represents the label of the Global Storage
|
|
LabelForGlobalStorage int = -11
|
|
// LabelForGlobalMemory represents the label of the Global Memory
|
|
LabelForGlobalMemory int = -12
|
|
// LabelForBuildSideResult represents the label of the BuildSideResult
|
|
LabelForBuildSideResult int = -13
|
|
// LabelForRowChunks represents the label of the row chunks
|
|
LabelForRowChunks int = -14
|
|
// LabelForStatsCache represents the label of the stats cache
|
|
LabelForStatsCache int = -15
|
|
// LabelForOuterList represents the label of the outer list
|
|
LabelForOuterList int = -16
|
|
// LabelForApplyCache represents the label of the apply cache
|
|
LabelForApplyCache int = -17
|
|
// LabelForSimpleTask represents the label of the simple task
|
|
LabelForSimpleTask int = -18
|
|
// LabelForCTEStorage represents the label of CTE storage
|
|
LabelForCTEStorage int = -19
|
|
// LabelForIndexJoinInnerWorker represents the label of IndexJoin InnerWorker
|
|
LabelForIndexJoinInnerWorker int = -20
|
|
// LabelForIndexJoinOuterWorker represents the label of IndexJoin OuterWorker
|
|
LabelForIndexJoinOuterWorker int = -21
|
|
)
|