From a3d74207cde534a520e080c3a21dd04da99102ee Mon Sep 17 00:00:00 2001 From: fzzf678 <108643977+fzzf678@users.noreply.github.com> Date: Wed, 2 Nov 2022 14:50:00 +0800 Subject: [PATCH] planner: add monitor for prepared plan cache memory usage (#38507) ref pingcap/tidb#37632 --- metrics/grafana/tidb.json | 211 +++++++++++++++++++++++++++++++++ metrics/metrics.go | 2 + metrics/server.go | 16 +++ planner/core/plan_cache_lru.go | 53 ++++++--- session/session.go | 3 + sessionctx/context.go | 1 + 6 files changed, 269 insertions(+), 17 deletions(-) diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index 0684067314..757953d97b 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -7473,6 +7473,217 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Total memory usage of all prepared plan cache in a instance", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 29 + }, + "hiddenSeries": false, + "id": 269, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tidb_server_plan_cache_instance_memory_usage{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Plan Cache Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:122", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:123", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": null, + "description": "TiDB prepared plan cache plan num\n", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 37 + }, + "hiddenSeries": false, + "id": 271, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tidb_server_plan_cache_instance_plan_num_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}\n", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 30 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Plan Cache Plan Num", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, diff --git a/metrics/metrics.go b/metrics/metrics.go index 3d6ba83979..2dfeb09b09 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -134,6 +134,8 @@ func RegisterMetrics() { prometheus.MustRegister(PanicCounter) prometheus.MustRegister(PlanCacheCounter) prometheus.MustRegister(PlanCacheMissCounter) + prometheus.MustRegister(PlanCacheInstanceMemoryUsage) + prometheus.MustRegister(PlanCacheInstancePlanNumCounter) prometheus.MustRegister(PseudoEstimation) prometheus.MustRegister(PacketIOCounter) prometheus.MustRegister(QueryDurationHistogram) diff --git a/metrics/server.go b/metrics/server.go index d88c84a254..4f0d147a4b 100644 --- a/metrics/server.go +++ b/metrics/server.go @@ -144,6 +144,22 @@ var ( Help: "Counter of plan cache miss.", }, []string{LblType}) + PlanCacheInstanceMemoryUsage = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "tidb", + Subsystem: "server", + Name: "plan_cache_instance_memory_usage", + Help: "Total plan cache memory usage of all sessions in a instance", + }, []string{LblType}) + + PlanCacheInstancePlanNumCounter = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "tidb", + Subsystem: "server", + Name: "plan_cache_instance_plan_num_total", + Help: "Counter of plan of all prepared plan cache in a instance", + }, []string{LblType}) + ReadFromTableCacheCounter = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: "tidb", diff --git a/planner/core/plan_cache_lru.go b/planner/core/plan_cache_lru.go index 7f7a1da30d..8489c2117f 100644 --- a/planner/core/plan_cache_lru.go +++ b/planner/core/plan_cache_lru.go @@ -18,6 +18,7 @@ import ( "sync" "github.com/pingcap/errors" + "github.com/pingcap/tidb/metrics" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/hack" "github.com/pingcap/tidb/util/kvcache" @@ -59,8 +60,7 @@ type LRUPlanCache struct { quota uint64 guard float64 - // MemTracker track the memory usage of prepared plan cache - memTracker *memory.Tracker + memoryUsageTotal int64 } // NewLRUPlanCache creates a PCLRUCache object, whose capacity is "capacity". @@ -71,7 +71,6 @@ func NewLRUPlanCache(capacity uint, guard float64, quota uint64, capacity = 100 logutil.BgLogger().Info("capacity of LRU cache is less than 1, will use default value(100) init cache") } - return &LRUPlanCache{ capacity: capacity, size: 0, @@ -80,7 +79,6 @@ func NewLRUPlanCache(capacity uint, guard float64, quota uint64, pickFromBucket: pickFromBucket, quota: quota, guard: guard, - memTracker: newTrackerForLRUPC(), } } @@ -116,8 +114,7 @@ func (l *LRUPlanCache) Put(key kvcache.Key, value kvcache.Value, paramTypes []*t bucket, bucketExist := l.buckets[hash] if bucketExist { if element, exist := l.pickFromBucket(bucket, paramTypes); exist { - l.memTracker.Consume(value.(*PlanCacheValue).MemoryUsage() - - element.Value.(*planCacheEntry).PlanValue.(*PlanCacheValue).MemoryUsage()) + l.updateInstanceMetric(&planCacheEntry{PlanKey: key, PlanValue: value}, element.Value.(*planCacheEntry)) element.Value.(*planCacheEntry).PlanValue = value l.lruList.MoveToFront(element) return @@ -133,7 +130,7 @@ func (l *LRUPlanCache) Put(key kvcache.Key, value kvcache.Value, paramTypes []*t element := l.lruList.PushFront(newCacheEntry) l.buckets[hash][element] = struct{}{} l.size++ - l.memTracker.Consume(newCacheEntry.MemoryUsage()) + l.updateInstanceMetric(newCacheEntry, nil) if l.size > l.capacity { l.removeOldest() } @@ -149,7 +146,7 @@ func (l *LRUPlanCache) Delete(key kvcache.Key) { bucket, bucketExist := l.buckets[hash] if bucketExist { for element := range bucket { - l.memTracker.Consume(-element.Value.(*planCacheEntry).MemoryUsage()) + l.updateInstanceMetric(nil, element.Value.(*planCacheEntry)) l.lruList.Remove(element) l.size-- } @@ -163,11 +160,11 @@ func (l *LRUPlanCache) DeleteAll() { defer l.lock.Unlock() for lru := l.lruList.Back(); lru != nil; lru = l.lruList.Back() { + l.updateInstanceMetric(nil, lru.Value.(*planCacheEntry)) l.lruList.Remove(lru) l.size-- } l.buckets = make(map[string]map[*list.Element]struct{}, 1) - l.memTracker = newTrackerForLRUPC() } // Size gets the current cache size. @@ -198,7 +195,16 @@ func (l *LRUPlanCache) MemoryUsage() (sum int64) { if l == nil { return } - return l.memTracker.BytesConsumed() + return l.memoryUsageTotal +} + +// Close do some clean work for LRUPlanCache when close the session +func (l *LRUPlanCache) Close() { + if l == nil { + return + } + metrics.PlanCacheInstanceMemoryUsage.WithLabelValues("instance").Sub(float64(l.memoryUsageTotal)) + metrics.PlanCacheInstancePlanNumCounter.WithLabelValues("plan_num").Sub(float64(l.size)) } // removeOldest removes the oldest element from the cache. @@ -211,7 +217,7 @@ func (l *LRUPlanCache) removeOldest() { l.onEvict(lru.Value.(*planCacheEntry).PlanKey, lru.Value.(*planCacheEntry).PlanValue) } - l.memTracker.Consume(-lru.Value.(*planCacheEntry).MemoryUsage()) + l.updateInstanceMetric(nil, lru.Value.(*planCacheEntry)) l.lruList.Remove(lru) l.removeFromBucket(lru) l.size-- @@ -251,10 +257,23 @@ func PickPlanFromBucket(bucket map[*list.Element]struct{}, paramTypes []*types.F return nil, false } -// newTrackerForLRUPC return a tracker which consumed emptyLRUPlanCacheSize -// todo: pass label when track general plan cache memory -func newTrackerForLRUPC() *memory.Tracker { - m := memory.NewTracker(memory.LabelForPreparedPlanCache, -1) - //todo: maybe need attach here - return m +// updateInstanceMetric update the memory usage and plan num for show in grafana +func (l *LRUPlanCache) updateInstanceMetric(in, out *planCacheEntry) { + if l == nil { + return + } + + if in != nil && out != nil { // replace plan + metrics.PlanCacheInstanceMemoryUsage.WithLabelValues("instance").Sub(float64(out.MemoryUsage())) + metrics.PlanCacheInstanceMemoryUsage.WithLabelValues("instance").Add(float64(in.MemoryUsage())) + l.memoryUsageTotal += in.MemoryUsage() - out.MemoryUsage() + } else if in != nil { // put plan + metrics.PlanCacheInstanceMemoryUsage.WithLabelValues("instance").Add(float64(in.MemoryUsage())) + l.memoryUsageTotal += in.MemoryUsage() + metrics.PlanCacheInstancePlanNumCounter.WithLabelValues("plan_num").Add(1) + } else { // delete plan + metrics.PlanCacheInstanceMemoryUsage.WithLabelValues("instance").Sub(float64(out.MemoryUsage())) + l.memoryUsageTotal -= out.MemoryUsage() + metrics.PlanCacheInstancePlanNumCounter.WithLabelValues("plan_num").Sub(1) + } } diff --git a/session/session.go b/session/session.go index 8a90a28ed7..1d3e8c1775 100644 --- a/session/session.go +++ b/session/session.go @@ -2572,6 +2572,9 @@ func (s *session) Close() { s.stmtStats.SetFinished() } s.ClearDiskFullOpt() + if s.preparedPlanCache != nil { + s.preparedPlanCache.Close() + } } // GetSessionVars implements the context.Context interface. diff --git a/sessionctx/context.go b/sessionctx/context.go index 28d6b8b16b..f51bdedef8 100644 --- a/sessionctx/context.go +++ b/sessionctx/context.go @@ -60,6 +60,7 @@ type PlanCache interface { DeleteAll() Size() int SetCapacity(capacity uint) error + Close() } // Context is an interface for transaction and executive args environment.