diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index 801d8f442c..313e51a808 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -11568,6 +11568,224 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "TiDB managing stats cache by lru", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 89 + }, + "hiddenSeries": false, + "id": 246, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tidb_statistics_stats_cache_lru_op{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 30 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Stats Cache LRU OPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:90", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:91", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "TiDB managing stats cache by lru", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 96 + }, + "hiddenSeries": false, + "id": 247, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tidb_statistics_stats_cache_lru_val{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"track\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "track-{{instance}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "tidb_statistics_stats_cache_lru_val{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"capacity\"}", + "hide": true, + "interval": "", + "legendFormat": "capacity--{{instance}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Stats Cache LRU Cost", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, diff --git a/metrics/metrics.go b/metrics/metrics.go index 5d2f894425..b668db4261 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -176,6 +176,8 @@ func RegisterMetrics() { prometheus.MustRegister(LoadTableCacheDurationHistogram) prometheus.MustRegister(NonTransactionalDeleteCount) prometheus.MustRegister(MemoryUsage) + prometheus.MustRegister(StatsCacheLRUCounter) + prometheus.MustRegister(StatsCacheLRUGauge) tikvmetrics.InitMetrics(TiDB, TiKVClient) tikvmetrics.RegisterMetrics() diff --git a/metrics/stats.go b/metrics/stats.go index c4b74cf088..9c54831579 100644 --- a/metrics/stats.go +++ b/metrics/stats.go @@ -128,4 +128,19 @@ var ( Help: "Bucketed histogram of latency time (ms) of stats read during sync-load.", Buckets: prometheus.ExponentialBuckets(1, 2, 22), // 1ms ~ 1h }) + + StatsCacheLRUCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "tidb", + Subsystem: "statistics", + Name: "stats_cache_lru_op", + Help: "Counter of lru for statsCache operation", + }, []string{LblType}) + + StatsCacheLRUGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "tidb", + Subsystem: "statistics", + Name: "stats_cache_lru_val", + Help: "gauge of stats cache lru value", + }, []string{LblType}) ) diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index 3720a8ef50..317c54f0b3 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -544,12 +544,17 @@ func (h *Handle) GetPartitionStats(tblInfo *model.TableInfo, pid int64, opts ... func (h *Handle) updateStatsCache(newCache statsCache) (updated bool) { h.statsCache.Lock() oldCache := h.statsCache.Load().(statsCache) + enableQuota := oldCache.EnableQuota() + newCost := newCache.Cost() if oldCache.version < newCache.version || (oldCache.version == newCache.version && oldCache.minorVersion < newCache.minorVersion) { - h.statsCache.memTracker.Consume(newCache.Cost() - oldCache.Cost()) + h.statsCache.memTracker.Consume(newCost - oldCache.Cost()) h.statsCache.Store(newCache) updated = true } h.statsCache.Unlock() + if updated && enableQuota { + costGauge.Set(float64(newCost)) + } return } diff --git a/statistics/handle/lru_cache.go b/statistics/handle/lru_cache.go index 0f5fc50b48..ab4b05f268 100644 --- a/statistics/handle/lru_cache.go +++ b/statistics/handle/lru_cache.go @@ -19,9 +19,20 @@ import ( "math" "sync" + "github.com/pingcap/tidb/metrics" "github.com/pingcap/tidb/statistics" ) +var ( + missCounter = metrics.StatsCacheLRUCounter.WithLabelValues("miss") + hitCounter = metrics.StatsCacheLRUCounter.WithLabelValues("hit") + updateCounter = metrics.StatsCacheLRUCounter.WithLabelValues("update") + delCounter = metrics.StatsCacheLRUCounter.WithLabelValues("del") + evictCounter = metrics.StatsCacheLRUCounter.WithLabelValues("evict") + costGauge = metrics.StatsCacheLRUGauge.WithLabelValues("track") + capacityGauge = metrics.StatsCacheLRUGauge.WithLabelValues("capacity") +) + type statsInnerCache struct { sync.RWMutex elements map[int64]*lruMapElement @@ -50,6 +61,7 @@ func newInnerLruCache(c int64) *innerItemLruCache { if c < 1 { c = math.MaxInt64 } + capacityGauge.Set(float64(c)) return &innerItemLruCache{ capacity: c, cache: list.New(), @@ -261,6 +273,11 @@ func (s *statsInnerCache) SetCapacity(c int64) { s.lru.setCapacity(c) } +// EnableQuota implements statsCacheInner +func (s *statsInnerCache) EnableQuota() bool { + return true +} + func (s *statsInnerCache) onEvict(tblID int64) { element, exist := s.elements[tblID] if !exist { @@ -283,12 +300,15 @@ func (s *statsInnerCache) capacity() int64 { func (c *innerItemLruCache) get(tblID, id int64) (*lruCacheItem, bool) { v, ok := c.elements[tblID] if !ok { + missCounter.Inc() return nil, false } ele, ok := v[id] if !ok { + missCounter.Inc() return nil, false } + hitCounter.Inc() c.cache.MoveToFront(ele) return ele.Value.(*lruCacheItem), true } @@ -302,6 +322,7 @@ func (c *innerItemLruCache) del(tblID, id int64) { if !ok { return } + delCounter.Inc() delete(c.elements[tblID], id) c.cache.Remove(ele) } @@ -309,6 +330,7 @@ func (c *innerItemLruCache) del(tblID, id int64) { func (c *innerItemLruCache) put(tblID, id int64, item statistics.TableCacheItem, itemMem statistics.CacheItemMemoryUsage, needEvict, needMove bool) { defer func() { + updateCounter.Inc() if needEvict { c.evictIfNeeded() } @@ -344,6 +366,7 @@ func (c *innerItemLruCache) put(tblID, id int64, item statistics.TableCacheItem, func (c *innerItemLruCache) evictIfNeeded() { curr := c.cache.Back() for c.trackingCost > c.capacity { + evictCounter.Inc() prev := curr.Prev() item := curr.Value.(*lruCacheItem) oldMem := item.innerMemUsage @@ -381,5 +404,6 @@ func (c *innerItemLruCache) setCapacity(capacity int64) { capacity = math.MaxInt64 } c.capacity = capacity + capacityGauge.Set(float64(c.capacity)) c.evictIfNeeded() } diff --git a/statistics/handle/statscache.go b/statistics/handle/statscache.go index 6a0ef4bbfd..9f06c09b02 100644 --- a/statistics/handle/statscache.go +++ b/statistics/handle/statscache.go @@ -38,6 +38,7 @@ type statsCacheInner interface { FreshMemUsage() Copy() statsCacheInner SetCapacity(int64) + EnableQuota() bool } func newStatsCache() statsCache { @@ -245,4 +246,10 @@ func (m *mapCache) Copy() statsCacheInner { return newM } +// SetCapacity implements statsCacheInner func (m *mapCache) SetCapacity(int64) {} + +// EnableQuota implements statsCacheInner +func (m *mapCache) EnableQuota() bool { + return false +}