159 lines
4.9 KiB
Go
159 lines
4.9 KiB
Go
// Copyright 2021 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package telemetry
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"strconv"
|
|
"sync"
|
|
"time"
|
|
|
|
pingcapErrors "github.com/pingcap/errors"
|
|
"github.com/pingcap/parser/mysql"
|
|
"github.com/pingcap/tidb/domain/infosync"
|
|
"github.com/pingcap/tidb/sessionctx"
|
|
"github.com/pingcap/tidb/util/logutil"
|
|
pmodel "github.com/prometheus/common/model"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
type slowQueryStats struct {
|
|
// Slow Query statistic buckets
|
|
SQBInfo *SlowQueryBucket `json:"slowQueryBucket"`
|
|
}
|
|
|
|
// SlowQueryBucket records the statistic information of slow query buckets
|
|
// Buckets: prometheus.ExponentialBuckets(0.001, 2, 28), // 1ms ~ 1.5days // defined in metrics/server.go
|
|
type SlowQueryBucket map[string]int
|
|
|
|
func (bucketMap SlowQueryBucket) String() string {
|
|
if bucketMap == nil {
|
|
return "nil"
|
|
}
|
|
var retStr string = "{"
|
|
for k, v := range bucketMap {
|
|
retStr += k + ":" + strconv.Itoa(v) + ","
|
|
}
|
|
retStr = retStr[:len(retStr)-1]
|
|
return retStr
|
|
}
|
|
|
|
const slowQueryBucketNum = 29 //prometheus.ExponentialBuckets(0.001, 2, 28), and 1 more +Inf
|
|
|
|
var (
|
|
// lastSQBInfo records last statistic information of slow query buckets
|
|
lastSQBInfo SlowQueryBucket
|
|
// currentSQBInfo records current statitic information of slow query buckets
|
|
currentSQBInfo SlowQueryBucket
|
|
slowQueryLock sync.Mutex
|
|
)
|
|
|
|
func getSlowQueryStats(ctx sessionctx.Context) (*slowQueryStats, error) {
|
|
slowQueryBucket, err := getSlowQueryBucket(ctx)
|
|
if err != nil {
|
|
logutil.BgLogger().Info(err.Error())
|
|
return nil, err
|
|
}
|
|
|
|
return &slowQueryStats{slowQueryBucket}, nil
|
|
}
|
|
|
|
// getSlowQueryBucket genenrates the delta SlowQueryBucket to report
|
|
func getSlowQueryBucket(ctx sessionctx.Context) (*SlowQueryBucket, error) {
|
|
// update currentSQBInfo first, then gen delta
|
|
if err := updateCurrentSQB(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
delta := calculateDeltaSQB()
|
|
return delta, nil
|
|
}
|
|
|
|
// updateCurrentSQB records current slow query buckets
|
|
func updateCurrentSQB(ctx sessionctx.Context) (err error) {
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
err = pingcapErrors.Errorf(fmt.Sprintln(r))
|
|
}
|
|
}()
|
|
|
|
pQueryCtx, cancel := context.WithTimeout(context.Background(), time.Second*30)
|
|
defer cancel()
|
|
pQueryTs := time.Now().Add(-time.Minute)
|
|
promQL := "tidb_server_slow_query_process_duration_seconds_bucket{sql_type=\"general\"}"
|
|
value, err := querySQLMetric(pQueryCtx, pQueryTs, promQL)
|
|
|
|
if err != nil && err != infosync.ErrPrometheusAddrIsNotSet {
|
|
logutil.BgLogger().Info("querySlowQueryMetric got error")
|
|
return err
|
|
}
|
|
if value == nil {
|
|
return
|
|
}
|
|
if value.Type() != pmodel.ValVector {
|
|
return errors.New("Prom vector expected, got " + value.Type().String())
|
|
}
|
|
promVec := value.(pmodel.Vector)
|
|
slowQueryLock.Lock()
|
|
for _, sample := range promVec {
|
|
metric := sample.Metric
|
|
bucketName := metric["le"] //hardcode bucket upper bound
|
|
currentSQBInfo[string(bucketName)] = int(sample.Value)
|
|
}
|
|
slowQueryLock.Unlock()
|
|
return nil
|
|
}
|
|
|
|
// calculateDeltaSQB calculate the delta between current slow query bucket and last slow query bucket
|
|
func calculateDeltaSQB() *SlowQueryBucket {
|
|
deltaMap := make(SlowQueryBucket)
|
|
slowQueryLock.Lock()
|
|
for key, value := range currentSQBInfo {
|
|
deltaMap[key] = value - (lastSQBInfo)[key]
|
|
}
|
|
slowQueryLock.Unlock()
|
|
return &deltaMap
|
|
}
|
|
|
|
// init Init lastSQBInfo, follow the definition of metrics/server.go
|
|
// Buckets: prometheus.ExponentialBuckets(0.001, 2, 28), // 1ms ~ 1.5days
|
|
func init() {
|
|
lastSQBInfo = make(SlowQueryBucket)
|
|
currentSQBInfo = make(SlowQueryBucket)
|
|
|
|
bucketBase := 0.001 // From 0.001 to 134217.728, total 28 float number; the 29th is +Inf
|
|
for i := 0; i < slowQueryBucketNum-1; i++ {
|
|
lastSQBInfo[strconv.FormatFloat(bucketBase, 'f', 3, 32)] = 0
|
|
currentSQBInfo[strconv.FormatFloat(bucketBase, 'f', 3, 32)] = 0
|
|
bucketBase += bucketBase
|
|
}
|
|
lastSQBInfo["+Inf"] = 0
|
|
currentSQBInfo["+Inf"] = 0
|
|
|
|
if mysql.TiDBReleaseVersion != "None" {
|
|
logutil.BgLogger().Info("Telemetry slow query stats initialized", zap.String("currentSQBInfo", currentSQBInfo.String()), zap.String("lastSQBInfo", lastSQBInfo.String()))
|
|
}
|
|
}
|
|
|
|
// postReportSlowQueryStats copy currentSQBInfo to lastSQBInfo to be ready for next report
|
|
// this function is designed for being compatible with preview telemetry
|
|
func postReportSlowQueryStats() {
|
|
slowQueryLock.Lock()
|
|
lastSQBInfo = currentSQBInfo
|
|
currentSQBInfo = make(SlowQueryBucket)
|
|
slowQueryLock.Unlock()
|
|
logutil.BgLogger().Info("Telemetry slow query stats, postReportSlowQueryStats finished")
|
|
}
|