433 lines
12 KiB
Go
433 lines
12 KiB
Go
// Copyright 2021 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package tracecpu
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"runtime/pprof"
|
|
"strconv"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/google/pprof/profile"
|
|
"github.com/pingcap/tidb/sessionctx/variable"
|
|
"github.com/pingcap/tidb/util"
|
|
"github.com/pingcap/tidb/util/hack"
|
|
"github.com/pingcap/tidb/util/logutil"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
const (
|
|
labelSQL = "sql"
|
|
labelSQLDigest = "sql_digest"
|
|
labelPlanDigest = "plan_digest"
|
|
)
|
|
|
|
// GlobalSQLCPUProfiler is the global SQL stats profiler.
|
|
var GlobalSQLCPUProfiler = newSQLCPUProfiler()
|
|
|
|
// Collector uses to collect SQL execution cpu time.
|
|
type Collector interface {
|
|
// Collect uses to collect the SQL execution cpu time.
|
|
// ts is a Unix time, unit is second.
|
|
Collect(ts uint64, stats []SQLCPUTimeRecord)
|
|
}
|
|
|
|
// SQLCPUTimeRecord represents a single record of how much cpu time a sql plan consumes in one second.
|
|
//
|
|
// PlanDigest can be empty, because:
|
|
// 1. some sql statements has no plan, like `COMMIT`
|
|
// 2. when a sql statement is being compiled, there's no plan yet
|
|
type SQLCPUTimeRecord struct {
|
|
SQLDigest []byte
|
|
PlanDigest []byte
|
|
CPUTimeMs uint32
|
|
}
|
|
|
|
type sqlCPUProfiler struct {
|
|
taskCh chan *profileData
|
|
|
|
mu struct {
|
|
sync.Mutex
|
|
ept *exportProfileTask
|
|
}
|
|
collector atomic.Value
|
|
}
|
|
|
|
var (
|
|
defaultProfileBufSize = 100 * 1024
|
|
profileBufPool = sync.Pool{
|
|
New: func() interface{} {
|
|
return bytes.NewBuffer(make([]byte, 0, defaultProfileBufSize))
|
|
},
|
|
}
|
|
)
|
|
|
|
// newSQLCPUProfiler create a sqlCPUProfiler.
|
|
func newSQLCPUProfiler() *sqlCPUProfiler {
|
|
return &sqlCPUProfiler{
|
|
taskCh: make(chan *profileData, 128),
|
|
}
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) Run() {
|
|
logutil.BgLogger().Info("cpu profiler started")
|
|
go sp.startCPUProfileWorker()
|
|
go sp.startAnalyzeProfileWorker()
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) SetCollector(c Collector) {
|
|
sp.collector.Store(c)
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) GetCollector() Collector {
|
|
c, ok := sp.collector.Load().(Collector)
|
|
if !ok || c == nil {
|
|
return nil
|
|
}
|
|
return c
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) startCPUProfileWorker() {
|
|
defer util.Recover("top-sql", "profileWorker", nil, false)
|
|
for {
|
|
if sp.IsEnabled() {
|
|
sp.doCPUProfile()
|
|
} else {
|
|
time.Sleep(time.Second)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) doCPUProfile() {
|
|
intervalSecond := variable.TopSQLVariable.PrecisionSeconds.Load()
|
|
task := sp.newProfileTask()
|
|
if err := pprof.StartCPUProfile(task.buf); err != nil {
|
|
// Sleep a while before retry.
|
|
time.Sleep(time.Second)
|
|
sp.putTaskToBuffer(task)
|
|
return
|
|
}
|
|
ns := int64(time.Second)*intervalSecond - int64(time.Now().Nanosecond())
|
|
time.Sleep(time.Nanosecond * time.Duration(ns))
|
|
pprof.StopCPUProfile()
|
|
task.end = time.Now().Unix()
|
|
if task.end < 0 {
|
|
task.end = 0
|
|
}
|
|
sp.taskCh <- task
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) startAnalyzeProfileWorker() {
|
|
defer util.Recover("top-sql", "analyzeProfileWorker", nil, false)
|
|
for {
|
|
task := <-sp.taskCh
|
|
p, err := profile.ParseData(task.buf.Bytes())
|
|
if err != nil {
|
|
logutil.BgLogger().Error("parse profile error", zap.Error(err))
|
|
sp.putTaskToBuffer(task)
|
|
continue
|
|
}
|
|
stats := sp.parseCPUProfileBySQLLabels(p)
|
|
sp.handleExportProfileTask(p)
|
|
if c := sp.GetCollector(); c != nil {
|
|
c.Collect(uint64(task.end), stats)
|
|
}
|
|
sp.putTaskToBuffer(task)
|
|
}
|
|
}
|
|
|
|
type profileData struct {
|
|
buf *bytes.Buffer
|
|
end int64
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) newProfileTask() *profileData {
|
|
buf := profileBufPool.Get().(*bytes.Buffer)
|
|
return &profileData{
|
|
buf: buf,
|
|
}
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) putTaskToBuffer(task *profileData) {
|
|
task.buf.Reset()
|
|
profileBufPool.Put(task.buf)
|
|
}
|
|
|
|
// parseCPUProfileBySQLLabels uses to aggregate the cpu-profile sample data by sql_digest and plan_digest labels,
|
|
// output the TopSQLCPUTimeRecord slice. Want to know more information about profile labels, see https://rakyll.org/profiler-labels/
|
|
// The sql_digest label is been set by `SetSQLLabels` function after parse the SQL.
|
|
// The plan_digest label is been set by `SetSQLAndPlanLabels` function after build the SQL plan.
|
|
// Since `sqlCPUProfiler` only care about the cpu time that consume by (sql_digest,plan_digest), the other sample data
|
|
// without those label will be ignore.
|
|
func (sp *sqlCPUProfiler) parseCPUProfileBySQLLabels(p *profile.Profile) []SQLCPUTimeRecord {
|
|
sqlMap := make(map[string]*sqlStats)
|
|
idx := len(p.SampleType) - 1
|
|
for _, s := range p.Sample {
|
|
digests, ok := s.Label[labelSQLDigest]
|
|
if !ok || len(digests) == 0 {
|
|
continue
|
|
}
|
|
for _, digest := range digests {
|
|
stmt, ok := sqlMap[digest]
|
|
if !ok {
|
|
stmt = &sqlStats{
|
|
plans: make(map[string]int64),
|
|
total: 0,
|
|
}
|
|
sqlMap[digest] = stmt
|
|
}
|
|
stmt.total += s.Value[idx]
|
|
|
|
plans := s.Label[labelPlanDigest]
|
|
for _, plan := range plans {
|
|
stmt.plans[plan] += s.Value[idx]
|
|
}
|
|
}
|
|
}
|
|
return sp.createSQLStats(sqlMap)
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) createSQLStats(sqlMap map[string]*sqlStats) []SQLCPUTimeRecord {
|
|
stats := make([]SQLCPUTimeRecord, 0, len(sqlMap))
|
|
for sqlDigest, stmt := range sqlMap {
|
|
stmt.tune()
|
|
for planDigest, val := range stmt.plans {
|
|
stats = append(stats, SQLCPUTimeRecord{
|
|
SQLDigest: []byte(sqlDigest),
|
|
PlanDigest: []byte(planDigest),
|
|
CPUTimeMs: uint32(time.Duration(val).Milliseconds()),
|
|
})
|
|
}
|
|
}
|
|
return stats
|
|
}
|
|
|
|
type sqlStats struct {
|
|
plans map[string]int64
|
|
total int64
|
|
}
|
|
|
|
// tune use to adjust sql stats. Consider following situation:
|
|
// The `sqlStats` maybe:
|
|
// plans: {
|
|
// "table_scan": 200ms, // The cpu time of the sql that plan with `table_scan` is 200ms.
|
|
// "index_scan": 300ms, // The cpu time of the sql that plan with `table_scan` is 300ms.
|
|
// },
|
|
// total: 600ms, // The total cpu time of the sql is 600ms.
|
|
// total_time - table_scan_time - index_scan_time = 100ms, and this 100ms means those sample data only contain the
|
|
// sql_digest label, doesn't contain the plan_digest label. This is cause by the `pprof profile` is base on sample,
|
|
// and the plan digest can only be set after optimizer generated execution plan. So the remain 100ms means the plan
|
|
// optimizer takes time to generated plan.
|
|
// After this tune function, the `sqlStats` become to:
|
|
// plans: {
|
|
// "" : 100ms, // 600 - 200 - 300 = 100ms, indicate the optimizer generated plan time cost.
|
|
// "table_scan": 200ms,
|
|
// "index_scan": 300ms,
|
|
// },
|
|
// total: 600ms,
|
|
func (s *sqlStats) tune() {
|
|
if len(s.plans) == 0 {
|
|
s.plans[""] = s.total
|
|
return
|
|
}
|
|
planTotal := int64(0)
|
|
for _, v := range s.plans {
|
|
planTotal += v
|
|
}
|
|
optimize := s.total - planTotal
|
|
if optimize <= 0 {
|
|
return
|
|
}
|
|
s.plans[""] += optimize
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) handleExportProfileTask(p *profile.Profile) {
|
|
sp.mu.Lock()
|
|
defer sp.mu.Unlock()
|
|
if sp.mu.ept == nil {
|
|
return
|
|
}
|
|
sp.mu.ept.mergeProfile(p)
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) hasExportProfileTask() bool {
|
|
sp.mu.Lock()
|
|
has := sp.mu.ept != nil
|
|
sp.mu.Unlock()
|
|
return has
|
|
}
|
|
|
|
// IsEnabled return true if it is(should be) enabled. It exports for tests.
|
|
func (sp *sqlCPUProfiler) IsEnabled() bool {
|
|
return variable.TopSQLEnabled() || sp.hasExportProfileTask()
|
|
}
|
|
|
|
// StartCPUProfile same like pprof.StartCPUProfile.
|
|
// Because the GlobalSQLCPUProfiler keep calling pprof.StartCPUProfile to fetch SQL cpu stats, other place (such pprof profile HTTP API handler) call pprof.StartCPUProfile will be failed,
|
|
// other place should call tracecpu.StartCPUProfile instead of pprof.StartCPUProfile.
|
|
func StartCPUProfile(w io.Writer) error {
|
|
if GlobalSQLCPUProfiler.IsEnabled() {
|
|
return GlobalSQLCPUProfiler.startExportCPUProfile(w)
|
|
}
|
|
return pprof.StartCPUProfile(w)
|
|
}
|
|
|
|
// StopCPUProfile same like pprof.StopCPUProfile.
|
|
// other place should call tracecpu.StopCPUProfile instead of pprof.StopCPUProfile.
|
|
func StopCPUProfile() error {
|
|
if GlobalSQLCPUProfiler.hasExportProfileTask() {
|
|
return GlobalSQLCPUProfiler.stopExportCPUProfile()
|
|
}
|
|
pprof.StopCPUProfile()
|
|
return nil
|
|
}
|
|
|
|
// CtxWithDigest wrap the ctx with sql digest, if plan digest is not null, wrap with plan digest too.
|
|
func CtxWithDigest(ctx context.Context, sqlDigest, planDigest []byte) context.Context {
|
|
if len(planDigest) == 0 {
|
|
return pprof.WithLabels(ctx, pprof.Labels(labelSQLDigest, string(hack.String(sqlDigest))))
|
|
}
|
|
return pprof.WithLabels(ctx, pprof.Labels(labelSQLDigest, string(hack.String(sqlDigest)),
|
|
labelPlanDigest, string(hack.String(planDigest))))
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) startExportCPUProfile(w io.Writer) error {
|
|
sp.mu.Lock()
|
|
defer sp.mu.Unlock()
|
|
if sp.mu.ept != nil {
|
|
return errors.New("cpu profiling already in use")
|
|
}
|
|
sp.mu.ept = &exportProfileTask{w: w}
|
|
return nil
|
|
}
|
|
|
|
func (sp *sqlCPUProfiler) stopExportCPUProfile() error {
|
|
sp.mu.Lock()
|
|
ept := sp.mu.ept
|
|
sp.mu.ept = nil
|
|
sp.mu.Unlock()
|
|
if ept.err != nil {
|
|
return ept.err
|
|
}
|
|
if w := ept.w; w != nil && ept.cpuProfile != nil {
|
|
sp.removeLabel(ept.cpuProfile)
|
|
return ept.cpuProfile.Write(w)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// removeLabel uses to remove labels for export cpu profile data.
|
|
// Since the sql_digest and plan_digest label is strange for other users.
|
|
// If `variable.EnablePProfSQLCPU` is true means wanto keep the `sql` label, otherwise, remove the `sql` label too.
|
|
func (sp *sqlCPUProfiler) removeLabel(p *profile.Profile) {
|
|
if p == nil {
|
|
return
|
|
}
|
|
keepLabelSQL := variable.EnablePProfSQLCPU.Load()
|
|
for _, s := range p.Sample {
|
|
for k := range s.Label {
|
|
switch k {
|
|
case labelSQL:
|
|
if !keepLabelSQL {
|
|
delete(s.Label, k)
|
|
}
|
|
case labelSQLDigest, labelPlanDigest:
|
|
delete(s.Label, k)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type exportProfileTask struct {
|
|
cpuProfile *profile.Profile
|
|
err error
|
|
w io.Writer
|
|
}
|
|
|
|
func (t *exportProfileTask) mergeProfile(p *profile.Profile) {
|
|
if t.err != nil || p == nil {
|
|
return
|
|
}
|
|
ps := make([]*profile.Profile, 0, 2)
|
|
if t.cpuProfile != nil {
|
|
ps = append(ps, t.cpuProfile)
|
|
}
|
|
ps = append(ps, p)
|
|
t.cpuProfile, t.err = profile.Merge(ps)
|
|
}
|
|
|
|
// ProfileHTTPHandler is same as pprof.Profile.
|
|
// The difference is ProfileHTTPHandler uses tracecpu.StartCPUProfile/StopCPUProfile to fetch profile data.
|
|
func ProfileHTTPHandler(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("X-Content-Type-Options", "nosniff")
|
|
sec, err := strconv.ParseInt(r.FormValue("seconds"), 10, 64)
|
|
if sec <= 0 || err != nil {
|
|
sec = 30
|
|
}
|
|
|
|
if durationExceedsWriteTimeout(r, float64(sec)) {
|
|
serveError(w, http.StatusBadRequest, "profile duration exceeds server's WriteTimeout")
|
|
return
|
|
}
|
|
|
|
// Set Content Type assuming StartCPUProfile will work,
|
|
// because if it does it starts writing.
|
|
w.Header().Set("Content-Type", "application/octet-stream")
|
|
w.Header().Set("Content-Disposition", `attachment; filename="profile"`)
|
|
|
|
err = StartCPUProfile(w)
|
|
if err != nil {
|
|
serveError(w, http.StatusInternalServerError, "Could not enable CPU profiling: "+err.Error())
|
|
return
|
|
}
|
|
// TODO: fix me.
|
|
// This can be fixed by always starts a 1 second profiling one by one,
|
|
// but to aggregate (merge) multiple profiles into one according to the precision.
|
|
// |<-- 1s -->|
|
|
// -|----------|----------|----------|----------|----------|-----------|-----> Background profile task timeline.
|
|
// |________________________________|
|
|
// (start cpu profile) v v (stop cpu profile) // expected profile timeline
|
|
// |________________________________| // actual profile timeline
|
|
time.Sleep(time.Second * time.Duration(sec))
|
|
err = StopCPUProfile()
|
|
if err != nil {
|
|
serveError(w, http.StatusInternalServerError, "Could not enable CPU profiling: "+err.Error())
|
|
return
|
|
}
|
|
}
|
|
|
|
func durationExceedsWriteTimeout(r *http.Request, seconds float64) bool {
|
|
srv, ok := r.Context().Value(http.ServerContextKey).(*http.Server)
|
|
return ok && srv.WriteTimeout != 0 && seconds >= srv.WriteTimeout.Seconds()
|
|
}
|
|
|
|
func serveError(w http.ResponseWriter, status int, txt string) {
|
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
|
w.Header().Set("X-Go-Pprof", "1")
|
|
w.Header().Del("Content-Disposition")
|
|
w.WriteHeader(status)
|
|
_, err := fmt.Fprintln(w, txt)
|
|
if err != nil {
|
|
logutil.BgLogger().Info("write http response error", zap.Error(err))
|
|
}
|
|
}
|