410 lines
12 KiB
Go
410 lines
12 KiB
Go
// Copyright 2022 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package cgroup
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
"github.com/pingcap/log"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// CPUQuotaStatus presents the status of how CPU quota is used
|
|
type CPUQuotaStatus int
|
|
|
|
const (
|
|
// CPUQuotaUndefined is returned when CPU quota is undefined
|
|
CPUQuotaUndefined CPUQuotaStatus = iota
|
|
// CPUQuotaUsed is returned when a valid CPU quota can be used
|
|
CPUQuotaUsed
|
|
// CPUQuotaMinUsed is return when CPU quota is smaller than the min value
|
|
CPUQuotaMinUsed
|
|
)
|
|
|
|
const (
|
|
_maxProcsKey = "GOMAXPROCS"
|
|
|
|
// They are cgroup filename for different data
|
|
cgroupV1MemStat = "memory.stat"
|
|
cgroupV2MemStat = "memory.stat"
|
|
cgroupV2MemLimit = "memory.max"
|
|
cgroupV1MemUsage = "memory.usage_in_bytes"
|
|
cgroupV2MemUsage = "memory.current"
|
|
cgroupV1CPUQuota = "cpu.cfs_quota_us"
|
|
cgroupV1CPUPeriod = "cpu.cfs_period_us"
|
|
cgroupV1CPUSysUsage = "cpuacct.usage_sys"
|
|
cgroupV1CPUUserUsage = "cpuacct.usage_user"
|
|
cgroupV2CPUMax = "cpu.max"
|
|
cgroupV2CPUStat = "cpu.stat"
|
|
|
|
// {memory|cpu}.stat file keys
|
|
//
|
|
// key for # of bytes of file-backed memory on inactive LRU list in cgroupv1
|
|
cgroupV1MemInactiveFileUsageStatKey = "total_inactive_file"
|
|
// key for # of bytes of file-backed memory on inactive LRU list in cgroupv2
|
|
cgroupV2MemInactiveFileUsageStatKey = "inactive_file"
|
|
cgroupV1MemLimitStatKey = "hierarchical_memory_limit"
|
|
)
|
|
const (
|
|
procPathCGroup = "/proc/self/cgroup"
|
|
procPathMountInfo = "/proc/self/mountinfo"
|
|
)
|
|
|
|
// CPUUsage returns CPU usage and quotas for an entire cgroup.
|
|
type CPUUsage struct {
|
|
// System time and user time taken by this cgroup or process. In nanoseconds.
|
|
Stime, Utime uint64
|
|
// CPU period and quota for this process, in microseconds. This cgroup has
|
|
// access to up to (quota/period) proportion of CPU resources on the system.
|
|
// For instance, if there are 4 CPUs, quota = 150000, period = 100000,
|
|
// this cgroup can use around ~1.5 CPUs, or 37.5% of total scheduler time.
|
|
// If quota is -1, it's unlimited.
|
|
Period, Quota int64
|
|
// NumCPUs is the number of CPUs in the system. Always returned even if
|
|
// not called from a cgroup.
|
|
NumCPU int
|
|
}
|
|
|
|
// SetGOMAXPROCS is to set GOMAXPROCS to the number of CPUs.
|
|
func SetGOMAXPROCS() (func(), error) {
|
|
const minGOMAXPROCS int = 1
|
|
|
|
undoNoop := func() {
|
|
log.Info("maxprocs: No GOMAXPROCS change to reset")
|
|
}
|
|
|
|
if max, exists := os.LookupEnv(_maxProcsKey); exists {
|
|
log.Info(fmt.Sprintf("maxprocs: Honoring GOMAXPROCS=%q as set in environment", max))
|
|
return undoNoop, nil
|
|
}
|
|
|
|
maxProcs, status, err := CPUQuotaToGOMAXPROCS(minGOMAXPROCS)
|
|
if err != nil {
|
|
return undoNoop, err
|
|
}
|
|
|
|
if status == CPUQuotaUndefined {
|
|
log.Info(fmt.Sprintf("maxprocs: Leaving GOMAXPROCS=%v: CPU quota undefined", runtime.GOMAXPROCS(0)))
|
|
return undoNoop, nil
|
|
}
|
|
|
|
prev := runtime.GOMAXPROCS(0)
|
|
undo := func() {
|
|
log.Info(fmt.Sprintf("maxprocs: Resetting GOMAXPROCS to %v", prev))
|
|
runtime.GOMAXPROCS(prev)
|
|
}
|
|
if prev == maxProcs {
|
|
return undoNoop, nil
|
|
}
|
|
switch status {
|
|
case CPUQuotaMinUsed:
|
|
log.Info(fmt.Sprintf("maxprocs: Updating GOMAXPROCS=%v: using minimum allowed GOMAXPROCS", maxProcs))
|
|
case CPUQuotaUsed:
|
|
log.Info(fmt.Sprintf("maxprocs: Updating GOMAXPROCS=%v: determined from CPU quota", maxProcs))
|
|
}
|
|
runtime.GOMAXPROCS(maxProcs)
|
|
return undo, nil
|
|
}
|
|
|
|
func readFile(filepath string) (res []byte, err error) {
|
|
var f *os.File
|
|
//nolint:gosec
|
|
f, err = os.Open(filepath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
err = errors.CombineErrors(err, f.Close())
|
|
}()
|
|
res, err = io.ReadAll(f)
|
|
return res, err
|
|
}
|
|
|
|
// The controller is defined via either type `memory` for cgroup v1 or via empty type for cgroup v2,
|
|
// where the type is the second field in /proc/[pid]/cgroup file
|
|
func detectControlPath(cgroupFilePath string, controller string) (string, error) {
|
|
//nolint:gosec
|
|
cgroup, err := os.Open(cgroupFilePath)
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "failed to read %s cgroup from cgroups file: %s", controller, cgroupFilePath)
|
|
}
|
|
defer func() {
|
|
err := cgroup.Close()
|
|
if err != nil {
|
|
log.Error("close cgroupFilePath", zap.Error(err))
|
|
}
|
|
}()
|
|
|
|
scanner := bufio.NewScanner(cgroup)
|
|
var unifiedPathIfFound string
|
|
for scanner.Scan() {
|
|
fields := bytes.Split(scanner.Bytes(), []byte{':'})
|
|
if len(fields) != 3 {
|
|
// The lines should always have three fields, there's something fishy here.
|
|
continue
|
|
}
|
|
|
|
f0, f1 := string(fields[0]), string(fields[1])
|
|
// First case if v2, second - v1. We give v2 the priority here.
|
|
// There is also a `hybrid` mode when both versions are enabled,
|
|
// but no known container solutions support it.
|
|
if f0 == "0" && f1 == "" {
|
|
unifiedPathIfFound = string(fields[2])
|
|
} else if f1 == controller {
|
|
return string(fields[2]), nil
|
|
}
|
|
}
|
|
|
|
return unifiedPathIfFound, nil
|
|
}
|
|
|
|
// See http://man7.org/linux/man-pages/man5/proc.5.html for `mountinfo` format.
|
|
func getCgroupDetails(mountInfoPath string, cRoot string, controller string) (string, int, error) {
|
|
//nolint:gosec
|
|
info, err := os.Open(mountInfoPath)
|
|
if err != nil {
|
|
return "", 0, errors.Wrapf(err, "failed to read mounts info from file: %s", mountInfoPath)
|
|
}
|
|
defer func() {
|
|
err := info.Close()
|
|
if err != nil {
|
|
log.Error("close mountInfoPath", zap.Error(err))
|
|
}
|
|
}()
|
|
|
|
scanner := bufio.NewScanner(info)
|
|
for scanner.Scan() {
|
|
fields := bytes.Fields(scanner.Bytes())
|
|
if len(fields) < 10 {
|
|
continue
|
|
}
|
|
|
|
ver, ok := detectCgroupVersion(fields, controller)
|
|
if ok {
|
|
mountPoint := string(fields[4])
|
|
if ver == 2 {
|
|
return mountPoint, ver, nil
|
|
}
|
|
// It is possible that the controller mount and the cgroup path are not the same (both are relative to the NS root).
|
|
// So start with the mount and construct the relative path of the cgroup.
|
|
// To test:
|
|
// 1、start a docker to run unit test or tidb-server
|
|
// > docker run -it --cpus=8 --memory=8g --name test --rm ubuntu:18.04 bash
|
|
//
|
|
// 2、change the limit when the container is running
|
|
// docker update --cpus=8 <containers>
|
|
nsRelativePath := string(fields[3])
|
|
if !strings.Contains(nsRelativePath, "..") {
|
|
// We don't expect to see err here ever but in case that it happens
|
|
// the best action is to ignore the line and hope that the rest of the lines
|
|
// will allow us to extract a valid path.
|
|
if relPath, err := filepath.Rel(nsRelativePath, cRoot); err == nil {
|
|
return filepath.Join(mountPoint, relPath), ver, nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return "", 0, fmt.Errorf("failed to detect cgroup root mount and version")
|
|
}
|
|
|
|
func cgroupFileToUint64(filepath, desc string) (res uint64, err error) {
|
|
contents, err := readFile(filepath)
|
|
if err != nil {
|
|
return 0, errors.Wrapf(err, "error when reading %s from cgroup v1 at %s", desc, filepath)
|
|
}
|
|
res, err = strconv.ParseUint(string(bytes.TrimSpace(contents)), 10, 64)
|
|
if err != nil {
|
|
return 0, errors.Wrapf(err, "error when parsing %s from cgroup v1 at %s", desc, filepath)
|
|
}
|
|
return res, err
|
|
}
|
|
|
|
func cgroupFileToInt64(filepath, desc string) (res int64, err error) {
|
|
contents, err := readFile(filepath)
|
|
if err != nil {
|
|
return 0, errors.Wrapf(err, "error when reading %s from cgroup v1 at %s", desc, filepath)
|
|
}
|
|
res, err = strconv.ParseInt(string(bytes.TrimSpace(contents)), 10, 64)
|
|
if err != nil {
|
|
return 0, errors.Wrapf(err, "error when parsing %s from cgroup v1 at %s", desc, filepath)
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
// Return version of cgroup mount for memory controller if found
|
|
func detectCgroupVersion(fields [][]byte, controller string) (_ int, found bool) {
|
|
if len(fields) < 10 {
|
|
return 0, false
|
|
}
|
|
|
|
// Due to strange format there can be optional fields in the middle of the set, starting
|
|
// from the field #7. The end of the fields is marked with "-" field
|
|
var pos = 6
|
|
for pos < len(fields) {
|
|
if bytes.Equal(fields[pos], []byte{'-'}) {
|
|
break
|
|
}
|
|
|
|
pos++
|
|
}
|
|
|
|
// No optional fields separator found or there is less than 3 fields after it which is wrong
|
|
if (len(fields) - pos - 1) < 3 {
|
|
return 0, false
|
|
}
|
|
|
|
pos++
|
|
|
|
// Check for controller specifically in cgroup v1 (it is listed in super
|
|
// options field), as the value can't be found if it is not enforced.
|
|
if bytes.Equal(fields[pos], []byte("cgroup")) && bytes.Contains(fields[pos+2], []byte(controller)) {
|
|
return 1, true
|
|
} else if bytes.Equal(fields[pos], []byte("cgroup2")) {
|
|
return 2, true
|
|
}
|
|
|
|
return 0, false
|
|
}
|
|
|
|
func detectCPUQuotaInV1(cRoot string) (period, quota int64, err error) {
|
|
quotaFilePath := filepath.Join(cRoot, cgroupV1CPUQuota)
|
|
periodFilePath := filepath.Join(cRoot, cgroupV1CPUPeriod)
|
|
quota, err = cgroupFileToInt64(quotaFilePath, "cpu quota")
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
period, err = cgroupFileToInt64(periodFilePath, "cpu period")
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
|
|
return period, quota, err
|
|
}
|
|
|
|
func detectCPUUsageInV1(cRoot string) (stime, utime uint64, err error) {
|
|
sysFilePath := filepath.Join(cRoot, cgroupV1CPUSysUsage)
|
|
userFilePath := filepath.Join(cRoot, cgroupV1CPUUserUsage)
|
|
stime, err = cgroupFileToUint64(sysFilePath, "cpu system time")
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
utime, err = cgroupFileToUint64(userFilePath, "cpu user time")
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
|
|
return stime, utime, err
|
|
}
|
|
|
|
func detectCPUQuotaInV2(cRoot string) (period, quota int64, err error) {
|
|
maxFilePath := filepath.Join(cRoot, cgroupV2CPUMax)
|
|
contents, err := readFile(maxFilePath)
|
|
if err != nil {
|
|
return 0, 0, errors.Wrapf(err, "error when read cpu quota from cgroup v2 at %s", maxFilePath)
|
|
}
|
|
fields := strings.Fields(string(contents))
|
|
if len(fields) > 2 || len(fields) == 0 {
|
|
return 0, 0, errors.Errorf("unexpected format when reading cpu quota from cgroup v2 at %s: %s", maxFilePath, contents)
|
|
}
|
|
if fields[0] == "max" {
|
|
// Negative quota denotes no limit.
|
|
quota = -1
|
|
} else {
|
|
quota, err = strconv.ParseInt(fields[0], 10, 64)
|
|
if err != nil {
|
|
return 0, 0, errors.Wrapf(err, "error when reading cpu quota from cgroup v2 at %s", maxFilePath)
|
|
}
|
|
}
|
|
if len(fields) == 2 {
|
|
period, err = strconv.ParseInt(fields[1], 10, 64)
|
|
if err != nil {
|
|
return 0, 0, errors.Wrapf(err, "error when reading cpu period from cgroup v2 at %s", maxFilePath)
|
|
}
|
|
}
|
|
return period, quota, nil
|
|
}
|
|
|
|
func detectCPUUsageInV2(cRoot string) (stime, utime uint64, err error) {
|
|
statFilePath := filepath.Join(cRoot, cgroupV2CPUStat)
|
|
var stat *os.File
|
|
//nolint:gosec
|
|
stat, err = os.Open(statFilePath)
|
|
if err != nil {
|
|
return 0, 0, errors.Wrapf(err, "can't read cpu usage from cgroup v2 at %s", statFilePath)
|
|
}
|
|
defer func() {
|
|
err = errors.CombineErrors(err, stat.Close())
|
|
}()
|
|
|
|
scanner := bufio.NewScanner(stat)
|
|
for scanner.Scan() {
|
|
fields := bytes.Fields(scanner.Bytes())
|
|
if len(fields) != 2 || (string(fields[0]) != "user_usec" && string(fields[0]) != "system_usec") {
|
|
continue
|
|
}
|
|
keyField := string(fields[0])
|
|
|
|
trimmed := string(bytes.TrimSpace(fields[1]))
|
|
usageVar := &stime
|
|
if keyField == "user_usec" {
|
|
usageVar = &utime
|
|
}
|
|
*usageVar, err = strconv.ParseUint(trimmed, 10, 64)
|
|
if err != nil {
|
|
return 0, 0, errors.Wrapf(err, "can't read cpu usage %s from cgroup v1 at %s", keyField, statFilePath)
|
|
}
|
|
}
|
|
|
|
return stime, utime, err
|
|
}
|
|
|
|
func readInt64Value(root, filename string, cgVersion int) (value uint64, err error) {
|
|
filePath := filepath.Join(root, filename)
|
|
//nolint:gosec
|
|
file, err := os.Open(filePath)
|
|
if err != nil {
|
|
return 0, errors.Wrapf(err, "can't read %s from cgroup v%d", filename, cgVersion)
|
|
}
|
|
defer file.Close()
|
|
|
|
scanner := bufio.NewScanner(file)
|
|
present := scanner.Scan()
|
|
if !present {
|
|
return 0, errors.Wrapf(err, "no value found in %s from cgroup v%d", filename, cgVersion)
|
|
}
|
|
data := scanner.Bytes()
|
|
trimmed := string(bytes.TrimSpace(data))
|
|
// cgroupv2 has certain control files that default to "max", so handle here.
|
|
if trimmed == "max" {
|
|
return math.MaxInt64, nil
|
|
}
|
|
value, err = strconv.ParseUint(trimmed, 10, 64)
|
|
if err != nil {
|
|
return 0, errors.Wrapf(err, "failed to parse value in %s from cgroup v%d", filename, cgVersion)
|
|
}
|
|
return value, nil
|
|
}
|