mirror of
https://github.com/containers/podman.git
synced 2025-05-20 08:36:23 +08:00

Bump common to 9b0d134f392f41de3f3065aad162e73a3904168e Signed-off-by: flouthoc <flouthoc.git@gmail.com>
315 lines
8.6 KiB
Go
315 lines
8.6 KiB
Go
//go:build linux
|
|
|
|
package cgroups
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/containers/storage/pkg/fileutils"
|
|
"github.com/opencontainers/cgroups"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// WriteFile writes to a cgroup file
|
|
func WriteFile(dir, file, data string) error {
|
|
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer fd.Close()
|
|
for {
|
|
_, err := fd.WriteString(data)
|
|
if errors.Is(err, unix.EINTR) {
|
|
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
|
|
continue
|
|
}
|
|
return err
|
|
}
|
|
}
|
|
|
|
// OpenFile opens a cgroup file with the given flags
|
|
func OpenFile(dir, file string, flags int) (*os.File, error) {
|
|
var resolveFlags uint64
|
|
mode := os.FileMode(0)
|
|
if TestMode && flags&os.O_WRONLY != 0 {
|
|
flags |= os.O_TRUNC | os.O_CREATE
|
|
mode = 0o600
|
|
}
|
|
cgroupPath := path.Join(dir, file)
|
|
relPath := strings.TrimPrefix(cgroupPath, cgroupRoot+"/")
|
|
|
|
var stats unix.Statfs_t
|
|
fdTest, errOpen := unix.Openat2(-1, cgroupRoot, &unix.OpenHow{
|
|
Flags: unix.O_DIRECTORY | unix.O_PATH,
|
|
})
|
|
errStat := unix.Fstatfs(fdTest, &stats)
|
|
cgroupFd := fdTest
|
|
|
|
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
|
|
if stats.Type == unix.CGROUP2_SUPER_MAGIC {
|
|
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
|
|
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
|
|
}
|
|
|
|
if errOpen != nil || errStat != nil || (len(relPath) == len(cgroupPath)) { // openat2 not available, use os
|
|
fdTest, err := os.OpenFile(cgroupPath, flags, mode)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if TestMode {
|
|
return fdTest, nil
|
|
}
|
|
if err := unix.Fstatfs(int(fdTest.Fd()), &stats); err != nil {
|
|
_ = fdTest.Close()
|
|
return nil, &os.PathError{Op: "statfs", Path: cgroupPath, Err: err}
|
|
}
|
|
if stats.Type != unix.CGROUP_SUPER_MAGIC && stats.Type != unix.CGROUP2_SUPER_MAGIC {
|
|
_ = fdTest.Close()
|
|
return nil, &os.PathError{Op: "open", Path: cgroupPath, Err: errors.New("not a cgroup file")}
|
|
}
|
|
return fdTest, nil
|
|
}
|
|
|
|
fd, err := unix.Openat2(cgroupFd, relPath,
|
|
&unix.OpenHow{
|
|
Resolve: resolveFlags,
|
|
Flags: uint64(flags) | unix.O_CLOEXEC,
|
|
Mode: uint64(mode),
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return os.NewFile(uintptr(fd), cgroupPath), nil
|
|
}
|
|
|
|
// ReadFile reads from a cgroup file, opening it with the read only flag
|
|
func ReadFile(dir, file string) (string, error) {
|
|
fd, err := OpenFile(dir, file, unix.O_RDONLY)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer fd.Close()
|
|
var buf bytes.Buffer
|
|
|
|
_, err = buf.ReadFrom(fd)
|
|
return buf.String(), err
|
|
}
|
|
|
|
// BlkioFiles gets the proper files for blkio weights
|
|
func BlkioFiles(cgroupPath string) (wtFile, wtDevFile string) {
|
|
var weightFile string
|
|
var weightDeviceFile string
|
|
// in this important since runc keeps these variables private, they won't be set
|
|
if cgroups.PathExists(filepath.Join(cgroupPath, "blkio.weight")) {
|
|
weightFile = "blkio.weight"
|
|
weightDeviceFile = "blkio.weight_device"
|
|
} else {
|
|
weightFile = "blkio.bfq.weight"
|
|
weightDeviceFile = "blkio.bfq.weight_device"
|
|
}
|
|
return weightFile, weightDeviceFile
|
|
}
|
|
|
|
// SetBlkioThrottle sets the throttle limits for the cgroup
|
|
func SetBlkioThrottle(res *cgroups.Resources, cgroupPath string) error {
|
|
for _, td := range res.BlkioThrottleReadBpsDevice {
|
|
if err := WriteFile(cgroupPath, "blkio.throttle.read_bps_device", fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, td := range res.BlkioThrottleWriteBpsDevice {
|
|
if err := WriteFile(cgroupPath, "blkio.throttle.write_bps_device", fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, td := range res.BlkioThrottleReadIOPSDevice {
|
|
if err := WriteFile(cgroupPath, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, td := range res.BlkioThrottleWriteIOPSDevice {
|
|
if err := WriteFile(cgroupPath, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Code below was moved from podman/utils/utils_supported.go and should properly better
|
|
// integrated here as some parts may be redundant.
|
|
|
|
func getCgroupProcess(procFile string, allowRoot bool) (string, error) {
|
|
f, err := os.Open(procFile)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer f.Close()
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
cgroup := ""
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
parts := strings.SplitN(line, ":", 3)
|
|
if len(parts) != 3 {
|
|
return "", fmt.Errorf("cannot parse cgroup line %q", line)
|
|
}
|
|
if strings.HasPrefix(line, "0::") {
|
|
cgroup = line[3:]
|
|
break
|
|
}
|
|
if len(parts[2]) > len(cgroup) {
|
|
cgroup = parts[2]
|
|
}
|
|
}
|
|
if len(cgroup) == 0 || (!allowRoot && cgroup == "/") {
|
|
return "", fmt.Errorf("could not find cgroup mount in %q", procFile)
|
|
}
|
|
return cgroup, nil
|
|
}
|
|
|
|
// GetOwnCgroup returns the cgroup for the current process.
|
|
func GetOwnCgroup() (string, error) {
|
|
return getCgroupProcess("/proc/self/cgroup", true)
|
|
}
|
|
|
|
func GetOwnCgroupDisallowRoot() (string, error) {
|
|
return getCgroupProcess("/proc/self/cgroup", false)
|
|
}
|
|
|
|
// GetCgroupProcess returns the cgroup for the specified process process.
|
|
func GetCgroupProcess(pid int) (string, error) {
|
|
return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid), true)
|
|
}
|
|
|
|
// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
|
|
func MoveUnderCgroupSubtree(subtree string) error {
|
|
return MoveUnderCgroup("", subtree, nil)
|
|
}
|
|
|
|
// MoveUnderCgroup moves a group of processes to a new cgroup.
|
|
// If cgroup is the empty string, then the current calling process cgroup is used.
|
|
// If processes is empty, then the processes from the current cgroup are moved.
|
|
func MoveUnderCgroup(cgroup, subtree string, processes []uint32) error {
|
|
procFile := "/proc/self/cgroup"
|
|
f, err := os.Open(procFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
unifiedMode, err := IsCgroup2UnifiedMode()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
parts := strings.SplitN(line, ":", 3)
|
|
if len(parts) != 3 {
|
|
return fmt.Errorf("cannot parse cgroup line %q", line)
|
|
}
|
|
|
|
// root cgroup, skip it
|
|
if parts[2] == "/" && (!unifiedMode || parts[1] != "") {
|
|
continue
|
|
}
|
|
|
|
cgroupRoot := "/sys/fs/cgroup"
|
|
// Special case the unified mount on hybrid cgroup and named hierarchies.
|
|
// This works on Fedora 31, but we should really parse the mounts to see
|
|
// where the cgroup hierarchy is mounted.
|
|
if parts[1] == "" && !unifiedMode {
|
|
// If it is not using unified mode, the cgroup v2 hierarchy is
|
|
// usually mounted under /sys/fs/cgroup/unified
|
|
cgroupRoot = filepath.Join(cgroupRoot, "unified")
|
|
|
|
// Ignore the unified mount if it doesn't exist
|
|
if err := fileutils.Exists(cgroupRoot); err != nil && os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
} else if parts[1] != "" {
|
|
// Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER.
|
|
controller := strings.TrimPrefix(parts[1], "name=")
|
|
cgroupRoot = filepath.Join(cgroupRoot, controller)
|
|
}
|
|
|
|
parentCgroup := cgroup
|
|
if parentCgroup == "" {
|
|
parentCgroup = parts[2]
|
|
}
|
|
newCgroup := filepath.Join(cgroupRoot, parentCgroup, subtree)
|
|
if err := os.MkdirAll(newCgroup, 0o755); err != nil && !os.IsExist(err) {
|
|
return err
|
|
}
|
|
|
|
f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0o755)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
if len(processes) > 0 {
|
|
for _, pid := range processes {
|
|
if _, err := fmt.Fprintf(f, "%d\n", pid); err != nil {
|
|
logrus.Debugf("Cannot move process %d to cgroup %q: %v", pid, newCgroup, err)
|
|
}
|
|
}
|
|
} else {
|
|
processesData, err := os.ReadFile(filepath.Join(cgroupRoot, parts[2], "cgroup.procs"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, pid := range bytes.Split(processesData, []byte("\n")) {
|
|
if len(pid) == 0 {
|
|
continue
|
|
}
|
|
if _, err := f.Write(pid); err != nil {
|
|
logrus.Debugf("Cannot move process %s to cgroup %q: %v", string(pid), newCgroup, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var (
|
|
maybeMoveToSubCgroupSync sync.Once
|
|
maybeMoveToSubCgroupSyncErr error
|
|
)
|
|
|
|
// MaybeMoveToSubCgroup moves the current process in a sub cgroup when
|
|
// it is running in the root cgroup on a system that uses cgroupv2.
|
|
func MaybeMoveToSubCgroup() error {
|
|
maybeMoveToSubCgroupSync.Do(func() {
|
|
unifiedMode, err := IsCgroup2UnifiedMode()
|
|
if err != nil {
|
|
maybeMoveToSubCgroupSyncErr = err
|
|
return
|
|
}
|
|
if !unifiedMode {
|
|
maybeMoveToSubCgroupSyncErr = nil
|
|
return
|
|
}
|
|
cgroup, err := GetOwnCgroup()
|
|
if err != nil {
|
|
maybeMoveToSubCgroupSyncErr = err
|
|
return
|
|
}
|
|
if cgroup == "/" {
|
|
maybeMoveToSubCgroupSyncErr = MoveUnderCgroupSubtree("init")
|
|
}
|
|
})
|
|
return maybeMoveToSubCgroupSyncErr
|
|
}
|