mirror of
https://github.com/containers/podman.git
synced 2025-12-01 10:38:05 +08:00
Pull in updates made to the filters code for images. Filters now perform an AND operation except for th reference filter which does an OR operation for positive case but an AND operation for negative cases. Signed-off-by: Urvashi Mohnani <umohnani@redhat.com>
315 lines
8.6 KiB
Go
315 lines
8.6 KiB
Go
//go:build linux
|
|
|
|
package cgroups
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// WriteFile writes to a cgroup file
|
|
func WriteFile(dir, file, data string) error {
|
|
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer fd.Close()
|
|
for {
|
|
_, err := fd.WriteString(data)
|
|
if errors.Is(err, unix.EINTR) {
|
|
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
|
|
continue
|
|
}
|
|
return err
|
|
}
|
|
}
|
|
|
|
// OpenFile opens a cgroup file with the given flags
|
|
func OpenFile(dir, file string, flags int) (*os.File, error) {
|
|
var resolveFlags uint64
|
|
mode := os.FileMode(0)
|
|
if TestMode && flags&os.O_WRONLY != 0 {
|
|
flags |= os.O_TRUNC | os.O_CREATE
|
|
mode = 0o600
|
|
}
|
|
cgroupPath := path.Join(dir, file)
|
|
relPath := strings.TrimPrefix(cgroupPath, cgroupRoot+"/")
|
|
|
|
var stats unix.Statfs_t
|
|
fdTest, errOpen := unix.Openat2(-1, cgroupRoot, &unix.OpenHow{
|
|
Flags: unix.O_DIRECTORY | unix.O_PATH,
|
|
})
|
|
errStat := unix.Fstatfs(fdTest, &stats)
|
|
cgroupFd := fdTest
|
|
|
|
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
|
|
if stats.Type == unix.CGROUP2_SUPER_MAGIC {
|
|
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
|
|
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
|
|
}
|
|
|
|
if errOpen != nil || errStat != nil || (len(relPath) == len(cgroupPath)) { // openat2 not available, use os
|
|
fdTest, err := os.OpenFile(cgroupPath, flags, mode)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if TestMode {
|
|
return fdTest, nil
|
|
}
|
|
if err := unix.Fstatfs(int(fdTest.Fd()), &stats); err != nil {
|
|
_ = fdTest.Close()
|
|
return nil, &os.PathError{Op: "statfs", Path: cgroupPath, Err: err}
|
|
}
|
|
if stats.Type != unix.CGROUP_SUPER_MAGIC && stats.Type != unix.CGROUP2_SUPER_MAGIC {
|
|
_ = fdTest.Close()
|
|
return nil, &os.PathError{Op: "open", Path: cgroupPath, Err: errors.New("not a cgroup file")}
|
|
}
|
|
return fdTest, nil
|
|
}
|
|
|
|
fd, err := unix.Openat2(cgroupFd, relPath,
|
|
&unix.OpenHow{
|
|
Resolve: resolveFlags,
|
|
Flags: uint64(flags) | unix.O_CLOEXEC,
|
|
Mode: uint64(mode),
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return os.NewFile(uintptr(fd), cgroupPath), nil
|
|
}
|
|
|
|
// ReadFile reads from a cgroup file, opening it with the read only flag
|
|
func ReadFile(dir, file string) (string, error) {
|
|
fd, err := OpenFile(dir, file, unix.O_RDONLY)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer fd.Close()
|
|
var buf bytes.Buffer
|
|
|
|
_, err = buf.ReadFrom(fd)
|
|
return buf.String(), err
|
|
}
|
|
|
|
// BlkioFiles gets the proper files for blkio weights
|
|
func BlkioFiles(cgroupPath string) (wtFile, wtDevFile string) {
|
|
var weightFile string
|
|
var weightDeviceFile string
|
|
// in this important since runc keeps these variables private, they won't be set
|
|
if cgroups.PathExists(filepath.Join(cgroupPath, "blkio.weight")) {
|
|
weightFile = "blkio.weight"
|
|
weightDeviceFile = "blkio.weight_device"
|
|
} else {
|
|
weightFile = "blkio.bfq.weight"
|
|
weightDeviceFile = "blkio.bfq.weight_device"
|
|
}
|
|
return weightFile, weightDeviceFile
|
|
}
|
|
|
|
// SetBlkioThrottle sets the throttle limits for the cgroup
|
|
func SetBlkioThrottle(res *configs.Resources, cgroupPath string) error {
|
|
for _, td := range res.BlkioThrottleReadBpsDevice {
|
|
if err := WriteFile(cgroupPath, "blkio.throttle.read_bps_device", fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, td := range res.BlkioThrottleWriteBpsDevice {
|
|
if err := WriteFile(cgroupPath, "blkio.throttle.write_bps_device", fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, td := range res.BlkioThrottleReadIOPSDevice {
|
|
if err := WriteFile(cgroupPath, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, td := range res.BlkioThrottleWriteIOPSDevice {
|
|
if err := WriteFile(cgroupPath, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Code below was moved from podman/utils/utils_supported.go and should properly better
|
|
// integrated here as some parts may be redundant.
|
|
|
|
func getCgroupProcess(procFile string, allowRoot bool) (string, error) {
|
|
f, err := os.Open(procFile)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer f.Close()
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
cgroup := ""
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
parts := strings.SplitN(line, ":", 3)
|
|
if len(parts) != 3 {
|
|
return "", fmt.Errorf("cannot parse cgroup line %q", line)
|
|
}
|
|
if strings.HasPrefix(line, "0::") {
|
|
cgroup = line[3:]
|
|
break
|
|
}
|
|
if len(parts[2]) > len(cgroup) {
|
|
cgroup = parts[2]
|
|
}
|
|
}
|
|
if len(cgroup) == 0 || (!allowRoot && cgroup == "/") {
|
|
return "", fmt.Errorf("could not find cgroup mount in %q", procFile)
|
|
}
|
|
return cgroup, nil
|
|
}
|
|
|
|
// GetOwnCgroup returns the cgroup for the current process.
|
|
func GetOwnCgroup() (string, error) {
|
|
return getCgroupProcess("/proc/self/cgroup", true)
|
|
}
|
|
|
|
func GetOwnCgroupDisallowRoot() (string, error) {
|
|
return getCgroupProcess("/proc/self/cgroup", false)
|
|
}
|
|
|
|
// GetCgroupProcess returns the cgroup for the specified process process.
|
|
func GetCgroupProcess(pid int) (string, error) {
|
|
return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid), true)
|
|
}
|
|
|
|
// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
|
|
func MoveUnderCgroupSubtree(subtree string) error {
|
|
return MoveUnderCgroup("", subtree, nil)
|
|
}
|
|
|
|
// MoveUnderCgroup moves a group of processes to a new cgroup.
|
|
// If cgroup is the empty string, then the current calling process cgroup is used.
|
|
// If processes is empty, then the processes from the current cgroup are moved.
|
|
func MoveUnderCgroup(cgroup, subtree string, processes []uint32) error {
|
|
procFile := "/proc/self/cgroup"
|
|
f, err := os.Open(procFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
unifiedMode, err := IsCgroup2UnifiedMode()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
parts := strings.SplitN(line, ":", 3)
|
|
if len(parts) != 3 {
|
|
return fmt.Errorf("cannot parse cgroup line %q", line)
|
|
}
|
|
|
|
// root cgroup, skip it
|
|
if parts[2] == "/" && !(unifiedMode && parts[1] == "") {
|
|
continue
|
|
}
|
|
|
|
cgroupRoot := "/sys/fs/cgroup"
|
|
// Special case the unified mount on hybrid cgroup and named hierarchies.
|
|
// This works on Fedora 31, but we should really parse the mounts to see
|
|
// where the cgroup hierarchy is mounted.
|
|
if parts[1] == "" && !unifiedMode {
|
|
// If it is not using unified mode, the cgroup v2 hierarchy is
|
|
// usually mounted under /sys/fs/cgroup/unified
|
|
cgroupRoot = filepath.Join(cgroupRoot, "unified")
|
|
|
|
// Ignore the unified mount if it doesn't exist
|
|
if _, err := os.Stat(cgroupRoot); err != nil && os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
} else if parts[1] != "" {
|
|
// Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER.
|
|
controller := strings.TrimPrefix(parts[1], "name=")
|
|
cgroupRoot = filepath.Join(cgroupRoot, controller)
|
|
}
|
|
|
|
parentCgroup := cgroup
|
|
if parentCgroup == "" {
|
|
parentCgroup = parts[2]
|
|
}
|
|
newCgroup := filepath.Join(cgroupRoot, parentCgroup, subtree)
|
|
if err := os.MkdirAll(newCgroup, 0o755); err != nil && !os.IsExist(err) {
|
|
return err
|
|
}
|
|
|
|
f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0o755)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
if len(processes) > 0 {
|
|
for _, pid := range processes {
|
|
if _, err := f.WriteString(fmt.Sprintf("%d\n", pid)); err != nil {
|
|
logrus.Debugf("Cannot move process %d to cgroup %q: %v", pid, newCgroup, err)
|
|
}
|
|
}
|
|
} else {
|
|
processesData, err := os.ReadFile(filepath.Join(cgroupRoot, parts[2], "cgroup.procs"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, pid := range bytes.Split(processesData, []byte("\n")) {
|
|
if len(pid) == 0 {
|
|
continue
|
|
}
|
|
if _, err := f.Write(pid); err != nil {
|
|
logrus.Debugf("Cannot move process %s to cgroup %q: %v", string(pid), newCgroup, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var (
|
|
maybeMoveToSubCgroupSync sync.Once
|
|
maybeMoveToSubCgroupSyncErr error
|
|
)
|
|
|
|
// MaybeMoveToSubCgroup moves the current process in a sub cgroup when
|
|
// it is running in the root cgroup on a system that uses cgroupv2.
|
|
func MaybeMoveToSubCgroup() error {
|
|
maybeMoveToSubCgroupSync.Do(func() {
|
|
unifiedMode, err := IsCgroup2UnifiedMode()
|
|
if err != nil {
|
|
maybeMoveToSubCgroupSyncErr = err
|
|
return
|
|
}
|
|
if !unifiedMode {
|
|
maybeMoveToSubCgroupSyncErr = nil
|
|
return
|
|
}
|
|
cgroup, err := GetOwnCgroup()
|
|
if err != nil {
|
|
maybeMoveToSubCgroupSyncErr = err
|
|
return
|
|
}
|
|
if cgroup == "/" {
|
|
maybeMoveToSubCgroupSyncErr = MoveUnderCgroupSubtree("init")
|
|
}
|
|
})
|
|
return maybeMoveToSubCgroupSyncErr
|
|
}
|