mirror of
https://github.com/containers/podman.git
synced 2025-08-06 19:44:14 +08:00

move the conmon process to the conmon cgroup also on exec. The previous implementation would fail to move the conmon process as the systemd unit already exists so its creation would fail. When the unit cannot be created, attempt to directly join the cgroup instead. Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
201 lines
5.5 KiB
Go
201 lines
5.5 KiB
Go
// +build linux darwin
|
|
|
|
package utils
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/containers/podman/v2/pkg/cgroups"
|
|
"github.com/containers/podman/v2/pkg/rootless"
|
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
|
"github.com/godbus/dbus/v5"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// RunUnderSystemdScope adds the specified pid to a systemd scope
|
|
func RunUnderSystemdScope(pid int, slice string, unitName string) error {
|
|
var properties []systemdDbus.Property
|
|
var conn *systemdDbus.Conn
|
|
var err error
|
|
|
|
if rootless.IsRootless() {
|
|
conn, err = cgroups.GetUserConnection(rootless.GetRootlessUID())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
conn, err = systemdDbus.New()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
properties = append(properties, systemdDbus.PropSlice(slice))
|
|
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
|
properties = append(properties, newProp("Delegate", true))
|
|
properties = append(properties, newProp("DefaultDependencies", false))
|
|
ch := make(chan string)
|
|
_, err = conn.StartTransientUnit(unitName, "replace", properties, ch)
|
|
if err != nil {
|
|
// On errors check if the cgroup already exists, if it does move the process there
|
|
if props, err := conn.GetUnitTypeProperties(unitName, "Scope"); err == nil {
|
|
if cgroup, ok := props["ControlGroup"].(string); ok && cgroup != "" {
|
|
if err := moveUnderCgroup(cgroup, "", []uint32{uint32(pid)}); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
return err
|
|
}
|
|
defer conn.Close()
|
|
|
|
// Block until job is started
|
|
<-ch
|
|
|
|
return nil
|
|
}
|
|
|
|
func getCgroupProcess(procFile string) (string, error) {
|
|
f, err := os.Open(procFile)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer f.Close()
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
cgroup := "/"
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
parts := strings.SplitN(line, ":", 3)
|
|
if len(parts) != 3 {
|
|
return "", errors.Errorf("cannot parse cgroup line %q", line)
|
|
}
|
|
if strings.HasPrefix(line, "0::") {
|
|
cgroup = line[3:]
|
|
break
|
|
}
|
|
// root cgroup, skip it
|
|
if parts[2] == "/" {
|
|
continue
|
|
}
|
|
// The process must have the same cgroup path for all controllers
|
|
// The OCI runtime spec file allow us to specify only one path.
|
|
if cgroup != "/" && cgroup != parts[2] {
|
|
return "", errors.Errorf("cgroup configuration not supported, the process is in two different cgroups")
|
|
}
|
|
cgroup = parts[2]
|
|
}
|
|
if cgroup == "/" {
|
|
return "", errors.Errorf("could not find cgroup mount in %q", procFile)
|
|
}
|
|
return cgroup, nil
|
|
}
|
|
|
|
// GetOwnCgroup returns the cgroup for the current process.
|
|
func GetOwnCgroup() (string, error) {
|
|
return getCgroupProcess("/proc/self/cgroup")
|
|
}
|
|
|
|
// GetCgroupProcess returns the cgroup for the specified process process.
|
|
func GetCgroupProcess(pid int) (string, error) {
|
|
return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid))
|
|
}
|
|
|
|
// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
|
|
func MoveUnderCgroupSubtree(subtree string) error {
|
|
return moveUnderCgroup("", subtree, nil)
|
|
}
|
|
|
|
// moveUnderCgroup moves a group of processes to a new cgroup.
|
|
// If cgroup is the empty string, then the current calling process cgroup is used.
|
|
// If processes is empty, then the processes from the current cgroup are moved.
|
|
func moveUnderCgroup(cgroup, subtree string, processes []uint32) error {
|
|
procFile := "/proc/self/cgroup"
|
|
f, err := os.Open(procFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
unifiedMode, err := cgroups.IsCgroup2UnifiedMode()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
parts := strings.SplitN(line, ":", 3)
|
|
if len(parts) != 3 {
|
|
return errors.Errorf("cannot parse cgroup line %q", line)
|
|
}
|
|
|
|
// root cgroup, skip it
|
|
if parts[2] == "/" {
|
|
continue
|
|
}
|
|
|
|
cgroupRoot := "/sys/fs/cgroup"
|
|
// Special case the unified mount on hybrid cgroup and named hierarchies.
|
|
// This works on Fedora 31, but we should really parse the mounts to see
|
|
// where the cgroup hierarchy is mounted.
|
|
if parts[1] == "" && !unifiedMode {
|
|
// If it is not using unified mode, the cgroup v2 hierarchy is
|
|
// usually mounted under /sys/fs/cgroup/unified
|
|
cgroupRoot = filepath.Join(cgroupRoot, "unified")
|
|
} else if parts[1] != "" {
|
|
// Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER.
|
|
controller := strings.TrimPrefix(parts[1], "name=")
|
|
cgroupRoot = filepath.Join(cgroupRoot, controller)
|
|
}
|
|
|
|
parentCgroup := cgroup
|
|
if parentCgroup == "" {
|
|
parentCgroup = parts[2]
|
|
}
|
|
newCgroup := filepath.Join(cgroupRoot, parentCgroup, subtree)
|
|
if err := os.Mkdir(newCgroup, 0755); err != nil && !os.IsExist(err) {
|
|
return err
|
|
}
|
|
|
|
f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0755)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
if len(processes) > 0 {
|
|
for _, pid := range processes {
|
|
if _, err := f.Write([]byte(fmt.Sprintf("%d\n", pid))); err != nil {
|
|
logrus.Warnf("Cannot move process %d to cgroup %q", pid, newCgroup)
|
|
}
|
|
}
|
|
} else {
|
|
processesData, err := ioutil.ReadFile(filepath.Join(cgroupRoot, parts[2], "cgroup.procs"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, pid := range bytes.Split(processesData, []byte("\n")) {
|
|
if _, err := f.Write(pid); err != nil {
|
|
logrus.Warnf("Cannot move process %d to cgroup %q", pid, newCgroup)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func newProp(name string, units interface{}) systemdDbus.Property {
|
|
return systemdDbus.Property{
|
|
Name: name,
|
|
Value: dbus.MakeVariant(units),
|
|
}
|
|
}
|