libpod: Move moveConmonToCgroupAndSignal and GetLimits to oci_conmon_linux.go

[NO NEW TESTS NEEDED]

Signed-off-by: Doug Rabson <dfr@rabson.org>
This commit is contained in:
Doug Rabson
2022-08-17 11:15:37 +01:00
parent 93bad90486
commit d43fac20f3
2 changed files with 267 additions and 261 deletions

View File

@ -23,10 +23,6 @@ import (
"text/template"
"time"
runcconfig "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/containers/common/pkg/cgroups"
"github.com/containers/common/pkg/config"
"github.com/containers/common/pkg/resize"
cutil "github.com/containers/common/pkg/util"
@ -1338,75 +1334,6 @@ func startCommand(cmd *exec.Cmd, ctr *Container) error {
return cmd.Start()
}
// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
// it then signals for conmon to start by sending nonce data down the start fd
func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error {
mustCreateCgroup := true
if ctr.config.NoCgroups {
mustCreateCgroup = false
}
// If cgroup creation is disabled - just signal.
switch ctr.config.CgroupsMode {
case "disabled", "no-conmon", cgroupSplit:
mustCreateCgroup = false
}
// $INVOCATION_ID is set by systemd when running as a service.
if ctr.runtime.RemoteURI() == "" && os.Getenv("INVOCATION_ID") != "" {
mustCreateCgroup = false
}
if mustCreateCgroup {
// Usually rootless users are not allowed to configure cgroupfs.
// There are cases though, where it is allowed, e.g. if the cgroup
// is manually configured and chowned). Avoid detecting all
// such cases and simply use a lower log level.
logLevel := logrus.WarnLevel
if rootless.IsRootless() {
logLevel = logrus.InfoLevel
}
// TODO: This should be a switch - we are not guaranteed that
// there are only 2 valid cgroup managers
cgroupParent := ctr.CgroupParent()
cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon")
Resource := ctr.Spec().Linux.Resources
cgroupResources, err := GetLimits(Resource)
if err != nil {
logrus.StandardLogger().Log(logLevel, "Could not get ctr resources")
}
if ctr.CgroupManager() == config.SystemdCgroupsManager {
unitName := createUnitName("libpod-conmon", ctr.ID())
realCgroupParent := cgroupParent
splitParent := strings.Split(cgroupParent, "/")
if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 {
realCgroupParent = splitParent[len(splitParent)-1]
}
logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName)
if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil {
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err)
}
} else {
control, err := cgroups.New(cgroupPath, &cgroupResources)
if err != nil {
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
} else if err := control.AddPid(cmd.Process.Pid); err != nil {
// we need to remove this defer and delete the cgroup once conmon exits
// maybe need a conmon monitor?
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
}
}
}
/* We set the cgroup, now the child can start creating children */
if err := writeConmonPipeData(startFd); err != nil {
return err
}
return nil
}
// newPipe creates a unix socket pair for communication.
// Returns two files - first is parent, second is child.
func newPipe() (*os.File, *os.File, error) {
@ -1671,191 +1598,3 @@ func httpAttachNonTerminalCopy(container *net.UnixConn, http *bufio.ReadWriter,
}
}
}
// GetLimits converts spec resource limits to cgroup consumable limits
func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) {
if resource == nil {
resource = &spec.LinuxResources{}
}
final := &runcconfig.Resources{}
devs := []*devices.Rule{}
// Devices
for _, entry := range resource.Devices {
if entry.Major == nil || entry.Minor == nil {
continue
}
runeType := 'a'
switch entry.Type {
case "b":
runeType = 'b'
case "c":
runeType = 'c'
}
devs = append(devs, &devices.Rule{
Type: devices.Type(runeType),
Major: *entry.Major,
Minor: *entry.Minor,
Permissions: devices.Permissions(entry.Access),
Allow: entry.Allow,
})
}
final.Devices = devs
// HugepageLimits
pageLimits := []*runcconfig.HugepageLimit{}
for _, entry := range resource.HugepageLimits {
pageLimits = append(pageLimits, &runcconfig.HugepageLimit{
Pagesize: entry.Pagesize,
Limit: entry.Limit,
})
}
final.HugetlbLimit = pageLimits
// Networking
netPriorities := []*runcconfig.IfPrioMap{}
if resource.Network != nil {
for _, entry := range resource.Network.Priorities {
netPriorities = append(netPriorities, &runcconfig.IfPrioMap{
Interface: entry.Name,
Priority: int64(entry.Priority),
})
}
}
final.NetPrioIfpriomap = netPriorities
rdma := make(map[string]runcconfig.LinuxRdma)
for name, entry := range resource.Rdma {
rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects}
}
final.Rdma = rdma
// Memory
if resource.Memory != nil {
if resource.Memory.Limit != nil {
final.Memory = *resource.Memory.Limit
}
if resource.Memory.Reservation != nil {
final.MemoryReservation = *resource.Memory.Reservation
}
if resource.Memory.Swap != nil {
final.MemorySwap = *resource.Memory.Swap
}
if resource.Memory.Swappiness != nil {
final.MemorySwappiness = resource.Memory.Swappiness
}
}
// CPU
if resource.CPU != nil {
if resource.CPU.Period != nil {
final.CpuPeriod = *resource.CPU.Period
}
if resource.CPU.Quota != nil {
final.CpuQuota = *resource.CPU.Quota
}
if resource.CPU.RealtimePeriod != nil {
final.CpuRtPeriod = *resource.CPU.RealtimePeriod
}
if resource.CPU.RealtimeRuntime != nil {
final.CpuRtRuntime = *resource.CPU.RealtimeRuntime
}
if resource.CPU.Shares != nil {
final.CpuShares = *resource.CPU.Shares
}
final.CpusetCpus = resource.CPU.Cpus
final.CpusetMems = resource.CPU.Mems
}
// BlkIO
if resource.BlockIO != nil {
if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 {
for _, entry := range resource.BlockIO.ThrottleReadBpsDevice {
throttle := &runcconfig.ThrottleDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
throttle.BlockIODevice = *dev
throttle.Rate = entry.Rate
final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle)
}
}
if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 {
for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice {
throttle := &runcconfig.ThrottleDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
throttle.BlockIODevice = *dev
throttle.Rate = entry.Rate
final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle)
}
}
if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 {
for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice {
throttle := &runcconfig.ThrottleDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
throttle.BlockIODevice = *dev
throttle.Rate = entry.Rate
final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle)
}
}
if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 {
for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice {
throttle := &runcconfig.ThrottleDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
throttle.BlockIODevice = *dev
throttle.Rate = entry.Rate
final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle)
}
}
if resource.BlockIO.LeafWeight != nil {
final.BlkioLeafWeight = *resource.BlockIO.LeafWeight
}
if resource.BlockIO.Weight != nil {
final.BlkioWeight = *resource.BlockIO.Weight
}
if len(resource.BlockIO.WeightDevice) > 0 {
for _, entry := range resource.BlockIO.WeightDevice {
weight := &runcconfig.WeightDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
if entry.Weight != nil {
weight.Weight = *entry.Weight
}
if entry.LeafWeight != nil {
weight.LeafWeight = *entry.LeafWeight
}
weight.BlockIODevice = *dev
final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight)
}
}
}
// Pids
if resource.Pids != nil {
final.PidsLimit = resource.Pids.Limit
}
// Networking
if resource.Network != nil {
if resource.Network.ClassID != nil {
final.NetClsClassid = *resource.Network.ClassID
}
}
// Unified state
final.Unified = resource.Unified
return *final, nil
}

View File

@ -3,11 +3,21 @@ package libpod
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
runcconfig "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/containers/common/pkg/cgroups"
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/pkg/errorhandling"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/utils"
pmount "github.com/containers/storage/pkg/mount"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
@ -89,3 +99,260 @@ func (r *ConmonOCIRuntime) withContainerSocketLabel(ctr *Container, closure func
}
return err
}
// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
// it then signals for conmon to start by sending nonce data down the start fd
func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File) error {
mustCreateCgroup := true
if ctr.config.NoCgroups {
mustCreateCgroup = false
}
// If cgroup creation is disabled - just signal.
switch ctr.config.CgroupsMode {
case "disabled", "no-conmon", cgroupSplit:
mustCreateCgroup = false
}
// $INVOCATION_ID is set by systemd when running as a service.
if ctr.runtime.RemoteURI() == "" && os.Getenv("INVOCATION_ID") != "" {
mustCreateCgroup = false
}
if mustCreateCgroup {
// Usually rootless users are not allowed to configure cgroupfs.
// There are cases though, where it is allowed, e.g. if the cgroup
// is manually configured and chowned). Avoid detecting all
// such cases and simply use a lower log level.
logLevel := logrus.WarnLevel
if rootless.IsRootless() {
logLevel = logrus.InfoLevel
}
// TODO: This should be a switch - we are not guaranteed that
// there are only 2 valid cgroup managers
cgroupParent := ctr.CgroupParent()
cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon")
Resource := ctr.Spec().Linux.Resources
cgroupResources, err := GetLimits(Resource)
if err != nil {
logrus.StandardLogger().Log(logLevel, "Could not get ctr resources")
}
if ctr.CgroupManager() == config.SystemdCgroupsManager {
unitName := createUnitName("libpod-conmon", ctr.ID())
realCgroupParent := cgroupParent
splitParent := strings.Split(cgroupParent, "/")
if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 {
realCgroupParent = splitParent[len(splitParent)-1]
}
logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName)
if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil {
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err)
}
} else {
control, err := cgroups.New(cgroupPath, &cgroupResources)
if err != nil {
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
} else if err := control.AddPid(cmd.Process.Pid); err != nil {
// we need to remove this defer and delete the cgroup once conmon exits
// maybe need a conmon monitor?
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
}
}
}
/* We set the cgroup, now the child can start creating children */
if err := writeConmonPipeData(startFd); err != nil {
return err
}
return nil
}
// GetLimits converts spec resource limits to cgroup consumable limits
func GetLimits(resource *spec.LinuxResources) (runcconfig.Resources, error) {
if resource == nil {
resource = &spec.LinuxResources{}
}
final := &runcconfig.Resources{}
devs := []*devices.Rule{}
// Devices
for _, entry := range resource.Devices {
if entry.Major == nil || entry.Minor == nil {
continue
}
runeType := 'a'
switch entry.Type {
case "b":
runeType = 'b'
case "c":
runeType = 'c'
}
devs = append(devs, &devices.Rule{
Type: devices.Type(runeType),
Major: *entry.Major,
Minor: *entry.Minor,
Permissions: devices.Permissions(entry.Access),
Allow: entry.Allow,
})
}
final.Devices = devs
// HugepageLimits
pageLimits := []*runcconfig.HugepageLimit{}
for _, entry := range resource.HugepageLimits {
pageLimits = append(pageLimits, &runcconfig.HugepageLimit{
Pagesize: entry.Pagesize,
Limit: entry.Limit,
})
}
final.HugetlbLimit = pageLimits
// Networking
netPriorities := []*runcconfig.IfPrioMap{}
if resource.Network != nil {
for _, entry := range resource.Network.Priorities {
netPriorities = append(netPriorities, &runcconfig.IfPrioMap{
Interface: entry.Name,
Priority: int64(entry.Priority),
})
}
}
final.NetPrioIfpriomap = netPriorities
rdma := make(map[string]runcconfig.LinuxRdma)
for name, entry := range resource.Rdma {
rdma[name] = runcconfig.LinuxRdma{HcaHandles: entry.HcaHandles, HcaObjects: entry.HcaObjects}
}
final.Rdma = rdma
// Memory
if resource.Memory != nil {
if resource.Memory.Limit != nil {
final.Memory = *resource.Memory.Limit
}
if resource.Memory.Reservation != nil {
final.MemoryReservation = *resource.Memory.Reservation
}
if resource.Memory.Swap != nil {
final.MemorySwap = *resource.Memory.Swap
}
if resource.Memory.Swappiness != nil {
final.MemorySwappiness = resource.Memory.Swappiness
}
}
// CPU
if resource.CPU != nil {
if resource.CPU.Period != nil {
final.CpuPeriod = *resource.CPU.Period
}
if resource.CPU.Quota != nil {
final.CpuQuota = *resource.CPU.Quota
}
if resource.CPU.RealtimePeriod != nil {
final.CpuRtPeriod = *resource.CPU.RealtimePeriod
}
if resource.CPU.RealtimeRuntime != nil {
final.CpuRtRuntime = *resource.CPU.RealtimeRuntime
}
if resource.CPU.Shares != nil {
final.CpuShares = *resource.CPU.Shares
}
final.CpusetCpus = resource.CPU.Cpus
final.CpusetMems = resource.CPU.Mems
}
// BlkIO
if resource.BlockIO != nil {
if len(resource.BlockIO.ThrottleReadBpsDevice) > 0 {
for _, entry := range resource.BlockIO.ThrottleReadBpsDevice {
throttle := &runcconfig.ThrottleDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
throttle.BlockIODevice = *dev
throttle.Rate = entry.Rate
final.BlkioThrottleReadBpsDevice = append(final.BlkioThrottleReadBpsDevice, throttle)
}
}
if len(resource.BlockIO.ThrottleWriteBpsDevice) > 0 {
for _, entry := range resource.BlockIO.ThrottleWriteBpsDevice {
throttle := &runcconfig.ThrottleDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
throttle.BlockIODevice = *dev
throttle.Rate = entry.Rate
final.BlkioThrottleWriteBpsDevice = append(final.BlkioThrottleWriteBpsDevice, throttle)
}
}
if len(resource.BlockIO.ThrottleReadIOPSDevice) > 0 {
for _, entry := range resource.BlockIO.ThrottleReadIOPSDevice {
throttle := &runcconfig.ThrottleDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
throttle.BlockIODevice = *dev
throttle.Rate = entry.Rate
final.BlkioThrottleReadIOPSDevice = append(final.BlkioThrottleReadIOPSDevice, throttle)
}
}
if len(resource.BlockIO.ThrottleWriteIOPSDevice) > 0 {
for _, entry := range resource.BlockIO.ThrottleWriteIOPSDevice {
throttle := &runcconfig.ThrottleDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
throttle.BlockIODevice = *dev
throttle.Rate = entry.Rate
final.BlkioThrottleWriteIOPSDevice = append(final.BlkioThrottleWriteIOPSDevice, throttle)
}
}
if resource.BlockIO.LeafWeight != nil {
final.BlkioLeafWeight = *resource.BlockIO.LeafWeight
}
if resource.BlockIO.Weight != nil {
final.BlkioWeight = *resource.BlockIO.Weight
}
if len(resource.BlockIO.WeightDevice) > 0 {
for _, entry := range resource.BlockIO.WeightDevice {
weight := &runcconfig.WeightDevice{}
dev := &runcconfig.BlockIODevice{
Major: entry.Major,
Minor: entry.Minor,
}
if entry.Weight != nil {
weight.Weight = *entry.Weight
}
if entry.LeafWeight != nil {
weight.LeafWeight = *entry.LeafWeight
}
weight.BlockIODevice = *dev
final.BlkioWeightDevice = append(final.BlkioWeightDevice, weight)
}
}
}
// Pids
if resource.Pids != nil {
final.PidsLimit = resource.Pids.Limit
}
// Networking
if resource.Network != nil {
if resource.Network.ClassID != nil {
final.NetClsClassid = *resource.Network.ClassID
}
}
// Unified state
final.Unified = resource.Unified
return *final, nil
}