Merge pull request #19888 from giuseppe/fix-pod-lifecycle

fix pod cgroup lifecycle
This commit is contained in:
Daniel J Walsh
2023-09-10 20:36:21 -04:00
committed by GitHub
11 changed files with 288 additions and 170 deletions

View File

@ -158,6 +158,15 @@ func create(cmd *cobra.Command, args []string) error {
return fmt.Errorf("cannot set share(%s) namespaces without an infra container", cmd.Flag("share").Value) return fmt.Errorf("cannot set share(%s) namespaces without an infra container", cmd.Flag("share").Value)
} }
createOptions.Share = nil createOptions.Share = nil
infraOptions, err = containers.CreateInit(cmd, infraOptions, true)
if err != nil {
return err
}
err = common.ContainerToPodOptions(&infraOptions, &createOptions)
if err != nil {
return err
}
} else { } else {
// reassign certain options for lbpod api, these need to be populated in spec // reassign certain options for lbpod api, these need to be populated in spec
flags := cmd.Flags() flags := cmd.Flags()
@ -284,6 +293,21 @@ func create(cmd *cobra.Command, args []string) error {
return err return err
} }
podSpec.Name = podName podSpec.Name = podName
} else {
ctrSpec := specgen.NewSpecGenerator("", false)
err = specgenutil.FillOutSpecGen(ctrSpec, &infraOptions, []string{})
if err != nil {
return err
}
// Marshall and Unmarshal the spec in order to map similar entities
wrapped, err := json.Marshal(ctrSpec)
if err != nil {
return err
}
err = json.Unmarshal(wrapped, podSpec)
if err != nil {
return err
}
} }
PodSpec := entities.PodSpec{PodSpecGen: *podSpec} PodSpec := entities.PodSpec{PodSpecGen: *podSpec}
response, err := registry.ContainerEngine().PodCreate(context.Background(), PodSpec) response, err := registry.ContainerEngine().PodCreate(context.Background(), PodSpec)

View File

@ -1028,6 +1028,19 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
shutdown.Inhibit() shutdown.Inhibit()
defer shutdown.Uninhibit() defer shutdown.Uninhibit()
// If the container is part of a pod, make sure the pod cgroup is created before the container
// so the limits can be applied.
if c.PodID() != "" {
pod, err := c.runtime.LookupPod(c.PodID())
if err != nil {
return err
}
if _, err := c.runtime.platformMakePod(pod, &pod.config.ResourceLimits); err != nil {
return err
}
}
// With the spec complete, do an OCI create // With the spec complete, do an OCI create
if _, err = c.ociRuntime.CreateContainer(c, nil); err != nil { if _, err = c.ociRuntime.CreateContainer(c, nil); err != nil {
return err return err

View File

@ -361,9 +361,6 @@ func (p *Pod) CgroupPath() (string, error) {
if err := p.updatePod(); err != nil { if err := p.updatePod(); err != nil {
return "", err return "", err
} }
if p.state.InfraContainerID == "" {
return "", fmt.Errorf("pod has no infra container: %w", define.ErrNoSuchCtr)
}
return p.state.CgroupPath, nil return p.state.CgroupPath, nil
} }

View File

@ -209,6 +209,13 @@ func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
return nil, err return nil, err
} }
if err := p.updatePod(); err != nil {
return nil, err
}
if err := p.removePodCgroup(); err != nil {
return nil, err
}
return nil, nil return nil, nil
} }

View File

@ -7,6 +7,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"path"
"path/filepath" "path/filepath"
"github.com/containers/common/pkg/cgroups" "github.com/containers/common/pkg/cgroups"
@ -14,6 +15,7 @@ import (
"github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events" "github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/specgen" "github.com/containers/podman/v4/pkg/specgen"
"github.com/containers/podman/v4/utils"
"github.com/hashicorp/go-multierror" "github.com/hashicorp/go-multierror"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
@ -56,9 +58,13 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option
pod.valid = true pod.valid = true
if err := r.platformMakePod(pod, p); err != nil { parentCgroup, err := r.platformMakePod(pod, p.ResourceLimits)
if err != nil {
return nil, err return nil, err
} }
if p.InfraContainerSpec != nil {
p.InfraContainerSpec.CgroupParent = parentCgroup
}
if !pod.HasInfraContainer() && pod.SharesNamespaces() { if !pod.HasInfraContainer() && pod.SharesNamespaces() {
return nil, errors.New("Pods must have an infra container to share namespaces") return nil, errors.New("Pods must have an infra container to share namespaces")
@ -192,6 +198,65 @@ func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Contai
return removedCtrs, nil return removedCtrs, nil
} }
func (p *Pod) removePodCgroup() error {
// Remove pod cgroup, if present
if p.state.CgroupPath == "" {
return nil
}
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
cgroup, err := utils.GetOwnCgroup()
if err != nil {
return err
}
// if we are trying to delete a cgroup that is our ancestor, we need to move the
// current process out of it before the cgroup is destroyed.
if isSubDir(cgroup, string(filepath.Separator)+p.state.CgroupPath) {
parent := path.Dir(p.state.CgroupPath)
if err := utils.MoveUnderCgroup(parent, "cleanup", nil); err != nil {
return err
}
}
switch p.runtime.config.Engine.CgroupManager {
case config.SystemdCgroupsManager:
if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil {
return fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
}
case config.CgroupfsCgroupsManager:
// Delete the cgroupfs cgroup
// Make sure the conmon cgroup is deleted first
// Since the pod is almost gone, don't bother failing
// hard - instead, just log errors.
conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
conmonCgroup, err := cgroups.Load(conmonCgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
return fmt.Errorf("retrieving pod %s conmon cgroup: %w", p.ID(), err)
}
if err == nil {
if err = conmonCgroup.Delete(); err != nil {
return fmt.Errorf("removing pod %s conmon cgroup: %w", p.ID(), err)
}
}
cgroup, err := cgroups.Load(p.state.CgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
return fmt.Errorf("retrieving pod %s cgroup: %w", p.ID(), err)
}
if err == nil {
if err := cgroup.Delete(); err != nil {
return fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
}
}
default:
// This should be caught much earlier, but let's still
// keep going so we make sure to evict the pod before
// ending up with an inconsistent state.
return fmt.Errorf("unrecognized cgroup manager %s when removing pod %s cgroups: %w", p.runtime.config.Engine.CgroupManager, p.ID(), define.ErrInternal)
}
return nil
}
func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) (map[string]error, error) { func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) (map[string]error, error) {
removedCtrs := make(map[string]error) removedCtrs := make(map[string]error)
@ -269,70 +334,14 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
} }
} }
// Remove pod cgroup, if present // Remove pod cgroup
if p.state.CgroupPath != "" { if err := p.removePodCgroup(); err != nil {
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
switch p.runtime.config.Engine.CgroupManager {
case config.SystemdCgroupsManager:
if err := deleteSystemdCgroup(p.state.CgroupPath, p.ResourceLim()); err != nil {
if removalErr == nil { if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err) removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else { } else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err) logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
} }
} }
case config.CgroupfsCgroupsManager:
// Delete the cgroupfs cgroup
// Make sure the conmon cgroup is deleted first
// Since the pod is almost gone, don't bother failing
// hard - instead, just log errors.
conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
conmonCgroup, err := cgroups.Load(conmonCgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
if removalErr == nil {
removalErr = fmt.Errorf("retrieving pod %s conmon cgroup: %w", p.ID(), err)
} else {
logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
}
}
if err == nil {
if err = conmonCgroup.Delete(); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s conmon cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
}
}
}
cgroup, err := cgroups.Load(p.state.CgroupPath)
if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
if removalErr == nil {
removalErr = fmt.Errorf("retrieving pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}
if err == nil {
if err := cgroup.Delete(); err != nil {
if removalErr == nil {
removalErr = fmt.Errorf("removing pod %s cgroup: %w", p.ID(), err)
} else {
logrus.Errorf("Deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
}
}
}
default:
// This should be caught much earlier, but let's still
// keep going so we make sure to evict the pod before
// ending up with an inconsistent state.
if removalErr == nil {
removalErr = fmt.Errorf("unrecognized cgroup manager %s when removing pod %s cgroups: %w", p.runtime.config.Engine.CgroupManager, p.ID(), define.ErrInternal)
} else {
logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.Engine.CgroupManager, p.ID())
}
}
}
if err := p.maybeRemoveServiceContainer(); err != nil { if err := p.maybeRemoveServiceContainer(); err != nil {
return removedCtrs, err return removedCtrs, err

View File

@ -1,9 +1,9 @@
package libpod package libpod
import ( import (
"github.com/containers/podman/v4/pkg/specgen" spec "github.com/opencontainers/runtime-spec/specs-go"
) )
func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error { func (r *Runtime) platformMakePod(pod *Pod, resourceLimits *spec.LinuxResources) (string, error) {
return nil return "", nil
} }

View File

@ -10,11 +10,12 @@ import (
"github.com/containers/common/pkg/config" "github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/rootless" "github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/specgen" spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error { func (r *Runtime) platformMakePod(pod *Pod, resourceLimits *spec.LinuxResources) (string, error) {
cgroupParent := ""
// Check Cgroup parent sanity, and set it if it was not set // Check Cgroup parent sanity, and set it if it was not set
if r.config.Cgroups() != "disabled" { if r.config.Cgroups() != "disabled" {
switch r.config.Engine.CgroupManager { switch r.config.Engine.CgroupManager {
@ -25,19 +26,18 @@ func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
if pod.config.CgroupParent == "" { if pod.config.CgroupParent == "" {
pod.config.CgroupParent = CgroupfsDefaultCgroupParent pod.config.CgroupParent = CgroupfsDefaultCgroupParent
} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { } else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return fmt.Errorf("systemd slice received as cgroup parent when using cgroupfs: %w", define.ErrInvalidArg) return "", fmt.Errorf("systemd slice received as cgroup parent when using cgroupfs: %w", define.ErrInvalidArg)
} }
// If we are set to use pod cgroups, set the cgroup parent that // If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share // all containers in the pod will share
if pod.config.UsePodCgroup { if pod.config.UsePodCgroup {
pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID()) pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
if p.InfraContainerSpec != nil { cgroupParent = pod.state.CgroupPath
p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
// cgroupfs + rootless = permission denied when creating the cgroup. // cgroupfs + rootless = permission denied when creating the cgroup.
if !rootless.IsRootless() { if !rootless.IsRootless() {
res, err := GetLimits(p.ResourceLimits) res, err := GetLimits(resourceLimits)
if err != nil { if err != nil {
return err return "", err
} }
// Need to both create and update the cgroup // Need to both create and update the cgroup
// rather than create a new path in c/common for pod cgroup creation // rather than create a new path in c/common for pod cgroup creation
@ -45,12 +45,11 @@ func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
// populate the resource limits on the pod level // populate the resource limits on the pod level
cgc, err := cgroups.New(pod.state.CgroupPath, &res) cgc, err := cgroups.New(pod.state.CgroupPath, &res)
if err != nil { if err != nil {
return err return "", err
} }
err = cgc.Update(&res) err = cgc.Update(&res)
if err != nil { if err != nil {
return err return "", err
}
} }
} }
} }
@ -63,22 +62,20 @@ func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
pod.config.CgroupParent = SystemdDefaultCgroupParent pod.config.CgroupParent = SystemdDefaultCgroupParent
} }
} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { } else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return fmt.Errorf("did not receive systemd slice as cgroup parent when using systemd to manage cgroups: %w", define.ErrInvalidArg) return "", fmt.Errorf("did not receive systemd slice as cgroup parent when using systemd to manage cgroups: %w", define.ErrInvalidArg)
} }
// If we are set to use pod cgroups, set the cgroup parent that // If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share // all containers in the pod will share
if pod.config.UsePodCgroup { if pod.config.UsePodCgroup {
cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.ResourceLimits) cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), resourceLimits)
if err != nil { if err != nil {
return fmt.Errorf("unable to create pod cgroup for pod %s: %w", pod.ID(), err) return "", fmt.Errorf("unable to create pod cgroup for pod %s: %w", pod.ID(), err)
} }
pod.state.CgroupPath = cgroupPath pod.state.CgroupPath = cgroupPath
if p.InfraContainerSpec != nil { cgroupParent = pod.state.CgroupPath
p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
}
} }
default: default:
return fmt.Errorf("unsupported Cgroup manager: %s - cannot validate cgroup parent: %w", r.config.Engine.CgroupManager, define.ErrInvalidArg) return "", fmt.Errorf("unsupported Cgroup manager: %s - cannot validate cgroup parent: %w", r.config.Engine.CgroupManager, define.ErrInvalidArg)
} }
} }
@ -86,5 +83,5 @@ func (r *Runtime) platformMakePod(pod *Pod, p specgen.PodSpecGenerator) error {
logrus.Debugf("Got pod cgroup as %s", pod.state.CgroupPath) logrus.Debugf("Got pod cgroup as %s", pod.state.CgroupPath)
} }
return nil return cgroupParent, nil
} }

View File

@ -5,6 +5,8 @@ package libpod
import ( import (
"fmt" "fmt"
"os"
"path/filepath"
"strings" "strings"
"syscall" "syscall"
@ -17,22 +19,36 @@ import (
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
func cgroupExist(path string) bool {
cgroupv2, _ := cgroups.IsCgroup2UnifiedMode()
var fullPath string
if cgroupv2 {
fullPath = filepath.Join("/sys/fs/cgroup", path)
} else {
fullPath = filepath.Join("/sys/fs/cgroup/memory", path)
}
_, err := os.Stat(fullPath)
return err == nil
}
// systemdSliceFromPath makes a new systemd slice under the given parent with // systemdSliceFromPath makes a new systemd slice under the given parent with
// the given name. // the given name.
// The parent must be a slice. The name must NOT include ".slice" // The parent must be a slice. The name must NOT include ".slice"
func systemdSliceFromPath(parent, name string, resources *spec.LinuxResources) (string, error) { func systemdSliceFromPath(parent, name string, resources *spec.LinuxResources) (string, error) {
cgroupPath, err := assembleSystemdCgroupName(parent, name) cgroupPath, systemdPath, err := assembleSystemdCgroupName(parent, name)
if err != nil { if err != nil {
return "", err return "", err
} }
logrus.Debugf("Created cgroup path %s for parent %s and name %s", cgroupPath, parent, name) logrus.Debugf("Created cgroup path %s for parent %s and name %s", systemdPath, parent, name)
if err := makeSystemdCgroup(cgroupPath, resources); err != nil { if !cgroupExist(cgroupPath) {
if err := makeSystemdCgroup(systemdPath, resources); err != nil {
return "", fmt.Errorf("creating cgroup %s: %w", cgroupPath, err) return "", fmt.Errorf("creating cgroup %s: %w", cgroupPath, err)
} }
}
logrus.Debugf("Created cgroup %s", cgroupPath) logrus.Debugf("Created cgroup %s", systemdPath)
return cgroupPath, nil return cgroupPath, nil
} }
@ -88,19 +104,27 @@ func deleteSystemdCgroup(path string, resources *spec.LinuxResources) error {
} }
// assembleSystemdCgroupName creates a systemd cgroup path given a base and // assembleSystemdCgroupName creates a systemd cgroup path given a base and
// a new component to add. // a new component to add. It also returns the path to the cgroup as it accessible
// below the cgroup mounts.
// The base MUST be systemd slice (end in .slice) // The base MUST be systemd slice (end in .slice)
func assembleSystemdCgroupName(baseSlice, newSlice string) (string, error) { func assembleSystemdCgroupName(baseSlice, newSlice string) (string, string, error) {
const sliceSuffix = ".slice" const sliceSuffix = ".slice"
if !strings.HasSuffix(baseSlice, sliceSuffix) { if !strings.HasSuffix(baseSlice, sliceSuffix) {
return "", fmt.Errorf("cannot assemble cgroup path with base %q - must end in .slice: %w", baseSlice, define.ErrInvalidArg) return "", "", fmt.Errorf("cannot assemble cgroup path with base %q - must end in .slice: %w", baseSlice, define.ErrInvalidArg)
} }
noSlice := strings.TrimSuffix(baseSlice, sliceSuffix) noSlice := strings.TrimSuffix(baseSlice, sliceSuffix)
final := fmt.Sprintf("%s/%s-%s%s", baseSlice, noSlice, newSlice, sliceSuffix) systemdPath := fmt.Sprintf("%s/%s-%s%s", baseSlice, noSlice, newSlice, sliceSuffix)
return final, nil if rootless.IsRootless() {
// When we run as rootless, the cgroup has a path like the following:
///sys/fs/cgroup/user.slice/user-@$UID.slice/user@$UID.service/user.slice/user-libpod_pod_$POD_ID.slice
uid := rootless.GetRootlessUID()
raw := fmt.Sprintf("user.slice/%s-%d.slice/user@%d.service/%s/%s-%s%s", noSlice, uid, uid, baseSlice, noSlice, newSlice, sliceSuffix)
return raw, systemdPath, nil
}
return systemdPath, systemdPath, nil
} }
var lvpRelabel = label.Relabel var lvpRelabel = label.Relabel

View File

@ -44,28 +44,18 @@ func MakePod(p *entities.PodSpec, rt *libpod.Runtime) (_ *libpod.Pod, finalErr e
p.PodSpecGen.InfraContainerSpec.RawImageName = imageName p.PodSpecGen.InfraContainerSpec.RawImageName = imageName
} }
if !p.PodSpecGen.NoInfra && p.PodSpecGen.InfraContainerSpec != nil { spec, err := MapSpec(&p.PodSpecGen)
var err error
p.PodSpecGen.InfraContainerSpec, err = MapSpec(&p.PodSpecGen)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} if err := specgen.FinishThrottleDevices(spec); err != nil {
if !p.PodSpecGen.NoInfra {
err := specgen.FinishThrottleDevices(p.PodSpecGen.InfraContainerSpec)
if err != nil {
return nil, err return nil, err
} }
if p.PodSpecGen.InfraContainerSpec.ResourceLimits != nil && if err := specgen.WeightDevices(spec); err != nil {
p.PodSpecGen.InfraContainerSpec.ResourceLimits.BlockIO != nil {
p.PodSpecGen.ResourceLimits.BlockIO = p.PodSpecGen.InfraContainerSpec.ResourceLimits.BlockIO
}
err = specgen.WeightDevices(p.PodSpecGen.InfraContainerSpec)
if err != nil {
return nil, err return nil, err
} }
p.PodSpecGen.ResourceLimits = p.PodSpecGen.InfraContainerSpec.ResourceLimits if spec.ResourceLimits != nil && spec.ResourceLimits.BlockIO != nil {
p.PodSpecGen.ResourceLimits.BlockIO = spec.ResourceLimits.BlockIO
} }
options, err := createPodOptions(&p.PodSpecGen) options, err := createPodOptions(&p.PodSpecGen)
@ -123,11 +113,12 @@ func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, er
var ( var (
options []libpod.PodCreateOption options []libpod.PodCreateOption
) )
if !p.NoInfra {
options = append(options, libpod.WithInfraContainer())
if p.ShareParent == nil || (p.ShareParent != nil && *p.ShareParent) { if p.ShareParent == nil || (p.ShareParent != nil && *p.ShareParent) {
options = append(options, libpod.WithPodParent()) options = append(options, libpod.WithPodParent())
} }
if !p.NoInfra {
options = append(options, libpod.WithInfraContainer())
nsOptions, err := GetNamespaceOptions(p.SharedNamespaces, p.InfraContainerSpec.NetNS.IsHost()) nsOptions, err := GetNamespaceOptions(p.SharedNamespaces, p.InfraContainerSpec.NetNS.IsHost())
if err != nil { if err != nil {
return nil, err return nil, err
@ -176,12 +167,18 @@ func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, er
// MapSpec modifies the already filled Infra specgenerator, // MapSpec modifies the already filled Infra specgenerator,
// replacing necessary values with those specified in pod creation // replacing necessary values with those specified in pod creation
func MapSpec(p *specgen.PodSpecGenerator) (*specgen.SpecGenerator, error) { func MapSpec(p *specgen.PodSpecGenerator) (*specgen.SpecGenerator, error) {
var spec *specgen.SpecGenerator
if p.InfraContainerSpec != nil {
spec = p.InfraContainerSpec
} else {
spec = &specgen.SpecGenerator{}
}
if len(p.PortMappings) > 0 { if len(p.PortMappings) > 0 {
ports, err := ParsePortMapping(p.PortMappings, nil) ports, err := ParsePortMapping(p.PortMappings, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
p.InfraContainerSpec.PortMappings = ports spec.PortMappings = ports
} }
switch p.NetNS.NSMode { switch p.NetNS.NSMode {
case specgen.Default, "": case specgen.Default, "":
@ -190,90 +187,90 @@ func MapSpec(p *specgen.PodSpecGenerator) (*specgen.SpecGenerator, error) {
break break
} }
case specgen.Bridge: case specgen.Bridge:
p.InfraContainerSpec.NetNS.NSMode = specgen.Bridge spec.NetNS.NSMode = specgen.Bridge
logrus.Debugf("Pod using bridge network mode") logrus.Debugf("Pod using bridge network mode")
case specgen.Private: case specgen.Private:
p.InfraContainerSpec.NetNS.NSMode = specgen.Private spec.NetNS.NSMode = specgen.Private
logrus.Debugf("Pod will use default network mode") logrus.Debugf("Pod will use default network mode")
case specgen.Host: case specgen.Host:
logrus.Debugf("Pod will use host networking") logrus.Debugf("Pod will use host networking")
if len(p.InfraContainerSpec.PortMappings) > 0 || if len(spec.PortMappings) > 0 ||
len(p.InfraContainerSpec.Networks) > 0 || len(spec.Networks) > 0 ||
p.InfraContainerSpec.NetNS.NSMode == specgen.NoNetwork { spec.NetNS.NSMode == specgen.NoNetwork {
return nil, fmt.Errorf("cannot set host network if network-related configuration is specified: %w", define.ErrInvalidArg) return nil, fmt.Errorf("cannot set host network if network-related configuration is specified: %w", define.ErrInvalidArg)
} }
p.InfraContainerSpec.NetNS.NSMode = specgen.Host spec.NetNS.NSMode = specgen.Host
case specgen.Slirp: case specgen.Slirp:
logrus.Debugf("Pod will use slirp4netns") logrus.Debugf("Pod will use slirp4netns")
if p.InfraContainerSpec.NetNS.NSMode != specgen.Host { if spec.NetNS.NSMode != specgen.Host {
p.InfraContainerSpec.NetworkOptions = p.NetworkOptions spec.NetworkOptions = p.NetworkOptions
p.InfraContainerSpec.NetNS.NSMode = specgen.Slirp spec.NetNS.NSMode = specgen.Slirp
} }
case specgen.Pasta: case specgen.Pasta:
logrus.Debugf("Pod will use pasta") logrus.Debugf("Pod will use pasta")
if p.InfraContainerSpec.NetNS.NSMode != specgen.Host { if spec.NetNS.NSMode != specgen.Host {
p.InfraContainerSpec.NetworkOptions = p.NetworkOptions spec.NetworkOptions = p.NetworkOptions
p.InfraContainerSpec.NetNS.NSMode = specgen.Pasta spec.NetNS.NSMode = specgen.Pasta
} }
case specgen.Path: case specgen.Path:
logrus.Debugf("Pod will use namespace path networking") logrus.Debugf("Pod will use namespace path networking")
p.InfraContainerSpec.NetNS.NSMode = specgen.Path spec.NetNS.NSMode = specgen.Path
p.InfraContainerSpec.NetNS.Value = p.PodNetworkConfig.NetNS.Value spec.NetNS.Value = p.PodNetworkConfig.NetNS.Value
case specgen.NoNetwork: case specgen.NoNetwork:
logrus.Debugf("Pod will not use networking") logrus.Debugf("Pod will not use networking")
if len(p.InfraContainerSpec.PortMappings) > 0 || if len(spec.PortMappings) > 0 ||
len(p.InfraContainerSpec.Networks) > 0 || len(spec.Networks) > 0 ||
p.InfraContainerSpec.NetNS.NSMode == specgen.Host { spec.NetNS.NSMode == specgen.Host {
return nil, fmt.Errorf("cannot disable pod network if network-related configuration is specified: %w", define.ErrInvalidArg) return nil, fmt.Errorf("cannot disable pod network if network-related configuration is specified: %w", define.ErrInvalidArg)
} }
p.InfraContainerSpec.NetNS.NSMode = specgen.NoNetwork spec.NetNS.NSMode = specgen.NoNetwork
default: default:
return nil, fmt.Errorf("pods presently do not support network mode %s", p.NetNS.NSMode) return nil, fmt.Errorf("pods presently do not support network mode %s", p.NetNS.NSMode)
} }
if len(p.InfraCommand) > 0 { if len(p.InfraCommand) > 0 {
p.InfraContainerSpec.Entrypoint = p.InfraCommand spec.Entrypoint = p.InfraCommand
} }
if len(p.HostAdd) > 0 { if len(p.HostAdd) > 0 {
p.InfraContainerSpec.HostAdd = p.HostAdd spec.HostAdd = p.HostAdd
} }
if len(p.DNSServer) > 0 { if len(p.DNSServer) > 0 {
var dnsServers []net.IP var dnsServers []net.IP
dnsServers = append(dnsServers, p.DNSServer...) dnsServers = append(dnsServers, p.DNSServer...)
p.InfraContainerSpec.DNSServers = dnsServers spec.DNSServers = dnsServers
} }
if len(p.DNSOption) > 0 { if len(p.DNSOption) > 0 {
p.InfraContainerSpec.DNSOptions = p.DNSOption spec.DNSOptions = p.DNSOption
} }
if len(p.DNSSearch) > 0 { if len(p.DNSSearch) > 0 {
p.InfraContainerSpec.DNSSearch = p.DNSSearch spec.DNSSearch = p.DNSSearch
} }
if p.NoManageResolvConf { if p.NoManageResolvConf {
p.InfraContainerSpec.UseImageResolvConf = true spec.UseImageResolvConf = true
} }
if len(p.Networks) > 0 { if len(p.Networks) > 0 {
p.InfraContainerSpec.Networks = p.Networks spec.Networks = p.Networks
} }
// deprecated cni networks for api users // deprecated cni networks for api users
if len(p.CNINetworks) > 0 { if len(p.CNINetworks) > 0 {
p.InfraContainerSpec.CNINetworks = p.CNINetworks spec.CNINetworks = p.CNINetworks
} }
if p.NoManageHosts { if p.NoManageHosts {
p.InfraContainerSpec.UseImageHosts = p.NoManageHosts spec.UseImageHosts = p.NoManageHosts
} }
if len(p.InfraConmonPidFile) > 0 { if len(p.InfraConmonPidFile) > 0 {
p.InfraContainerSpec.ConmonPidFile = p.InfraConmonPidFile spec.ConmonPidFile = p.InfraConmonPidFile
} }
if p.Sysctl != nil && len(p.Sysctl) > 0 { if p.Sysctl != nil && len(p.Sysctl) > 0 {
p.InfraContainerSpec.Sysctl = p.Sysctl spec.Sysctl = p.Sysctl
} }
p.InfraContainerSpec.Image = p.InfraImage spec.Image = p.InfraImage
return p.InfraContainerSpec, nil return spec, nil
} }
func PodConfigToSpec(rt *libpod.Runtime, spec *specgen.PodSpecGenerator, infraOptions *entities.ContainerCreateOptions, id string) (p *libpod.Pod, err error) { func PodConfigToSpec(rt *libpod.Runtime, spec *specgen.PodSpecGenerator, infraOptions *entities.ContainerCreateOptions, id string) (p *libpod.Pod, err error) {

View File

@ -707,4 +707,54 @@ function thingy_with_unique_id() {
run_podman rm -f -a run_podman rm -f -a
} }
@test "podman pod cleans cgroup and keeps limits" {
skip_if_remote "we cannot check cgroup settings"
skip_if_rootless_cgroupsv1 "rootless cannot use cgroups on v1"
for infra in true false; do
run_podman pod create --infra=$infra --memory=256M
podid="$output"
run_podman run -d --pod $podid $IMAGE top -d 2
run_podman pod inspect $podid
result=$(jq -r .CgroupPath <<< $output)
assert "$result" =~ "/" ".CgroupPath is a valid path"
if is_cgroupsv2; then
cgroup_path=/sys/fs/cgroup/$result
else
cgroup_path=/sys/fs/cgroup/memory/$result
fi
if test ! -e $cgroup_path; then
die "the cgroup $cgroup_path does not exist"
fi
run_podman pod stop -t 0 $podid
if test -e $cgroup_path; then
die "the cgroup $cgroup_path should not exist after pod stop"
fi
run_podman pod start $podid
if test ! -e $cgroup_path; then
die "the cgroup $cgroup_path does not exist"
fi
# validate that cgroup limits are in place after a restart
# issue #19175
if is_cgroupsv2; then
memory_limit_file=$cgroup_path/memory.max
else
memory_limit_file=$cgroup_path/memory.limit_in_bytes
fi
assert "$(< $memory_limit_file)" = "268435456" "Contents of $memory_limit_file"
run_podman pod rm -t 0 -f $podid
if test -e $cgroup_path; then
die "the cgroup $cgroup_path should not exist after pod rm"
fi
done
}
# vim: filetype=sh # vim: filetype=sh

View File

@ -47,7 +47,7 @@ func RunUnderSystemdScope(pid int, slice string, unitName string) error {
// On errors check if the cgroup already exists, if it does move the process there // On errors check if the cgroup already exists, if it does move the process there
if props, err := conn.GetUnitTypePropertiesContext(context.Background(), unitName, "Scope"); err == nil { if props, err := conn.GetUnitTypePropertiesContext(context.Background(), unitName, "Scope"); err == nil {
if cgroup, ok := props["ControlGroup"].(string); ok && cgroup != "" { if cgroup, ok := props["ControlGroup"].(string); ok && cgroup != "" {
if err := moveUnderCgroup(cgroup, "", []uint32{uint32(pid)}); err == nil { if err := MoveUnderCgroup(cgroup, "", []uint32{uint32(pid)}); err == nil {
return nil return nil
} }
// On errors return the original error message we got from StartTransientUnit. // On errors return the original error message we got from StartTransientUnit.
@ -107,13 +107,13 @@ func GetCgroupProcess(pid int) (string, error) {
// MoveUnderCgroupSubtree moves the PID under a cgroup subtree. // MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
func MoveUnderCgroupSubtree(subtree string) error { func MoveUnderCgroupSubtree(subtree string) error {
return moveUnderCgroup("", subtree, nil) return MoveUnderCgroup("", subtree, nil)
} }
// moveUnderCgroup moves a group of processes to a new cgroup. // MoveUnderCgroup moves a group of processes to a new cgroup.
// If cgroup is the empty string, then the current calling process cgroup is used. // If cgroup is the empty string, then the current calling process cgroup is used.
// If processes is empty, then the processes from the current cgroup are moved. // If processes is empty, then the processes from the current cgroup are moved.
func moveUnderCgroup(cgroup, subtree string, processes []uint32) error { func MoveUnderCgroup(cgroup, subtree string, processes []uint32) error {
procFile := "/proc/self/cgroup" procFile := "/proc/self/cgroup"
f, err := os.Open(procFile) f, err := os.Open(procFile)
if err != nil { if err != nil {