mirror of
https://github.com/containers/podman.git
synced 2025-09-26 00:06:04 +08:00

InfraContainer should go through the same creation process as regular containers. This change was from the cmd level down, involving new container CLI opts and specgen creating functions. What now happens is that both container and pod cli options are populated in cmd and used to create a podSpecgen and a containerSpecgen. The process then goes as follows FillOutSpecGen (infra) -> MapSpec (podOpts -> infraOpts) -> PodCreate -> MakePod -> createPodOptions -> NewPod -> CompleteSpec (infra) -> MakeContainer -> NewContainer -> newContainer -> AddInfra (to pod state) Signed-off-by: cdoern <cdoern@redhat.com>
393 lines
10 KiB
Go
393 lines
10 KiB
Go
package generate
|
|
|
|
import (
|
|
"context"
|
|
"path"
|
|
"strings"
|
|
|
|
"github.com/containers/common/libimage"
|
|
"github.com/containers/common/pkg/config"
|
|
"github.com/containers/podman/v3/libpod"
|
|
"github.com/containers/podman/v3/libpod/define"
|
|
"github.com/containers/podman/v3/pkg/cgroups"
|
|
"github.com/containers/podman/v3/pkg/rootless"
|
|
"github.com/containers/podman/v3/pkg/specgen"
|
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/opencontainers/runtime-tools/generate"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) {
|
|
if s.ProcOpts == nil {
|
|
return
|
|
}
|
|
for i := range g.Config.Mounts {
|
|
if g.Config.Mounts[i].Destination == "/proc" {
|
|
g.Config.Mounts[i].Options = s.ProcOpts
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func addRlimits(s *specgen.SpecGenerator, g *generate.Generator) error {
|
|
var (
|
|
isRootless = rootless.IsRootless()
|
|
nofileSet = false
|
|
nprocSet = false
|
|
)
|
|
|
|
if s.Rlimits == nil {
|
|
g.Config.Process.Rlimits = nil
|
|
return nil
|
|
}
|
|
|
|
for _, u := range s.Rlimits {
|
|
name := "RLIMIT_" + strings.ToUpper(u.Type)
|
|
if name == "RLIMIT_NOFILE" {
|
|
nofileSet = true
|
|
} else if name == "RLIMIT_NPROC" {
|
|
nprocSet = true
|
|
}
|
|
g.AddProcessRlimits(name, u.Hard, u.Soft)
|
|
}
|
|
|
|
// If not explicitly overridden by the user, default number of open
|
|
// files and number of processes to the maximum they can be set to
|
|
// (without overriding a sysctl)
|
|
if !nofileSet {
|
|
max := define.RLimitDefaultValue
|
|
current := define.RLimitDefaultValue
|
|
if isRootless {
|
|
var rlimit unix.Rlimit
|
|
if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil {
|
|
logrus.Warnf("failed to return RLIMIT_NOFILE ulimit %q", err)
|
|
}
|
|
if rlimit.Cur < current {
|
|
current = rlimit.Cur
|
|
}
|
|
if rlimit.Max < max {
|
|
max = rlimit.Max
|
|
}
|
|
}
|
|
g.AddProcessRlimits("RLIMIT_NOFILE", max, current)
|
|
}
|
|
if !nprocSet {
|
|
max := define.RLimitDefaultValue
|
|
current := define.RLimitDefaultValue
|
|
if isRootless {
|
|
var rlimit unix.Rlimit
|
|
if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err != nil {
|
|
logrus.Warnf("failed to return RLIMIT_NPROC ulimit %q", err)
|
|
}
|
|
if rlimit.Cur < current {
|
|
current = rlimit.Cur
|
|
}
|
|
if rlimit.Max < max {
|
|
max = rlimit.Max
|
|
}
|
|
}
|
|
g.AddProcessRlimits("RLIMIT_NPROC", max, current)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Produce the final command for the container.
|
|
func makeCommand(ctx context.Context, s *specgen.SpecGenerator, imageData *libimage.ImageData, rtc *config.Config) ([]string, error) {
|
|
finalCommand := []string{}
|
|
|
|
entrypoint := s.Entrypoint
|
|
if entrypoint == nil && imageData != nil {
|
|
entrypoint = imageData.Config.Entrypoint
|
|
}
|
|
|
|
// Don't append the entrypoint if it is [""]
|
|
if len(entrypoint) != 1 || entrypoint[0] != "" {
|
|
finalCommand = append(finalCommand, entrypoint...)
|
|
}
|
|
|
|
// Only use image command if the user did not manually set an
|
|
// entrypoint.
|
|
command := s.Command
|
|
if len(command) == 0 && imageData != nil && len(s.Entrypoint) == 0 {
|
|
command = imageData.Config.Cmd
|
|
}
|
|
|
|
finalCommand = append(finalCommand, command...)
|
|
|
|
if len(finalCommand) == 0 {
|
|
return nil, errors.Errorf("no command or entrypoint provided, and no CMD or ENTRYPOINT from image")
|
|
}
|
|
|
|
if s.Init {
|
|
initPath := s.InitPath
|
|
if initPath == "" && rtc != nil {
|
|
initPath = rtc.Engine.InitPath
|
|
}
|
|
if initPath == "" {
|
|
return nil, errors.Errorf("no path to init binary found but container requested an init")
|
|
}
|
|
finalCommand = append([]string{"/dev/init", "--"}, finalCommand...)
|
|
}
|
|
|
|
return finalCommand, nil
|
|
}
|
|
|
|
// canMountSys is a best-effort heuristic to detect whether mounting a new sysfs is permitted in the container
|
|
func canMountSys(isRootless, isNewUserns bool, s *specgen.SpecGenerator) bool {
|
|
if s.NetNS.IsHost() && (isRootless || isNewUserns) {
|
|
return false
|
|
}
|
|
if isNewUserns {
|
|
switch s.NetNS.NSMode {
|
|
case specgen.Slirp, specgen.Private, specgen.NoNetwork, specgen.Bridge:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func getCGroupPermissons(unmask []string) string {
|
|
ro := "ro"
|
|
rw := "rw"
|
|
cgroup := "/sys/fs/cgroup"
|
|
|
|
cgroupv2, _ := cgroups.IsCgroup2UnifiedMode()
|
|
if !cgroupv2 {
|
|
return ro
|
|
}
|
|
|
|
if unmask != nil && unmask[0] == "ALL" {
|
|
return rw
|
|
}
|
|
|
|
for _, p := range unmask {
|
|
if path.Clean(p) == cgroup {
|
|
return rw
|
|
}
|
|
}
|
|
return ro
|
|
}
|
|
|
|
// SpecGenToOCI returns the base configuration for the container.
|
|
func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string) (*spec.Spec, error) {
|
|
cgroupPerm := getCGroupPermissons(s.Unmask)
|
|
|
|
g, err := generate.New("linux")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Remove the default /dev/shm mount to ensure we overwrite it
|
|
g.RemoveMount("/dev/shm")
|
|
g.HostSpecific = true
|
|
addCgroup := true
|
|
|
|
isRootless := rootless.IsRootless()
|
|
isNewUserns := s.UserNS.IsContainer() || s.UserNS.IsPath() || s.UserNS.IsPrivate()
|
|
|
|
canMountSys := canMountSys(isRootless, isNewUserns, s)
|
|
|
|
if s.Privileged && canMountSys {
|
|
cgroupPerm = "rw"
|
|
g.RemoveMount("/sys")
|
|
sysMnt := spec.Mount{
|
|
Destination: "/sys",
|
|
Type: "sysfs",
|
|
Source: "sysfs",
|
|
Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"},
|
|
}
|
|
g.AddMount(sysMnt)
|
|
}
|
|
if !canMountSys {
|
|
addCgroup = false
|
|
g.RemoveMount("/sys")
|
|
r := "ro"
|
|
if s.Privileged {
|
|
r = "rw"
|
|
}
|
|
sysMnt := spec.Mount{
|
|
Destination: "/sys",
|
|
Type: "bind", // should we use a constant for this, like createconfig?
|
|
Source: "/sys",
|
|
Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"},
|
|
}
|
|
g.AddMount(sysMnt)
|
|
if !s.Privileged && isRootless {
|
|
g.AddLinuxMaskedPaths("/sys/kernel")
|
|
}
|
|
}
|
|
gid5Available := true
|
|
if isRootless {
|
|
nGids, err := rootless.GetAvailableGids()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
gid5Available = nGids >= 5
|
|
}
|
|
// When using a different user namespace, check that the GID 5 is mapped inside
|
|
// the container.
|
|
if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) {
|
|
mappingFound := false
|
|
for _, r := range s.IDMappings.GIDMap {
|
|
if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size {
|
|
mappingFound = true
|
|
break
|
|
}
|
|
}
|
|
if !mappingFound {
|
|
gid5Available = false
|
|
}
|
|
}
|
|
if !gid5Available {
|
|
// If we have no GID mappings, the gid=5 default option would fail, so drop it.
|
|
g.RemoveMount("/dev/pts")
|
|
devPts := spec.Mount{
|
|
Destination: "/dev/pts",
|
|
Type: "devpts",
|
|
Source: "devpts",
|
|
Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"},
|
|
}
|
|
g.AddMount(devPts)
|
|
}
|
|
|
|
inUserNS := isRootless || isNewUserns
|
|
|
|
if inUserNS && s.IpcNS.IsHost() {
|
|
g.RemoveMount("/dev/mqueue")
|
|
devMqueue := spec.Mount{
|
|
Destination: "/dev/mqueue",
|
|
Type: "bind", // constant ?
|
|
Source: "/dev/mqueue",
|
|
Options: []string{"bind", "nosuid", "noexec", "nodev"},
|
|
}
|
|
g.AddMount(devMqueue)
|
|
}
|
|
if inUserNS && s.PidNS.IsHost() {
|
|
g.RemoveMount("/proc")
|
|
procMount := spec.Mount{
|
|
Destination: "/proc",
|
|
Type: define.TypeBind,
|
|
Source: "/proc",
|
|
Options: []string{"rbind", "nosuid", "noexec", "nodev"},
|
|
}
|
|
g.AddMount(procMount)
|
|
}
|
|
|
|
if addCgroup {
|
|
cgroupMnt := spec.Mount{
|
|
Destination: "/sys/fs/cgroup",
|
|
Type: "cgroup",
|
|
Source: "cgroup",
|
|
Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm},
|
|
}
|
|
g.AddMount(cgroupMnt)
|
|
}
|
|
|
|
g.Config.Linux.Personality = s.Personality
|
|
|
|
g.SetProcessCwd(s.WorkDir)
|
|
|
|
g.SetProcessArgs(finalCmd)
|
|
|
|
g.SetProcessTerminal(s.Terminal)
|
|
|
|
for key, val := range s.Annotations {
|
|
g.AddAnnotation(key, val)
|
|
}
|
|
g.AddProcessEnv("container", "podman")
|
|
|
|
g.Config.Linux.Resources = s.ResourceLimits
|
|
|
|
// Devices
|
|
if s.Privileged {
|
|
// If privileged, we need to add all the host devices to the
|
|
// spec. We do not add the user provided ones because we are
|
|
// already adding them all.
|
|
if err := addPrivilegedDevices(&g); err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
// add default devices from containers.conf
|
|
for _, device := range rtc.Containers.Devices {
|
|
if err := DevicesFromPath(&g, device); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
// add default devices specified by caller
|
|
for _, device := range s.Devices {
|
|
if err := DevicesFromPath(&g, device.Path); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, dev := range s.DeviceCGroupRule {
|
|
g.AddLinuxResourcesDevice(true, dev.Type, dev.Major, dev.Minor, dev.Access)
|
|
}
|
|
|
|
BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g)
|
|
|
|
for name, val := range s.Env {
|
|
g.AddProcessEnv(name, val)
|
|
}
|
|
|
|
if err := addRlimits(s, &g); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// NAMESPACES
|
|
if err := specConfigureNamespaces(s, &g, rt, pod); err != nil {
|
|
return nil, err
|
|
}
|
|
configSpec := g.Config
|
|
|
|
if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// BIND MOUNTS
|
|
configSpec.Mounts = SupersedeUserMounts(mounts, configSpec.Mounts)
|
|
// Process mounts to ensure correct options
|
|
if err := InitFSMounts(configSpec.Mounts); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Add annotations
|
|
if configSpec.Annotations == nil {
|
|
configSpec.Annotations = make(map[string]string)
|
|
}
|
|
|
|
if s.Remove {
|
|
configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue
|
|
} else {
|
|
configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse
|
|
}
|
|
|
|
if len(s.VolumesFrom) > 0 {
|
|
configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",")
|
|
}
|
|
|
|
if s.Privileged {
|
|
configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue
|
|
} else {
|
|
configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse
|
|
}
|
|
|
|
if s.Init {
|
|
configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue
|
|
} else {
|
|
configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse
|
|
}
|
|
|
|
if s.OOMScoreAdj != nil {
|
|
g.SetProcessOOMScoreAdj(*s.OOMScoreAdj)
|
|
}
|
|
setProcOpts(s, &g)
|
|
|
|
return configSpec, nil
|
|
}
|