mirror of
https://github.com/containers/podman.git
synced 2025-08-16 12:28:48 +08:00

Also make sure that the limits we set for rootless are not higher than what we'd set for root containers. Rootless containers failed to start when the calling user already had ulimit (e.g. on NOFILE) set. This is basically a cherry-pick of 76f8efc0d0d into specgen Signed-off-by: Ralf Haferkamp <rhafer@suse.com>
368 lines
9.0 KiB
Go
368 lines
9.0 KiB
Go
package generate
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
|
|
"github.com/containers/common/pkg/config"
|
|
"github.com/containers/libpod/libpod"
|
|
"github.com/containers/libpod/libpod/define"
|
|
"github.com/containers/libpod/libpod/image"
|
|
"github.com/containers/libpod/pkg/rootless"
|
|
"github.com/containers/libpod/pkg/specgen"
|
|
"github.com/opencontainers/runc/libcontainer/user"
|
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/opencontainers/runtime-tools/generate"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
func addRlimits(s *specgen.SpecGenerator, g *generate.Generator) error {
|
|
var (
|
|
kernelMax uint64 = 1048576
|
|
isRootless = rootless.IsRootless()
|
|
nofileSet = false
|
|
nprocSet = false
|
|
)
|
|
|
|
if s.Rlimits == nil {
|
|
g.Config.Process.Rlimits = nil
|
|
return nil
|
|
}
|
|
|
|
for _, u := range s.Rlimits {
|
|
name := "RLIMIT_" + strings.ToUpper(u.Type)
|
|
if name == "RLIMIT_NOFILE" {
|
|
nofileSet = true
|
|
} else if name == "RLIMIT_NPROC" {
|
|
nprocSet = true
|
|
}
|
|
g.AddProcessRlimits(name, u.Hard, u.Soft)
|
|
}
|
|
|
|
// If not explicitly overridden by the user, default number of open
|
|
// files and number of processes to the maximum they can be set to
|
|
// (without overriding a sysctl)
|
|
if !nofileSet {
|
|
max := kernelMax
|
|
current := kernelMax
|
|
if isRootless {
|
|
var rlimit unix.Rlimit
|
|
if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil {
|
|
logrus.Warnf("failed to return RLIMIT_NOFILE ulimit %q", err)
|
|
}
|
|
if rlimit.Cur < current {
|
|
current = rlimit.Cur
|
|
}
|
|
if rlimit.Max < max {
|
|
max = rlimit.Max
|
|
}
|
|
}
|
|
g.AddProcessRlimits("RLIMIT_NOFILE", max, current)
|
|
}
|
|
if !nprocSet {
|
|
max := kernelMax
|
|
current := kernelMax
|
|
if isRootless {
|
|
var rlimit unix.Rlimit
|
|
if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err != nil {
|
|
logrus.Warnf("failed to return RLIMIT_NPROC ulimit %q", err)
|
|
}
|
|
if rlimit.Cur < current {
|
|
current = rlimit.Cur
|
|
}
|
|
if rlimit.Max < max {
|
|
max = rlimit.Max
|
|
}
|
|
}
|
|
g.AddProcessRlimits("RLIMIT_NPROC", max, current)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Produce the final command for the container.
|
|
func makeCommand(ctx context.Context, s *specgen.SpecGenerator, img *image.Image, rtc *config.Config) ([]string, error) {
|
|
finalCommand := []string{}
|
|
|
|
entrypoint := s.Entrypoint
|
|
if len(entrypoint) == 0 && img != nil {
|
|
newEntry, err := img.Entrypoint(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
entrypoint = newEntry
|
|
}
|
|
|
|
finalCommand = append(finalCommand, entrypoint...)
|
|
|
|
command := s.Command
|
|
if command == nil && img != nil {
|
|
newCmd, err := img.Cmd(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
command = newCmd
|
|
}
|
|
|
|
finalCommand = append(finalCommand, command...)
|
|
|
|
if len(finalCommand) == 0 {
|
|
return nil, errors.Errorf("no command or entrypoint provided, and no CMD or ENTRYPOINT from image")
|
|
}
|
|
|
|
if s.Init {
|
|
initPath := s.InitPath
|
|
if initPath == "" && rtc != nil {
|
|
initPath = rtc.Engine.InitPath
|
|
}
|
|
if initPath == "" {
|
|
return nil, errors.Errorf("no path to init binary found but container requested an init")
|
|
}
|
|
finalCommand = append([]string{"/dev/init", "--"}, finalCommand...)
|
|
}
|
|
|
|
return finalCommand, nil
|
|
}
|
|
|
|
func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *image.Image, mounts []spec.Mount, pod *libpod.Pod) (*spec.Spec, error) {
|
|
var (
|
|
inUserNS bool
|
|
)
|
|
cgroupPerm := "ro"
|
|
g, err := generate.New("linux")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Remove the default /dev/shm mount to ensure we overwrite it
|
|
g.RemoveMount("/dev/shm")
|
|
g.HostSpecific = true
|
|
addCgroup := true
|
|
canMountSys := true
|
|
|
|
isRootless := rootless.IsRootless()
|
|
if isRootless {
|
|
inUserNS = true
|
|
}
|
|
if !s.UserNS.IsHost() {
|
|
if s.UserNS.IsContainer() || s.UserNS.IsPath() {
|
|
inUserNS = true
|
|
}
|
|
if s.UserNS.IsPrivate() {
|
|
inUserNS = true
|
|
}
|
|
}
|
|
if inUserNS && s.NetNS.IsHost() {
|
|
canMountSys = false
|
|
}
|
|
|
|
if s.Privileged && canMountSys {
|
|
cgroupPerm = "rw"
|
|
g.RemoveMount("/sys")
|
|
sysMnt := spec.Mount{
|
|
Destination: "/sys",
|
|
Type: "sysfs",
|
|
Source: "sysfs",
|
|
Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"},
|
|
}
|
|
g.AddMount(sysMnt)
|
|
} else if !canMountSys {
|
|
addCgroup = false
|
|
g.RemoveMount("/sys")
|
|
r := "ro"
|
|
if s.Privileged {
|
|
r = "rw"
|
|
}
|
|
sysMnt := spec.Mount{
|
|
Destination: "/sys",
|
|
Type: "bind", // should we use a constant for this, like createconfig?
|
|
Source: "/sys",
|
|
Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"},
|
|
}
|
|
g.AddMount(sysMnt)
|
|
if !s.Privileged && isRootless {
|
|
g.AddLinuxMaskedPaths("/sys/kernel")
|
|
}
|
|
}
|
|
gid5Available := true
|
|
if isRootless {
|
|
nGids, err := GetAvailableGids()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
gid5Available = nGids >= 5
|
|
}
|
|
// When using a different user namespace, check that the GID 5 is mapped inside
|
|
// the container.
|
|
if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) {
|
|
mappingFound := false
|
|
for _, r := range s.IDMappings.GIDMap {
|
|
if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size {
|
|
mappingFound = true
|
|
break
|
|
}
|
|
}
|
|
if !mappingFound {
|
|
gid5Available = false
|
|
}
|
|
|
|
}
|
|
if !gid5Available {
|
|
// If we have no GID mappings, the gid=5 default option would fail, so drop it.
|
|
g.RemoveMount("/dev/pts")
|
|
devPts := spec.Mount{
|
|
Destination: "/dev/pts",
|
|
Type: "devpts",
|
|
Source: "devpts",
|
|
Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"},
|
|
}
|
|
g.AddMount(devPts)
|
|
}
|
|
|
|
if inUserNS && s.IpcNS.IsHost() {
|
|
g.RemoveMount("/dev/mqueue")
|
|
devMqueue := spec.Mount{
|
|
Destination: "/dev/mqueue",
|
|
Type: "bind", // constant ?
|
|
Source: "/dev/mqueue",
|
|
Options: []string{"bind", "nosuid", "noexec", "nodev"},
|
|
}
|
|
g.AddMount(devMqueue)
|
|
}
|
|
if inUserNS && s.PidNS.IsHost() {
|
|
g.RemoveMount("/proc")
|
|
procMount := spec.Mount{
|
|
Destination: "/proc",
|
|
Type: TypeBind,
|
|
Source: "/proc",
|
|
Options: []string{"rbind", "nosuid", "noexec", "nodev"},
|
|
}
|
|
g.AddMount(procMount)
|
|
}
|
|
|
|
if addCgroup {
|
|
cgroupMnt := spec.Mount{
|
|
Destination: "/sys/fs/cgroup",
|
|
Type: "cgroup",
|
|
Source: "cgroup",
|
|
Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm},
|
|
}
|
|
g.AddMount(cgroupMnt)
|
|
}
|
|
g.SetProcessCwd(s.WorkDir)
|
|
|
|
finalCmd, err := makeCommand(ctx, s, newImage, rtc)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
g.SetProcessArgs(finalCmd)
|
|
|
|
g.SetProcessTerminal(s.Terminal)
|
|
|
|
for key, val := range s.Annotations {
|
|
g.AddAnnotation(key, val)
|
|
}
|
|
g.AddProcessEnv("container", "podman")
|
|
|
|
g.Config.Linux.Resources = s.ResourceLimits
|
|
|
|
// Devices
|
|
if s.Privileged {
|
|
// If privileged, we need to add all the host devices to the
|
|
// spec. We do not add the user provided ones because we are
|
|
// already adding them all.
|
|
if err := addPrivilegedDevices(&g); err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
// add default devices from containers.conf
|
|
for _, device := range rtc.Containers.Devices {
|
|
if err := DevicesFromPath(&g, device); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
// add default devices specified by caller
|
|
for _, device := range s.Devices {
|
|
if err := DevicesFromPath(&g, device.Path); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
// SECURITY OPTS
|
|
g.SetProcessNoNewPrivileges(s.NoNewPrivileges)
|
|
|
|
if !s.Privileged {
|
|
g.SetProcessApparmorProfile(s.ApparmorProfile)
|
|
}
|
|
|
|
BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), &g)
|
|
|
|
for name, val := range s.Env {
|
|
g.AddProcessEnv(name, val)
|
|
}
|
|
|
|
if err := addRlimits(s, &g); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// NAMESPACES
|
|
if err := specConfigureNamespaces(s, &g, rt, pod); err != nil {
|
|
return nil, err
|
|
}
|
|
configSpec := g.Config
|
|
|
|
if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// BIND MOUNTS
|
|
configSpec.Mounts = SupercedeUserMounts(mounts, configSpec.Mounts)
|
|
// Process mounts to ensure correct options
|
|
if err := InitFSMounts(configSpec.Mounts); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Add annotations
|
|
if configSpec.Annotations == nil {
|
|
configSpec.Annotations = make(map[string]string)
|
|
}
|
|
|
|
if s.Remove {
|
|
configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue
|
|
} else {
|
|
configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse
|
|
}
|
|
|
|
if len(s.VolumesFrom) > 0 {
|
|
configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",")
|
|
}
|
|
|
|
if s.Privileged {
|
|
configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue
|
|
} else {
|
|
configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse
|
|
}
|
|
|
|
if s.Init {
|
|
configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue
|
|
} else {
|
|
configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse
|
|
}
|
|
|
|
return configSpec, nil
|
|
}
|
|
|
|
func GetAvailableGids() (int64, error) {
|
|
idMap, err := user.ParseIDMapFile("/proc/self/gid_map")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
count := int64(0)
|
|
for _, r := range idMap {
|
|
count += r.Count
|
|
}
|
|
return count, nil
|
|
}
|