Files
podman/pkg/specgenutil/specgen.go
Paul Holzinger a158eae7ff podman run use pod userns even with --pod-id-file
The pod was set after we checked the namespace and the namespace code
only checked the --pod flag but didn't consider --pod-id-file option.
As such fix the check to first set the pod option on the spec then use
that for the namespace. Also make sure we always use an empty default
otherwise it would be impossible in the backend to know if a user
requested a specific userns or not, i.e. even in case of a set
PODMAN_USERNS env a container should still get the userns from the pod
and not use the var in this case. Therefore unset it from the default
cli value.

There are more issues here around --pod-id-file and cli validation that
does not consider the option as conflicting with --userns like --pod
does but I decided to fix the bug at hand and don't try to fix the
entire mess which most likely would take days.

Fixes #22931

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
2024-06-24 17:18:08 +02:00

1290 lines
34 KiB
Go

package specgenutil
import (
"encoding/json"
"errors"
"fmt"
"os"
"strconv"
"strings"
"time"
"github.com/containers/common/pkg/config"
"github.com/containers/image/v5/manifest"
"github.com/containers/podman/v5/cmd/podman/parse"
"github.com/containers/podman/v5/libpod/define"
"github.com/containers/podman/v5/pkg/domain/entities"
envLib "github.com/containers/podman/v5/pkg/env"
"github.com/containers/podman/v5/pkg/namespaces"
"github.com/containers/podman/v5/pkg/specgen"
systemdDefine "github.com/containers/podman/v5/pkg/systemd/define"
"github.com/containers/podman/v5/pkg/util"
"github.com/docker/go-units"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
)
const (
rlimitPrefix = "rlimit_"
)
func getCPULimits(c *entities.ContainerCreateOptions) *specs.LinuxCPU {
cpu := &specs.LinuxCPU{}
hasLimits := false
if c.CPUS > 0 {
period, quota := util.CoresToPeriodAndQuota(c.CPUS)
cpu.Period = &period
cpu.Quota = &quota
hasLimits = true
}
if c.CPUShares > 0 {
cpu.Shares = &c.CPUShares
hasLimits = true
}
if c.CPUPeriod > 0 {
cpu.Period = &c.CPUPeriod
hasLimits = true
}
if c.CPUSetCPUs != "" {
cpu.Cpus = c.CPUSetCPUs
hasLimits = true
}
if c.CPUSetMems != "" {
cpu.Mems = c.CPUSetMems
hasLimits = true
}
if c.CPUQuota > 0 {
cpu.Quota = &c.CPUQuota
hasLimits = true
}
if c.CPURTPeriod > 0 {
cpu.RealtimePeriod = &c.CPURTPeriod
hasLimits = true
}
if c.CPURTRuntime > 0 {
cpu.RealtimeRuntime = &c.CPURTRuntime
hasLimits = true
}
if !hasLimits {
return nil
}
return cpu
}
func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (*specs.LinuxBlockIO, error) {
var err error
io := &specs.LinuxBlockIO{}
if s.ResourceLimits == nil {
s.ResourceLimits = &specs.LinuxResources{}
}
hasLimits := false
if b := c.BlkIOWeight; len(b) > 0 {
if s.ResourceLimits.BlockIO == nil {
s.ResourceLimits.BlockIO = &specs.LinuxBlockIO{}
}
u, err := strconv.ParseUint(b, 10, 16)
if err != nil {
return nil, fmt.Errorf("invalid value for blkio-weight: %w", err)
}
nu := uint16(u)
io.Weight = &nu
s.ResourceLimits.BlockIO.Weight = &nu
hasLimits = true
}
if len(c.BlkIOWeightDevice) > 0 {
if s.WeightDevice, err = parseWeightDevices(c.BlkIOWeightDevice); err != nil {
return nil, err
}
hasLimits = true
}
if bps := c.DeviceReadBPs; len(bps) > 0 {
if s.ThrottleReadBpsDevice, err = parseThrottleBPSDevices(bps); err != nil {
return nil, err
}
hasLimits = true
}
if bps := c.DeviceWriteBPs; len(bps) > 0 {
if s.ThrottleWriteBpsDevice, err = parseThrottleBPSDevices(bps); err != nil {
return nil, err
}
hasLimits = true
}
if iops := c.DeviceReadIOPs; len(iops) > 0 {
if s.ThrottleReadIOPSDevice, err = parseThrottleIOPsDevices(iops); err != nil {
return nil, err
}
hasLimits = true
}
if iops := c.DeviceWriteIOPs; len(iops) > 0 {
if s.ThrottleWriteIOPSDevice, err = parseThrottleIOPsDevices(iops); err != nil {
return nil, err
}
hasLimits = true
}
if !hasLimits {
return nil, nil
}
return io, nil
}
func LimitToSwap(memory *specs.LinuxMemory, swap string, ml int64) {
if ml > 0 {
memory.Limit = &ml
if swap == "" {
limit := 2 * ml
memory.Swap = &(limit)
}
}
}
func getMemoryLimits(c *entities.ContainerCreateOptions) (*specs.LinuxMemory, error) {
var err error
memory := &specs.LinuxMemory{}
hasLimits := false
if m := c.Memory; len(m) > 0 {
ml, err := units.RAMInBytes(m)
if err != nil {
return nil, fmt.Errorf("invalid value for memory: %w", err)
}
LimitToSwap(memory, c.MemorySwap, ml)
hasLimits = true
}
if m := c.MemoryReservation; len(m) > 0 {
mr, err := units.RAMInBytes(m)
if err != nil {
return nil, fmt.Errorf("invalid value for memory: %w", err)
}
memory.Reservation = &mr
hasLimits = true
}
if m := c.MemorySwap; len(m) > 0 {
var ms int64
// only set memory swap if it was set
// -1 indicates unlimited
if m != "-1" {
ms, err = units.RAMInBytes(m)
memory.Swap = &ms
if err != nil {
return nil, fmt.Errorf("invalid value for memory: %w", err)
}
hasLimits = true
}
}
if c.MemorySwappiness >= 0 {
swappiness := uint64(c.MemorySwappiness)
memory.Swappiness = &swappiness
hasLimits = true
}
if c.OOMKillDisable {
memory.DisableOOMKiller = &c.OOMKillDisable
hasLimits = true
}
if !hasLimits {
return nil, nil
}
return memory, nil
}
func setNamespaces(rtc *config.Config, s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) error {
var err error
if c.PID != "" {
s.PidNS, err = specgen.ParseNamespace(c.PID)
if err != nil {
return err
}
}
if c.IPC != "" {
s.IpcNS, err = specgen.ParseIPCNamespace(c.IPC)
if err != nil {
return err
}
}
if c.UTS != "" {
s.UtsNS, err = specgen.ParseNamespace(c.UTS)
if err != nil {
return err
}
}
if c.CgroupNS != "" {
s.CgroupNS, err = specgen.ParseNamespace(c.CgroupNS)
if err != nil {
return err
}
}
userns := c.UserNS
// caller must make sure s.Pod is set before calling this function.
if userns == "" && s.Pod == "" {
if ns, ok := os.LookupEnv("PODMAN_USERNS"); ok {
userns = ns
} else {
// TODO: This should be moved into pkg/specgen/generate so we don't use the client's containers.conf
userns = rtc.Containers.UserNS
}
}
// userns must be treated differently
if userns != "" {
s.UserNS, err = specgen.ParseUserNamespace(userns)
if err != nil {
return err
}
}
if c.Net != nil {
s.NetNS = c.Net.Network
}
if s.IDMappings == nil {
userNS := namespaces.UsernsMode(s.UserNS.NSMode)
tempIDMap, err := util.ParseIDMapping(namespaces.UsernsMode(userns), []string{}, []string{}, "", "")
if err != nil {
return err
}
s.IDMappings, err = util.ParseIDMapping(userNS, c.UIDMap, c.GIDMap, c.SubUIDName, c.SubGIDName)
if err != nil {
return err
}
if len(s.IDMappings.GIDMap) == 0 {
s.IDMappings.AutoUserNsOpts.AdditionalGIDMappings = tempIDMap.AutoUserNsOpts.AdditionalGIDMappings
if s.UserNS.NSMode == specgen.NamespaceMode("auto") {
s.IDMappings.AutoUserNs = true
}
}
if len(s.IDMappings.UIDMap) == 0 {
s.IDMappings.AutoUserNsOpts.AdditionalUIDMappings = tempIDMap.AutoUserNsOpts.AdditionalUIDMappings
if s.UserNS.NSMode == specgen.NamespaceMode("auto") {
s.IDMappings.AutoUserNs = true
}
}
if tempIDMap.AutoUserNsOpts.Size != 0 {
s.IDMappings.AutoUserNsOpts.Size = tempIDMap.AutoUserNsOpts.Size
}
// If some mappings are specified, assume a private user namespace
if userNS.IsDefaultValue() && (!s.IDMappings.HostUIDMapping || !s.IDMappings.HostGIDMapping) {
s.UserNS.NSMode = specgen.Private
} else {
s.UserNS.NSMode = specgen.NamespaceMode(userNS)
}
}
return nil
}
func GenRlimits(ulimits []string) ([]specs.POSIXRlimit, error) {
rlimits := make([]specs.POSIXRlimit, 0, len(ulimits))
// Rlimits/Ulimits
for _, ulimit := range ulimits {
if ulimit == "host" {
rlimits = nil
break
}
// `ulimitNameMapping` from go-units uses lowercase and names
// without prefixes, e.g. `RLIMIT_NOFILE` should be converted to `nofile`.
// https://github.com/containers/podman/issues/9803
u := strings.TrimPrefix(strings.ToLower(ulimit), rlimitPrefix)
ul, err := units.ParseUlimit(u)
if err != nil {
return nil, fmt.Errorf("ulimit option %q requires name=SOFT:HARD, failed to be parsed: %w", u, err)
}
rl := specs.POSIXRlimit{
Type: ul.Name,
Hard: uint64(ul.Hard),
Soft: uint64(ul.Soft),
}
rlimits = append(rlimits, rl)
}
return rlimits, nil
}
func currentLabelOpts() ([]string, error) {
label, err := selinux.CurrentLabel()
if err != nil {
return nil, err
}
if label == "" {
return nil, nil
}
con, err := selinux.NewContext(label)
if err != nil {
return nil, err
}
return []string{
fmt.Sprintf("label=user:%s", con["user"]),
fmt.Sprintf("label=role:%s", con["role"]),
}, nil
}
func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions, args []string) error {
rtc, err := config.Default()
if err != nil {
return err
}
// TODO: This needs to move into pkg/specgen/generate so we aren't using containers.conf on the client.
if rtc.Containers.EnableLabeledUsers {
defSecurityOpts, err := currentLabelOpts()
if err != nil {
return err
}
c.SecurityOpt = append(defSecurityOpts, c.SecurityOpt...)
}
// validate flags as needed
if err := validate(c); err != nil {
return err
}
s.User = c.User
var inputCommand []string
if !c.IsInfra {
if len(args) > 1 {
inputCommand = args[1:]
}
}
if len(c.HealthCmd) > 0 {
if c.NoHealthCheck {
return errors.New("cannot specify both --no-healthcheck and --health-cmd")
}
s.HealthConfig, err = makeHealthCheckFromCli(c.HealthCmd, c.HealthInterval, c.HealthRetries, c.HealthTimeout, c.HealthStartPeriod, false)
if err != nil {
return err
}
} else if c.NoHealthCheck {
s.HealthConfig = &manifest.Schema2HealthConfig{
Test: []string{"NONE"},
}
}
onFailureAction, err := define.ParseHealthCheckOnFailureAction(c.HealthOnFailure)
if err != nil {
return err
}
s.HealthCheckOnFailureAction = onFailureAction
if c.StartupHCCmd != "" {
if c.NoHealthCheck {
return errors.New("cannot specify both --no-healthcheck and --health-startup-cmd")
}
// The hardcoded "1s" will be discarded, as the startup
// healthcheck does not have a period. So just hardcode
// something that parses correctly.
tmpHcConfig, err := makeHealthCheckFromCli(c.StartupHCCmd, c.StartupHCInterval, c.StartupHCRetries, c.StartupHCTimeout, "1s", true)
if err != nil {
return err
}
s.StartupHealthConfig = new(define.StartupHealthCheck)
s.StartupHealthConfig.Test = tmpHcConfig.Test
s.StartupHealthConfig.Interval = tmpHcConfig.Interval
s.StartupHealthConfig.Timeout = tmpHcConfig.Timeout
s.StartupHealthConfig.Retries = tmpHcConfig.Retries
s.StartupHealthConfig.Successes = int(c.StartupHCSuccesses)
}
if len(s.Pod) == 0 || len(c.Pod) > 0 {
s.Pod = c.Pod
}
if len(c.PodIDFile) > 0 {
if len(s.Pod) > 0 {
return errors.New("cannot specify both --pod and --pod-id-file")
}
podID, err := ReadPodIDFile(c.PodIDFile)
if err != nil {
return err
}
s.Pod = podID
}
// Important s.Pod must be set above here.
if err := setNamespaces(rtc, s, c); err != nil {
return err
}
if s.Terminal == nil {
s.Terminal = &c.TTY
}
if err := verifyExpose(c.Expose); err != nil {
return err
}
// We are not handling the Expose flag yet.
// s.PortsExpose = c.Expose
if c.Net != nil {
s.PortMappings = c.Net.PublishPorts
}
if s.PublishExposedPorts == nil {
s.PublishExposedPorts = &c.PublishAll
}
expose, err := CreateExpose(c.Expose)
if err != nil {
return err
}
if len(s.Expose) == 0 {
s.Expose = expose
}
if sig := c.StopSignal; len(sig) > 0 {
stopSignal, err := util.ParseSignal(sig)
if err != nil {
return err
}
s.StopSignal = &stopSignal
}
// ENVIRONMENT VARIABLES
//
// Precedence order (higher index wins):
// 1) containers.conf (EnvHost, EnvHTTP, Env) 2) image data, 3 User EnvHost/EnvHTTP, 4) env-file, 5) env
// containers.conf handled and image data handled on the server side
// user specified EnvHost and EnvHTTP handled on Server Side relative to Server
// env-file and env handled on client side
var env map[string]string
// First transform the os env into a map. We need it for the labels later in
// any case.
osEnv := envLib.Map(os.Environ())
if s.EnvHost == nil {
s.EnvHost = &c.EnvHost
}
if s.HTTPProxy == nil {
s.HTTPProxy = &c.HTTPProxy
}
// env-file overrides any previous variables
for _, f := range c.EnvFile {
fileEnv, err := envLib.ParseFile(f)
if err != nil {
return err
}
// File env is overridden by env.
env = envLib.Join(env, fileEnv)
}
parsedEnv, err := envLib.ParseSlice(c.Env)
if err != nil {
return err
}
if len(s.Env) == 0 {
s.Env = envLib.Join(env, parsedEnv)
}
// LABEL VARIABLES
labels, err := parse.GetAllLabels(c.LabelFile, c.Label)
if err != nil {
return fmt.Errorf("unable to process labels: %w", err)
}
if systemdUnit, exists := osEnv[systemdDefine.EnvVariable]; exists {
labels[systemdDefine.EnvVariable] = systemdUnit
}
if len(s.Labels) == 0 {
s.Labels = labels
}
// Intel RDT CAT
if c.IntelRdtClosID != "" {
s.IntelRdt = &specs.LinuxIntelRdt{}
s.IntelRdt.ClosID = c.IntelRdtClosID
}
// ANNOTATIONS
annotations := make(map[string]string)
// Last, add user annotations
for _, annotation := range c.Annotation {
key, val, hasVal := strings.Cut(annotation, "=")
if !hasVal {
return errors.New("annotations must be formatted KEY=VALUE")
}
annotations[key] = val
}
if len(s.Annotations) == 0 {
s.Annotations = annotations
}
if len(c.StorageOpts) > 0 {
opts := make(map[string]string, len(c.StorageOpts))
for _, opt := range c.StorageOpts {
key, val, hasVal := strings.Cut(opt, "=")
if !hasVal {
return errors.New("storage-opt must be formatted KEY=VALUE")
}
opts[key] = val
}
s.StorageOpts = opts
}
if len(s.WorkDir) == 0 {
s.WorkDir = c.Workdir
}
if c.Entrypoint != nil {
entrypoint := []string{}
// Check if entrypoint specified is json
if err := json.Unmarshal([]byte(*c.Entrypoint), &entrypoint); err != nil {
entrypoint = append(entrypoint, *c.Entrypoint)
}
s.Entrypoint = entrypoint
}
// Include the command used to create the container.
if len(s.ContainerCreateCommand) == 0 {
s.ContainerCreateCommand = os.Args
}
if len(inputCommand) > 0 {
s.Command = inputCommand
}
// SHM Size
if c.ShmSize != "" {
val, err := units.RAMInBytes(c.ShmSize)
if err != nil {
return fmt.Errorf("unable to translate --shm-size: %w", err)
}
s.ShmSize = &val
}
// SHM Size Systemd
if c.ShmSizeSystemd != "" {
val, err := units.RAMInBytes(c.ShmSizeSystemd)
if err != nil {
return fmt.Errorf("unable to translate --shm-size-systemd: %w", err)
}
s.ShmSizeSystemd = &val
}
if c.Net != nil {
s.Networks = c.Net.Networks
}
if c.Net != nil {
s.HostAdd = c.Net.AddHosts
s.UseImageResolvConf = &c.Net.UseImageResolvConf
s.DNSServers = c.Net.DNSServers
s.DNSSearch = c.Net.DNSSearch
s.DNSOptions = c.Net.DNSOptions
s.NetworkOptions = c.Net.NetworkOptions
s.UseImageHosts = &c.Net.NoHosts
}
if len(s.HostUsers) == 0 || len(c.HostUsers) != 0 {
s.HostUsers = c.HostUsers
}
if len(c.ImageVolume) != 0 {
if len(s.ImageVolumeMode) == 0 {
s.ImageVolumeMode = c.ImageVolume
}
}
if s.ImageVolumeMode == define.TypeBind {
s.ImageVolumeMode = "anonymous"
}
if len(s.Systemd) == 0 || len(c.Systemd) != 0 {
s.Systemd = strings.ToLower(c.Systemd)
}
if len(s.SdNotifyMode) == 0 || len(c.SdNotifyMode) != 0 {
s.SdNotifyMode = c.SdNotifyMode
}
if s.ResourceLimits == nil {
s.ResourceLimits = &specs.LinuxResources{}
}
s.ResourceLimits, err = GetResources(s, c)
if err != nil {
return err
}
if s.LogConfiguration == nil {
s.LogConfiguration = &specgen.LogConfig{}
}
if ld := c.LogDriver; len(ld) > 0 {
s.LogConfiguration.Driver = ld
}
if len(s.CgroupParent) == 0 || len(c.CgroupParent) != 0 {
s.CgroupParent = c.CgroupParent
}
if len(s.CgroupsMode) == 0 {
s.CgroupsMode = c.CgroupsMode
}
if len(s.Groups) == 0 || len(c.GroupAdd) != 0 {
s.Groups = c.GroupAdd
}
if len(s.Hostname) == 0 || len(c.Hostname) != 0 {
s.Hostname = c.Hostname
}
sysctl := map[string]string{}
if ctl := c.Sysctl; len(ctl) > 0 {
sysctl, err = util.ValidateSysctls(ctl)
if err != nil {
return err
}
}
if len(s.Sysctl) == 0 || len(c.Sysctl) != 0 {
s.Sysctl = sysctl
}
if len(s.CapAdd) == 0 || len(c.CapAdd) != 0 {
s.CapAdd = c.CapAdd
}
if len(s.CapDrop) == 0 || len(c.CapDrop) != 0 {
s.CapDrop = c.CapDrop
}
if s.Privileged == nil {
s.Privileged = &c.Privileged
}
if s.ReadOnlyFilesystem == nil {
s.ReadOnlyFilesystem = &c.ReadOnly
}
if len(s.ConmonPidFile) == 0 || len(c.ConmonPIDFile) != 0 {
s.ConmonPidFile = c.ConmonPIDFile
}
if len(s.DependencyContainers) == 0 || len(c.Requires) != 0 {
s.DependencyContainers = c.Requires
}
// Only add ReadWrite tmpfs mounts iff the container is
// being run ReadOnly and ReadWriteTmpFS is not disabled,
// (user specifying --read-only-tmpfs=false.)
localRWTmpfs := c.ReadOnly && c.ReadWriteTmpFS
s.ReadWriteTmpfs = &localRWTmpfs
// TODO convert to map?
// check if key=value and convert
sysmap := make(map[string]string)
for _, ctl := range c.Sysctl {
key, val, hasVal := strings.Cut(ctl, "=")
if !hasVal {
return fmt.Errorf("invalid sysctl value %q", ctl)
}
sysmap[key] = val
}
if len(s.Sysctl) == 0 || len(c.Sysctl) != 0 {
s.Sysctl = sysmap
}
if c.CIDFile != "" {
s.Annotations[define.InspectAnnotationCIDFile] = c.CIDFile
}
for _, opt := range c.SecurityOpt {
// Docker deprecated the ":" syntax but still supports it,
// so we need to as well
var key, val string
var hasVal bool
if strings.Contains(opt, "=") {
key, val, hasVal = strings.Cut(opt, "=")
} else {
key, val, hasVal = strings.Cut(opt, ":")
}
if !hasVal &&
key != "no-new-privileges" {
return fmt.Errorf("invalid --security-opt 1: %q", opt)
}
switch key {
case "apparmor":
s.ContainerSecurityConfig.ApparmorProfile = val
s.Annotations[define.InspectAnnotationApparmor] = val
case "label":
if val == "nested" {
localTrue := true
s.ContainerSecurityConfig.LabelNested = &localTrue
continue
}
// TODO selinux opts and label opts are the same thing
s.ContainerSecurityConfig.SelinuxOpts = append(s.ContainerSecurityConfig.SelinuxOpts, val)
s.Annotations[define.InspectAnnotationLabel] = strings.Join(s.ContainerSecurityConfig.SelinuxOpts, ",label=")
case "mask":
s.ContainerSecurityConfig.Mask = append(s.ContainerSecurityConfig.Mask, strings.Split(val, ":")...)
case "proc-opts":
s.ProcOpts = strings.Split(val, ",")
case "seccomp":
s.SeccompProfilePath = val
s.Annotations[define.InspectAnnotationSeccomp] = val
// this option is for docker compatibility, it is the same as unmask=ALL
case "systempaths":
if val == "unconfined" {
s.ContainerSecurityConfig.Unmask = append(s.ContainerSecurityConfig.Unmask, []string{"ALL"}...)
} else {
return fmt.Errorf("invalid systempaths option %q, only `unconfined` is supported", val)
}
case "unmask":
if hasVal {
s.ContainerSecurityConfig.Unmask = append(s.ContainerSecurityConfig.Unmask, val)
}
case "no-new-privileges":
noNewPrivileges := true
if hasVal {
noNewPrivileges, err = strconv.ParseBool(val)
if err != nil {
return fmt.Errorf("invalid --security-opt 2: %q", opt)
}
}
s.ContainerSecurityConfig.NoNewPrivileges = &noNewPrivileges
default:
return fmt.Errorf("invalid --security-opt 2: %q", opt)
}
}
if len(s.SeccompPolicy) == 0 || len(c.SeccompPolicy) != 0 {
s.SeccompPolicy = c.SeccompPolicy
}
if len(s.VolumesFrom) == 0 || len(c.VolumesFrom) != 0 {
s.VolumesFrom = c.VolumesFrom
}
// Only add read-only tmpfs mounts in case that we are read-only and the
// read-only tmpfs flag has been set.
mounts, volumes, overlayVolumes, imageVolumes, err := parseVolumes(rtc, c.Volume, c.Mount, c.TmpFS)
if err != nil {
return err
}
if len(s.Mounts) == 0 || len(c.Mount) != 0 {
s.Mounts = mounts
}
if len(s.Volumes) == 0 || len(c.Volume) != 0 {
s.Volumes = volumes
}
if s.LabelNested != nil && *s.LabelNested {
// Need to unmask the SELinux file system
s.Unmask = append(s.Unmask, "/sys/fs/selinux", "/proc")
s.Mounts = append(s.Mounts, specs.Mount{
Source: "/sys/fs/selinux",
Destination: "/sys/fs/selinux",
Type: define.TypeBind,
})
s.Annotations[define.RunOCIMountContextType] = "rootcontext"
}
// TODO make sure these work in clone
if len(s.OverlayVolumes) == 0 {
s.OverlayVolumes = overlayVolumes
}
if len(s.ImageVolumes) == 0 {
s.ImageVolumes = imageVolumes
}
devices := c.Devices
for _, gpu := range c.GPUs {
devices = append(devices, "nvidia.com/gpu="+gpu)
}
for _, dev := range devices {
s.Devices = append(s.Devices, specs.LinuxDevice{Path: dev})
}
for _, rule := range c.DeviceCgroupRule {
dev, err := parseLinuxResourcesDeviceAccess(rule)
if err != nil {
return err
}
s.DeviceCgroupRule = append(s.DeviceCgroupRule, dev)
}
if s.Init == nil {
s.Init = &c.Init
}
if len(s.InitPath) == 0 || len(c.InitPath) != 0 {
s.InitPath = c.InitPath
}
if s.Stdin == nil {
s.Stdin = &c.Interactive
}
// quiet
// DeviceCgroupRules: c.StringSlice("device-cgroup-rule"),
// Rlimits/Ulimits
s.Rlimits, err = GenRlimits(c.Ulimit)
if err != nil {
return err
}
logOpts := make(map[string]string)
for _, o := range c.LogOptions {
key, val, hasVal := strings.Cut(o, "=")
if !hasVal {
return fmt.Errorf("invalid log option %q", o)
}
switch strings.ToLower(key) {
case "driver":
s.LogConfiguration.Driver = val
case "path":
s.LogConfiguration.Path = val
case "max-size":
logSize, err := units.FromHumanSize(val)
if err != nil {
return err
}
s.LogConfiguration.Size = logSize
default:
logOpts[key] = val
}
}
if len(s.LogConfiguration.Options) == 0 || len(c.LogOptions) != 0 {
s.LogConfiguration.Options = logOpts
}
if len(s.Name) == 0 || len(c.Name) != 0 {
s.Name = c.Name
}
if c.PreserveFDs != 0 && c.PreserveFD != nil {
return errors.New("cannot specify both --preserve-fds and --preserve-fd")
}
if s.PreserveFDs == 0 || c.PreserveFDs != 0 {
s.PreserveFDs = c.PreserveFDs
}
if s.PreserveFD == nil || c.PreserveFD != nil {
s.PreserveFD = c.PreserveFD
}
if s.OOMScoreAdj == nil || c.OOMScoreAdj != nil {
s.OOMScoreAdj = c.OOMScoreAdj
}
if c.Restart != "" {
policy, retries, err := util.ParseRestartPolicy(c.Restart)
if err != nil {
return err
}
s.RestartPolicy = policy
s.RestartRetries = &retries
}
if len(s.Secrets) == 0 || len(c.Secrets) != 0 {
s.Secrets, s.EnvSecrets, err = parseSecrets(c.Secrets)
if err != nil {
return err
}
}
if c.Personality != "" {
s.Personality = &specs.LinuxPersonality{}
s.Personality.Domain = specs.LinuxPersonalityDomain(c.Personality)
}
if s.Remove == nil {
s.Remove = &c.Rm
}
if s.StopTimeout == nil || c.StopTimeout != 0 {
s.StopTimeout = &c.StopTimeout
}
if s.Timeout == 0 || c.Timeout != 0 {
s.Timeout = c.Timeout
}
if len(s.Timezone) == 0 || len(c.Timezone) != 0 {
s.Timezone = c.Timezone
}
if len(s.Umask) == 0 || len(c.Umask) != 0 {
s.Umask = c.Umask
}
if len(s.PidFile) == 0 || len(c.PidFile) != 0 {
s.PidFile = c.PidFile
}
if s.Volatile == nil {
s.Volatile = &c.Rm
}
if len(s.EnvMerge) == 0 || len(c.EnvMerge) != 0 {
s.EnvMerge = c.EnvMerge
}
if len(s.UnsetEnv) == 0 || len(c.UnsetEnv) != 0 {
s.UnsetEnv = c.UnsetEnv
}
if s.UnsetEnvAll == nil {
s.UnsetEnvAll = &c.UnsetEnvAll
}
if len(s.ChrootDirs) == 0 || len(c.ChrootDirs) != 0 {
s.ChrootDirs = c.ChrootDirs
}
// Initcontainers
if len(s.InitContainerType) == 0 || len(c.InitContainerType) != 0 {
s.InitContainerType = c.InitContainerType
}
t := true
if s.Passwd == nil {
s.Passwd = &t
}
if len(s.PasswdEntry) == 0 || len(c.PasswdEntry) != 0 {
s.PasswdEntry = c.PasswdEntry
}
if len(s.GroupEntry) == 0 || len(c.GroupEntry) != 0 {
s.GroupEntry = c.GroupEntry
}
return nil
}
func makeHealthCheckFromCli(inCmd, interval string, retries uint, timeout, startPeriod string, isStartup bool) (*manifest.Schema2HealthConfig, error) {
cmdArr := []string{}
isArr := true
err := json.Unmarshal([]byte(inCmd), &cmdArr) // array unmarshalling
if err != nil {
cmdArr = strings.SplitN(inCmd, " ", 2) // default for compat
isArr = false
}
// Every healthcheck requires a command
if len(cmdArr) == 0 {
return nil, errors.New("must define a healthcheck command for all healthchecks")
}
var concat string
if strings.ToUpper(cmdArr[0]) == define.HealthConfigTestCmd || strings.ToUpper(cmdArr[0]) == define.HealthConfigTestNone { // this is for compat, we are already split properly for most compat cases
cmdArr = strings.Fields(inCmd)
} else if strings.ToUpper(cmdArr[0]) != define.HealthConfigTestCmdShell { // this is for podman side of things, won't contain the keywords
if isArr && len(cmdArr) > 1 { // an array of consecutive commands
cmdArr = append([]string{define.HealthConfigTestCmd}, cmdArr...)
} else { // one singular command
if len(cmdArr) == 1 {
concat = cmdArr[0]
} else {
concat = strings.Join(cmdArr[0:], " ")
}
cmdArr = append([]string{define.HealthConfigTestCmdShell}, concat)
}
}
if strings.ToUpper(cmdArr[0]) == define.HealthConfigTestNone { // if specified to remove healtcheck
cmdArr = []string{define.HealthConfigTestNone}
}
// healthcheck is by default an array, so we simply pass the user input
hc := manifest.Schema2HealthConfig{
Test: cmdArr,
}
if interval == "disable" {
interval = "0"
}
intervalDuration, err := time.ParseDuration(interval)
if err != nil {
return nil, fmt.Errorf("invalid healthcheck-interval: %w", err)
}
hc.Interval = intervalDuration
if retries < 1 && !isStartup {
return nil, errors.New("healthcheck-retries must be greater than 0")
}
hc.Retries = int(retries)
timeoutDuration, err := time.ParseDuration(timeout)
if err != nil {
return nil, fmt.Errorf("invalid healthcheck-timeout: %w", err)
}
if timeoutDuration < time.Duration(1) {
return nil, errors.New("healthcheck-timeout must be at least 1 second")
}
hc.Timeout = timeoutDuration
startPeriodDuration, err := time.ParseDuration(startPeriod)
if err != nil {
return nil, fmt.Errorf("invalid healthcheck-start-period: %w", err)
}
if startPeriodDuration < time.Duration(0) {
return nil, errors.New("healthcheck-start-period must be 0 seconds or greater")
}
hc.StartPeriod = startPeriodDuration
return &hc, nil
}
func parseWeightDevices(weightDevs []string) (map[string]specs.LinuxWeightDevice, error) {
wd := make(map[string]specs.LinuxWeightDevice)
for _, dev := range weightDevs {
key, val, hasVal := strings.Cut(dev, ":")
if !hasVal {
return nil, fmt.Errorf("bad format: %s", dev)
}
if !strings.HasPrefix(key, "/dev/") {
return nil, fmt.Errorf("bad format for device path: %s", dev)
}
weight, err := strconv.ParseUint(val, 10, 0)
if err != nil {
return nil, fmt.Errorf("invalid weight for device: %s", dev)
}
if weight > 0 && (weight < 10 || weight > 1000) {
return nil, fmt.Errorf("invalid weight for device: %s", dev)
}
w := uint16(weight)
wd[key] = specs.LinuxWeightDevice{
Weight: &w,
LeafWeight: nil,
}
}
return wd, nil
}
func parseThrottleBPSDevices(bpsDevices []string) (map[string]specs.LinuxThrottleDevice, error) {
td := make(map[string]specs.LinuxThrottleDevice)
for _, dev := range bpsDevices {
key, val, hasVal := strings.Cut(dev, ":")
if !hasVal {
return nil, fmt.Errorf("bad format: %s", dev)
}
if !strings.HasPrefix(key, "/dev/") {
return nil, fmt.Errorf("bad format for device path: %s", dev)
}
rate, err := units.RAMInBytes(val)
if err != nil {
return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>[<unit>]. Number must be a positive integer. Unit is optional and can be kb, mb, or gb", dev)
}
if rate < 0 {
return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>[<unit>]. Number must be a positive integer. Unit is optional and can be kb, mb, or gb", dev)
}
td[key] = specs.LinuxThrottleDevice{Rate: uint64(rate)}
}
return td, nil
}
func parseThrottleIOPsDevices(iopsDevices []string) (map[string]specs.LinuxThrottleDevice, error) {
td := make(map[string]specs.LinuxThrottleDevice)
for _, dev := range iopsDevices {
key, val, hasVal := strings.Cut(dev, ":")
if !hasVal {
return nil, fmt.Errorf("bad format: %s", dev)
}
if !strings.HasPrefix(key, "/dev/") {
return nil, fmt.Errorf("bad format for device path: %s", dev)
}
rate, err := strconv.ParseUint(val, 10, 64)
if err != nil {
return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>. Number must be a positive integer", dev)
}
td[key] = specs.LinuxThrottleDevice{Rate: rate}
}
return td, nil
}
func parseSecrets(secrets []string) ([]specgen.Secret, map[string]string, error) {
secretParseError := errors.New("parsing secret")
var mount []specgen.Secret
envs := make(map[string]string)
for _, val := range secrets {
// mount only tells if user has set an option that can only be used with mount secret type
mountOnly := false
source := ""
secretType := ""
target := ""
var uid, gid uint32
// default mode 444 octal = 292 decimal
var mode uint32 = 292
split := strings.Split(val, ",")
// --secret mysecret
if len(split) == 1 {
mountSecret := specgen.Secret{
Source: val,
Target: target,
UID: uid,
GID: gid,
Mode: mode,
}
mount = append(mount, mountSecret)
continue
}
// --secret mysecret,opt=opt
if !strings.Contains(split[0], "=") {
source = split[0]
split = split[1:]
}
for _, val := range split {
name, value, hasValue := strings.Cut(val, "=")
if !hasValue {
return nil, nil, fmt.Errorf("option %s must be in form option=value: %w", val, secretParseError)
}
switch name {
case "source":
source = value
case "type":
if secretType != "" {
return nil, nil, fmt.Errorf("cannot set more than one secret type: %w", secretParseError)
}
if value != "mount" && value != "env" {
return nil, nil, fmt.Errorf("type %s is invalid: %w", value, secretParseError)
}
secretType = value
case "target":
target = value
case "mode":
mountOnly = true
mode64, err := strconv.ParseUint(value, 8, 32)
if err != nil {
return nil, nil, fmt.Errorf("mode %s invalid: %w", value, secretParseError)
}
mode = uint32(mode64)
case "uid", "UID":
mountOnly = true
uid64, err := strconv.ParseUint(value, 10, 32)
if err != nil {
return nil, nil, fmt.Errorf("UID %s invalid: %w", value, secretParseError)
}
uid = uint32(uid64)
case "gid", "GID":
mountOnly = true
gid64, err := strconv.ParseUint(value, 10, 32)
if err != nil {
return nil, nil, fmt.Errorf("GID %s invalid: %w", value, secretParseError)
}
gid = uint32(gid64)
default:
return nil, nil, fmt.Errorf("option %s invalid: %w", val, secretParseError)
}
}
if secretType == "" {
secretType = "mount"
}
if source == "" {
return nil, nil, fmt.Errorf("no source found %s: %w", val, secretParseError)
}
if secretType == "mount" {
mountSecret := specgen.Secret{
Source: source,
Target: target,
UID: uid,
GID: gid,
Mode: mode,
}
mount = append(mount, mountSecret)
}
if secretType == "env" {
if mountOnly {
return nil, nil, fmt.Errorf("UID, GID, Mode options cannot be set with secret type env: %w", secretParseError)
}
if target == "" {
target = source
}
envs[target] = source
}
}
return mount, envs, nil
}
var cgroupDeviceType = map[string]bool{
"a": true, // all
"b": true, // block device
"c": true, // character device
}
var cgroupDeviceAccess = map[string]bool{
"r": true, // read
"w": true, // write
"m": true, // mknod
}
// parseLinuxResourcesDeviceAccess parses the raw string passed with the --device-access-add flag
func parseLinuxResourcesDeviceAccess(device string) (specs.LinuxDeviceCgroup, error) {
var devType, access string
var major, minor *int64
value := strings.Split(device, " ")
if len(value) != 3 {
return specs.LinuxDeviceCgroup{}, fmt.Errorf("invalid device cgroup rule requires type, major:Minor, and access rules: %q", device)
}
devType = value[0]
if !cgroupDeviceType[devType] {
return specs.LinuxDeviceCgroup{}, fmt.Errorf("invalid device type in device-access-add: %s", devType)
}
majorNumber, minorNumber, hasMinor := strings.Cut(value[1], ":")
if majorNumber != "*" {
i, err := strconv.ParseUint(majorNumber, 10, 64)
if err != nil {
return specs.LinuxDeviceCgroup{}, err
}
m := int64(i)
major = &m
}
if hasMinor && minorNumber != "*" {
i, err := strconv.ParseUint(minorNumber, 10, 64)
if err != nil {
return specs.LinuxDeviceCgroup{}, err
}
m := int64(i)
minor = &m
}
access = value[2]
for _, c := range strings.Split(access, "") {
if !cgroupDeviceAccess[c] {
return specs.LinuxDeviceCgroup{}, fmt.Errorf("invalid device access in device-access-add: %s", c)
}
}
return specs.LinuxDeviceCgroup{
Allow: true,
Type: devType,
Major: major,
Minor: minor,
Access: access,
}, nil
}
func GetResources(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (*specs.LinuxResources, error) {
var err error
if s.ResourceLimits.Memory == nil || (len(c.Memory) != 0 || len(c.MemoryReservation) != 0 || len(c.MemorySwap) != 0 || c.MemorySwappiness != 0) {
s.ResourceLimits.Memory, err = getMemoryLimits(c)
if err != nil {
return nil, err
}
}
if s.ResourceLimits.BlockIO == nil || (len(c.BlkIOWeight) != 0 || len(c.BlkIOWeightDevice) != 0 || len(c.DeviceReadBPs) != 0 || len(c.DeviceWriteBPs) != 0) {
s.ResourceLimits.BlockIO, err = getIOLimits(s, c)
if err != nil {
return nil, err
}
}
if c.PIDsLimit != nil {
pids := specs.LinuxPids{
Limit: *c.PIDsLimit,
}
s.ResourceLimits.Pids = &pids
}
if s.ResourceLimits.CPU == nil || (c.CPUPeriod != 0 || c.CPUQuota != 0 || c.CPURTPeriod != 0 || c.CPURTRuntime != 0 || c.CPUS != 0 || len(c.CPUSetCPUs) != 0 || len(c.CPUSetMems) != 0 || c.CPUShares != 0) {
s.ResourceLimits.CPU = getCPULimits(c)
}
unifieds := make(map[string]string)
for _, unified := range c.CgroupConf {
key, val, hasVal := strings.Cut(unified, "=")
if !hasVal {
return nil, errors.New("--cgroup-conf must be formatted KEY=VALUE")
}
unifieds[key] = val
}
if len(unifieds) > 0 {
s.ResourceLimits.Unified = unifieds
}
if s.ResourceLimits.CPU == nil && s.ResourceLimits.Pids == nil && s.ResourceLimits.BlockIO == nil && s.ResourceLimits.Memory == nil && s.ResourceLimits.Unified == nil {
s.ResourceLimits = nil
}
return s.ResourceLimits, nil
}