Files
podman/pkg/specgenutil/specgen.go
Giuseppe Scrivano bbe0e4a5db specgenutil: record the pod userns in the annotations
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
2024-07-24 12:10:48 +02:00

1294 lines
34 KiB
Go

package specgenutil
import (
"encoding/json"
"errors"
"fmt"
"os"
"strconv"
"strings"
"time"
"github.com/containers/common/pkg/config"
"github.com/containers/image/v5/manifest"
"github.com/containers/podman/v5/cmd/podman/parse"
"github.com/containers/podman/v5/libpod/define"
"github.com/containers/podman/v5/pkg/domain/entities"
envLib "github.com/containers/podman/v5/pkg/env"
"github.com/containers/podman/v5/pkg/namespaces"
"github.com/containers/podman/v5/pkg/specgen"
systemdDefine "github.com/containers/podman/v5/pkg/systemd/define"
"github.com/containers/podman/v5/pkg/util"
"github.com/docker/go-units"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
)
const (
rlimitPrefix = "rlimit_"
)
func getCPULimits(c *entities.ContainerCreateOptions) *specs.LinuxCPU {
cpu := &specs.LinuxCPU{}
hasLimits := false
if c.CPUS > 0 {
period, quota := util.CoresToPeriodAndQuota(c.CPUS)
cpu.Period = &period
cpu.Quota = &quota
hasLimits = true
}
if c.CPUShares > 0 {
cpu.Shares = &c.CPUShares
hasLimits = true
}
if c.CPUPeriod > 0 {
cpu.Period = &c.CPUPeriod
hasLimits = true
}
if c.CPUSetCPUs != "" {
cpu.Cpus = c.CPUSetCPUs
hasLimits = true
}
if c.CPUSetMems != "" {
cpu.Mems = c.CPUSetMems
hasLimits = true
}
if c.CPUQuota > 0 {
cpu.Quota = &c.CPUQuota
hasLimits = true
}
if c.CPURTPeriod > 0 {
cpu.RealtimePeriod = &c.CPURTPeriod
hasLimits = true
}
if c.CPURTRuntime > 0 {
cpu.RealtimeRuntime = &c.CPURTRuntime
hasLimits = true
}
if !hasLimits {
return nil
}
return cpu
}
func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (*specs.LinuxBlockIO, error) {
var err error
io := &specs.LinuxBlockIO{}
if s.ResourceLimits == nil {
s.ResourceLimits = &specs.LinuxResources{}
}
hasLimits := false
if b := c.BlkIOWeight; len(b) > 0 {
if s.ResourceLimits.BlockIO == nil {
s.ResourceLimits.BlockIO = &specs.LinuxBlockIO{}
}
u, err := strconv.ParseUint(b, 10, 16)
if err != nil {
return nil, fmt.Errorf("invalid value for blkio-weight: %w", err)
}
nu := uint16(u)
io.Weight = &nu
s.ResourceLimits.BlockIO.Weight = &nu
hasLimits = true
}
if len(c.BlkIOWeightDevice) > 0 {
if s.WeightDevice, err = parseWeightDevices(c.BlkIOWeightDevice); err != nil {
return nil, err
}
hasLimits = true
}
if bps := c.DeviceReadBPs; len(bps) > 0 {
if s.ThrottleReadBpsDevice, err = parseThrottleBPSDevices(bps); err != nil {
return nil, err
}
hasLimits = true
}
if bps := c.DeviceWriteBPs; len(bps) > 0 {
if s.ThrottleWriteBpsDevice, err = parseThrottleBPSDevices(bps); err != nil {
return nil, err
}
hasLimits = true
}
if iops := c.DeviceReadIOPs; len(iops) > 0 {
if s.ThrottleReadIOPSDevice, err = parseThrottleIOPsDevices(iops); err != nil {
return nil, err
}
hasLimits = true
}
if iops := c.DeviceWriteIOPs; len(iops) > 0 {
if s.ThrottleWriteIOPSDevice, err = parseThrottleIOPsDevices(iops); err != nil {
return nil, err
}
hasLimits = true
}
if !hasLimits {
return nil, nil
}
return io, nil
}
func LimitToSwap(memory *specs.LinuxMemory, swap string, ml int64) {
if ml > 0 {
memory.Limit = &ml
if swap == "" {
limit := 2 * ml
memory.Swap = &(limit)
}
}
}
func getMemoryLimits(c *entities.ContainerCreateOptions) (*specs.LinuxMemory, error) {
var err error
memory := &specs.LinuxMemory{}
hasLimits := false
if m := c.Memory; len(m) > 0 {
ml, err := units.RAMInBytes(m)
if err != nil {
return nil, fmt.Errorf("invalid value for memory: %w", err)
}
LimitToSwap(memory, c.MemorySwap, ml)
hasLimits = true
}
if m := c.MemoryReservation; len(m) > 0 {
mr, err := units.RAMInBytes(m)
if err != nil {
return nil, fmt.Errorf("invalid value for memory: %w", err)
}
memory.Reservation = &mr
hasLimits = true
}
if m := c.MemorySwap; len(m) > 0 {
var ms int64
// only set memory swap if it was set
// -1 indicates unlimited
if m != "-1" {
ms, err = units.RAMInBytes(m)
memory.Swap = &ms
if err != nil {
return nil, fmt.Errorf("invalid value for memory: %w", err)
}
hasLimits = true
}
}
if c.MemorySwappiness >= 0 {
swappiness := uint64(c.MemorySwappiness)
memory.Swappiness = &swappiness
hasLimits = true
}
if c.OOMKillDisable {
memory.DisableOOMKiller = &c.OOMKillDisable
hasLimits = true
}
if !hasLimits {
return nil, nil
}
return memory, nil
}
func setNamespaces(rtc *config.Config, s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) error {
var err error
if c.PID != "" {
s.PidNS, err = specgen.ParseNamespace(c.PID)
if err != nil {
return err
}
}
if c.IPC != "" {
s.IpcNS, err = specgen.ParseIPCNamespace(c.IPC)
if err != nil {
return err
}
}
if c.UTS != "" {
s.UtsNS, err = specgen.ParseNamespace(c.UTS)
if err != nil {
return err
}
}
if c.CgroupNS != "" {
s.CgroupNS, err = specgen.ParseNamespace(c.CgroupNS)
if err != nil {
return err
}
}
userns := c.UserNS
// caller must make sure s.Pod is set before calling this function.
if userns == "" && s.Pod == "" {
if ns, ok := os.LookupEnv("PODMAN_USERNS"); ok {
userns = ns
} else {
// TODO: This should be moved into pkg/specgen/generate so we don't use the client's containers.conf
userns = rtc.Containers.UserNS
}
}
// userns must be treated differently
if userns != "" {
s.UserNS, err = specgen.ParseUserNamespace(userns)
if err != nil {
return err
}
}
if c.Net != nil {
s.NetNS = c.Net.Network
}
if s.IDMappings == nil {
userNS := namespaces.UsernsMode(s.UserNS.NSMode)
tempIDMap, err := util.ParseIDMapping(namespaces.UsernsMode(userns), []string{}, []string{}, "", "")
if err != nil {
return err
}
s.IDMappings, err = util.ParseIDMapping(userNS, c.UIDMap, c.GIDMap, c.SubUIDName, c.SubGIDName)
if err != nil {
return err
}
if len(s.IDMappings.GIDMap) == 0 {
s.IDMappings.AutoUserNsOpts.AdditionalGIDMappings = tempIDMap.AutoUserNsOpts.AdditionalGIDMappings
if s.UserNS.NSMode == specgen.NamespaceMode("auto") {
s.IDMappings.AutoUserNs = true
}
}
if len(s.IDMappings.UIDMap) == 0 {
s.IDMappings.AutoUserNsOpts.AdditionalUIDMappings = tempIDMap.AutoUserNsOpts.AdditionalUIDMappings
if s.UserNS.NSMode == specgen.NamespaceMode("auto") {
s.IDMappings.AutoUserNs = true
}
}
if tempIDMap.AutoUserNsOpts.Size != 0 {
s.IDMappings.AutoUserNsOpts.Size = tempIDMap.AutoUserNsOpts.Size
}
// If some mappings are specified, assume a private user namespace
if userNS.IsDefaultValue() && (!s.IDMappings.HostUIDMapping || !s.IDMappings.HostGIDMapping) {
s.UserNS.NSMode = specgen.Private
} else {
s.UserNS.NSMode = specgen.NamespaceMode(userNS)
}
}
return nil
}
func GenRlimits(ulimits []string) ([]specs.POSIXRlimit, error) {
rlimits := make([]specs.POSIXRlimit, 0, len(ulimits))
// Rlimits/Ulimits
for _, ulimit := range ulimits {
if ulimit == "host" {
rlimits = nil
break
}
// `ulimitNameMapping` from go-units uses lowercase and names
// without prefixes, e.g. `RLIMIT_NOFILE` should be converted to `nofile`.
// https://github.com/containers/podman/issues/9803
u := strings.TrimPrefix(strings.ToLower(ulimit), rlimitPrefix)
ul, err := units.ParseUlimit(u)
if err != nil {
return nil, fmt.Errorf("ulimit option %q requires name=SOFT:HARD, failed to be parsed: %w", u, err)
}
rl := specs.POSIXRlimit{
Type: ul.Name,
Hard: uint64(ul.Hard),
Soft: uint64(ul.Soft),
}
rlimits = append(rlimits, rl)
}
return rlimits, nil
}
func currentLabelOpts() ([]string, error) {
label, err := selinux.CurrentLabel()
if err != nil {
return nil, err
}
if label == "" {
return nil, nil
}
con, err := selinux.NewContext(label)
if err != nil {
return nil, err
}
return []string{
fmt.Sprintf("label=user:%s", con["user"]),
fmt.Sprintf("label=role:%s", con["role"]),
}, nil
}
func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions, args []string) error {
rtc, err := config.Default()
if err != nil {
return err
}
// TODO: This needs to move into pkg/specgen/generate so we aren't using containers.conf on the client.
if rtc.Containers.EnableLabeledUsers {
defSecurityOpts, err := currentLabelOpts()
if err != nil {
return err
}
c.SecurityOpt = append(defSecurityOpts, c.SecurityOpt...)
}
// validate flags as needed
if err := validate(c); err != nil {
return err
}
s.User = c.User
var inputCommand []string
if !c.IsInfra {
if len(args) > 1 {
inputCommand = args[1:]
}
}
if len(c.HealthCmd) > 0 {
if c.NoHealthCheck {
return errors.New("cannot specify both --no-healthcheck and --health-cmd")
}
s.HealthConfig, err = makeHealthCheckFromCli(c.HealthCmd, c.HealthInterval, c.HealthRetries, c.HealthTimeout, c.HealthStartPeriod, false)
if err != nil {
return err
}
} else if c.NoHealthCheck {
s.HealthConfig = &manifest.Schema2HealthConfig{
Test: []string{"NONE"},
}
}
onFailureAction, err := define.ParseHealthCheckOnFailureAction(c.HealthOnFailure)
if err != nil {
return err
}
s.HealthCheckOnFailureAction = onFailureAction
if c.StartupHCCmd != "" {
if c.NoHealthCheck {
return errors.New("cannot specify both --no-healthcheck and --health-startup-cmd")
}
// The hardcoded "1s" will be discarded, as the startup
// healthcheck does not have a period. So just hardcode
// something that parses correctly.
tmpHcConfig, err := makeHealthCheckFromCli(c.StartupHCCmd, c.StartupHCInterval, c.StartupHCRetries, c.StartupHCTimeout, "1s", true)
if err != nil {
return err
}
s.StartupHealthConfig = new(define.StartupHealthCheck)
s.StartupHealthConfig.Test = tmpHcConfig.Test
s.StartupHealthConfig.Interval = tmpHcConfig.Interval
s.StartupHealthConfig.Timeout = tmpHcConfig.Timeout
s.StartupHealthConfig.Retries = tmpHcConfig.Retries
s.StartupHealthConfig.Successes = int(c.StartupHCSuccesses)
}
if len(s.Pod) == 0 || len(c.Pod) > 0 {
s.Pod = c.Pod
}
if len(c.PodIDFile) > 0 {
if len(s.Pod) > 0 {
return errors.New("cannot specify both --pod and --pod-id-file")
}
podID, err := ReadPodIDFile(c.PodIDFile)
if err != nil {
return err
}
s.Pod = podID
}
// Important s.Pod must be set above here.
if err := setNamespaces(rtc, s, c); err != nil {
return err
}
if s.Terminal == nil {
s.Terminal = &c.TTY
}
if err := verifyExpose(c.Expose); err != nil {
return err
}
// We are not handling the Expose flag yet.
// s.PortsExpose = c.Expose
if c.Net != nil {
s.PortMappings = c.Net.PublishPorts
}
if s.PublishExposedPorts == nil {
s.PublishExposedPorts = &c.PublishAll
}
expose, err := CreateExpose(c.Expose)
if err != nil {
return err
}
if len(s.Expose) == 0 {
s.Expose = expose
}
if sig := c.StopSignal; len(sig) > 0 {
stopSignal, err := util.ParseSignal(sig)
if err != nil {
return err
}
s.StopSignal = &stopSignal
}
// ENVIRONMENT VARIABLES
//
// Precedence order (higher index wins):
// 1) containers.conf (EnvHost, EnvHTTP, Env) 2) image data, 3 User EnvHost/EnvHTTP, 4) env-file, 5) env
// containers.conf handled and image data handled on the server side
// user specified EnvHost and EnvHTTP handled on Server Side relative to Server
// env-file and env handled on client side
var env map[string]string
// First transform the os env into a map. We need it for the labels later in
// any case.
osEnv := envLib.Map(os.Environ())
if s.EnvHost == nil {
s.EnvHost = &c.EnvHost
}
if s.HTTPProxy == nil {
s.HTTPProxy = &c.HTTPProxy
}
// env-file overrides any previous variables
for _, f := range c.EnvFile {
fileEnv, err := envLib.ParseFile(f)
if err != nil {
return err
}
// File env is overridden by env.
env = envLib.Join(env, fileEnv)
}
parsedEnv, err := envLib.ParseSlice(c.Env)
if err != nil {
return err
}
if len(s.Env) == 0 {
s.Env = envLib.Join(env, parsedEnv)
}
// LABEL VARIABLES
labels, err := parse.GetAllLabels(c.LabelFile, c.Label)
if err != nil {
return fmt.Errorf("unable to process labels: %w", err)
}
if systemdUnit, exists := osEnv[systemdDefine.EnvVariable]; exists {
labels[systemdDefine.EnvVariable] = systemdUnit
}
if len(s.Labels) == 0 {
s.Labels = labels
}
// Intel RDT CAT
if c.IntelRdtClosID != "" {
s.IntelRdt = &specs.LinuxIntelRdt{}
s.IntelRdt.ClosID = c.IntelRdtClosID
}
// ANNOTATIONS
annotations := make(map[string]string)
// Last, add user annotations
for _, annotation := range c.Annotation {
key, val, hasVal := strings.Cut(annotation, "=")
if !hasVal {
return errors.New("annotations must be formatted KEY=VALUE")
}
annotations[key] = val
}
if len(s.Annotations) == 0 {
s.Annotations = annotations
}
// Add the user namespace configuration to the annotations
if c.UserNS != "" {
s.Annotations[define.UserNsAnnotation] = c.UserNS
}
if len(c.StorageOpts) > 0 {
opts := make(map[string]string, len(c.StorageOpts))
for _, opt := range c.StorageOpts {
key, val, hasVal := strings.Cut(opt, "=")
if !hasVal {
return errors.New("storage-opt must be formatted KEY=VALUE")
}
opts[key] = val
}
s.StorageOpts = opts
}
if len(s.WorkDir) == 0 {
s.WorkDir = c.Workdir
}
if c.Entrypoint != nil {
entrypoint := []string{}
// Check if entrypoint specified is json
if err := json.Unmarshal([]byte(*c.Entrypoint), &entrypoint); err != nil {
entrypoint = append(entrypoint, *c.Entrypoint)
}
s.Entrypoint = entrypoint
}
// Include the command used to create the container.
if len(s.ContainerCreateCommand) == 0 {
s.ContainerCreateCommand = os.Args
}
if len(inputCommand) > 0 {
s.Command = inputCommand
}
// SHM Size
if c.ShmSize != "" {
val, err := units.RAMInBytes(c.ShmSize)
if err != nil {
return fmt.Errorf("unable to translate --shm-size: %w", err)
}
s.ShmSize = &val
}
// SHM Size Systemd
if c.ShmSizeSystemd != "" {
val, err := units.RAMInBytes(c.ShmSizeSystemd)
if err != nil {
return fmt.Errorf("unable to translate --shm-size-systemd: %w", err)
}
s.ShmSizeSystemd = &val
}
if c.Net != nil {
s.Networks = c.Net.Networks
}
if c.Net != nil {
s.HostAdd = c.Net.AddHosts
s.UseImageResolvConf = &c.Net.UseImageResolvConf
s.DNSServers = c.Net.DNSServers
s.DNSSearch = c.Net.DNSSearch
s.DNSOptions = c.Net.DNSOptions
s.NetworkOptions = c.Net.NetworkOptions
s.UseImageHosts = &c.Net.NoHosts
}
if len(s.HostUsers) == 0 || len(c.HostUsers) != 0 {
s.HostUsers = c.HostUsers
}
if len(c.ImageVolume) != 0 {
if len(s.ImageVolumeMode) == 0 {
s.ImageVolumeMode = c.ImageVolume
}
}
if s.ImageVolumeMode == define.TypeBind {
s.ImageVolumeMode = "anonymous"
}
if len(s.Systemd) == 0 || len(c.Systemd) != 0 {
s.Systemd = strings.ToLower(c.Systemd)
}
if len(s.SdNotifyMode) == 0 || len(c.SdNotifyMode) != 0 {
s.SdNotifyMode = c.SdNotifyMode
}
if s.ResourceLimits == nil {
s.ResourceLimits = &specs.LinuxResources{}
}
s.ResourceLimits, err = GetResources(s, c)
if err != nil {
return err
}
if s.LogConfiguration == nil {
s.LogConfiguration = &specgen.LogConfig{}
}
if ld := c.LogDriver; len(ld) > 0 {
s.LogConfiguration.Driver = ld
}
if len(s.CgroupParent) == 0 || len(c.CgroupParent) != 0 {
s.CgroupParent = c.CgroupParent
}
if len(s.CgroupsMode) == 0 {
s.CgroupsMode = c.CgroupsMode
}
if len(s.Groups) == 0 || len(c.GroupAdd) != 0 {
s.Groups = c.GroupAdd
}
if len(s.Hostname) == 0 || len(c.Hostname) != 0 {
s.Hostname = c.Hostname
}
sysctl := map[string]string{}
if ctl := c.Sysctl; len(ctl) > 0 {
sysctl, err = util.ValidateSysctls(ctl)
if err != nil {
return err
}
}
if len(s.Sysctl) == 0 || len(c.Sysctl) != 0 {
s.Sysctl = sysctl
}
if len(s.CapAdd) == 0 || len(c.CapAdd) != 0 {
s.CapAdd = c.CapAdd
}
if len(s.CapDrop) == 0 || len(c.CapDrop) != 0 {
s.CapDrop = c.CapDrop
}
if s.Privileged == nil {
s.Privileged = &c.Privileged
}
if s.ReadOnlyFilesystem == nil {
s.ReadOnlyFilesystem = &c.ReadOnly
}
if len(s.ConmonPidFile) == 0 || len(c.ConmonPIDFile) != 0 {
s.ConmonPidFile = c.ConmonPIDFile
}
if len(s.DependencyContainers) == 0 || len(c.Requires) != 0 {
s.DependencyContainers = c.Requires
}
// Only add ReadWrite tmpfs mounts iff the container is
// being run ReadOnly and ReadWriteTmpFS is not disabled,
// (user specifying --read-only-tmpfs=false.)
localRWTmpfs := c.ReadOnly && c.ReadWriteTmpFS
s.ReadWriteTmpfs = &localRWTmpfs
// TODO convert to map?
// check if key=value and convert
sysmap := make(map[string]string)
for _, ctl := range c.Sysctl {
key, val, hasVal := strings.Cut(ctl, "=")
if !hasVal {
return fmt.Errorf("invalid sysctl value %q", ctl)
}
sysmap[key] = val
}
if len(s.Sysctl) == 0 || len(c.Sysctl) != 0 {
s.Sysctl = sysmap
}
if c.CIDFile != "" {
s.Annotations[define.InspectAnnotationCIDFile] = c.CIDFile
}
for _, opt := range c.SecurityOpt {
// Docker deprecated the ":" syntax but still supports it,
// so we need to as well
var key, val string
var hasVal bool
if strings.Contains(opt, "=") {
key, val, hasVal = strings.Cut(opt, "=")
} else {
key, val, hasVal = strings.Cut(opt, ":")
}
if !hasVal &&
key != "no-new-privileges" {
return fmt.Errorf("invalid --security-opt 1: %q", opt)
}
switch key {
case "apparmor":
s.ContainerSecurityConfig.ApparmorProfile = val
s.Annotations[define.InspectAnnotationApparmor] = val
case "label":
if val == "nested" {
localTrue := true
s.ContainerSecurityConfig.LabelNested = &localTrue
continue
}
// TODO selinux opts and label opts are the same thing
s.ContainerSecurityConfig.SelinuxOpts = append(s.ContainerSecurityConfig.SelinuxOpts, val)
s.Annotations[define.InspectAnnotationLabel] = strings.Join(s.ContainerSecurityConfig.SelinuxOpts, ",label=")
case "mask":
s.ContainerSecurityConfig.Mask = append(s.ContainerSecurityConfig.Mask, strings.Split(val, ":")...)
case "proc-opts":
s.ProcOpts = strings.Split(val, ",")
case "seccomp":
s.SeccompProfilePath = val
s.Annotations[define.InspectAnnotationSeccomp] = val
// this option is for docker compatibility, it is the same as unmask=ALL
case "systempaths":
if val == "unconfined" {
s.ContainerSecurityConfig.Unmask = append(s.ContainerSecurityConfig.Unmask, []string{"ALL"}...)
} else {
return fmt.Errorf("invalid systempaths option %q, only `unconfined` is supported", val)
}
case "unmask":
if hasVal {
s.ContainerSecurityConfig.Unmask = append(s.ContainerSecurityConfig.Unmask, val)
}
case "no-new-privileges":
noNewPrivileges := true
if hasVal {
noNewPrivileges, err = strconv.ParseBool(val)
if err != nil {
return fmt.Errorf("invalid --security-opt 2: %q", opt)
}
}
s.ContainerSecurityConfig.NoNewPrivileges = &noNewPrivileges
default:
return fmt.Errorf("invalid --security-opt 2: %q", opt)
}
}
if len(s.SeccompPolicy) == 0 || len(c.SeccompPolicy) != 0 {
s.SeccompPolicy = c.SeccompPolicy
}
if len(s.VolumesFrom) == 0 || len(c.VolumesFrom) != 0 {
s.VolumesFrom = c.VolumesFrom
}
// Only add read-only tmpfs mounts in case that we are read-only and the
// read-only tmpfs flag has been set.
mounts, volumes, overlayVolumes, imageVolumes, err := parseVolumes(rtc, c.Volume, c.Mount, c.TmpFS)
if err != nil {
return err
}
if len(s.Mounts) == 0 || len(c.Mount) != 0 {
s.Mounts = mounts
}
if len(s.Volumes) == 0 || len(c.Volume) != 0 {
s.Volumes = volumes
}
if s.LabelNested != nil && *s.LabelNested {
// Need to unmask the SELinux file system
s.Unmask = append(s.Unmask, "/sys/fs/selinux", "/proc")
s.Mounts = append(s.Mounts, specs.Mount{
Source: "/sys/fs/selinux",
Destination: "/sys/fs/selinux",
Type: define.TypeBind,
})
s.Annotations[define.RunOCIMountContextType] = "rootcontext"
}
// TODO make sure these work in clone
if len(s.OverlayVolumes) == 0 {
s.OverlayVolumes = overlayVolumes
}
if len(s.ImageVolumes) == 0 {
s.ImageVolumes = imageVolumes
}
devices := c.Devices
for _, gpu := range c.GPUs {
devices = append(devices, "nvidia.com/gpu="+gpu)
}
for _, dev := range devices {
s.Devices = append(s.Devices, specs.LinuxDevice{Path: dev})
}
for _, rule := range c.DeviceCgroupRule {
dev, err := parseLinuxResourcesDeviceAccess(rule)
if err != nil {
return err
}
s.DeviceCgroupRule = append(s.DeviceCgroupRule, dev)
}
if s.Init == nil {
s.Init = &c.Init
}
if len(s.InitPath) == 0 || len(c.InitPath) != 0 {
s.InitPath = c.InitPath
}
if s.Stdin == nil {
s.Stdin = &c.Interactive
}
// quiet
// DeviceCgroupRules: c.StringSlice("device-cgroup-rule"),
// Rlimits/Ulimits
s.Rlimits, err = GenRlimits(c.Ulimit)
if err != nil {
return err
}
logOpts := make(map[string]string)
for _, o := range c.LogOptions {
key, val, hasVal := strings.Cut(o, "=")
if !hasVal {
return fmt.Errorf("invalid log option %q", o)
}
switch strings.ToLower(key) {
case "driver":
s.LogConfiguration.Driver = val
case "path":
s.LogConfiguration.Path = val
case "max-size":
logSize, err := units.FromHumanSize(val)
if err != nil {
return err
}
s.LogConfiguration.Size = logSize
default:
logOpts[key] = val
}
}
if len(s.LogConfiguration.Options) == 0 || len(c.LogOptions) != 0 {
s.LogConfiguration.Options = logOpts
}
if len(s.Name) == 0 || len(c.Name) != 0 {
s.Name = c.Name
}
if c.PreserveFDs != 0 && c.PreserveFD != nil {
return errors.New("cannot specify both --preserve-fds and --preserve-fd")
}
if s.PreserveFDs == 0 || c.PreserveFDs != 0 {
s.PreserveFDs = c.PreserveFDs
}
if s.PreserveFD == nil || c.PreserveFD != nil {
s.PreserveFD = c.PreserveFD
}
if s.OOMScoreAdj == nil || c.OOMScoreAdj != nil {
s.OOMScoreAdj = c.OOMScoreAdj
}
if c.Restart != "" {
policy, retries, err := util.ParseRestartPolicy(c.Restart)
if err != nil {
return err
}
s.RestartPolicy = policy
s.RestartRetries = &retries
}
if len(s.Secrets) == 0 || len(c.Secrets) != 0 {
s.Secrets, s.EnvSecrets, err = parseSecrets(c.Secrets)
if err != nil {
return err
}
}
if c.Personality != "" {
s.Personality = &specs.LinuxPersonality{}
s.Personality.Domain = specs.LinuxPersonalityDomain(c.Personality)
}
if s.Remove == nil {
s.Remove = &c.Rm
}
if s.StopTimeout == nil || c.StopTimeout != 0 {
s.StopTimeout = &c.StopTimeout
}
if s.Timeout == 0 || c.Timeout != 0 {
s.Timeout = c.Timeout
}
if len(s.Timezone) == 0 || len(c.Timezone) != 0 {
s.Timezone = c.Timezone
}
if len(s.Umask) == 0 || len(c.Umask) != 0 {
s.Umask = c.Umask
}
if len(s.PidFile) == 0 || len(c.PidFile) != 0 {
s.PidFile = c.PidFile
}
if s.Volatile == nil {
s.Volatile = &c.Rm
}
if len(s.EnvMerge) == 0 || len(c.EnvMerge) != 0 {
s.EnvMerge = c.EnvMerge
}
if len(s.UnsetEnv) == 0 || len(c.UnsetEnv) != 0 {
s.UnsetEnv = c.UnsetEnv
}
if s.UnsetEnvAll == nil {
s.UnsetEnvAll = &c.UnsetEnvAll
}
if len(s.ChrootDirs) == 0 || len(c.ChrootDirs) != 0 {
s.ChrootDirs = c.ChrootDirs
}
// Initcontainers
if len(s.InitContainerType) == 0 || len(c.InitContainerType) != 0 {
s.InitContainerType = c.InitContainerType
}
t := true
if s.Passwd == nil {
s.Passwd = &t
}
if len(s.PasswdEntry) == 0 || len(c.PasswdEntry) != 0 {
s.PasswdEntry = c.PasswdEntry
}
if len(s.GroupEntry) == 0 || len(c.GroupEntry) != 0 {
s.GroupEntry = c.GroupEntry
}
return nil
}
func makeHealthCheckFromCli(inCmd, interval string, retries uint, timeout, startPeriod string, isStartup bool) (*manifest.Schema2HealthConfig, error) {
cmdArr := []string{}
isArr := true
err := json.Unmarshal([]byte(inCmd), &cmdArr) // array unmarshalling
if err != nil {
cmdArr = strings.SplitN(inCmd, " ", 2) // default for compat
isArr = false
}
// Every healthcheck requires a command
if len(cmdArr) == 0 {
return nil, errors.New("must define a healthcheck command for all healthchecks")
}
var concat string
if strings.ToUpper(cmdArr[0]) == define.HealthConfigTestCmd || strings.ToUpper(cmdArr[0]) == define.HealthConfigTestNone { // this is for compat, we are already split properly for most compat cases
cmdArr = strings.Fields(inCmd)
} else if strings.ToUpper(cmdArr[0]) != define.HealthConfigTestCmdShell { // this is for podman side of things, won't contain the keywords
if isArr && len(cmdArr) > 1 { // an array of consecutive commands
cmdArr = append([]string{define.HealthConfigTestCmd}, cmdArr...)
} else { // one singular command
if len(cmdArr) == 1 {
concat = cmdArr[0]
} else {
concat = strings.Join(cmdArr[0:], " ")
}
cmdArr = append([]string{define.HealthConfigTestCmdShell}, concat)
}
}
if strings.ToUpper(cmdArr[0]) == define.HealthConfigTestNone { // if specified to remove healtcheck
cmdArr = []string{define.HealthConfigTestNone}
}
// healthcheck is by default an array, so we simply pass the user input
hc := manifest.Schema2HealthConfig{
Test: cmdArr,
}
if interval == "disable" {
interval = "0"
}
intervalDuration, err := time.ParseDuration(interval)
if err != nil {
return nil, fmt.Errorf("invalid healthcheck-interval: %w", err)
}
hc.Interval = intervalDuration
if retries < 1 && !isStartup {
return nil, errors.New("healthcheck-retries must be greater than 0")
}
hc.Retries = int(retries)
timeoutDuration, err := time.ParseDuration(timeout)
if err != nil {
return nil, fmt.Errorf("invalid healthcheck-timeout: %w", err)
}
if timeoutDuration < time.Duration(1) {
return nil, errors.New("healthcheck-timeout must be at least 1 second")
}
hc.Timeout = timeoutDuration
startPeriodDuration, err := time.ParseDuration(startPeriod)
if err != nil {
return nil, fmt.Errorf("invalid healthcheck-start-period: %w", err)
}
if startPeriodDuration < time.Duration(0) {
return nil, errors.New("healthcheck-start-period must be 0 seconds or greater")
}
hc.StartPeriod = startPeriodDuration
return &hc, nil
}
func parseWeightDevices(weightDevs []string) (map[string]specs.LinuxWeightDevice, error) {
wd := make(map[string]specs.LinuxWeightDevice)
for _, dev := range weightDevs {
key, val, hasVal := strings.Cut(dev, ":")
if !hasVal {
return nil, fmt.Errorf("bad format: %s", dev)
}
if !strings.HasPrefix(key, "/dev/") {
return nil, fmt.Errorf("bad format for device path: %s", dev)
}
weight, err := strconv.ParseUint(val, 10, 0)
if err != nil {
return nil, fmt.Errorf("invalid weight for device: %s", dev)
}
if weight > 0 && (weight < 10 || weight > 1000) {
return nil, fmt.Errorf("invalid weight for device: %s", dev)
}
w := uint16(weight)
wd[key] = specs.LinuxWeightDevice{
Weight: &w,
LeafWeight: nil,
}
}
return wd, nil
}
func parseThrottleBPSDevices(bpsDevices []string) (map[string]specs.LinuxThrottleDevice, error) {
td := make(map[string]specs.LinuxThrottleDevice)
for _, dev := range bpsDevices {
key, val, hasVal := strings.Cut(dev, ":")
if !hasVal {
return nil, fmt.Errorf("bad format: %s", dev)
}
if !strings.HasPrefix(key, "/dev/") {
return nil, fmt.Errorf("bad format for device path: %s", dev)
}
rate, err := units.RAMInBytes(val)
if err != nil {
return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>[<unit>]. Number must be a positive integer. Unit is optional and can be kb, mb, or gb", dev)
}
if rate < 0 {
return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>[<unit>]. Number must be a positive integer. Unit is optional and can be kb, mb, or gb", dev)
}
td[key] = specs.LinuxThrottleDevice{Rate: uint64(rate)}
}
return td, nil
}
func parseThrottleIOPsDevices(iopsDevices []string) (map[string]specs.LinuxThrottleDevice, error) {
td := make(map[string]specs.LinuxThrottleDevice)
for _, dev := range iopsDevices {
key, val, hasVal := strings.Cut(dev, ":")
if !hasVal {
return nil, fmt.Errorf("bad format: %s", dev)
}
if !strings.HasPrefix(key, "/dev/") {
return nil, fmt.Errorf("bad format for device path: %s", dev)
}
rate, err := strconv.ParseUint(val, 10, 64)
if err != nil {
return nil, fmt.Errorf("invalid rate for device: %s. The correct format is <device-path>:<number>. Number must be a positive integer", dev)
}
td[key] = specs.LinuxThrottleDevice{Rate: rate}
}
return td, nil
}
func parseSecrets(secrets []string) ([]specgen.Secret, map[string]string, error) {
secretParseError := errors.New("parsing secret")
var mount []specgen.Secret
envs := make(map[string]string)
for _, val := range secrets {
// mount only tells if user has set an option that can only be used with mount secret type
mountOnly := false
source := ""
secretType := ""
target := ""
var uid, gid uint32
// default mode 444 octal = 292 decimal
var mode uint32 = 292
split := strings.Split(val, ",")
// --secret mysecret
if len(split) == 1 {
mountSecret := specgen.Secret{
Source: val,
Target: target,
UID: uid,
GID: gid,
Mode: mode,
}
mount = append(mount, mountSecret)
continue
}
// --secret mysecret,opt=opt
if !strings.Contains(split[0], "=") {
source = split[0]
split = split[1:]
}
for _, val := range split {
name, value, hasValue := strings.Cut(val, "=")
if !hasValue {
return nil, nil, fmt.Errorf("option %s must be in form option=value: %w", val, secretParseError)
}
switch name {
case "source":
source = value
case "type":
if secretType != "" {
return nil, nil, fmt.Errorf("cannot set more than one secret type: %w", secretParseError)
}
if value != "mount" && value != "env" {
return nil, nil, fmt.Errorf("type %s is invalid: %w", value, secretParseError)
}
secretType = value
case "target":
target = value
case "mode":
mountOnly = true
mode64, err := strconv.ParseUint(value, 8, 32)
if err != nil {
return nil, nil, fmt.Errorf("mode %s invalid: %w", value, secretParseError)
}
mode = uint32(mode64)
case "uid", "UID":
mountOnly = true
uid64, err := strconv.ParseUint(value, 10, 32)
if err != nil {
return nil, nil, fmt.Errorf("UID %s invalid: %w", value, secretParseError)
}
uid = uint32(uid64)
case "gid", "GID":
mountOnly = true
gid64, err := strconv.ParseUint(value, 10, 32)
if err != nil {
return nil, nil, fmt.Errorf("GID %s invalid: %w", value, secretParseError)
}
gid = uint32(gid64)
default:
return nil, nil, fmt.Errorf("option %s invalid: %w", val, secretParseError)
}
}
if secretType == "" {
secretType = "mount"
}
if source == "" {
return nil, nil, fmt.Errorf("no source found %s: %w", val, secretParseError)
}
if secretType == "mount" {
mountSecret := specgen.Secret{
Source: source,
Target: target,
UID: uid,
GID: gid,
Mode: mode,
}
mount = append(mount, mountSecret)
}
if secretType == "env" {
if mountOnly {
return nil, nil, fmt.Errorf("UID, GID, Mode options cannot be set with secret type env: %w", secretParseError)
}
if target == "" {
target = source
}
envs[target] = source
}
}
return mount, envs, nil
}
var cgroupDeviceType = map[string]bool{
"a": true, // all
"b": true, // block device
"c": true, // character device
}
var cgroupDeviceAccess = map[string]bool{
"r": true, // read
"w": true, // write
"m": true, // mknod
}
// parseLinuxResourcesDeviceAccess parses the raw string passed with the --device-access-add flag
func parseLinuxResourcesDeviceAccess(device string) (specs.LinuxDeviceCgroup, error) {
var devType, access string
var major, minor *int64
value := strings.Split(device, " ")
if len(value) != 3 {
return specs.LinuxDeviceCgroup{}, fmt.Errorf("invalid device cgroup rule requires type, major:Minor, and access rules: %q", device)
}
devType = value[0]
if !cgroupDeviceType[devType] {
return specs.LinuxDeviceCgroup{}, fmt.Errorf("invalid device type in device-access-add: %s", devType)
}
majorNumber, minorNumber, hasMinor := strings.Cut(value[1], ":")
if majorNumber != "*" {
i, err := strconv.ParseUint(majorNumber, 10, 64)
if err != nil {
return specs.LinuxDeviceCgroup{}, err
}
m := int64(i)
major = &m
}
if hasMinor && minorNumber != "*" {
i, err := strconv.ParseUint(minorNumber, 10, 64)
if err != nil {
return specs.LinuxDeviceCgroup{}, err
}
m := int64(i)
minor = &m
}
access = value[2]
for _, c := range strings.Split(access, "") {
if !cgroupDeviceAccess[c] {
return specs.LinuxDeviceCgroup{}, fmt.Errorf("invalid device access in device-access-add: %s", c)
}
}
return specs.LinuxDeviceCgroup{
Allow: true,
Type: devType,
Major: major,
Minor: minor,
Access: access,
}, nil
}
func GetResources(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (*specs.LinuxResources, error) {
var err error
if s.ResourceLimits.Memory == nil || (len(c.Memory) != 0 || len(c.MemoryReservation) != 0 || len(c.MemorySwap) != 0 || c.MemorySwappiness != 0) {
s.ResourceLimits.Memory, err = getMemoryLimits(c)
if err != nil {
return nil, err
}
}
if s.ResourceLimits.BlockIO == nil || (len(c.BlkIOWeight) != 0 || len(c.BlkIOWeightDevice) != 0 || len(c.DeviceReadBPs) != 0 || len(c.DeviceWriteBPs) != 0) {
s.ResourceLimits.BlockIO, err = getIOLimits(s, c)
if err != nil {
return nil, err
}
}
if c.PIDsLimit != nil {
pids := specs.LinuxPids{
Limit: *c.PIDsLimit,
}
s.ResourceLimits.Pids = &pids
}
if s.ResourceLimits.CPU == nil || (c.CPUPeriod != 0 || c.CPUQuota != 0 || c.CPURTPeriod != 0 || c.CPURTRuntime != 0 || c.CPUS != 0 || len(c.CPUSetCPUs) != 0 || len(c.CPUSetMems) != 0 || c.CPUShares != 0) {
s.ResourceLimits.CPU = getCPULimits(c)
}
unifieds := make(map[string]string)
for _, unified := range c.CgroupConf {
key, val, hasVal := strings.Cut(unified, "=")
if !hasVal {
return nil, errors.New("--cgroup-conf must be formatted KEY=VALUE")
}
unifieds[key] = val
}
if len(unifieds) > 0 {
s.ResourceLimits.Unified = unifieds
}
if s.ResourceLimits.CPU == nil && s.ResourceLimits.Pids == nil && s.ResourceLimits.BlockIO == nil && s.ResourceLimits.Memory == nil && s.ResourceLimits.Unified == nil {
s.ResourceLimits = nil
}
return s.ResourceLimits, nil
}