Files
podman/pkg/systemd/quadlet/quadlet.go
OpenShift Merge Robot 47bcd10f61 Merge pull request #16243 from alexlarsson/volume-create-ignore
Add podman volume create --ignore
2022-10-26 15:00:51 -04:00

713 lines
20 KiB
Go

package quadlet
import (
"fmt"
"math"
"os"
"regexp"
"strings"
"unicode"
"github.com/containers/podman/v4/pkg/systemd/parser"
)
// Overwritten at build time:
var (
QuadletUserName = "quadlet" // Name of user used to look up subuid/subgid for remap uids
)
const (
// Directory for global Quadlet files (sysadmin owned)
UnitDirAdmin = "/etc/containers/systemd"
// Directory for global Quadlet files (distro owned)
UnitDirDistro = "/usr/share/containers/systemd"
// Names of commonly used systemd/quadlet group names
UnitGroup = "Unit"
InstallGroup = "Install"
ServiceGroup = "Service"
ContainerGroup = "Container"
XContainerGroup = "X-Container"
VolumeGroup = "Volume"
XVolumeGroup = "X-Volume"
// Fallbacks uid/gid ranges if the above username doesn't exist or has no subuids
FallbackUIDStart = 1879048192
FallbackUIDLength = 165536
FallbackGIDStart = 1879048192
FallbackGIDLength = 165536
)
var validPortRange = regexp.MustCompile(`\d+(-\d+)?(/udp|/tcp)?$`)
// All the supported quadlet keys
const (
KeyContainerName = "ContainerName"
KeyImage = "Image"
KeyEnvironment = "Environment"
KeyExec = "Exec"
KeyNoNewPrivileges = "NoNewPrivileges"
KeyDropCapability = "DropCapability"
KeyAddCapability = "AddCapability"
KeyReadOnly = "ReadOnly"
KeyRemapUsers = "RemapUsers"
KeyRemapUIDStart = "RemapUidStart"
KeyRemapGIDStart = "RemapGidStart"
KeyRemapUIDRanges = "RemapUidRanges"
KeyRemapGIDRanges = "RemapGidRanges"
KeyNotify = "Notify"
KeyExposeHostPort = "ExposeHostPort"
KeyPublishPort = "PublishPort"
KeyKeepID = "KeepId"
KeyUser = "User"
KeyGroup = "Group"
KeyHostUser = "HostUser"
KeyHostGroup = "HostGroup"
KeyVolume = "Volume"
KeyPodmanArgs = "PodmanArgs"
KeyLabel = "Label"
KeyAnnotation = "Annotation"
KeyRunInit = "RunInit"
KeyVolatileTmp = "VolatileTmp"
KeyTimezone = "Timezone"
KeySeccompProfile = "SeccompProfile"
KeyAddDevice = "AddDevice"
KeyNetwork = "Network"
)
// Supported keys in "Container" group
var supportedContainerKeys = map[string]bool{
KeyContainerName: true,
KeyImage: true,
KeyEnvironment: true,
KeyExec: true,
KeyNoNewPrivileges: true,
KeyDropCapability: true,
KeyAddCapability: true,
KeyReadOnly: true,
KeyRemapUsers: true,
KeyRemapUIDStart: true,
KeyRemapGIDStart: true,
KeyRemapUIDRanges: true,
KeyRemapGIDRanges: true,
KeyNotify: true,
KeyExposeHostPort: true,
KeyPublishPort: true,
KeyKeepID: true,
KeyUser: true,
KeyGroup: true,
KeyHostUser: true,
KeyHostGroup: true,
KeyVolume: true,
KeyPodmanArgs: true,
KeyLabel: true,
KeyAnnotation: true,
KeyRunInit: true,
KeyVolatileTmp: true,
KeyTimezone: true,
KeySeccompProfile: true,
KeyAddDevice: true,
KeyNetwork: true,
}
// Supported keys in "Volume" group
var supportedVolumeKeys = map[string]bool{
KeyUser: true,
KeyGroup: true,
KeyLabel: true,
}
func replaceExtension(name string, extension string, extraPrefix string, extraSuffix string) string {
baseName := name
dot := strings.LastIndexByte(name, '.')
if dot > 0 {
baseName = name[:dot]
}
return extraPrefix + baseName + extraSuffix + extension
}
var defaultRemapUIDs, defaultRemapGIDs *Ranges
func getDefaultRemapUids() *Ranges {
if defaultRemapUIDs == nil {
defaultRemapUIDs = lookupHostSubuid(QuadletUserName)
if defaultRemapUIDs == nil {
defaultRemapUIDs =
NewRanges(FallbackUIDStart, FallbackUIDLength)
}
}
return defaultRemapUIDs
}
func getDefaultRemapGids() *Ranges {
if defaultRemapGIDs == nil {
defaultRemapGIDs = lookupHostSubgid(QuadletUserName)
if defaultRemapGIDs == nil {
defaultRemapGIDs =
NewRanges(FallbackGIDStart, FallbackGIDLength)
}
}
return defaultRemapGIDs
}
func isPortRange(port string) bool {
return validPortRange.MatchString(port)
}
func checkForUnknownKeys(unit *parser.UnitFile, groupName string, supportedKeys map[string]bool) error {
keys := unit.ListKeys(groupName)
for _, key := range keys {
if !supportedKeys[key] {
return fmt.Errorf("unsupported key '%s' in group '%s' in %s", key, groupName, unit.Path)
}
}
return nil
}
func lookupRanges(unit *parser.UnitFile, groupName string, key string, nameLookup func(string) *Ranges, defaultValue *Ranges) *Ranges {
v, ok := unit.Lookup(groupName, key)
if !ok {
if defaultValue != nil {
return defaultValue.Copy()
}
return NewRangesEmpty()
}
if len(v) == 0 {
return NewRangesEmpty()
}
if !unicode.IsDigit(rune(v[0])) {
if nameLookup != nil {
r := nameLookup(v)
if r != nil {
return r
}
}
return NewRangesEmpty()
}
return ParseRanges(v)
}
func splitPorts(ports string) []string {
parts := make([]string, 0)
// IP address could have colons in it. For example: "[::]:8080:80/tcp, so we split carefully
start := 0
end := 0
for end < len(ports) {
switch ports[end] {
case '[':
end++
for end < len(ports) && ports[end] != ']' {
end++
}
if end < len(ports) {
end++ // Skip ]
}
case ':':
parts = append(parts, ports[start:end])
end++
start = end
default:
end++
}
}
parts = append(parts, ports[start:end])
return parts
}
func addIDMaps(podman *PodmanCmdline, argPrefix string, containerID, hostID, remapStartID uint32, availableHostIDs *Ranges) {
if availableHostIDs == nil {
// Map everything by default
availableHostIDs = NewRangesEmpty()
}
// Map the first ids up to remapStartID to the host equivalent
unmappedIds := NewRanges(0, remapStartID)
// The rest we want to map to availableHostIDs. Note that this
// overlaps unmappedIds, because below we may remove ranges from
// unmapped ids and we want to backfill those.
mappedIds := NewRanges(0, math.MaxUint32)
// Always map specified uid to specified host_uid
podman.addIDMap(argPrefix, containerID, hostID, 1)
// We no longer want to map this container id as its already mapped
mappedIds.Remove(containerID, 1)
unmappedIds.Remove(containerID, 1)
// But also, we don't want to use the *host* id again, as we can only map it once
unmappedIds.Remove(hostID, 1)
availableHostIDs.Remove(hostID, 1)
// Map unmapped ids to equivalent host range, and remove from mappedIds to avoid double-mapping
for _, r := range unmappedIds.Ranges {
start := r.Start
length := r.Length
podman.addIDMap(argPrefix, start, start, length)
mappedIds.Remove(start, length)
availableHostIDs.Remove(start, length)
}
for cIdx := 0; cIdx < len(mappedIds.Ranges) && len(availableHostIDs.Ranges) > 0; cIdx++ {
cRange := &mappedIds.Ranges[cIdx]
cStart := cRange.Start
cLength := cRange.Length
for cLength > 0 && len(availableHostIDs.Ranges) > 0 {
hRange := &availableHostIDs.Ranges[0]
hStart := hRange.Start
hLength := hRange.Length
nextLength := minUint32(hLength, cLength)
podman.addIDMap(argPrefix, cStart, hStart, nextLength)
availableHostIDs.Remove(hStart, nextLength)
cStart += nextLength
cLength -= nextLength
}
}
}
// Convert a quadlet container file (unit file with a Container group) to a systemd
// service file (unit file with Service group) based on the options in the
// Container group.
// The original Container group is kept around as X-Container.
func ConvertContainer(container *parser.UnitFile, isUser bool) (*parser.UnitFile, error) {
service := container.Dup()
service.Filename = replaceExtension(container.Filename, ".service", "", "")
if container.Path != "" {
service.Add(UnitGroup, "SourcePath", container.Path)
}
if err := checkForUnknownKeys(container, ContainerGroup, supportedContainerKeys); err != nil {
return nil, err
}
// Rename old Container group to x-Container so that systemd ignores it
service.RenameGroup(ContainerGroup, XContainerGroup)
image, ok := container.Lookup(ContainerGroup, KeyImage)
if !ok || len(image) == 0 {
return nil, fmt.Errorf("no Image key specified")
}
containerName, ok := container.Lookup(ContainerGroup, KeyContainerName)
if !ok || len(containerName) == 0 {
// By default, We want to name the container by the service name
containerName = "systemd-%N"
}
// Set PODMAN_SYSTEMD_UNIT so that podman auto-update can restart the service.
service.Add(ServiceGroup, "Environment", "PODMAN_SYSTEMD_UNIT=%n")
// Only allow mixed or control-group, as nothing else works well
killMode, ok := service.Lookup(ServiceGroup, "KillMode")
if !ok || !(killMode == "mixed" || killMode == "control-group") {
if ok {
return nil, fmt.Errorf("invalid KillMode '%s'", killMode)
}
// We default to mixed instead of control-group, because it lets conmon do its thing
service.Set(ServiceGroup, "KillMode", "mixed")
}
// Read env early so we can override it below
podmanEnv := container.LookupAllKeyVal(ContainerGroup, KeyEnvironment)
// Need the containers filesystem mounted to start podman
service.Add(UnitGroup, "RequiresMountsFor", "%t/containers")
// Remove any leftover cid file before starting, just to be sure.
// We remove any actual pre-existing container by name with --replace=true.
// But --cidfile will fail if the target exists.
service.Add(ServiceGroup, "ExecStartPre", "-rm -f %t/%N.cid")
// If the conman exited uncleanly it may not have removed the container, so force it,
// -i makes it ignore non-existing files.
service.Add(ServiceGroup, "ExecStopPost", "-/usr/bin/podman rm -f -i --cidfile=%t/%N.cid")
// Remove the cid file, to avoid confusion as the container is no longer running.
service.Add(ServiceGroup, "ExecStopPost", "-rm -f %t/%N.cid")
podman := NewPodmanCmdline("run")
podman.addf("--name=%s", containerName)
podman.add(
// We store the container id so we can clean it up in case of failure
"--cidfile=%t/%N.cid",
// And replace any previous container with the same name, not fail
"--replace",
// On clean shutdown, remove container
"--rm",
// Detach from container, we don't need the podman process to hang around
"-d",
// But we still want output to the journal, so use the log driver.
"--log-driver", "passthrough",
// Never try to pull the image during service start
"--pull=never")
// We use crun as the runtime and delegated groups to it
service.Add(ServiceGroup, "Delegate", "yes")
podman.add(
"--runtime", "/usr/bin/crun",
"--cgroups=split")
timezone, ok := container.Lookup(ContainerGroup, KeyTimezone)
if ok && len(timezone) > 0 {
podman.addf("--tz=%s", timezone)
}
networks := container.LookupAll(ContainerGroup, KeyNetwork)
for _, network := range networks {
if len(network) > 0 {
podman.addf("--network=%s", network)
}
}
// Run with a pid1 init to reap zombies by default (as most apps don't do that)
runInit := container.LookupBoolean(ContainerGroup, KeyRunInit, true)
if runInit {
podman.add("--init")
}
// By default we handle startup notification with conmon, but allow passing it to the container with Notify=yes
notify := container.LookupBoolean(ContainerGroup, KeyNotify, false)
if notify {
podman.add("--sdnotify=container")
} else {
podman.add("--sdnotify=conmon")
}
service.Setv(ServiceGroup,
"Type", "notify",
"NotifyAccess", "all")
if !container.HasKey(ServiceGroup, "SyslogIdentifier") {
service.Set(ServiceGroup, "SyslogIdentifier", "%N")
}
// Default to no higher level privileges or caps
noNewPrivileges := container.LookupBoolean(ContainerGroup, KeyNoNewPrivileges, true)
if noNewPrivileges {
podman.add("--security-opt=no-new-privileges")
}
// But allow overrides with AddCapability
devices := container.LookupAllStrv(ContainerGroup, KeyAddDevice)
for _, device := range devices {
podman.addf("--device=%s", device)
}
// Default to no higher level privileges or caps
seccompProfile, hasSeccompProfile := container.Lookup(ContainerGroup, KeySeccompProfile)
if hasSeccompProfile {
podman.add("--security-opt", fmt.Sprintf("seccomp=%s", seccompProfile))
}
dropCaps := []string{"all"} // Default
if container.HasKey(ContainerGroup, KeyDropCapability) {
dropCaps = container.LookupAllStrv(ContainerGroup, KeyDropCapability)
}
for _, caps := range dropCaps {
podman.addf("--cap-drop=%s", strings.ToLower(caps))
}
// But allow overrides with AddCapability
addCaps := container.LookupAllStrv(ContainerGroup, KeyAddCapability)
for _, caps := range addCaps {
podman.addf("--cap-add=%s", strings.ToLower(caps))
}
readOnly := container.LookupBoolean(ContainerGroup, KeyReadOnly, true)
if readOnly {
podman.add("--read-only")
}
// We want /tmp to be a tmpfs, like on rhel host
volatileTmp := container.LookupBoolean(ContainerGroup, KeyVolatileTmp, true)
if volatileTmp {
/* Read only mode already has a tmpfs by default */
if !readOnly {
podman.add("--tmpfs", "/tmp:rw,size=512M,mode=1777")
}
} else if readOnly {
/* !volatileTmp, disable the default tmpfs from --read-only */
podman.add("--read-only-tmpfs=false")
}
defaultContainerUID := uint32(0)
defaultContainerGID := uint32(0)
keepID := container.LookupBoolean(ContainerGroup, KeyKeepID, false)
if keepID {
if isUser {
defaultContainerUID = uint32(os.Getuid())
defaultContainerGID = uint32(os.Getgid())
podman.add("--userns", "keep-id")
} else {
return nil, fmt.Errorf("key 'KeepId' in '%s' unsupported for system units", container.Path)
}
}
uid := container.LookupUint32(ContainerGroup, KeyUser, defaultContainerUID)
gid := container.LookupUint32(ContainerGroup, KeyGroup, defaultContainerGID)
hostUID, err := container.LookupUID(ContainerGroup, KeyHostUser, uid)
if err != nil {
return nil, fmt.Errorf("key 'HostUser' invalid: %s", err)
}
hostGID, err := container.LookupGID(ContainerGroup, KeyHostGroup, gid)
if err != nil {
return nil, fmt.Errorf("key 'HostGroup' invalid: %s", err)
}
if uid != defaultContainerUID || gid != defaultContainerGID {
podman.add("--user")
if gid == defaultContainerGID {
podman.addf("%d", uid)
} else {
podman.addf("%d:%d", uid, gid)
}
}
var remapUsers bool
if isUser {
remapUsers = false
} else {
remapUsers = container.LookupBoolean(ContainerGroup, KeyRemapUsers, false)
}
if !remapUsers {
// No remapping of users, although we still need maps if the
// main user/group is remapped, even if most ids map one-to-one.
if uid != hostUID {
addIDMaps(podman, "--uidmap", uid, hostUID, math.MaxUint32, nil)
}
if gid != hostGID {
addIDMaps(podman, "--gidmap", gid, hostGID, math.MaxUint32, nil)
}
} else {
uidRemapIDs := lookupRanges(container, ContainerGroup, KeyRemapUIDRanges, lookupHostSubuid, getDefaultRemapUids())
gidRemapIDs := lookupRanges(container, ContainerGroup, KeyRemapGIDRanges, lookupHostSubgid, getDefaultRemapGids())
remapUIDStart := container.LookupUint32(ContainerGroup, KeyRemapUIDStart, 1)
remapGIDStart := container.LookupUint32(ContainerGroup, KeyRemapGIDStart, 1)
addIDMaps(podman, "--uidmap", uid, hostUID, remapUIDStart, uidRemapIDs)
addIDMaps(podman, "--gidmap", gid, hostGID, remapGIDStart, gidRemapIDs)
}
volumes := container.LookupAll(ContainerGroup, KeyVolume)
for _, volume := range volumes {
parts := strings.SplitN(volume, ":", 3)
source := ""
var dest string
options := ""
if len(parts) >= 2 {
source = parts[0]
dest = parts[1]
} else {
dest = parts[0]
}
if len(parts) >= 3 {
options = ":" + parts[2]
}
if source != "" {
if source[0] == '/' {
// Absolute path
service.Add(UnitGroup, "RequiresMountsFor", source)
} else if strings.HasSuffix(source, ".volume") {
// the podman volume name is systemd-$name
volumeName := replaceExtension(source, "", "systemd-", "")
// the systemd unit name is $name-volume.service
volumeServiceName := replaceExtension(source, ".service", "", "-volume")
source = volumeName
service.Add(UnitGroup, "Requires", volumeServiceName)
service.Add(UnitGroup, "After", volumeServiceName)
}
}
podman.add("-v")
if source == "" {
podman.add(dest)
} else {
podman.addf("%s:%s%s", source, dest, options)
}
}
exposedPorts := container.LookupAll(ContainerGroup, KeyExposeHostPort)
for _, exposedPort := range exposedPorts {
exposedPort = strings.TrimSpace(exposedPort) // Allow whitespace after
if !isPortRange(exposedPort) {
return nil, fmt.Errorf("invalid port format '%s'", exposedPort)
}
podman.addf("--expose=%s", exposedPort)
}
publishPorts := container.LookupAll(ContainerGroup, KeyPublishPort)
for _, publishPort := range publishPorts {
publishPort = strings.TrimSpace(publishPort) // Allow whitespace after
// IP address could have colons in it. For example: "[::]:8080:80/tcp, so use custom splitter
parts := splitPorts(publishPort)
var containerPort string
ip := ""
hostPort := ""
// format (from podman run):
// ip:hostPort:containerPort | ip::containerPort | hostPort:containerPort | containerPort
//
// ip could be IPv6 with minimum of these chars "[::]"
// containerPort can have a suffix of "/tcp" or "/udp"
//
switch len(parts) {
case 1:
containerPort = parts[0]
case 2:
hostPort = parts[0]
containerPort = parts[1]
case 3:
ip = parts[0]
hostPort = parts[1]
containerPort = parts[2]
default:
return nil, fmt.Errorf("invalid published port '%s'", publishPort)
}
if ip == "0.0.0.0" {
ip = ""
}
if len(hostPort) > 0 && !isPortRange(hostPort) {
return nil, fmt.Errorf("invalid port format '%s'", hostPort)
}
if len(containerPort) > 0 && !isPortRange(containerPort) {
return nil, fmt.Errorf("invalid port format '%s'", containerPort)
}
switch {
case len(ip) > 0 && len(hostPort) > 0:
podman.addf("-p=%s:%s:%s", ip, hostPort, containerPort)
case len(ip) > 0:
podman.addf("-p=%s::%s", ip, containerPort)
case len(hostPort) > 0:
podman.addf("-p=%s:%s", hostPort, containerPort)
default:
podman.addf("-p=%s", containerPort)
}
}
podman.addEnv(podmanEnv)
labels := container.LookupAllKeyVal(ContainerGroup, KeyLabel)
podman.addLabels(labels)
annotations := container.LookupAllKeyVal(ContainerGroup, KeyAnnotation)
podman.addAnnotations(annotations)
podmanArgs := container.LookupAllArgs(ContainerGroup, KeyPodmanArgs)
podman.add(podmanArgs...)
podman.add(image)
execArgs, ok := container.LookupLastArgs(ContainerGroup, KeyExec)
if ok {
podman.add(execArgs...)
}
service.AddCmdline(ServiceGroup, "ExecStart", podman.Args)
return service, nil
}
// Convert a quadlet volume file (unit file with a Volume group) to a systemd
// service file (unit file with Service group) based on the options in the
// Volume group.
// The original Container group is kept around as X-Container.
func ConvertVolume(volume *parser.UnitFile, name string) (*parser.UnitFile, error) {
service := volume.Dup()
service.Filename = replaceExtension(volume.Filename, ".service", "", "-volume")
if err := checkForUnknownKeys(volume, VolumeGroup, supportedVolumeKeys); err != nil {
return nil, err
}
/* Rename old Volume group to x-Volume so that systemd ignores it */
service.RenameGroup(VolumeGroup, XVolumeGroup)
volumeName := replaceExtension(name, "", "systemd-", "")
// Need the containers filesystem mounted to start podman
service.Add(UnitGroup, "RequiresMountsFor", "%t/containers")
labels := volume.LookupAllKeyVal(VolumeGroup, "Label")
podman := NewPodmanCmdline("volume", "create", "--ignore")
var opts strings.Builder
opts.WriteString("o=")
if volume.HasKey(VolumeGroup, "User") {
uid := volume.LookupUint32(VolumeGroup, "User", 0)
if opts.Len() > 2 {
opts.WriteString(",")
}
opts.WriteString(fmt.Sprintf("uid=%d", uid))
}
if volume.HasKey(VolumeGroup, "Group") {
gid := volume.LookupUint32(VolumeGroup, "Group", 0)
if opts.Len() > 2 {
opts.WriteString(",")
}
opts.WriteString(fmt.Sprintf("gid=%d", gid))
}
if opts.Len() > 2 {
podman.add("--opt", opts.String())
}
podman.addLabels(labels)
podman.add(volumeName)
service.AddCmdline(ServiceGroup, "ExecStart", podman.Args)
service.Setv(ServiceGroup,
"Type", "oneshot",
"RemainAfterExit", "yes",
// The default syslog identifier is the exec basename (podman) which isn't very useful here
"SyslogIdentifier", "%N")
return service, nil
}