mirror of
https://github.com/containers/podman.git
synced 2025-05-22 01:27:07 +08:00

Instead of creating an extra container create a network and mount namespace inside the podman user namespace. This ns is used to for rootless cni operations. This helps to align the rootless and rootful network code path. If we run as rootless we just have to set up a extra net ns and initialize slirp4netns in it. The ocicni lib will be called in that net ns. This design allows allows easier maintenance, no extra container with pause processes, support for rootless cni with --uidmap and possibly more. The biggest problem is backwards compatibility. I don't think live migration can be possible. If the user reboots or restart all cni containers everything should work as expected again. The user is left with the rootless-cni-infa container and image but this can safely be removed. To make the existing cni configs work we need execute the cni plugins in a extra mount namespace. This ensures that we can safely mount over /run and /var which have to be writeable for the cni plugins without removing access to these files by the main podman process. One caveat is that we need to keep the netns files at `XDG_RUNTIME_DIR/netns` accessible. `XDG_RUNTIME_DIR/rootless-cni/{run,var}` will be mounted to `/{run,var}`. To ensure that we keep the netns directory we bind mount this relative to the new root location, e.g. XDG_RUNTIME_DIR/rootless-cni/run/user/1000/netns before we mount the run directory. The run directory is mounted recursive, this makes the netns directory at the same path accessible as before. This also allows iptables-legacy to work because /run/xtables.lock is now writeable. Signed-off-by: Paul Holzinger <paul.holzinger@web.de>
2319 lines
70 KiB
Go
2319 lines
70 KiB
Go
// +build linux
|
|
|
|
package libpod
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"math"
|
|
"net"
|
|
"os"
|
|
"os/user"
|
|
"path"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
metadata "github.com/checkpoint-restore/checkpointctl/lib"
|
|
cnitypes "github.com/containernetworking/cni/pkg/types/current"
|
|
"github.com/containernetworking/plugins/pkg/ns"
|
|
"github.com/containers/buildah/pkg/chrootuser"
|
|
"github.com/containers/buildah/pkg/overlay"
|
|
butil "github.com/containers/buildah/util"
|
|
"github.com/containers/common/pkg/apparmor"
|
|
"github.com/containers/common/pkg/chown"
|
|
"github.com/containers/common/pkg/config"
|
|
"github.com/containers/common/pkg/subscriptions"
|
|
"github.com/containers/common/pkg/umask"
|
|
"github.com/containers/podman/v3/libpod/define"
|
|
"github.com/containers/podman/v3/libpod/events"
|
|
"github.com/containers/podman/v3/pkg/annotations"
|
|
"github.com/containers/podman/v3/pkg/cgroups"
|
|
"github.com/containers/podman/v3/pkg/checkpoint/crutils"
|
|
"github.com/containers/podman/v3/pkg/criu"
|
|
"github.com/containers/podman/v3/pkg/lookup"
|
|
"github.com/containers/podman/v3/pkg/resolvconf"
|
|
"github.com/containers/podman/v3/pkg/rootless"
|
|
"github.com/containers/podman/v3/pkg/util"
|
|
"github.com/containers/podman/v3/utils"
|
|
"github.com/containers/podman/v3/version"
|
|
"github.com/containers/storage/pkg/archive"
|
|
"github.com/containers/storage/pkg/idtools"
|
|
securejoin "github.com/cyphar/filepath-securejoin"
|
|
runcuser "github.com/opencontainers/runc/libcontainer/user"
|
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/opencontainers/runtime-tools/generate"
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
func (c *Container) mountSHM(shmOptions string) error {
|
|
if err := unix.Mount("shm", c.config.ShmDir, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
|
|
label.FormatMountLabel(shmOptions, c.config.MountLabel)); err != nil {
|
|
return errors.Wrapf(err, "failed to mount shm tmpfs %q", c.config.ShmDir)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Container) unmountSHM(mount string) error {
|
|
if err := unix.Unmount(mount, 0); err != nil {
|
|
if err != syscall.EINVAL && err != syscall.ENOENT {
|
|
return errors.Wrapf(err, "error unmounting container %s SHM mount %s", c.ID(), mount)
|
|
}
|
|
// If it's just an EINVAL or ENOENT, debug logs only
|
|
logrus.Debugf("container %s failed to unmount %s : %v", c.ID(), mount, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// prepare mounts the container and sets up other required resources like net
|
|
// namespaces
|
|
func (c *Container) prepare() error {
|
|
var (
|
|
wg sync.WaitGroup
|
|
netNS ns.NetNS
|
|
networkStatus []*cnitypes.Result
|
|
createNetNSErr, mountStorageErr error
|
|
mountPoint string
|
|
tmpStateLock sync.Mutex
|
|
)
|
|
|
|
wg.Add(2)
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
// Set up network namespace if not already set up
|
|
noNetNS := c.state.NetNS == nil
|
|
if c.config.CreateNetNS && noNetNS && !c.config.PostConfigureNetNS {
|
|
netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c)
|
|
if createNetNSErr != nil {
|
|
return
|
|
}
|
|
|
|
tmpStateLock.Lock()
|
|
defer tmpStateLock.Unlock()
|
|
|
|
// Assign NetNS attributes to container
|
|
c.state.NetNS = netNS
|
|
c.state.NetworkStatus = networkStatus
|
|
}
|
|
|
|
// handle rootless network namespace setup
|
|
if noNetNS && !c.config.PostConfigureNetNS {
|
|
if rootless.IsRootless() {
|
|
createNetNSErr = c.runtime.setupRootlessNetNS(c)
|
|
} else if c.config.NetMode.IsSlirp4netns() {
|
|
createNetNSErr = c.runtime.setupSlirp4netns(c)
|
|
}
|
|
}
|
|
}()
|
|
// Mount storage if not mounted
|
|
go func() {
|
|
defer wg.Done()
|
|
mountPoint, mountStorageErr = c.mountStorage()
|
|
|
|
if mountStorageErr != nil {
|
|
return
|
|
}
|
|
|
|
tmpStateLock.Lock()
|
|
defer tmpStateLock.Unlock()
|
|
|
|
// Finish up mountStorage
|
|
c.state.Mounted = true
|
|
c.state.Mountpoint = mountPoint
|
|
|
|
logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
|
|
}()
|
|
|
|
wg.Wait()
|
|
|
|
var createErr error
|
|
if createNetNSErr != nil {
|
|
createErr = createNetNSErr
|
|
}
|
|
if mountStorageErr != nil {
|
|
if createErr != nil {
|
|
logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr)
|
|
}
|
|
createErr = mountStorageErr
|
|
}
|
|
|
|
// Only trigger storage cleanup if mountStorage was successful.
|
|
// Otherwise, we may mess up mount counters.
|
|
if createNetNSErr != nil && mountStorageErr == nil {
|
|
if err := c.cleanupStorage(); err != nil {
|
|
// createErr is guaranteed non-nil, so print
|
|
// unconditionally
|
|
logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr)
|
|
createErr = errors.Wrapf(err, "error unmounting storage for container %s after network create failure", c.ID())
|
|
}
|
|
}
|
|
|
|
// It's OK to unconditionally trigger network cleanup. If the network
|
|
// isn't ready it will do nothing.
|
|
if createErr != nil {
|
|
if err := c.cleanupNetwork(); err != nil {
|
|
logrus.Errorf("Error preparing container %s: %v", c.ID(), createErr)
|
|
createErr = errors.Wrapf(err, "error cleaning up container %s network after setup failure", c.ID())
|
|
}
|
|
}
|
|
|
|
if createErr != nil {
|
|
return createErr
|
|
}
|
|
|
|
// Save changes to container state
|
|
if err := c.save(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Make sure the workdir exists
|
|
if err := c.resolveWorkDir(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// resolveWorkDir resolves the container's workdir and, depending on the
|
|
// configuration, will create it, or error out if it does not exist.
|
|
// Note that the container must be mounted before.
|
|
func (c *Container) resolveWorkDir() error {
|
|
workdir := c.WorkingDir()
|
|
|
|
// If the specified workdir is a subdir of a volume or mount,
|
|
// we don't need to do anything. The runtime is taking care of
|
|
// that.
|
|
if isPathOnVolume(c, workdir) || isPathOnBindMount(c, workdir) {
|
|
logrus.Debugf("Workdir %q resolved to a volume or mount", workdir)
|
|
return nil
|
|
}
|
|
|
|
_, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir)
|
|
|
|
st, err := os.Stat(resolvedWorkdir)
|
|
if err == nil {
|
|
if !st.IsDir() {
|
|
return errors.Errorf("workdir %q exists on container %s, but is not a directory", workdir, c.ID())
|
|
}
|
|
return nil
|
|
}
|
|
if !c.config.CreateWorkingDir {
|
|
// No need to create it (e.g., `--workdir=/foo`), so let's make sure
|
|
// the path exists on the container.
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return errors.Errorf("workdir %q does not exist on container %s", workdir, c.ID())
|
|
}
|
|
// This might be a serious error (e.g., permission), so
|
|
// we need to return the full error.
|
|
return errors.Wrapf(err, "error detecting workdir %q on container %s", workdir, c.ID())
|
|
}
|
|
return nil
|
|
}
|
|
if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil {
|
|
if os.IsExist(err) {
|
|
return nil
|
|
}
|
|
return errors.Wrapf(err, "error creating container %s workdir", c.ID())
|
|
}
|
|
|
|
// Ensure container entrypoint is created (if required).
|
|
uid, gid, _, err := chrootuser.GetUser(c.state.Mountpoint, c.User())
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error looking up %s inside of the container %s", c.User(), c.ID())
|
|
}
|
|
if err := os.Chown(resolvedWorkdir, int(uid), int(gid)); err != nil {
|
|
return errors.Wrapf(err, "error chowning container %s workdir to container root", c.ID())
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// cleanupNetwork unmounts and cleans up the container's network
|
|
func (c *Container) cleanupNetwork() error {
|
|
if c.config.NetNsCtr != "" {
|
|
return nil
|
|
}
|
|
netDisabled, err := c.NetworkDisabled()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if netDisabled {
|
|
return nil
|
|
}
|
|
if c.state.NetNS == nil {
|
|
logrus.Debugf("Network is already cleaned up, skipping...")
|
|
return nil
|
|
}
|
|
|
|
// Stop the container's network namespace (if it has one)
|
|
if err := c.runtime.teardownNetNS(c); err != nil {
|
|
logrus.Errorf("unable to cleanup network for container %s: %q", c.ID(), err)
|
|
}
|
|
|
|
c.state.NetNS = nil
|
|
c.state.NetworkStatus = nil
|
|
|
|
if c.valid {
|
|
return c.save()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// reloadNetwork reloads the network for the given container, recreating
|
|
// firewall rules.
|
|
func (c *Container) reloadNetwork() error {
|
|
result, err := c.runtime.reloadContainerNetwork(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.state.NetworkStatus = result
|
|
|
|
return c.save()
|
|
}
|
|
|
|
func (c *Container) getUserOverrides() *lookup.Overrides {
|
|
var hasPasswdFile, hasGroupFile bool
|
|
overrides := lookup.Overrides{}
|
|
for _, m := range c.config.Spec.Mounts {
|
|
if m.Destination == "/etc/passwd" {
|
|
overrides.ContainerEtcPasswdPath = m.Source
|
|
hasPasswdFile = true
|
|
}
|
|
if m.Destination == "/etc/group" {
|
|
overrides.ContainerEtcGroupPath = m.Source
|
|
hasGroupFile = true
|
|
}
|
|
if m.Destination == "/etc" {
|
|
if !hasPasswdFile {
|
|
overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
|
|
}
|
|
if !hasGroupFile {
|
|
overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
|
|
}
|
|
}
|
|
}
|
|
if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
|
|
overrides.ContainerEtcPasswdPath = path
|
|
}
|
|
return &overrides
|
|
}
|
|
|
|
// Generate spec for a container
|
|
// Accepts a map of the container's dependencies
|
|
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
|
|
overrides := c.getUserOverrides()
|
|
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
g := generate.Generator{Config: c.config.Spec}
|
|
|
|
// If network namespace was requested, add it now
|
|
if c.config.CreateNetNS {
|
|
if c.config.PostConfigureNetNS {
|
|
if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS.Path()); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
// Apply AppArmor checks and load the default profile if needed.
|
|
if len(c.config.Spec.Process.ApparmorProfile) > 0 {
|
|
updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
g.SetProcessApparmorProfile(updatedProfile)
|
|
}
|
|
|
|
if err := c.makeBindMounts(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Get host UID and GID based on the container process UID and GID.
|
|
hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Check if the spec file mounts contain the options z, Z or U.
|
|
// If they have z or Z, relabel the source directory and then remove the option.
|
|
// If they have U, chown the source directory and them remove the option.
|
|
for i := range g.Config.Mounts {
|
|
m := &g.Config.Mounts[i]
|
|
var options []string
|
|
for _, o := range m.Options {
|
|
switch o {
|
|
case "U":
|
|
if m.Type == "tmpfs" {
|
|
options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
|
|
} else {
|
|
if err := chown.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
case "z":
|
|
fallthrough
|
|
case "Z":
|
|
if err := label.Relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
default:
|
|
options = append(options, o)
|
|
}
|
|
}
|
|
m.Options = options
|
|
}
|
|
|
|
g.SetProcessSelinuxLabel(c.ProcessLabel())
|
|
g.SetLinuxMountLabel(c.MountLabel())
|
|
|
|
// Add named volumes
|
|
for _, namedVol := range c.config.NamedVolumes {
|
|
volume, err := c.runtime.GetVolume(namedVol.Name)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error retrieving volume %s to add to container %s", namedVol.Name, c.ID())
|
|
}
|
|
mountPoint, err := volume.MountPoint()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
volMount := spec.Mount{
|
|
Type: "bind",
|
|
Source: mountPoint,
|
|
Destination: namedVol.Dest,
|
|
Options: namedVol.Options,
|
|
}
|
|
g.AddMount(volMount)
|
|
}
|
|
|
|
// Add bind mounts to container
|
|
for dstPath, srcPath := range c.state.BindMounts {
|
|
newMount := spec.Mount{
|
|
Type: "bind",
|
|
Source: srcPath,
|
|
Destination: dstPath,
|
|
Options: []string{"bind", "rprivate"},
|
|
}
|
|
if c.IsReadOnly() && dstPath != "/dev/shm" {
|
|
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
|
|
}
|
|
if !MountExists(g.Mounts(), dstPath) {
|
|
g.AddMount(newMount)
|
|
} else {
|
|
logrus.Infof("User mount overriding libpod mount at %q", dstPath)
|
|
}
|
|
}
|
|
|
|
// Add overlay volumes
|
|
for _, overlayVol := range c.config.OverlayVolumes {
|
|
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
overlayMount, err := overlay.Mount(contentDir, overlayVol.Source, overlayVol.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "mounting overlay failed %q", overlayVol.Source)
|
|
}
|
|
|
|
// Check overlay volume options
|
|
for _, o := range overlayVol.Options {
|
|
switch o {
|
|
case "U":
|
|
if err := chown.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := chown.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
g.AddMount(overlayMount)
|
|
}
|
|
|
|
// Add image volumes as overlay mounts
|
|
for _, volume := range c.config.ImageVolumes {
|
|
// Mount the specified image.
|
|
img, err := c.runtime.ImageRuntime().NewFromLocal(volume.Source)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error creating image volume %q:%q", volume.Source, volume.Dest)
|
|
}
|
|
mountPoint, err := img.Mount(nil, "")
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error mounting image volume %q:%q", volume.Source, volume.Dest)
|
|
}
|
|
|
|
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "failed to create TempDir in the %s directory", c.config.StaticDir)
|
|
}
|
|
|
|
var overlayMount spec.Mount
|
|
if volume.ReadWrite {
|
|
overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
|
|
} else {
|
|
overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
|
|
}
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "creating overlay mount for image %q failed", volume.Source)
|
|
}
|
|
g.AddMount(overlayMount)
|
|
}
|
|
|
|
hasHomeSet := false
|
|
for _, s := range c.config.Spec.Process.Env {
|
|
if strings.HasPrefix(s, "HOME=") {
|
|
hasHomeSet = true
|
|
break
|
|
}
|
|
}
|
|
if !hasHomeSet && execUser.Home != "" {
|
|
c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
|
|
}
|
|
|
|
if c.config.User != "" {
|
|
// User and Group must go together
|
|
g.SetProcessUID(uint32(execUser.Uid))
|
|
g.SetProcessGID(uint32(execUser.Gid))
|
|
}
|
|
|
|
if c.config.Umask != "" {
|
|
decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "Invalid Umask Value")
|
|
}
|
|
umask := uint32(decVal)
|
|
g.Config.Process.User.Umask = &umask
|
|
}
|
|
|
|
// Add addition groups if c.config.GroupAdd is not empty
|
|
if len(c.config.Groups) > 0 {
|
|
gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error looking up supplemental groups for container %s", c.ID())
|
|
}
|
|
for _, gid := range gids {
|
|
g.AddProcessAdditionalGid(gid)
|
|
}
|
|
}
|
|
|
|
if c.config.Systemd {
|
|
if err := c.setupSystemd(g.Mounts(), g); err != nil {
|
|
return nil, errors.Wrapf(err, "error adding systemd-specific mounts")
|
|
}
|
|
}
|
|
|
|
// Look up and add groups the user belongs to, if a group wasn't directly specified
|
|
if !strings.Contains(c.config.User, ":") {
|
|
// the gidMappings that are present inside the container user namespace
|
|
var gidMappings []idtools.IDMap
|
|
|
|
switch {
|
|
case len(c.config.IDMappings.GIDMap) > 0:
|
|
gidMappings = c.config.IDMappings.GIDMap
|
|
case rootless.IsRootless():
|
|
// Check whether the current user namespace has enough gids available.
|
|
availableGids, err := rootless.GetAvailableGids()
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "cannot read number of available GIDs")
|
|
}
|
|
gidMappings = []idtools.IDMap{{
|
|
ContainerID: 0,
|
|
HostID: 0,
|
|
Size: int(availableGids),
|
|
}}
|
|
default:
|
|
gidMappings = []idtools.IDMap{{
|
|
ContainerID: 0,
|
|
HostID: 0,
|
|
Size: math.MaxInt32,
|
|
}}
|
|
}
|
|
for _, gid := range execUser.Sgids {
|
|
isGIDAvailable := false
|
|
for _, m := range gidMappings {
|
|
if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
|
|
isGIDAvailable = true
|
|
break
|
|
}
|
|
}
|
|
if isGIDAvailable {
|
|
g.AddProcessAdditionalGid(uint32(gid))
|
|
} else {
|
|
logrus.Warnf("additional gid=%d is not present in the user namespace, skip setting it", gid)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add shared namespaces from other containers
|
|
if c.config.IPCNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.MountNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.NetNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.PIDNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.UserNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
if len(g.Config.Linux.UIDMappings) == 0 {
|
|
// runc complains if no mapping is specified, even if we join another ns. So provide a dummy mapping
|
|
g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
|
|
g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
|
|
}
|
|
}
|
|
|
|
availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
// The kernel-provided files only exist if user namespaces are supported
|
|
logrus.Debugf("user or group ID mappings not available: %s", err)
|
|
} else {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
|
|
g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
|
|
}
|
|
|
|
// Hostname handling:
|
|
// If we have a UTS namespace, set Hostname in the OCI spec.
|
|
// Set the HOSTNAME environment variable unless explicitly overridden by
|
|
// the user (already present in OCI spec). If we don't have a UTS ns,
|
|
// set it to the host's hostname instead.
|
|
hostname := c.Hostname()
|
|
foundUTS := false
|
|
|
|
for _, i := range c.config.Spec.Linux.Namespaces {
|
|
if i.Type == spec.UTSNamespace && i.Path == "" {
|
|
foundUTS = true
|
|
g.SetHostname(hostname)
|
|
break
|
|
}
|
|
}
|
|
if !foundUTS {
|
|
tmpHostname, err := os.Hostname()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
hostname = tmpHostname
|
|
}
|
|
needEnv := true
|
|
for _, checkEnv := range g.Config.Process.Env {
|
|
if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
|
|
needEnv = false
|
|
break
|
|
}
|
|
}
|
|
if needEnv {
|
|
g.AddProcessEnv("HOSTNAME", hostname)
|
|
}
|
|
|
|
if c.config.UTSNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.CgroupNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
if c.config.IDMappings.AutoUserNs {
|
|
if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
|
|
return nil, err
|
|
}
|
|
g.ClearLinuxUIDMappings()
|
|
for _, uidmap := range c.config.IDMappings.UIDMap {
|
|
g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
|
|
}
|
|
g.ClearLinuxGIDMappings()
|
|
for _, gidmap := range c.config.IDMappings.GIDMap {
|
|
g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
|
|
}
|
|
}
|
|
|
|
g.SetRootPath(c.state.Mountpoint)
|
|
g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
|
|
g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
|
|
|
|
if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
|
|
g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
|
|
}
|
|
|
|
// Only add container environment variable if not already present
|
|
foundContainerEnv := false
|
|
for _, env := range g.Config.Process.Env {
|
|
if strings.HasPrefix(env, "container=") {
|
|
foundContainerEnv = true
|
|
break
|
|
}
|
|
}
|
|
if !foundContainerEnv {
|
|
g.AddProcessEnv("container", "libpod")
|
|
}
|
|
|
|
cgroupPath, err := c.getOCICgroupPath()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
g.SetLinuxCgroupsPath(cgroupPath)
|
|
|
|
// Mounts need to be sorted so paths will not cover other paths
|
|
mounts := sortMounts(g.Mounts())
|
|
g.ClearMounts()
|
|
|
|
// Determine property of RootPropagation based on volume properties. If
|
|
// a volume is shared, then keep root propagation shared. This should
|
|
// work for slave and private volumes too.
|
|
//
|
|
// For slave volumes, it can be either [r]shared/[r]slave.
|
|
//
|
|
// For private volumes any root propagation value should work.
|
|
rootPropagation := ""
|
|
for _, m := range mounts {
|
|
// We need to remove all symlinks from tmpfs mounts.
|
|
// Runc and other runtimes may choke on them.
|
|
// Easy solution: use securejoin to do a scoped evaluation of
|
|
// the links, then trim off the mount prefix.
|
|
if m.Type == "tmpfs" {
|
|
finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error resolving symlinks for mount destination %s", m.Destination)
|
|
}
|
|
trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
|
|
m.Destination = trimmedPath
|
|
}
|
|
g.AddMount(m)
|
|
for _, opt := range m.Options {
|
|
switch opt {
|
|
case MountShared, MountRShared:
|
|
if rootPropagation != MountShared && rootPropagation != MountRShared {
|
|
rootPropagation = MountShared
|
|
}
|
|
case MountSlave, MountRSlave:
|
|
if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
|
|
rootPropagation = MountRSlave
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if rootPropagation != "" {
|
|
logrus.Debugf("set root propagation to %q", rootPropagation)
|
|
if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// Warning: precreate hooks may alter g.Config in place.
|
|
if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
|
|
return nil, errors.Wrapf(err, "error setting up OCI Hooks")
|
|
}
|
|
|
|
return g.Config, nil
|
|
}
|
|
|
|
// systemd expects to have /run, /run/lock and /tmp on tmpfs
|
|
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
|
|
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
|
|
options := []string{"rw", "rprivate", "nosuid", "nodev"}
|
|
for _, dest := range []string{"/run", "/run/lock"} {
|
|
if MountExists(mounts, dest) {
|
|
continue
|
|
}
|
|
tmpfsMnt := spec.Mount{
|
|
Destination: dest,
|
|
Type: "tmpfs",
|
|
Source: "tmpfs",
|
|
Options: append(options, "tmpcopyup"),
|
|
}
|
|
g.AddMount(tmpfsMnt)
|
|
}
|
|
for _, dest := range []string{"/tmp", "/var/log/journal"} {
|
|
if MountExists(mounts, dest) {
|
|
continue
|
|
}
|
|
tmpfsMnt := spec.Mount{
|
|
Destination: dest,
|
|
Type: "tmpfs",
|
|
Source: "tmpfs",
|
|
Options: append(options, "tmpcopyup"),
|
|
}
|
|
g.AddMount(tmpfsMnt)
|
|
}
|
|
|
|
unified, err := cgroups.IsCgroup2UnifiedMode()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if unified {
|
|
g.RemoveMount("/sys/fs/cgroup")
|
|
|
|
hasCgroupNs := false
|
|
for _, ns := range c.config.Spec.Linux.Namespaces {
|
|
if ns.Type == spec.CgroupNamespace {
|
|
hasCgroupNs = true
|
|
break
|
|
}
|
|
}
|
|
|
|
var systemdMnt spec.Mount
|
|
if hasCgroupNs {
|
|
systemdMnt = spec.Mount{
|
|
Destination: "/sys/fs/cgroup",
|
|
Type: "cgroup",
|
|
Source: "cgroup",
|
|
Options: []string{"private", "rw"},
|
|
}
|
|
} else {
|
|
systemdMnt = spec.Mount{
|
|
Destination: "/sys/fs/cgroup",
|
|
Type: "bind",
|
|
Source: "/sys/fs/cgroup",
|
|
Options: []string{"bind", "private", "rw"},
|
|
}
|
|
}
|
|
g.AddMount(systemdMnt)
|
|
} else {
|
|
mountOptions := []string{"bind", "rprivate"}
|
|
|
|
var statfs unix.Statfs_t
|
|
if err := unix.Statfs("/sys/fs/cgroup/systemd", &statfs); err != nil {
|
|
mountOptions = append(mountOptions, "nodev", "noexec", "nosuid")
|
|
} else {
|
|
if statfs.Flags&unix.MS_NODEV == unix.MS_NODEV {
|
|
mountOptions = append(mountOptions, "nodev")
|
|
}
|
|
if statfs.Flags&unix.MS_NOEXEC == unix.MS_NOEXEC {
|
|
mountOptions = append(mountOptions, "noexec")
|
|
}
|
|
if statfs.Flags&unix.MS_NOSUID == unix.MS_NOSUID {
|
|
mountOptions = append(mountOptions, "nosuid")
|
|
}
|
|
if statfs.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
|
|
mountOptions = append(mountOptions, "ro")
|
|
}
|
|
}
|
|
|
|
systemdMnt := spec.Mount{
|
|
Destination: "/sys/fs/cgroup/systemd",
|
|
Type: "bind",
|
|
Source: "/sys/fs/cgroup/systemd",
|
|
Options: mountOptions,
|
|
}
|
|
g.AddMount(systemdMnt)
|
|
g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Add an existing container's namespace to the spec
|
|
func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr string, specNS spec.LinuxNamespaceType) error {
|
|
nsCtr, err := c.runtime.state.Container(ctr)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error retrieving dependency %s of container %s from state", ctr, c.ID())
|
|
}
|
|
|
|
if specNS == spec.UTSNamespace {
|
|
hostname := nsCtr.Hostname()
|
|
// Joining an existing namespace, cannot set the hostname
|
|
g.SetHostname("")
|
|
g.AddProcessEnv("HOSTNAME", hostname)
|
|
}
|
|
|
|
// TODO need unlocked version of this for use in pods
|
|
nsPath, err := nsCtr.NamespacePath(ns)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := g.AddOrReplaceLinuxNamespace(string(specNS), nsPath); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
|
|
if len(c.Dependencies()) > 0 {
|
|
return errors.Errorf("Cannot export checkpoints of containers with dependencies")
|
|
}
|
|
logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
|
|
|
|
includeFiles := []string{
|
|
"artifacts",
|
|
"ctr.log",
|
|
metadata.CheckpointDirectory,
|
|
metadata.ConfigDumpFile,
|
|
metadata.SpecDumpFile,
|
|
metadata.NetworkStatusFile,
|
|
}
|
|
|
|
if options.PreCheckPoint {
|
|
includeFiles[0] = "pre-checkpoint"
|
|
}
|
|
// Get root file-system changes included in the checkpoint archive
|
|
var addToTarFiles []string
|
|
if !options.IgnoreRootfs {
|
|
// To correctly track deleted files, let's go through the output of 'podman diff'
|
|
rootFsChanges, err := c.runtime.GetDiff("", c.ID())
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error exporting root file-system diff for %q", c.ID())
|
|
}
|
|
|
|
addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, c.state.Mountpoint, c.bundlePath())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
includeFiles = append(includeFiles, addToTarFiles...)
|
|
}
|
|
|
|
// Folder containing archived volumes that will be included in the export
|
|
expVolDir := filepath.Join(c.bundlePath(), "volumes")
|
|
|
|
// Create an archive for each volume associated with the container
|
|
if !options.IgnoreVolumes {
|
|
if err := os.MkdirAll(expVolDir, 0700); err != nil {
|
|
return errors.Wrapf(err, "error creating volumes export directory %q", expVolDir)
|
|
}
|
|
|
|
for _, v := range c.config.NamedVolumes {
|
|
volumeTarFilePath := filepath.Join("volumes", v.Name+".tar")
|
|
volumeTarFileFullPath := filepath.Join(c.bundlePath(), volumeTarFilePath)
|
|
|
|
volumeTarFile, err := os.Create(volumeTarFileFullPath)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating %q", volumeTarFileFullPath)
|
|
}
|
|
|
|
volume, err := c.runtime.GetVolume(v.Name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
mp, err := volume.MountPoint()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if mp == "" {
|
|
return errors.Wrapf(define.ErrInternal, "volume %s is not mounted, cannot export", volume.Name())
|
|
}
|
|
|
|
input, err := archive.TarWithOptions(mp, &archive.TarOptions{
|
|
Compression: archive.Uncompressed,
|
|
IncludeSourceDir: true,
|
|
})
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error reading volume directory %q", v.Dest)
|
|
}
|
|
|
|
_, err = io.Copy(volumeTarFile, input)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
volumeTarFile.Close()
|
|
|
|
includeFiles = append(includeFiles, volumeTarFilePath)
|
|
}
|
|
}
|
|
|
|
input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
|
|
Compression: archive.Gzip,
|
|
IncludeSourceDir: true,
|
|
IncludeFiles: includeFiles,
|
|
})
|
|
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error reading checkpoint directory %q", c.ID())
|
|
}
|
|
|
|
outFile, err := os.Create(options.TargetFile)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating checkpoint export file %q", options.TargetFile)
|
|
}
|
|
defer outFile.Close()
|
|
|
|
if err := os.Chmod(options.TargetFile, 0600); err != nil {
|
|
return err
|
|
}
|
|
|
|
_, err = io.Copy(outFile, input)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, file := range addToTarFiles {
|
|
os.Remove(filepath.Join(c.bundlePath(), file))
|
|
}
|
|
|
|
if !options.IgnoreVolumes {
|
|
os.RemoveAll(expVolDir)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *Container) checkpointRestoreSupported() error {
|
|
if !criu.CheckForCriu() {
|
|
return errors.Errorf("checkpoint/restore requires at least CRIU %d", criu.MinCriuVersion)
|
|
}
|
|
if !c.ociRuntime.SupportsCheckpoint() {
|
|
return errors.Errorf("configured runtime does not support checkpoint/restore")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) error {
|
|
if err := c.checkpointRestoreSupported(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if c.state.State != define.ContainerStateRunning {
|
|
return errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
|
|
}
|
|
|
|
if c.AutoRemove() && options.TargetFile == "" {
|
|
return errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
|
|
}
|
|
|
|
if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := c.ociRuntime.CheckpointContainer(c, options); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Save network.status. This is needed to restore the container with
|
|
// the same IP. Currently limited to one IP address in a container
|
|
// with one interface.
|
|
if _, err := metadata.WriteJSONFile(c.state.NetworkStatus, c.bundlePath(), metadata.NetworkStatusFile); err != nil {
|
|
return err
|
|
}
|
|
|
|
defer c.newContainerEvent(events.Checkpoint)
|
|
|
|
// There is a bug from criu: https://github.com/checkpoint-restore/criu/issues/116
|
|
// We have to change the symbolic link from absolute path to relative path
|
|
if options.WithPrevious {
|
|
os.Remove(path.Join(c.CheckpointPath(), "parent"))
|
|
if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if options.TargetFile != "" {
|
|
if err := c.exportCheckpoint(options); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
logrus.Debugf("Checkpointed container %s", c.ID())
|
|
|
|
if !options.KeepRunning && !options.PreCheckPoint {
|
|
c.state.State = define.ContainerStateStopped
|
|
|
|
// Cleanup Storage and Network
|
|
if err := c.cleanup(ctx); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if !options.Keep && !options.PreCheckPoint {
|
|
cleanup := []string{
|
|
"dump.log",
|
|
"stats-dump",
|
|
metadata.ConfigDumpFile,
|
|
metadata.SpecDumpFile,
|
|
}
|
|
for _, del := range cleanup {
|
|
file := filepath.Join(c.bundlePath(), del)
|
|
if err := os.Remove(file); err != nil {
|
|
logrus.Debugf("unable to remove file %s", file)
|
|
}
|
|
}
|
|
}
|
|
|
|
c.state.FinishedTime = time.Now()
|
|
return c.save()
|
|
}
|
|
|
|
func (c *Container) importCheckpoint(input string) error {
|
|
if err := crutils.CRImportCheckpointWithoutConfig(c.bundlePath(), input); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Make sure the newly created config.json exists on disk
|
|
g := generate.Generator{Config: c.config.Spec}
|
|
if err := c.saveSpec(g.Config); err != nil {
|
|
return errors.Wrap(err, "saving imported container specification for restore failed")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *Container) importPreCheckpoint(input string) error {
|
|
archiveFile, err := os.Open(input)
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to open pre-checkpoint archive for import")
|
|
}
|
|
|
|
defer archiveFile.Close()
|
|
|
|
err = archive.Untar(archiveFile, c.bundlePath(), nil)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "Unpacking of pre-checkpoint archive %s failed", input)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) {
|
|
if err := c.checkpointRestoreSupported(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
|
|
return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
|
|
}
|
|
|
|
if options.ImportPrevious != "" {
|
|
if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if options.TargetFile != "" {
|
|
if err := c.importCheckpoint(options.TargetFile); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
|
|
// no sense to try a restore. This is a minimal check if a checkpoint exist.
|
|
if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
|
|
return errors.Wrapf(err, "a complete checkpoint for this container cannot be found, cannot restore")
|
|
}
|
|
|
|
if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
|
|
return err
|
|
}
|
|
|
|
// If a container is restored multiple times from an exported checkpoint with
|
|
// the help of '--import --name', the restore will fail if during 'podman run'
|
|
// a static container IP was set with '--ip'. The user can tell the restore
|
|
// process to ignore the static IP with '--ignore-static-ip'
|
|
if options.IgnoreStaticIP {
|
|
c.config.StaticIP = nil
|
|
}
|
|
|
|
// If a container is restored multiple times from an exported checkpoint with
|
|
// the help of '--import --name', the restore will fail if during 'podman run'
|
|
// a static container MAC address was set with '--mac-address'. The user
|
|
// can tell the restore process to ignore the static MAC with
|
|
// '--ignore-static-mac'
|
|
if options.IgnoreStaticMAC {
|
|
c.config.StaticMAC = nil
|
|
}
|
|
|
|
// Read network configuration from checkpoint
|
|
// Currently only one interface with one IP is supported.
|
|
networkStatus, _, err := metadata.ReadContainerCheckpointNetworkStatus(c.bundlePath())
|
|
// If the restored container should get a new name, the IP address of
|
|
// the container will not be restored. This assumes that if a new name is
|
|
// specified, the container is restored multiple times.
|
|
// TODO: This implicit restoring with or without IP depending on an
|
|
// unrelated restore parameter (--name) does not seem like the
|
|
// best solution.
|
|
if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
|
|
// The file with the network.status does exist. Let's restore the
|
|
// container with the same IP address / MAC address as during checkpointing.
|
|
if !options.IgnoreStaticIP {
|
|
if IP := metadata.GetIPFromNetworkStatus(networkStatus); IP != nil {
|
|
// Tell CNI which IP address we want.
|
|
c.requestedIP = IP
|
|
}
|
|
}
|
|
if !options.IgnoreStaticMAC {
|
|
if MAC := metadata.GetMACFromNetworkStatus(networkStatus); MAC != nil {
|
|
// Tell CNI which MAC address we want.
|
|
c.requestedMAC = MAC
|
|
}
|
|
}
|
|
}
|
|
|
|
defer func() {
|
|
if retErr != nil {
|
|
if err := c.cleanup(ctx); err != nil {
|
|
logrus.Errorf("error cleaning up container %s: %v", c.ID(), err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
if err := c.prepare(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Read config
|
|
jsonPath := filepath.Join(c.bundlePath(), "config.json")
|
|
logrus.Debugf("generate.NewFromFile at %v", jsonPath)
|
|
g, err := generate.NewFromFile(jsonPath)
|
|
if err != nil {
|
|
logrus.Debugf("generate.NewFromFile failed with %v", err)
|
|
return err
|
|
}
|
|
|
|
// Restoring from an import means that we are doing migration
|
|
if options.TargetFile != "" {
|
|
g.SetRootPath(c.state.Mountpoint)
|
|
}
|
|
|
|
// We want to have the same network namespace as before.
|
|
if c.config.CreateNetNS {
|
|
netNSPath := ""
|
|
if !c.config.PostConfigureNetNS {
|
|
netNSPath = c.state.NetNS.Path()
|
|
}
|
|
|
|
if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), netNSPath); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := c.makeBindMounts(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if options.TargetFile != "" {
|
|
for dstPath, srcPath := range c.state.BindMounts {
|
|
newMount := spec.Mount{
|
|
Type: "bind",
|
|
Source: srcPath,
|
|
Destination: dstPath,
|
|
Options: []string{"bind", "private"},
|
|
}
|
|
if c.IsReadOnly() && dstPath != "/dev/shm" {
|
|
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
|
|
}
|
|
if !MountExists(g.Mounts(), dstPath) {
|
|
g.AddMount(newMount)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Cleanup for a working restore.
|
|
if err := c.removeConmonFiles(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Save the OCI spec to disk
|
|
if err := c.saveSpec(g.Config); err != nil {
|
|
return err
|
|
}
|
|
|
|
// When restoring from an imported archive, allow restoring the content of volumes.
|
|
// Volumes are created in setupContainer()
|
|
if options.TargetFile != "" && !options.IgnoreVolumes {
|
|
for _, v := range c.config.NamedVolumes {
|
|
volumeFilePath := filepath.Join(c.bundlePath(), "volumes", v.Name+".tar")
|
|
|
|
volumeFile, err := os.Open(volumeFilePath)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to open volume file %s", volumeFilePath)
|
|
}
|
|
defer volumeFile.Close()
|
|
|
|
volume, err := c.runtime.GetVolume(v.Name)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to retrieve volume %s", v.Name)
|
|
}
|
|
|
|
mountPoint, err := volume.MountPoint()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if mountPoint == "" {
|
|
return errors.Wrapf(err, "unable to import volume %s as it is not mounted", volume.Name())
|
|
}
|
|
if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
|
|
return errors.Wrapf(err, "Failed to extract volume %s to %s", volumeFilePath, mountPoint)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Before actually restarting the container, apply the root file-system changes
|
|
if !options.IgnoreRootfs {
|
|
if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := c.ociRuntime.CreateContainer(c, &options); err != nil {
|
|
return err
|
|
}
|
|
|
|
logrus.Debugf("Restored container %s", c.ID())
|
|
|
|
c.state.State = define.ContainerStateRunning
|
|
|
|
if !options.Keep {
|
|
// Delete all checkpoint related files. At this point, in theory, all files
|
|
// should exist. Still ignoring errors for now as the container should be
|
|
// restored and running. Not erroring out just because some cleanup operation
|
|
// failed. Starting with the checkpoint directory
|
|
err = os.RemoveAll(c.CheckpointPath())
|
|
if err != nil {
|
|
logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
|
|
}
|
|
err = os.RemoveAll(c.PreCheckPointPath())
|
|
if err != nil {
|
|
logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
|
|
}
|
|
cleanup := [...]string{
|
|
"restore.log",
|
|
"dump.log",
|
|
"stats-dump",
|
|
"stats-restore",
|
|
metadata.NetworkStatusFile,
|
|
metadata.RootFsDiffTar,
|
|
metadata.DeletedFilesFile,
|
|
}
|
|
for _, del := range cleanup {
|
|
file := filepath.Join(c.bundlePath(), del)
|
|
err = os.Remove(file)
|
|
if err != nil {
|
|
logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return c.save()
|
|
}
|
|
|
|
// Make standard bind mounts to include in the container
|
|
func (c *Container) makeBindMounts() error {
|
|
if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
|
|
return errors.Wrap(err, "cannot chown run directory")
|
|
}
|
|
|
|
if c.state.BindMounts == nil {
|
|
c.state.BindMounts = make(map[string]string)
|
|
}
|
|
netDisabled, err := c.NetworkDisabled()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !netDisabled {
|
|
// If /etc/resolv.conf and /etc/hosts exist, delete them so we
|
|
// will recreate. Only do this if we aren't sharing them with
|
|
// another container.
|
|
if c.config.NetNsCtr == "" {
|
|
if resolvePath, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
|
|
if err := os.Remove(resolvePath); err != nil && !os.IsNotExist(err) {
|
|
return errors.Wrapf(err, "container %s", c.ID())
|
|
}
|
|
delete(c.state.BindMounts, "/etc/resolv.conf")
|
|
}
|
|
if hostsPath, ok := c.state.BindMounts["/etc/hosts"]; ok {
|
|
if err := os.Remove(hostsPath); err != nil && !os.IsNotExist(err) {
|
|
return errors.Wrapf(err, "container %s", c.ID())
|
|
}
|
|
delete(c.state.BindMounts, "/etc/hosts")
|
|
}
|
|
}
|
|
|
|
if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
|
|
// We share a net namespace.
|
|
// We want /etc/resolv.conf and /etc/hosts from the
|
|
// other container. Unless we're not creating both of
|
|
// them.
|
|
var (
|
|
depCtr *Container
|
|
nextCtr string
|
|
)
|
|
|
|
// I don't like infinite loops, but I don't think there's
|
|
// a serious risk of looping dependencies - too many
|
|
// protections against that elsewhere.
|
|
nextCtr = c.config.NetNsCtr
|
|
for {
|
|
depCtr, err = c.runtime.state.Container(nextCtr)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error fetching dependency %s of container %s", c.config.NetNsCtr, c.ID())
|
|
}
|
|
nextCtr = depCtr.config.NetNsCtr
|
|
if nextCtr == "" {
|
|
break
|
|
}
|
|
}
|
|
|
|
// We need that container's bind mounts
|
|
bindMounts, err := depCtr.BindMounts()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error fetching bind mounts from dependency %s of container %s", depCtr.ID(), c.ID())
|
|
}
|
|
|
|
// The other container may not have a resolv.conf or /etc/hosts
|
|
// If it doesn't, don't copy them
|
|
resolvPath, exists := bindMounts["/etc/resolv.conf"]
|
|
if !c.config.UseImageResolvConf && exists {
|
|
c.state.BindMounts["/etc/resolv.conf"] = resolvPath
|
|
}
|
|
|
|
// check if dependency container has an /etc/hosts file.
|
|
// It may not have one, so only use it if it does.
|
|
hostsPath, exists := bindMounts["/etc/hosts"]
|
|
if !c.config.UseImageHosts && exists {
|
|
depCtr.lock.Lock()
|
|
// generate a hosts file for the dependency container,
|
|
// based on either its old hosts file, or the default,
|
|
// and add the relevant information from the new container (hosts and IP)
|
|
hostsPath, err = depCtr.appendHosts(hostsPath, c)
|
|
|
|
if err != nil {
|
|
depCtr.lock.Unlock()
|
|
return errors.Wrapf(err, "error creating hosts file for container %s which depends on container %s", c.ID(), depCtr.ID())
|
|
}
|
|
depCtr.lock.Unlock()
|
|
|
|
// finally, save it in the new container
|
|
c.state.BindMounts["/etc/hosts"] = hostsPath
|
|
}
|
|
|
|
if !hasCurrentUserMapped(c) {
|
|
if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
|
|
return err
|
|
}
|
|
if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
} else {
|
|
if !c.config.UseImageResolvConf {
|
|
newResolv, err := c.generateResolvConf()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating resolv.conf for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/etc/resolv.conf"] = newResolv
|
|
}
|
|
|
|
if !c.config.UseImageHosts {
|
|
newHosts, err := c.generateHosts("/etc/hosts")
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating hosts file for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/etc/hosts"] = newHosts
|
|
}
|
|
}
|
|
|
|
if c.state.BindMounts["/etc/hosts"] != "" {
|
|
if err := label.Relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if c.state.BindMounts["/etc/resolv.conf"] != "" {
|
|
if err := label.Relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
} else {
|
|
if !c.config.UseImageHosts && c.state.BindMounts["/etc/hosts"] == "" {
|
|
newHosts, err := c.generateHosts("/etc/hosts")
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating hosts file for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/etc/hosts"] = newHosts
|
|
}
|
|
}
|
|
|
|
// SHM is always added when we mount the container
|
|
c.state.BindMounts["/dev/shm"] = c.config.ShmDir
|
|
|
|
newPasswd, newGroup, err := c.generatePasswdAndGroup()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating temporary passwd file for container %s", c.ID())
|
|
}
|
|
if newPasswd != "" {
|
|
// Make /etc/passwd
|
|
// If it already exists, delete so we can recreate
|
|
delete(c.state.BindMounts, "/etc/passwd")
|
|
c.state.BindMounts["/etc/passwd"] = newPasswd
|
|
}
|
|
if newGroup != "" {
|
|
// Make /etc/group
|
|
// If it already exists, delete so we can recreate
|
|
delete(c.state.BindMounts, "/etc/group")
|
|
c.state.BindMounts["/etc/group"] = newGroup
|
|
}
|
|
|
|
// Make /etc/hostname
|
|
// This should never change, so no need to recreate if it exists
|
|
if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
|
|
hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating hostname file for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/etc/hostname"] = hostnamePath
|
|
}
|
|
|
|
// Make /etc/localtime
|
|
ctrTimezone := c.Timezone()
|
|
if ctrTimezone != "" {
|
|
// validate the format of the timezone specified if it's not "local"
|
|
if ctrTimezone != "local" {
|
|
_, err = time.LoadLocation(ctrTimezone)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error finding timezone for container %s", c.ID())
|
|
}
|
|
}
|
|
if _, ok := c.state.BindMounts["/etc/localtime"]; !ok {
|
|
var zonePath string
|
|
if ctrTimezone == "local" {
|
|
zonePath, err = filepath.EvalSymlinks("/etc/localtime")
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error finding local timezone for container %s", c.ID())
|
|
}
|
|
} else {
|
|
zone := filepath.Join("/usr/share/zoneinfo", ctrTimezone)
|
|
zonePath, err = filepath.EvalSymlinks(zone)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error setting timezone for container %s", c.ID())
|
|
}
|
|
}
|
|
localtimePath, err := c.copyTimezoneFile(zonePath)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error setting timezone for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/etc/localtime"] = localtimePath
|
|
}
|
|
}
|
|
|
|
// Make .containerenv if it does not exist
|
|
if _, ok := c.state.BindMounts["/run/.containerenv"]; !ok {
|
|
containerenv := c.runtime.graphRootMountedFlag(c.config.Spec.Mounts)
|
|
isRootless := 0
|
|
if rootless.IsRootless() {
|
|
isRootless = 1
|
|
}
|
|
imageID, imageName := c.Image()
|
|
|
|
if c.Privileged() {
|
|
// Populate the .containerenv with container information
|
|
containerenv = fmt.Sprintf(`engine="podman-%s"
|
|
name=%q
|
|
id=%q
|
|
image=%q
|
|
imageid=%q
|
|
rootless=%d
|
|
%s`, version.Version.String(), c.Name(), c.ID(), imageName, imageID, isRootless, containerenv)
|
|
}
|
|
containerenvPath, err := c.writeStringToRundir(".containerenv", containerenv)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating containerenv file for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/run/.containerenv"] = containerenvPath
|
|
}
|
|
|
|
// Add Subscription Mounts
|
|
subscriptionMounts := subscriptions.MountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.Containers.DefaultMountsFile, c.state.Mountpoint, c.RootUID(), c.RootGID(), rootless.IsRootless(), false)
|
|
for _, mount := range subscriptionMounts {
|
|
if _, ok := c.state.BindMounts[mount.Destination]; !ok {
|
|
c.state.BindMounts[mount.Destination] = mount.Source
|
|
}
|
|
}
|
|
|
|
// Secrets are mounted by getting the secret data from the secrets manager,
|
|
// copying the data into the container's static dir,
|
|
// then mounting the copied dir into /run/secrets.
|
|
// The secrets mounting must come after subscription mounts, since subscription mounts
|
|
// creates the /run/secrets dir in the container where we mount as well.
|
|
if len(c.Secrets()) > 0 {
|
|
// create /run/secrets if subscriptions did not create
|
|
if err := c.createSecretMountDir(); err != nil {
|
|
return errors.Wrapf(err, "error creating secrets mount")
|
|
}
|
|
for _, secret := range c.Secrets() {
|
|
src := filepath.Join(c.config.SecretsPath, secret.Name)
|
|
dest := filepath.Join("/run/secrets", secret.Name)
|
|
c.state.BindMounts[dest] = src
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// generateResolvConf generates a containers resolv.conf
|
|
func (c *Container) generateResolvConf() (string, error) {
|
|
var (
|
|
nameservers []string
|
|
cniNameServers []string
|
|
cniSearchDomains []string
|
|
)
|
|
|
|
resolvConf := "/etc/resolv.conf"
|
|
for _, namespace := range c.config.Spec.Linux.Namespaces {
|
|
if namespace.Type == spec.NetworkNamespace {
|
|
if namespace.Path != "" && !strings.HasPrefix(namespace.Path, "/proc/") {
|
|
definedPath := filepath.Join("/etc/netns", filepath.Base(namespace.Path), "resolv.conf")
|
|
_, err := os.Stat(definedPath)
|
|
if err == nil {
|
|
resolvConf = definedPath
|
|
} else if !os.IsNotExist(err) {
|
|
return "", err
|
|
}
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
// Determine the endpoint for resolv.conf in case it is a symlink
|
|
resolvPath, err := filepath.EvalSymlinks(resolvConf)
|
|
// resolv.conf doesn't have to exists
|
|
if err != nil && !os.IsNotExist(err) {
|
|
return "", err
|
|
}
|
|
|
|
// Determine if symlink points to any of the systemd-resolved files
|
|
if strings.HasPrefix(resolvPath, "/run/systemd/resolve/") {
|
|
resolvPath = "/run/systemd/resolve/resolv.conf"
|
|
}
|
|
|
|
contents, err := ioutil.ReadFile(resolvPath)
|
|
// resolv.conf doesn't have to exists
|
|
if err != nil && !os.IsNotExist(err) {
|
|
return "", err
|
|
}
|
|
|
|
// Ensure that the container's /etc/resolv.conf is compatible with its
|
|
// network configuration.
|
|
// TODO: set ipv6 enable bool more sanely
|
|
resolv, err := resolvconf.FilterResolvDNS(contents, true, c.config.CreateNetNS)
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "error parsing host resolv.conf")
|
|
}
|
|
|
|
// Check if CNI gave back and DNS servers for us to add in
|
|
cniResponse := c.state.NetworkStatus
|
|
for _, i := range cniResponse {
|
|
if i.DNS.Nameservers != nil {
|
|
cniNameServers = append(cniNameServers, i.DNS.Nameservers...)
|
|
logrus.Debugf("adding nameserver(s) from cni response of '%q'", i.DNS.Nameservers)
|
|
}
|
|
if i.DNS.Search != nil {
|
|
cniSearchDomains = append(cniSearchDomains, i.DNS.Search...)
|
|
logrus.Debugf("adding search domain(s) from cni response of '%q'", i.DNS.Search)
|
|
}
|
|
}
|
|
|
|
dns := make([]net.IP, 0, len(c.runtime.config.Containers.DNSServers))
|
|
for _, i := range c.runtime.config.Containers.DNSServers {
|
|
result := net.ParseIP(i)
|
|
if result == nil {
|
|
return "", errors.Wrapf(define.ErrInvalidArg, "invalid IP address %s", i)
|
|
}
|
|
dns = append(dns, result)
|
|
}
|
|
dnsServers := append(dns, c.config.DNSServer...)
|
|
// If the user provided dns, it trumps all; then dns masq; then resolv.conf
|
|
switch {
|
|
case len(dnsServers) > 0:
|
|
|
|
// We store DNS servers as net.IP, so need to convert to string
|
|
for _, server := range dnsServers {
|
|
nameservers = append(nameservers, server.String())
|
|
}
|
|
case len(cniNameServers) > 0:
|
|
nameservers = append(nameservers, cniNameServers...)
|
|
default:
|
|
// Make a new resolv.conf
|
|
nameservers = resolvconf.GetNameservers(resolv.Content)
|
|
// slirp4netns has a built in DNS server.
|
|
if c.config.NetMode.IsSlirp4netns() {
|
|
nameservers = append([]string{slirp4netnsDNS}, nameservers...)
|
|
}
|
|
}
|
|
|
|
var search []string
|
|
if len(c.config.DNSSearch) > 0 || len(c.runtime.config.Containers.DNSSearches) > 0 || len(cniSearchDomains) > 0 {
|
|
if !util.StringInSlice(".", c.config.DNSSearch) {
|
|
search = c.runtime.config.Containers.DNSSearches
|
|
search = append(search, c.config.DNSSearch...)
|
|
search = append(search, cniSearchDomains...)
|
|
}
|
|
} else {
|
|
search = resolvconf.GetSearchDomains(resolv.Content)
|
|
}
|
|
|
|
var options []string
|
|
if len(c.config.DNSOption) > 0 || len(c.runtime.config.Containers.DNSOptions) > 0 {
|
|
options = c.runtime.config.Containers.DNSOptions
|
|
options = append(options, c.config.DNSOption...)
|
|
} else {
|
|
options = resolvconf.GetOptions(resolv.Content)
|
|
}
|
|
|
|
destPath := filepath.Join(c.state.RunDir, "resolv.conf")
|
|
|
|
if err := os.Remove(destPath); err != nil && !os.IsNotExist(err) {
|
|
return "", errors.Wrapf(err, "container %s", c.ID())
|
|
}
|
|
|
|
// Build resolv.conf
|
|
if _, err = resolvconf.Build(destPath, nameservers, search, options); err != nil {
|
|
return "", errors.Wrapf(err, "error building resolv.conf for container %s", c.ID())
|
|
}
|
|
|
|
// Relabel resolv.conf for the container
|
|
if err := label.Relabel(destPath, c.config.MountLabel, true); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return filepath.Join(c.state.RunDir, "resolv.conf"), nil
|
|
}
|
|
|
|
// generateHosts creates a containers hosts file
|
|
func (c *Container) generateHosts(path string) (string, error) {
|
|
orig, err := ioutil.ReadFile(path)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
hosts := string(orig)
|
|
hosts += c.getHosts()
|
|
return c.writeStringToRundir("hosts", hosts)
|
|
}
|
|
|
|
// appendHosts appends a container's config and state pertaining to hosts to a container's
|
|
// local hosts file. netCtr is the container from which the netNS information is
|
|
// taken.
|
|
// path is the basis of the hosts file, into which netCtr's netNS information will be appended.
|
|
// FIXME. Path should be used by this function,but I am not sure what is correct; remove //lint
|
|
// once this is fixed
|
|
func (c *Container) appendHosts(path string, netCtr *Container) (string, error) { //nolint
|
|
return c.appendStringToRunDir("hosts", netCtr.getHosts())
|
|
}
|
|
|
|
// getHosts finds the pertinent information for a container's host file in its config and state
|
|
// and returns a string in a format that can be written to the host file
|
|
func (c *Container) getHosts() string {
|
|
var hosts string
|
|
if len(c.config.HostAdd) > 0 {
|
|
for _, host := range c.config.HostAdd {
|
|
// the host format has already been verified at this point
|
|
fields := strings.SplitN(host, ":", 2)
|
|
hosts += fmt.Sprintf("%s %s\n", fields[1], fields[0])
|
|
}
|
|
}
|
|
|
|
hosts += c.cniHosts()
|
|
|
|
// If not making a network namespace, add our own hostname.
|
|
if c.Hostname() != "" {
|
|
if c.config.NetMode.IsSlirp4netns() {
|
|
// When using slirp4netns, the interface gets a static IP
|
|
hosts += fmt.Sprintf("# used by slirp4netns\n%s\t%s %s\n", slirp4netnsIP, c.Hostname(), c.config.Name)
|
|
} else {
|
|
hasNetNS := false
|
|
netNone := false
|
|
for _, ns := range c.config.Spec.Linux.Namespaces {
|
|
if ns.Type == spec.NetworkNamespace {
|
|
hasNetNS = true
|
|
if ns.Path == "" && !c.config.CreateNetNS {
|
|
netNone = true
|
|
}
|
|
break
|
|
}
|
|
}
|
|
if !hasNetNS {
|
|
// 127.0.1.1 and host's hostname to match Docker
|
|
osHostname, _ := os.Hostname()
|
|
hosts += fmt.Sprintf("127.0.1.1 %s %s %s\n", osHostname, c.Hostname(), c.config.Name)
|
|
}
|
|
if netNone {
|
|
hosts += fmt.Sprintf("127.0.1.1 %s %s\n", c.Hostname(), c.config.Name)
|
|
}
|
|
}
|
|
}
|
|
return hosts
|
|
}
|
|
|
|
// generateGroupEntry generates an entry or entries into /etc/group as
|
|
// required by container configuration.
|
|
// Generally speaking, we will make an entry under two circumstances:
|
|
// 1. The container is started as a specific user:group, and that group is both
|
|
// numeric, and does not already exist in /etc/group.
|
|
// 2. It is requested that Libpod add the group that launched Podman to
|
|
// /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if
|
|
// the group in question already exists in /etc/passwd).
|
|
// Returns group entry (as a string that can be appended to /etc/group) and any
|
|
// error that occurred.
|
|
func (c *Container) generateGroupEntry() (string, error) {
|
|
groupString := ""
|
|
|
|
// Things we *can't* handle: adding the user we added in
|
|
// generatePasswdEntry to any *existing* groups.
|
|
addedGID := 0
|
|
if c.config.AddCurrentUserPasswdEntry {
|
|
entry, gid, err := c.generateCurrentUserGroupEntry()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
groupString += entry
|
|
addedGID = gid
|
|
}
|
|
if c.config.User != "" {
|
|
entry, _, err := c.generateUserGroupEntry(addedGID)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
groupString += entry
|
|
}
|
|
|
|
return groupString, nil
|
|
}
|
|
|
|
// Make an entry in /etc/group for the group of the user running podman iff we
|
|
// are rootless.
|
|
func (c *Container) generateCurrentUserGroupEntry() (string, int, error) {
|
|
gid := rootless.GetRootlessGID()
|
|
if gid == 0 {
|
|
return "", 0, nil
|
|
}
|
|
|
|
g, err := user.LookupGroupId(strconv.Itoa(gid))
|
|
if err != nil {
|
|
return "", 0, errors.Wrapf(err, "failed to get current group")
|
|
}
|
|
|
|
// Lookup group name to see if it exists in the image.
|
|
_, err = lookup.GetGroup(c.state.Mountpoint, g.Name)
|
|
if err != runcuser.ErrNoGroupEntries {
|
|
return "", 0, err
|
|
}
|
|
|
|
// Lookup GID to see if it exists in the image.
|
|
_, err = lookup.GetGroup(c.state.Mountpoint, g.Gid)
|
|
if err != runcuser.ErrNoGroupEntries {
|
|
return "", 0, err
|
|
}
|
|
|
|
// We need to get the username of the rootless user so we can add it to
|
|
// the group.
|
|
username := ""
|
|
uid := rootless.GetRootlessUID()
|
|
if uid != 0 {
|
|
u, err := user.LookupId(strconv.Itoa(uid))
|
|
if err != nil {
|
|
return "", 0, errors.Wrapf(err, "failed to get current user to make group entry")
|
|
}
|
|
username = u.Username
|
|
}
|
|
|
|
// Make the entry.
|
|
return fmt.Sprintf("%s:x:%s:%s\n", g.Name, g.Gid, username), gid, nil
|
|
}
|
|
|
|
// Make an entry in /etc/group for the group the container was specified to run
|
|
// as.
|
|
func (c *Container) generateUserGroupEntry(addedGID int) (string, int, error) {
|
|
if c.config.User == "" {
|
|
return "", 0, nil
|
|
}
|
|
|
|
splitUser := strings.SplitN(c.config.User, ":", 2)
|
|
group := splitUser[0]
|
|
if len(splitUser) > 1 {
|
|
group = splitUser[1]
|
|
}
|
|
|
|
gid, err := strconv.ParseUint(group, 10, 32)
|
|
if err != nil {
|
|
return "", 0, nil
|
|
}
|
|
|
|
if addedGID != 0 && addedGID == int(gid) {
|
|
return "", 0, nil
|
|
}
|
|
|
|
// Check if the group already exists
|
|
_, err = lookup.GetGroup(c.state.Mountpoint, group)
|
|
if err != runcuser.ErrNoGroupEntries {
|
|
return "", 0, err
|
|
}
|
|
|
|
return fmt.Sprintf("%d:x:%d:%s\n", gid, gid, splitUser[0]), int(gid), nil
|
|
}
|
|
|
|
// generatePasswdEntry generates an entry or entries into /etc/passwd as
|
|
// required by container configuration.
|
|
// Generally speaking, we will make an entry under two circumstances:
|
|
// 1. The container is started as a specific user who is not in /etc/passwd.
|
|
// This only triggers if the user is given as a *numeric* ID.
|
|
// 2. It is requested that Libpod add the user that launched Podman to
|
|
// /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if
|
|
// the user in question already exists in /etc/passwd) or the UID to be added
|
|
// is 0).
|
|
// Returns password entry (as a string that can be appended to /etc/passwd) and
|
|
// any error that occurred.
|
|
func (c *Container) generatePasswdEntry() (string, error) {
|
|
passwdString := ""
|
|
|
|
addedUID := 0
|
|
if c.config.AddCurrentUserPasswdEntry {
|
|
entry, uid, _, err := c.generateCurrentUserPasswdEntry()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
passwdString += entry
|
|
addedUID = uid
|
|
}
|
|
if c.config.User != "" {
|
|
entry, _, _, err := c.generateUserPasswdEntry(addedUID)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
passwdString += entry
|
|
}
|
|
|
|
return passwdString, nil
|
|
}
|
|
|
|
// generateCurrentUserPasswdEntry generates an /etc/passwd entry for the user
|
|
// running the container engine.
|
|
// Returns a passwd entry for the user, and the UID and GID of the added entry.
|
|
func (c *Container) generateCurrentUserPasswdEntry() (string, int, int, error) {
|
|
uid := rootless.GetRootlessUID()
|
|
if uid == 0 {
|
|
return "", 0, 0, nil
|
|
}
|
|
|
|
u, err := user.LookupId(strconv.Itoa(uid))
|
|
if err != nil {
|
|
return "", 0, 0, errors.Wrapf(err, "failed to get current user")
|
|
}
|
|
|
|
// Lookup the user to see if it exists in the container image.
|
|
_, err = lookup.GetUser(c.state.Mountpoint, u.Username)
|
|
if err != runcuser.ErrNoPasswdEntries {
|
|
return "", 0, 0, err
|
|
}
|
|
|
|
// Lookup the UID to see if it exists in the container image.
|
|
_, err = lookup.GetUser(c.state.Mountpoint, u.Uid)
|
|
if err != runcuser.ErrNoPasswdEntries {
|
|
return "", 0, 0, err
|
|
}
|
|
|
|
// If the user's actual home directory exists, or was mounted in - use
|
|
// that.
|
|
homeDir := c.WorkingDir()
|
|
hDir := u.HomeDir
|
|
for hDir != "/" {
|
|
if MountExists(c.config.Spec.Mounts, hDir) {
|
|
homeDir = u.HomeDir
|
|
break
|
|
}
|
|
hDir = filepath.Dir(hDir)
|
|
}
|
|
if homeDir != u.HomeDir {
|
|
for _, hDir := range c.UserVolumes() {
|
|
if hDir == u.HomeDir {
|
|
homeDir = u.HomeDir
|
|
break
|
|
}
|
|
}
|
|
}
|
|
// Set HOME environment if not already set
|
|
hasHomeSet := false
|
|
for _, s := range c.config.Spec.Process.Env {
|
|
if strings.HasPrefix(s, "HOME=") {
|
|
hasHomeSet = true
|
|
break
|
|
}
|
|
}
|
|
if !hasHomeSet {
|
|
c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", homeDir))
|
|
}
|
|
|
|
return fmt.Sprintf("%s:*:%s:%s:%s:%s:/bin/sh\n", u.Username, u.Uid, u.Gid, u.Name, homeDir), uid, rootless.GetRootlessGID(), nil
|
|
}
|
|
|
|
// generateUserPasswdEntry generates an /etc/passwd entry for the container user
|
|
// to run in the container.
|
|
// The UID and GID of the added entry will also be returned.
|
|
// Accepts one argument, that being any UID that has already been added to the
|
|
// passwd file by other functions; if it matches the UID we were given, we don't
|
|
// need to do anything.
|
|
func (c *Container) generateUserPasswdEntry(addedUID int) (string, int, int, error) {
|
|
var (
|
|
groupspec string
|
|
gid int
|
|
)
|
|
if c.config.User == "" {
|
|
return "", 0, 0, nil
|
|
}
|
|
splitSpec := strings.SplitN(c.config.User, ":", 2)
|
|
userspec := splitSpec[0]
|
|
if len(splitSpec) > 1 {
|
|
groupspec = splitSpec[1]
|
|
}
|
|
// If a non numeric User, then don't generate passwd
|
|
uid, err := strconv.ParseUint(userspec, 10, 32)
|
|
if err != nil {
|
|
return "", 0, 0, nil
|
|
}
|
|
|
|
if addedUID != 0 && int(uid) == addedUID {
|
|
return "", 0, 0, nil
|
|
}
|
|
|
|
// Lookup the user to see if it exists in the container image
|
|
_, err = lookup.GetUser(c.state.Mountpoint, userspec)
|
|
if err != runcuser.ErrNoPasswdEntries {
|
|
return "", 0, 0, err
|
|
}
|
|
|
|
if groupspec != "" {
|
|
ugid, err := strconv.ParseUint(groupspec, 10, 32)
|
|
if err == nil {
|
|
gid = int(ugid)
|
|
} else {
|
|
group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
|
|
if err != nil {
|
|
return "", 0, 0, errors.Wrapf(err, "unable to get gid %s from group file", groupspec)
|
|
}
|
|
gid = group.Gid
|
|
}
|
|
}
|
|
return fmt.Sprintf("%d:*:%d:%d:container user:%s:/bin/sh\n", uid, uid, gid, c.WorkingDir()), int(uid), gid, nil
|
|
}
|
|
|
|
// generatePasswdAndGroup generates container-specific passwd and group files
|
|
// iff g.config.User is a number or we are configured to make a passwd entry for
|
|
// the current user.
|
|
// Returns path to file to mount at /etc/passwd, path to file to mount at
|
|
// /etc/group, and any error that occurred. If no passwd/group file were
|
|
// required, the empty string will be returned for those path (this may occur
|
|
// even if no error happened).
|
|
// This may modify the mounted container's /etc/passwd and /etc/group instead of
|
|
// making copies to bind-mount in, so we don't break useradd (it wants to make a
|
|
// copy of /etc/passwd and rename the copy to /etc/passwd, which is impossible
|
|
// with a bind mount). This is done in cases where the container is *not*
|
|
// read-only. In this case, the function will return nothing ("", "", nil).
|
|
func (c *Container) generatePasswdAndGroup() (string, string, error) {
|
|
if !c.config.AddCurrentUserPasswdEntry && c.config.User == "" {
|
|
return "", "", nil
|
|
}
|
|
|
|
needPasswd := true
|
|
needGroup := true
|
|
|
|
// First, check if there's a mount at /etc/passwd or group, we don't
|
|
// want to interfere with user mounts.
|
|
if MountExists(c.config.Spec.Mounts, "/etc/passwd") {
|
|
needPasswd = false
|
|
}
|
|
if MountExists(c.config.Spec.Mounts, "/etc/group") {
|
|
needGroup = false
|
|
}
|
|
|
|
// Next, check if we already made the files. If we didn't, don't need to
|
|
// do anything more.
|
|
if needPasswd {
|
|
passwdPath := filepath.Join(c.config.StaticDir, "passwd")
|
|
if _, err := os.Stat(passwdPath); err == nil {
|
|
needPasswd = false
|
|
}
|
|
}
|
|
if needGroup {
|
|
groupPath := filepath.Join(c.config.StaticDir, "group")
|
|
if _, err := os.Stat(groupPath); err == nil {
|
|
needGroup = false
|
|
}
|
|
}
|
|
|
|
// Next, check if the container even has a /etc/passwd or /etc/group.
|
|
// If it doesn't we don't want to create them ourselves.
|
|
if needPasswd {
|
|
exists, err := c.checkFileExistsInRootfs("/etc/passwd")
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
needPasswd = exists
|
|
}
|
|
if needGroup {
|
|
exists, err := c.checkFileExistsInRootfs("/etc/group")
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
needGroup = exists
|
|
}
|
|
|
|
// If we don't need a /etc/passwd or /etc/group at this point we can
|
|
// just return.
|
|
if !needPasswd && !needGroup {
|
|
return "", "", nil
|
|
}
|
|
|
|
passwdPath := ""
|
|
groupPath := ""
|
|
|
|
ro := c.IsReadOnly()
|
|
|
|
if needPasswd {
|
|
passwdEntry, err := c.generatePasswdEntry()
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
|
|
needsWrite := passwdEntry != ""
|
|
switch {
|
|
case ro && needsWrite:
|
|
logrus.Debugf("Making /etc/passwd for container %s", c.ID())
|
|
originPasswdFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
|
|
if err != nil {
|
|
return "", "", errors.Wrapf(err, "error creating path to container %s /etc/passwd", c.ID())
|
|
}
|
|
orig, err := ioutil.ReadFile(originPasswdFile)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
return "", "", err
|
|
}
|
|
passwdFile, err := c.writeStringToStaticDir("passwd", string(orig)+passwdEntry)
|
|
if err != nil {
|
|
return "", "", errors.Wrapf(err, "failed to create temporary passwd file")
|
|
}
|
|
if err := os.Chmod(passwdFile, 0644); err != nil {
|
|
return "", "", err
|
|
}
|
|
passwdPath = passwdFile
|
|
case !ro && needsWrite:
|
|
logrus.Debugf("Modifying container %s /etc/passwd", c.ID())
|
|
containerPasswd, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
|
|
if err != nil {
|
|
return "", "", errors.Wrapf(err, "error looking up location of container %s /etc/passwd", c.ID())
|
|
}
|
|
|
|
f, err := os.OpenFile(containerPasswd, os.O_APPEND|os.O_WRONLY, 0600)
|
|
if err != nil {
|
|
return "", "", errors.Wrapf(err, "container %s", c.ID())
|
|
}
|
|
defer f.Close()
|
|
|
|
if _, err := f.WriteString(passwdEntry); err != nil {
|
|
return "", "", errors.Wrapf(err, "unable to append to container %s /etc/passwd", c.ID())
|
|
}
|
|
default:
|
|
logrus.Debugf("Not modifying container %s /etc/passwd", c.ID())
|
|
}
|
|
}
|
|
if needGroup {
|
|
groupEntry, err := c.generateGroupEntry()
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
|
|
needsWrite := groupEntry != ""
|
|
switch {
|
|
case ro && needsWrite:
|
|
logrus.Debugf("Making /etc/group for container %s", c.ID())
|
|
originGroupFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
|
|
if err != nil {
|
|
return "", "", errors.Wrapf(err, "error creating path to container %s /etc/group", c.ID())
|
|
}
|
|
orig, err := ioutil.ReadFile(originGroupFile)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
return "", "", err
|
|
}
|
|
groupFile, err := c.writeStringToStaticDir("group", string(orig)+groupEntry)
|
|
if err != nil {
|
|
return "", "", errors.Wrapf(err, "failed to create temporary group file")
|
|
}
|
|
if err := os.Chmod(groupFile, 0644); err != nil {
|
|
return "", "", err
|
|
}
|
|
groupPath = groupFile
|
|
case !ro && needsWrite:
|
|
logrus.Debugf("Modifying container %s /etc/group", c.ID())
|
|
containerGroup, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
|
|
if err != nil {
|
|
return "", "", errors.Wrapf(err, "error looking up location of container %s /etc/group", c.ID())
|
|
}
|
|
|
|
f, err := os.OpenFile(containerGroup, os.O_APPEND|os.O_WRONLY, 0600)
|
|
if err != nil {
|
|
return "", "", errors.Wrapf(err, "container %s", c.ID())
|
|
}
|
|
defer f.Close()
|
|
|
|
if _, err := f.WriteString(groupEntry); err != nil {
|
|
return "", "", errors.Wrapf(err, "unable to append to container %s /etc/group", c.ID())
|
|
}
|
|
default:
|
|
logrus.Debugf("Not modifying container %s /etc/group", c.ID())
|
|
}
|
|
}
|
|
|
|
return passwdPath, groupPath, nil
|
|
}
|
|
|
|
// Get cgroup path in a format suitable for the OCI spec
|
|
func (c *Container) getOCICgroupPath() (string, error) {
|
|
unified, err := cgroups.IsCgroup2UnifiedMode()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
cgroupManager := c.CgroupManager()
|
|
switch {
|
|
case (rootless.IsRootless() && !unified) || c.config.NoCgroups:
|
|
return "", nil
|
|
case c.config.CgroupsMode == cgroupSplit:
|
|
if c.config.CgroupParent != "" {
|
|
return c.config.CgroupParent, nil
|
|
}
|
|
selfCgroup, err := utils.GetOwnCgroup()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return filepath.Join(selfCgroup, "container"), nil
|
|
case cgroupManager == config.SystemdCgroupsManager:
|
|
// When the OCI runtime is set to use Systemd as a cgroup manager, it
|
|
// expects cgroups to be passed as follows:
|
|
// slice:prefix:name
|
|
systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
|
|
logrus.Debugf("Setting CGroups for container %s to %s", c.ID(), systemdCgroups)
|
|
return systemdCgroups, nil
|
|
case cgroupManager == config.CgroupfsCgroupsManager:
|
|
cgroupPath := filepath.Join(c.config.CgroupParent, fmt.Sprintf("libpod-%s", c.ID()))
|
|
logrus.Debugf("Setting CGroup path for container %s to %s", c.ID(), cgroupPath)
|
|
return cgroupPath, nil
|
|
default:
|
|
return "", errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager %s requested", cgroupManager)
|
|
}
|
|
}
|
|
|
|
func (c *Container) copyTimezoneFile(zonePath string) (string, error) {
|
|
var localtimeCopy string = filepath.Join(c.state.RunDir, "localtime")
|
|
file, err := os.Stat(zonePath)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if file.IsDir() {
|
|
return "", errors.New("Invalid timezone: is a directory")
|
|
}
|
|
src, err := os.Open(zonePath)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer src.Close()
|
|
dest, err := os.Create(localtimeCopy)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer dest.Close()
|
|
_, err = io.Copy(dest, src)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if err := label.Relabel(localtimeCopy, c.config.MountLabel, false); err != nil {
|
|
return "", err
|
|
}
|
|
if err := dest.Chown(c.RootUID(), c.RootGID()); err != nil {
|
|
return "", err
|
|
}
|
|
return localtimeCopy, err
|
|
}
|
|
|
|
func (c *Container) cleanupOverlayMounts() error {
|
|
return overlay.CleanupContent(c.config.StaticDir)
|
|
}
|
|
|
|
// Check if a file exists at the given path in the container's root filesystem.
|
|
// Container must already be mounted for this to be used.
|
|
func (c *Container) checkFileExistsInRootfs(file string) (bool, error) {
|
|
checkPath, err := securejoin.SecureJoin(c.state.Mountpoint, file)
|
|
if err != nil {
|
|
return false, errors.Wrapf(err, "cannot create path to container %s file %q", c.ID(), file)
|
|
}
|
|
stat, err := os.Stat(checkPath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return false, nil
|
|
}
|
|
return false, errors.Wrapf(err, "container %s", c.ID())
|
|
}
|
|
if stat.IsDir() {
|
|
return false, nil
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// Creates and mounts an empty dir to mount secrets into, if it does not already exist
|
|
func (c *Container) createSecretMountDir() error {
|
|
src := filepath.Join(c.state.RunDir, "/run/secrets")
|
|
_, err := os.Stat(src)
|
|
if os.IsNotExist(err) {
|
|
oldUmask := umask.Set(0)
|
|
defer umask.Set(oldUmask)
|
|
|
|
if err := os.MkdirAll(src, 0644); err != nil {
|
|
return err
|
|
}
|
|
if err := label.Relabel(src, c.config.MountLabel, false); err != nil {
|
|
return err
|
|
}
|
|
if err := os.Chown(src, c.RootUID(), c.RootGID()); err != nil {
|
|
return err
|
|
}
|
|
c.state.BindMounts["/run/secrets"] = src
|
|
return nil
|
|
}
|
|
|
|
return err
|
|
}
|