mirror of
https://github.com/containers/podman.git
synced 2025-05-31 15:42:48 +08:00

We have an issue in the current implementation where the cleanup process is not able to umount the storage as it is running in a separate namespace. Simplify the implementation for user namespaces by not using an intermediate mount namespace. For doing it, we need to relax the permissions on the parent directories and allow browsing them. Containers that are running without a user namespace, will still maintain mode 0700 on their directory. Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
1004 lines
30 KiB
Go
1004 lines
30 KiB
Go
// +build linux
|
|
|
|
package libpod
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
cnitypes "github.com/containernetworking/cni/pkg/types/current"
|
|
"github.com/containernetworking/plugins/pkg/ns"
|
|
"github.com/containers/buildah/pkg/secrets"
|
|
crioAnnotations "github.com/containers/libpod/pkg/annotations"
|
|
"github.com/containers/libpod/pkg/apparmor"
|
|
"github.com/containers/libpod/pkg/criu"
|
|
"github.com/containers/libpod/pkg/lookup"
|
|
"github.com/containers/libpod/pkg/resolvconf"
|
|
"github.com/containers/libpod/pkg/rootless"
|
|
"github.com/cyphar/filepath-securejoin"
|
|
"github.com/opencontainers/runc/libcontainer/user"
|
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/opencontainers/runtime-tools/generate"
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
|
opentracing "github.com/opentracing/opentracing-go"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
func (c *Container) mountSHM(shmOptions string) error {
|
|
if err := unix.Mount("shm", c.config.ShmDir, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
|
|
label.FormatMountLabel(shmOptions, c.config.MountLabel)); err != nil {
|
|
return errors.Wrapf(err, "failed to mount shm tmpfs %q", c.config.ShmDir)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Container) unmountSHM(mount string) error {
|
|
if err := unix.Unmount(mount, unix.MNT_DETACH); err != nil {
|
|
if err != syscall.EINVAL {
|
|
logrus.Warnf("container %s failed to unmount %s : %v", c.ID(), mount, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// prepare mounts the container and sets up other required resources like net
|
|
// namespaces
|
|
func (c *Container) prepare() (err error) {
|
|
var (
|
|
wg sync.WaitGroup
|
|
netNS ns.NetNS
|
|
networkStatus []*cnitypes.Result
|
|
createNetNSErr, mountStorageErr error
|
|
mountPoint string
|
|
tmpStateLock sync.Mutex
|
|
)
|
|
|
|
wg.Add(2)
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
// Set up network namespace if not already set up
|
|
if c.config.CreateNetNS && c.state.NetNS == nil && !c.config.PostConfigureNetNS {
|
|
netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c)
|
|
|
|
tmpStateLock.Lock()
|
|
defer tmpStateLock.Unlock()
|
|
|
|
// Assign NetNS attributes to container
|
|
if createNetNSErr == nil {
|
|
c.state.NetNS = netNS
|
|
c.state.NetworkStatus = networkStatus
|
|
}
|
|
}
|
|
}()
|
|
// Mount storage if not mounted
|
|
go func() {
|
|
defer wg.Done()
|
|
mountPoint, mountStorageErr = c.mountStorage()
|
|
|
|
if mountStorageErr != nil {
|
|
return
|
|
}
|
|
|
|
tmpStateLock.Lock()
|
|
defer tmpStateLock.Unlock()
|
|
|
|
// Finish up mountStorage
|
|
c.state.Mounted = true
|
|
c.state.Mountpoint = mountPoint
|
|
|
|
logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
|
|
}()
|
|
|
|
defer func() {
|
|
if err != nil {
|
|
if err2 := c.cleanupNetwork(); err2 != nil {
|
|
logrus.Errorf("Error cleaning up container %s network: %v", c.ID(), err2)
|
|
}
|
|
if err2 := c.cleanupStorage(); err2 != nil {
|
|
logrus.Errorf("Error cleaning up container %s storage: %v", c.ID(), err2)
|
|
}
|
|
}
|
|
}()
|
|
|
|
wg.Wait()
|
|
|
|
if createNetNSErr != nil {
|
|
if mountStorageErr != nil {
|
|
logrus.Error(createNetNSErr)
|
|
return mountStorageErr
|
|
}
|
|
return createNetNSErr
|
|
}
|
|
if mountStorageErr != nil {
|
|
return mountStorageErr
|
|
}
|
|
|
|
// Save the container
|
|
return c.save()
|
|
}
|
|
|
|
// cleanupNetwork unmounts and cleans up the container's network
|
|
func (c *Container) cleanupNetwork() error {
|
|
if c.config.NetNsCtr != "" {
|
|
return nil
|
|
}
|
|
netDisabled, err := c.NetworkDisabled()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if netDisabled {
|
|
return nil
|
|
}
|
|
if c.state.NetNS == nil {
|
|
logrus.Debugf("Network is already cleaned up, skipping...")
|
|
return nil
|
|
}
|
|
|
|
// Stop the container's network namespace (if it has one)
|
|
if err := c.runtime.teardownNetNS(c); err != nil {
|
|
logrus.Errorf("unable to cleanup network for container %s: %q", c.ID(), err)
|
|
}
|
|
|
|
c.state.NetNS = nil
|
|
c.state.NetworkStatus = nil
|
|
|
|
if c.valid {
|
|
return c.save()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Generate spec for a container
|
|
// Accepts a map of the container's dependencies
|
|
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
|
|
span, _ := opentracing.StartSpanFromContext(ctx, "generateSpec")
|
|
span.SetTag("type", "container")
|
|
defer span.Finish()
|
|
|
|
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
g := generate.NewFromSpec(c.config.Spec)
|
|
|
|
// If network namespace was requested, add it now
|
|
if c.config.CreateNetNS {
|
|
if c.config.PostConfigureNetNS {
|
|
g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, "")
|
|
} else {
|
|
g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path())
|
|
}
|
|
}
|
|
|
|
// Apply AppArmor checks and load the default profile if needed.
|
|
updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
g.SetProcessApparmorProfile(updatedProfile)
|
|
|
|
if err := c.makeBindMounts(); err != nil {
|
|
return nil, err
|
|
}
|
|
// Check if the spec file mounts contain the label Relabel flags z or Z.
|
|
// If they do, relabel the source directory and then remove the option.
|
|
for i := range g.Config.Mounts {
|
|
m := &g.Config.Mounts[i]
|
|
var options []string
|
|
for _, o := range m.Options {
|
|
switch o {
|
|
case "z":
|
|
fallthrough
|
|
case "Z":
|
|
if err := label.Relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
|
|
return nil, errors.Wrapf(err, "relabel failed %q", m.Source)
|
|
}
|
|
|
|
default:
|
|
options = append(options, o)
|
|
}
|
|
}
|
|
m.Options = options
|
|
}
|
|
|
|
g.SetProcessSelinuxLabel(c.ProcessLabel())
|
|
g.SetLinuxMountLabel(c.MountLabel())
|
|
// Add bind mounts to container
|
|
for dstPath, srcPath := range c.state.BindMounts {
|
|
newMount := spec.Mount{
|
|
Type: "bind",
|
|
Source: srcPath,
|
|
Destination: dstPath,
|
|
Options: []string{"bind", "private"},
|
|
}
|
|
if c.IsReadOnly() && dstPath != "/dev/shm" {
|
|
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
|
|
}
|
|
if !MountExists(g.Mounts(), dstPath) {
|
|
g.AddMount(newMount)
|
|
} else {
|
|
logrus.Warnf("User mount overriding libpod mount at %q", dstPath)
|
|
}
|
|
}
|
|
|
|
if c.config.User != "" {
|
|
// User and Group must go together
|
|
g.SetProcessUID(uint32(execUser.Uid))
|
|
g.SetProcessGID(uint32(execUser.Gid))
|
|
}
|
|
|
|
// Add addition groups if c.config.GroupAdd is not empty
|
|
if len(c.config.Groups) > 0 {
|
|
gids, _ := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, nil)
|
|
for _, gid := range gids {
|
|
g.AddProcessAdditionalGid(gid)
|
|
}
|
|
}
|
|
|
|
if c.config.Systemd {
|
|
if err := c.setupSystemd(g.Mounts(), g); err != nil {
|
|
return nil, errors.Wrapf(err, "error adding systemd-specific mounts")
|
|
}
|
|
}
|
|
|
|
// Look up and add groups the user belongs to, if a group wasn't directly specified
|
|
if !rootless.IsRootless() && !strings.Contains(c.config.User, ":") {
|
|
for _, gid := range execUser.Sgids {
|
|
g.AddProcessAdditionalGid(uint32(gid))
|
|
}
|
|
}
|
|
|
|
// Add shared namespaces from other containers
|
|
if c.config.IPCNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.MountNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.NetNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.PIDNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, PIDNS, c.config.PIDNsCtr, string(spec.PIDNamespace)); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.UserNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.UTSNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if c.config.CgroupNsCtr != "" {
|
|
if err := c.addNamespaceContainer(&g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
g.SetRootPath(c.state.Mountpoint)
|
|
g.AddAnnotation(crioAnnotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
|
|
g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
|
|
|
|
for _, i := range c.config.Spec.Linux.Namespaces {
|
|
if string(i.Type) == spec.UTSNamespace {
|
|
hostname := c.Hostname()
|
|
g.SetHostname(hostname)
|
|
g.AddProcessEnv("HOSTNAME", hostname)
|
|
break
|
|
}
|
|
}
|
|
|
|
// Only add container environment variable if not already present
|
|
foundContainerEnv := false
|
|
for _, env := range g.Config.Process.Env {
|
|
if strings.HasPrefix(env, "container=") {
|
|
foundContainerEnv = true
|
|
break
|
|
}
|
|
}
|
|
if !foundContainerEnv {
|
|
g.AddProcessEnv("container", "libpod")
|
|
}
|
|
|
|
if rootless.IsRootless() {
|
|
g.SetLinuxCgroupsPath("")
|
|
} else if c.runtime.config.CgroupManager == SystemdCgroupsManager {
|
|
// When runc is set to use Systemd as a cgroup manager, it
|
|
// expects cgroups to be passed as follows:
|
|
// slice:prefix:name
|
|
systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
|
|
logrus.Debugf("Setting CGroups for container %s to %s", c.ID(), systemdCgroups)
|
|
g.SetLinuxCgroupsPath(systemdCgroups)
|
|
} else {
|
|
cgroupPath, err := c.CGroupPath()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
logrus.Debugf("Setting CGroup path for container %s to %s", c.ID(), cgroupPath)
|
|
g.SetLinuxCgroupsPath(cgroupPath)
|
|
}
|
|
|
|
// Mounts need to be sorted so paths will not cover other paths
|
|
mounts := sortMounts(g.Mounts())
|
|
g.ClearMounts()
|
|
|
|
// Determine property of RootPropagation based on volume properties. If
|
|
// a volume is shared, then keep root propagation shared. This should
|
|
// work for slave and private volumes too.
|
|
//
|
|
// For slave volumes, it can be either [r]shared/[r]slave.
|
|
//
|
|
// For private volumes any root propagation value should work.
|
|
rootPropagation := ""
|
|
for _, m := range mounts {
|
|
// We need to remove all symlinks from tmpfs mounts.
|
|
// Runc and other runtimes may choke on them.
|
|
// Easy solution: use securejoin to do a scoped evaluation of
|
|
// the links, then trim off the mount prefix.
|
|
if m.Type == "tmpfs" {
|
|
finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error resolving symlinks for mount destination %s", m.Destination)
|
|
}
|
|
trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
|
|
m.Destination = trimmedPath
|
|
}
|
|
g.AddMount(m)
|
|
for _, opt := range m.Options {
|
|
switch opt {
|
|
case MountShared, MountRShared:
|
|
if rootPropagation != MountShared && rootPropagation != MountRShared {
|
|
rootPropagation = MountShared
|
|
}
|
|
case MountSlave, MountRSlave:
|
|
if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
|
|
rootPropagation = MountRSlave
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if rootPropagation != "" {
|
|
logrus.Debugf("set root propagation to %q", rootPropagation)
|
|
g.SetLinuxRootPropagation(rootPropagation)
|
|
}
|
|
|
|
// Warning: precreate hooks may alter g.Config in place.
|
|
if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
|
|
return nil, errors.Wrapf(err, "error setting up OCI Hooks")
|
|
}
|
|
|
|
return g.Config, nil
|
|
}
|
|
|
|
// systemd expects to have /run, /run/lock and /tmp on tmpfs
|
|
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
|
|
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
|
|
options := []string{"rw", "rprivate", "noexec", "nosuid", "nodev"}
|
|
for _, dest := range []string{"/run", "/run/lock"} {
|
|
if MountExists(mounts, dest) {
|
|
continue
|
|
}
|
|
tmpfsMnt := spec.Mount{
|
|
Destination: dest,
|
|
Type: "tmpfs",
|
|
Source: "tmpfs",
|
|
Options: append(options, "tmpcopyup", "size=65536k"),
|
|
}
|
|
g.AddMount(tmpfsMnt)
|
|
}
|
|
for _, dest := range []string{"/tmp", "/var/log/journal"} {
|
|
if MountExists(mounts, dest) {
|
|
continue
|
|
}
|
|
tmpfsMnt := spec.Mount{
|
|
Destination: dest,
|
|
Type: "tmpfs",
|
|
Source: "tmpfs",
|
|
Options: append(options, "tmpcopyup"),
|
|
}
|
|
g.AddMount(tmpfsMnt)
|
|
}
|
|
|
|
// rootless containers have no write access to /sys/fs/cgroup, so don't
|
|
// add any mount into the container.
|
|
if !rootless.IsRootless() {
|
|
cgroupPath, err := c.CGroupPath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
sourcePath := filepath.Join("/sys/fs/cgroup/systemd", cgroupPath)
|
|
|
|
systemdMnt := spec.Mount{
|
|
Destination: "/sys/fs/cgroup/systemd",
|
|
Type: "bind",
|
|
Source: sourcePath,
|
|
Options: []string{"bind", "private"},
|
|
}
|
|
g.AddMount(systemdMnt)
|
|
} else {
|
|
systemdMnt := spec.Mount{
|
|
Destination: "/sys/fs/cgroup/systemd",
|
|
Type: "bind",
|
|
Source: "/sys/fs/cgroup/systemd",
|
|
Options: []string{"bind", "nodev", "noexec", "nosuid"},
|
|
}
|
|
g.AddMount(systemdMnt)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Add an existing container's namespace to the spec
|
|
func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr string, specNS string) error {
|
|
nsCtr, err := c.runtime.state.Container(ctr)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error retrieving dependency %s of container %s from state", ctr, c.ID())
|
|
}
|
|
|
|
// TODO need unlocked version of this for use in pods
|
|
nsPath, err := nsCtr.NamespacePath(ns)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := g.AddOrReplaceLinuxNamespace(specNS, nsPath); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *Container) checkpointRestoreSupported() (err error) {
|
|
if !criu.CheckForCriu() {
|
|
return errors.Errorf("Checkpoint/Restore requires at least CRIU %d", criu.MinCriuVersion)
|
|
}
|
|
if !c.runtime.ociRuntime.featureCheckCheckpointing() {
|
|
return errors.Errorf("Configured runtime does not support checkpoint/restore")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (err error) {
|
|
if err := c.checkpointRestoreSupported(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if c.state.State != ContainerStateRunning {
|
|
return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
|
|
}
|
|
|
|
// Create the CRIU log file and label it
|
|
dumpLog := filepath.Join(c.bundlePath(), "dump.log")
|
|
|
|
logFile, err := os.OpenFile(dumpLog, os.O_CREATE, 0600)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to create CRIU log file %q", dumpLog)
|
|
}
|
|
logFile.Close()
|
|
if err = label.SetFileLabel(dumpLog, c.MountLabel()); err != nil {
|
|
return errors.Wrapf(err, "failed to label CRIU log file %q", dumpLog)
|
|
}
|
|
|
|
if err := c.runtime.ociRuntime.checkpointContainer(c, options); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Save network.status. This is needed to restore the container with
|
|
// the same IP. Currently limited to one IP address in a container
|
|
// with one interface.
|
|
formatJSON, err := json.MarshalIndent(c.state.NetworkStatus, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := ioutil.WriteFile(filepath.Join(c.bundlePath(), "network.status"), formatJSON, 0644); err != nil {
|
|
return err
|
|
}
|
|
|
|
logrus.Debugf("Checkpointed container %s", c.ID())
|
|
|
|
if !options.KeepRunning {
|
|
c.state.State = ContainerStateStopped
|
|
|
|
// Cleanup Storage and Network
|
|
if err := c.cleanup(ctx); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if !options.Keep {
|
|
// Remove log file
|
|
os.Remove(filepath.Join(c.bundlePath(), "dump.log"))
|
|
// Remove statistic file
|
|
os.Remove(filepath.Join(c.bundlePath(), "stats-dump"))
|
|
}
|
|
|
|
return c.save()
|
|
}
|
|
|
|
func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (err error) {
|
|
|
|
if err := c.checkpointRestoreSupported(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if (c.state.State != ContainerStateConfigured) && (c.state.State != ContainerStateExited) {
|
|
return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
|
|
}
|
|
|
|
// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
|
|
// no sense to try a restore. This is a minimal check if a checkpoint exist.
|
|
if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
|
|
return errors.Wrapf(err, "A complete checkpoint for this container cannot be found, cannot restore")
|
|
}
|
|
|
|
// Read network configuration from checkpoint
|
|
// Currently only one interface with one IP is supported.
|
|
networkStatusFile, err := os.Open(filepath.Join(c.bundlePath(), "network.status"))
|
|
if err == nil {
|
|
// The file with the network.status does exist. Let's restore the
|
|
// container with the same IP address as during checkpointing.
|
|
defer networkStatusFile.Close()
|
|
var networkStatus []*cnitypes.Result
|
|
networkJSON, err := ioutil.ReadAll(networkStatusFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
json.Unmarshal(networkJSON, &networkStatus)
|
|
// Take the first IP address
|
|
var IP net.IP
|
|
if len(networkStatus) > 0 {
|
|
if len(networkStatus[0].IPs) > 0 {
|
|
IP = networkStatus[0].IPs[0].Address.IP
|
|
}
|
|
}
|
|
if IP != nil {
|
|
// Tell CNI which IP address we want.
|
|
c.requestedIP = IP
|
|
}
|
|
}
|
|
|
|
defer func() {
|
|
if err != nil {
|
|
if err2 := c.cleanup(ctx); err2 != nil {
|
|
logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
|
|
}
|
|
}
|
|
}()
|
|
|
|
if err := c.prepare(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Read config
|
|
jsonPath := filepath.Join(c.bundlePath(), "config.json")
|
|
logrus.Debugf("generate.NewFromFile at %v", jsonPath)
|
|
g, err := generate.NewFromFile(jsonPath)
|
|
if err != nil {
|
|
logrus.Debugf("generate.NewFromFile failed with %v", err)
|
|
return err
|
|
}
|
|
|
|
// We want to have the same network namespace as before.
|
|
if c.config.CreateNetNS {
|
|
g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path())
|
|
}
|
|
|
|
// Save the OCI spec to disk
|
|
if err := c.saveSpec(g.Spec()); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := c.makeBindMounts(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Cleanup for a working restore.
|
|
c.removeConmonFiles()
|
|
|
|
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, &options); err != nil {
|
|
return err
|
|
}
|
|
|
|
logrus.Debugf("Restored container %s", c.ID())
|
|
|
|
c.state.State = ContainerStateRunning
|
|
|
|
if !options.Keep {
|
|
// Delete all checkpoint related files. At this point, in theory, all files
|
|
// should exist. Still ignoring errors for now as the container should be
|
|
// restored and running. Not erroring out just because some cleanup operation
|
|
// failed. Starting with the checkpoint directory
|
|
err = os.RemoveAll(c.CheckpointPath())
|
|
if err != nil {
|
|
logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
|
|
}
|
|
cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status"}
|
|
for _, delete := range cleanup {
|
|
file := filepath.Join(c.bundlePath(), delete)
|
|
err = os.Remove(file)
|
|
if err != nil {
|
|
logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return c.save()
|
|
}
|
|
|
|
// Make standard bind mounts to include in the container
|
|
func (c *Container) makeBindMounts() error {
|
|
if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
|
|
return errors.Wrapf(err, "cannot chown run directory %s", c.state.RunDir)
|
|
}
|
|
|
|
if c.state.BindMounts == nil {
|
|
c.state.BindMounts = make(map[string]string)
|
|
}
|
|
netDisabled, err := c.NetworkDisabled()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !netDisabled {
|
|
// If /etc/resolv.conf and /etc/hosts exist, delete them so we
|
|
// will recreate. Only do this if we aren't sharing them with
|
|
// another container.
|
|
if c.config.NetNsCtr == "" {
|
|
if path, ok := c.state.BindMounts["/etc/resolv.conf"]; ok {
|
|
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
|
|
return errors.Wrapf(err, "error removing container %s resolv.conf", c.ID())
|
|
}
|
|
delete(c.state.BindMounts, "/etc/resolv.conf")
|
|
}
|
|
if path, ok := c.state.BindMounts["/etc/hosts"]; ok {
|
|
if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
|
|
return errors.Wrapf(err, "error removing container %s hosts", c.ID())
|
|
}
|
|
delete(c.state.BindMounts, "/etc/hosts")
|
|
}
|
|
}
|
|
|
|
if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
|
|
// We share a net namespace.
|
|
// We want /etc/resolv.conf and /etc/hosts from the
|
|
// other container. Unless we're not creating both of
|
|
// them.
|
|
depCtr, err := c.runtime.state.Container(c.config.NetNsCtr)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error fetching dependency %s of container %s", c.config.NetNsCtr, c.ID())
|
|
}
|
|
|
|
// We need that container's bind mounts
|
|
bindMounts, err := depCtr.BindMounts()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error fetching bind mounts from dependency %s of container %s", depCtr.ID(), c.ID())
|
|
}
|
|
|
|
if !c.config.UseImageResolvConf {
|
|
// The other container may not have a resolv.conf or /etc/hosts
|
|
// If it doesn't, don't copy them
|
|
resolvPath, exists := bindMounts["/etc/resolv.conf"]
|
|
if exists {
|
|
c.state.BindMounts["/etc/resolv.conf"] = resolvPath
|
|
}
|
|
}
|
|
|
|
if !c.config.UseImageHosts {
|
|
// check if dependency container has an /etc/hosts file
|
|
hostsPath, exists := bindMounts["/etc/hosts"]
|
|
if !exists {
|
|
return errors.Errorf("error finding hosts file of dependency container %s for container %s", depCtr.ID(), c.ID())
|
|
}
|
|
|
|
depCtr.lock.Lock()
|
|
// generate a hosts file for the dependency container,
|
|
// based on either its old hosts file, or the default,
|
|
// and add the relevant information from the new container (hosts and IP)
|
|
hostsPath, err = depCtr.appendHosts(hostsPath, c)
|
|
|
|
if err != nil {
|
|
depCtr.lock.Unlock()
|
|
return errors.Wrapf(err, "error creating hosts file for container %s which depends on container %s", c.ID(), depCtr.ID())
|
|
}
|
|
depCtr.lock.Unlock()
|
|
|
|
// finally, save it in the new container
|
|
c.state.BindMounts["/etc/hosts"] = hostsPath
|
|
}
|
|
} else {
|
|
if !c.config.UseImageResolvConf {
|
|
newResolv, err := c.generateResolvConf()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating resolv.conf for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/etc/resolv.conf"] = newResolv
|
|
}
|
|
|
|
if !c.config.UseImageHosts {
|
|
newHosts, err := c.generateHosts("/etc/hosts")
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating hosts file for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/etc/hosts"] = newHosts
|
|
}
|
|
}
|
|
|
|
if c.state.BindMounts["/etc/hosts"] != "" {
|
|
if err := label.Relabel(c.state.BindMounts["/etc/hosts"], c.config.MountLabel, true); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if c.state.BindMounts["/etc/resolv.conf"] != "" {
|
|
if err := label.Relabel(c.state.BindMounts["/etc/resolv.conf"], c.config.MountLabel, true); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
// SHM is always added when we mount the container
|
|
c.state.BindMounts["/dev/shm"] = c.config.ShmDir
|
|
|
|
newPasswd, err := c.generatePasswd()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating temporary passwd file for container %s", c.ID())
|
|
}
|
|
if newPasswd != "" {
|
|
// Make /etc/passwd
|
|
if _, ok := c.state.BindMounts["/etc/passwd"]; ok {
|
|
// If it already exists, delete so we can recreate
|
|
delete(c.state.BindMounts, "/etc/passwd")
|
|
}
|
|
logrus.Debugf("adding entry to /etc/passwd for non existent default user")
|
|
c.state.BindMounts["/etc/passwd"] = newPasswd
|
|
}
|
|
|
|
// Make /etc/hostname
|
|
// This should never change, so no need to recreate if it exists
|
|
if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
|
|
hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating hostname file for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/etc/hostname"] = hostnamePath
|
|
}
|
|
|
|
// Make .containerenv
|
|
// Empty file, so no need to recreate if it exists
|
|
if _, ok := c.state.BindMounts["/run/.containerenv"]; !ok {
|
|
// Empty string for now, but we may consider populating this later
|
|
containerenvPath, err := c.writeStringToRundir(".containerenv", "")
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error creating containerenv file for container %s", c.ID())
|
|
}
|
|
c.state.BindMounts["/run/.containerenv"] = containerenvPath
|
|
}
|
|
|
|
// Add Secret Mounts
|
|
secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.RunDir, c.RootUID(), c.RootGID(), rootless.IsRootless())
|
|
for _, mount := range secretMounts {
|
|
if _, ok := c.state.BindMounts[mount.Destination]; !ok {
|
|
c.state.BindMounts[mount.Destination] = mount.Source
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// generateResolvConf generates a containers resolv.conf
|
|
func (c *Container) generateResolvConf() (string, error) {
|
|
resolvConf := "/etc/resolv.conf"
|
|
for _, ns := range c.config.Spec.Linux.Namespaces {
|
|
if ns.Type == spec.NetworkNamespace {
|
|
if ns.Path != "" && !strings.HasPrefix(ns.Path, "/proc/") {
|
|
definedPath := filepath.Join("/etc/netns", filepath.Base(ns.Path), "resolv.conf")
|
|
_, err := os.Stat(definedPath)
|
|
if err == nil {
|
|
resolvConf = definedPath
|
|
} else if !os.IsNotExist(err) {
|
|
return "", errors.Wrapf(err, "failed to stat %s", definedPath)
|
|
}
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
// Determine the endpoint for resolv.conf in case it is a symlink
|
|
resolvPath, err := filepath.EvalSymlinks(resolvConf)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
contents, err := ioutil.ReadFile(resolvPath)
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "unable to read %s", resolvPath)
|
|
}
|
|
|
|
// Ensure that the container's /etc/resolv.conf is compatible with its
|
|
// network configuration.
|
|
// TODO: set ipv6 enable bool more sanely
|
|
resolv, err := resolvconf.FilterResolvDNS(contents, true, c.config.CreateNetNS)
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "error parsing host resolv.conf")
|
|
}
|
|
|
|
// Make a new resolv.conf
|
|
nameservers := resolvconf.GetNameservers(resolv.Content)
|
|
// slirp4netns has a built in DNS server.
|
|
if c.config.NetMode.IsSlirp4netns() {
|
|
nameservers = append(nameservers, "10.0.2.3")
|
|
}
|
|
if len(c.config.DNSServer) > 0 {
|
|
// We store DNS servers as net.IP, so need to convert to string
|
|
nameservers = []string{}
|
|
for _, server := range c.config.DNSServer {
|
|
nameservers = append(nameservers, server.String())
|
|
}
|
|
}
|
|
|
|
search := resolvconf.GetSearchDomains(resolv.Content)
|
|
if len(c.config.DNSSearch) > 0 {
|
|
search = c.config.DNSSearch
|
|
}
|
|
|
|
options := resolvconf.GetOptions(resolv.Content)
|
|
if len(c.config.DNSOption) > 0 {
|
|
options = c.config.DNSOption
|
|
}
|
|
|
|
destPath := filepath.Join(c.state.RunDir, "resolv.conf")
|
|
|
|
if err := os.Remove(destPath); err != nil && !os.IsNotExist(err) {
|
|
return "", errors.Wrapf(err, "error removing resolv.conf for container %s", c.ID())
|
|
}
|
|
|
|
// Build resolv.conf
|
|
if _, err = resolvconf.Build(destPath, nameservers, search, options); err != nil {
|
|
return "", errors.Wrapf(err, "error building resolv.conf for container %s", c.ID())
|
|
}
|
|
|
|
// Relabel resolv.conf for the container
|
|
if err := label.Relabel(destPath, c.config.MountLabel, true); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return filepath.Join(c.state.RunDir, "resolv.conf"), nil
|
|
}
|
|
|
|
// generateHosts creates a containers hosts file
|
|
func (c *Container) generateHosts(path string) (string, error) {
|
|
orig, err := ioutil.ReadFile(path)
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "unable to read %s", path)
|
|
}
|
|
hosts := string(orig)
|
|
hosts += c.getHosts()
|
|
return c.writeStringToRundir("hosts", hosts)
|
|
}
|
|
|
|
// appendHosts appends a container's config and state pertaining to hosts to a container's
|
|
// local hosts file. netCtr is the container from which the netNS information is
|
|
// taken.
|
|
// path is the basis of the hosts file, into which netCtr's netNS information will be appended.
|
|
func (c *Container) appendHosts(path string, netCtr *Container) (string, error) {
|
|
return c.appendStringToRundir("hosts", netCtr.getHosts())
|
|
}
|
|
|
|
// getHosts finds the pertinent information for a container's host file in its config and state
|
|
// and returns a string in a format that can be written to the host file
|
|
func (c *Container) getHosts() string {
|
|
var hosts string
|
|
if len(c.config.HostAdd) > 0 {
|
|
for _, host := range c.config.HostAdd {
|
|
// the host format has already been verified at this point
|
|
fields := strings.SplitN(host, ":", 2)
|
|
hosts += fmt.Sprintf("%s %s\n", fields[1], fields[0])
|
|
}
|
|
}
|
|
if len(c.state.NetworkStatus) > 0 && len(c.state.NetworkStatus[0].IPs) > 0 {
|
|
ipAddress := strings.Split(c.state.NetworkStatus[0].IPs[0].Address.String(), "/")[0]
|
|
hosts += fmt.Sprintf("%s\t%s\n", ipAddress, c.Hostname())
|
|
}
|
|
return hosts
|
|
}
|
|
|
|
// generatePasswd generates a container specific passwd file,
|
|
// iff g.config.User is a number
|
|
func (c *Container) generatePasswd() (string, error) {
|
|
var (
|
|
groupspec string
|
|
gid int
|
|
)
|
|
if c.config.User == "" {
|
|
return "", nil
|
|
}
|
|
spec := strings.SplitN(c.config.User, ":", 2)
|
|
userspec := spec[0]
|
|
if len(spec) > 1 {
|
|
groupspec = spec[1]
|
|
}
|
|
// If a non numeric User, then don't generate passwd
|
|
uid, err := strconv.ParseUint(userspec, 10, 32)
|
|
if err != nil {
|
|
return "", nil
|
|
}
|
|
// Lookup the user to see if it exists in the container image
|
|
_, err = lookup.GetUser(c.state.Mountpoint, userspec)
|
|
if err != nil && err != user.ErrNoPasswdEntries {
|
|
return "", err
|
|
}
|
|
if err == nil {
|
|
return "", nil
|
|
}
|
|
if groupspec != "" {
|
|
ugid, err := strconv.ParseUint(groupspec, 10, 32)
|
|
if err == nil {
|
|
gid = int(ugid)
|
|
} else {
|
|
group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "unable to get gid %s from group file", groupspec)
|
|
}
|
|
gid = group.Gid
|
|
}
|
|
}
|
|
originPasswdFile := filepath.Join(c.state.Mountpoint, "/etc/passwd")
|
|
orig, err := ioutil.ReadFile(originPasswdFile)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
return "", errors.Wrapf(err, "unable to read passwd file %s", originPasswdFile)
|
|
}
|
|
|
|
pwd := fmt.Sprintf("%s%d:x:%d:%d:container user:%s:/bin/sh\n", orig, uid, uid, gid, c.WorkingDir())
|
|
passwdFile, err := c.writeStringToRundir("passwd", pwd)
|
|
if err != nil {
|
|
return "", errors.Wrapf(err, "failed to create temporary passwd file")
|
|
}
|
|
if os.Chmod(passwdFile, 0644); err != nil {
|
|
return "", err
|
|
}
|
|
return passwdFile, nil
|
|
}
|
|
|
|
func (c *Container) copyOwnerAndPerms(source, dest string) error {
|
|
info, err := os.Stat(source)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil
|
|
}
|
|
return errors.Wrapf(err, "cannot stat `%s`", dest)
|
|
}
|
|
if err := os.Chmod(dest, info.Mode()); err != nil {
|
|
return errors.Wrapf(err, "cannot chmod `%s`", dest)
|
|
}
|
|
if err := os.Chown(dest, int(info.Sys().(*syscall.Stat_t).Uid), int(info.Sys().(*syscall.Stat_t).Gid)); err != nil {
|
|
return errors.Wrapf(err, "cannot chown `%s`", dest)
|
|
}
|
|
return nil
|
|
}
|