Files
podman/libpod/container_internal_common.go
Doug Rabson a148c16225 libpod: Use (*Container).addNetworkNamespace to restore checkpoint network
[NO NEW TESTS NEEDED]

Signed-off-by: Doug Rabson <dfr@rabson.org>
2022-09-05 10:17:49 +01:00

1570 lines
48 KiB
Go

//go:build linux || freebsd
// +build linux freebsd
package libpod
import (
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"math"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"time"
metadata "github.com/checkpoint-restore/checkpointctl/lib"
"github.com/checkpoint-restore/go-criu/v5/stats"
cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/containers/buildah"
"github.com/containers/buildah/pkg/chrootuser"
"github.com/containers/buildah/pkg/overlay"
butil "github.com/containers/buildah/util"
"github.com/containers/common/libnetwork/types"
"github.com/containers/common/pkg/apparmor"
cutil "github.com/containers/common/pkg/util"
is "github.com/containers/image/v5/storage"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/annotations"
"github.com/containers/podman/v4/pkg/checkpoint/crutils"
"github.com/containers/podman/v4/pkg/criu"
"github.com/containers/podman/v4/pkg/lookup"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/util"
"github.com/containers/podman/v4/version"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/idtools"
securejoin "github.com/cyphar/filepath-securejoin"
runcuser "github.com/opencontainers/runc/libcontainer/user"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/sirupsen/logrus"
)
// Internal only function which returns upper and work dir from
// overlay options.
func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
upperDir := ""
workDir := ""
for _, o := range options {
if strings.HasPrefix(o, "upperdir") {
splitOpt := strings.SplitN(o, "=", 2)
if len(splitOpt) > 1 {
upperDir = splitOpt[1]
if upperDir == "" {
return "", "", errors.New("cannot accept empty value for upperdir")
}
}
}
if strings.HasPrefix(o, "workdir") {
splitOpt := strings.SplitN(o, "=", 2)
if len(splitOpt) > 1 {
workDir = splitOpt[1]
if workDir == "" {
return "", "", errors.New("cannot accept empty value for workdir")
}
}
}
}
if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") {
return "", "", errors.New("must specify both upperdir and workdir")
}
return upperDir, workDir, nil
}
// Generate spec for a container
// Accepts a map of the container's dependencies
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
overrides := c.getUserOverrides()
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
if err != nil {
if cutil.StringInSlice(c.config.User, c.config.HostUsers) {
execUser, err = lookupHostUser(c.config.User)
}
if err != nil {
return nil, err
}
}
// NewFromSpec() is deprecated according to its comment
// however the recommended replace just causes a nil map panic
//nolint:staticcheck
g := generate.NewFromSpec(c.config.Spec)
// If the flag to mount all devices is set for a privileged container, add
// all the devices from the host's machine into the container
if c.config.MountAllDevices {
if err := util.AddPrivilegedDevices(&g); err != nil {
return nil, err
}
}
// If network namespace was requested, add it now
if err := c.addNetworkNamespace(&g); err != nil {
return nil, err
}
// Apply AppArmor checks and load the default profile if needed.
if len(c.config.Spec.Process.ApparmorProfile) > 0 {
updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
if err != nil {
return nil, err
}
g.SetProcessApparmorProfile(updatedProfile)
}
if err := c.makeBindMounts(); err != nil {
return nil, err
}
if err := c.mountNotifySocket(g); err != nil {
return nil, err
}
// Get host UID and GID based on the container process UID and GID.
hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
if err != nil {
return nil, err
}
// Add named volumes
for _, namedVol := range c.config.NamedVolumes {
volume, err := c.runtime.GetVolume(namedVol.Name)
if err != nil {
return nil, fmt.Errorf("error retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
}
mountPoint, err := volume.MountPoint()
if err != nil {
return nil, err
}
overlayFlag := false
upperDir := ""
workDir := ""
for _, o := range namedVol.Options {
if o == "O" {
overlayFlag = true
upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
if err != nil {
return nil, err
}
}
}
if overlayFlag {
var overlayMount spec.Mount
var overlayOpts *overlay.Options
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
if err != nil {
return nil, err
}
overlayOpts = &overlay.Options{RootUID: c.RootUID(),
RootGID: c.RootGID(),
UpperDirOptionFragment: upperDir,
WorkDirOptionFragment: workDir,
GraphOpts: c.runtime.store.GraphOptions(),
}
overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
if err != nil {
return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
}
for _, o := range namedVol.Options {
if o == "U" {
if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
}
if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
}
}
}
g.AddMount(overlayMount)
} else {
volMount := spec.Mount{
Type: define.TypeBind,
Source: mountPoint,
Destination: namedVol.Dest,
Options: namedVol.Options,
}
g.AddMount(volMount)
}
}
// Check if the spec file mounts contain the options z, Z or U.
// If they have z or Z, relabel the source directory and then remove the option.
// If they have U, chown the source directory and them remove the option.
for i := range g.Config.Mounts {
m := &g.Config.Mounts[i]
var options []string
for _, o := range m.Options {
switch o {
case "U":
if m.Type == "tmpfs" {
options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
} else {
// only chown on initial creation of container
if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
}
}
case "z":
fallthrough
case "Z":
if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
return nil, err
}
default:
options = append(options, o)
}
}
m.Options = options
}
c.setProcessLabel(&g)
c.setMountLabel(&g)
// Add bind mounts to container
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: define.TypeBind,
Source: srcPath,
Destination: dstPath,
Options: bindOptions,
}
if c.IsReadOnly() && dstPath != "/dev/shm" {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
} else {
logrus.Infof("User mount overriding libpod mount at %q", dstPath)
}
}
// Add overlay volumes
for _, overlayVol := range c.config.OverlayVolumes {
upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
if err != nil {
return nil, err
}
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
if err != nil {
return nil, err
}
overlayOpts := &overlay.Options{RootUID: c.RootUID(),
RootGID: c.RootGID(),
UpperDirOptionFragment: upperDir,
WorkDirOptionFragment: workDir,
GraphOpts: c.runtime.store.GraphOptions(),
}
overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
if err != nil {
return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
}
// Check overlay volume options
for _, o := range overlayVol.Options {
if o == "U" {
if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
}
if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
}
}
}
g.AddMount(overlayMount)
}
// Add image volumes as overlay mounts
for _, volume := range c.config.ImageVolumes {
// Mount the specified image.
img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
if err != nil {
return nil, fmt.Errorf("error creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
}
mountPoint, err := img.Mount(ctx, nil, "")
if err != nil {
return nil, fmt.Errorf("error mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
}
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
if err != nil {
return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
}
var overlayMount spec.Mount
if volume.ReadWrite {
overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
} else {
overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
}
if err != nil {
return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
}
g.AddMount(overlayMount)
}
hasHomeSet := false
for _, s := range c.config.Spec.Process.Env {
if strings.HasPrefix(s, "HOME=") {
hasHomeSet = true
break
}
}
if !hasHomeSet && execUser.Home != "" {
c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", execUser.Home))
}
if c.config.User != "" {
// User and Group must go together
g.SetProcessUID(uint32(execUser.Uid))
g.SetProcessGID(uint32(execUser.Gid))
g.AddProcessAdditionalGid(uint32(execUser.Gid))
}
if c.config.Umask != "" {
decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
if err != nil {
return nil, fmt.Errorf("invalid Umask Value: %w", err)
}
umask := uint32(decVal)
g.Config.Process.User.Umask = &umask
}
// Add addition groups if c.config.GroupAdd is not empty
if len(c.config.Groups) > 0 {
gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
if err != nil {
return nil, fmt.Errorf("error looking up supplemental groups for container %s: %w", c.ID(), err)
}
for _, gid := range gids {
g.AddProcessAdditionalGid(gid)
}
}
if err := c.addSystemdMounts(&g); err != nil {
return nil, err
}
// Look up and add groups the user belongs to, if a group wasn't directly specified
if !strings.Contains(c.config.User, ":") {
// the gidMappings that are present inside the container user namespace
var gidMappings []idtools.IDMap
switch {
case len(c.config.IDMappings.GIDMap) > 0:
gidMappings = c.config.IDMappings.GIDMap
case rootless.IsRootless():
// Check whether the current user namespace has enough gids available.
availableGids, err := rootless.GetAvailableGids()
if err != nil {
return nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
}
gidMappings = []idtools.IDMap{{
ContainerID: 0,
HostID: 0,
Size: int(availableGids),
}}
default:
gidMappings = []idtools.IDMap{{
ContainerID: 0,
HostID: 0,
Size: math.MaxInt32,
}}
}
for _, gid := range execUser.Sgids {
isGIDAvailable := false
for _, m := range gidMappings {
if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
isGIDAvailable = true
break
}
}
if isGIDAvailable {
g.AddProcessAdditionalGid(uint32(gid))
} else {
logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid)
}
}
}
// Add shared namespaces from other containers
if err := c.addSharedNamespaces(&g); err != nil {
return nil, err
}
g.SetRootPath(c.state.Mountpoint)
g.AddAnnotation(annotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano))
g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal))
if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
}
if err := c.setCgroupsPath(&g); err != nil {
return nil, err
}
// Warning: CDI may alter g.Config in place.
if len(c.config.CDIDevices) > 0 {
registry := cdi.GetRegistry(
cdi.WithAutoRefresh(false),
)
if err := registry.Refresh(); err != nil {
logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
}
_, err := registry.InjectDevices(g.Config, c.config.CDIDevices...)
if err != nil {
return nil, fmt.Errorf("error setting up CDI devices: %w", err)
}
}
// Mounts need to be sorted so paths will not cover other paths
mounts := sortMounts(g.Mounts())
g.ClearMounts()
for _, m := range mounts {
// We need to remove all symlinks from tmpfs mounts.
// Runc and other runtimes may choke on them.
// Easy solution: use securejoin to do a scoped evaluation of
// the links, then trim off the mount prefix.
if m.Type == "tmpfs" {
finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
if err != nil {
return nil, fmt.Errorf("error resolving symlinks for mount destination %s: %w", m.Destination, err)
}
trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
m.Destination = trimmedPath
}
g.AddMount(m)
}
if err := c.addRootPropagation(&g, mounts); err != nil {
return nil, err
}
// Warning: precreate hooks may alter g.Config in place.
if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
return nil, fmt.Errorf("error setting up OCI Hooks: %w", err)
}
if len(c.config.EnvSecrets) > 0 {
manager, err := c.runtime.SecretsManager()
if err != nil {
return nil, err
}
if err != nil {
return nil, err
}
for name, secr := range c.config.EnvSecrets {
_, data, err := manager.LookupSecretData(secr.Name)
if err != nil {
return nil, err
}
g.AddProcessEnv(name, string(data))
}
}
// Pass down the LISTEN_* environment (see #10443).
for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} {
if val, ok := os.LookupEnv(key); ok {
// Force the PID to `1` since we cannot rely on (all
// versions of) all runtimes to do it for us.
if key == "LISTEN_PID" {
val = "1"
}
g.AddProcessEnv(key, val)
}
}
return g.Config, nil
}
// isWorkDirSymlink returns true if resolved workdir is symlink or a chain of symlinks,
// and final resolved target is present either on volume, mount or inside of container
// otherwise it returns false. Following function is meant for internal use only and
// can change at any point of time.
func (c *Container) isWorkDirSymlink(resolvedPath string) bool {
// We cannot create workdir since explicit --workdir is
// set in config but workdir could also be a symlink.
// If it's a symlink, check if the resolved target is present in the container.
// If so, that's a valid use case: return nil.
maxSymLinks := 0
for {
// Linux only supports a chain of 40 links.
// Reference: https://github.com/torvalds/linux/blob/master/include/linux/namei.h#L13
if maxSymLinks > 40 {
break
}
resolvedSymlink, err := os.Readlink(resolvedPath)
if err != nil {
// End sym-link resolution loop.
break
}
if resolvedSymlink != "" {
_, resolvedSymlinkWorkdir, err := c.resolvePath(c.state.Mountpoint, resolvedSymlink)
if isPathOnVolume(c, resolvedSymlinkWorkdir) || isPathOnBindMount(c, resolvedSymlinkWorkdir) {
// Resolved symlink exists on external volume or mount
return true
}
if err != nil {
// Could not resolve path so end sym-link resolution loop.
break
}
if resolvedSymlinkWorkdir != "" {
resolvedPath = resolvedSymlinkWorkdir
_, err := os.Stat(resolvedSymlinkWorkdir)
if err == nil {
// Symlink resolved successfully and resolved path exists on container,
// this is a valid use-case so return nil.
logrus.Debugf("Workdir is a symlink with target to %q and resolved symlink exists on container", resolvedSymlink)
return true
}
}
}
maxSymLinks++
}
return false
}
// resolveWorkDir resolves the container's workdir and, depending on the
// configuration, will create it, or error out if it does not exist.
// Note that the container must be mounted before.
func (c *Container) resolveWorkDir() error {
workdir := c.WorkingDir()
// If the specified workdir is a subdir of a volume or mount,
// we don't need to do anything. The runtime is taking care of
// that.
if isPathOnVolume(c, workdir) || isPathOnBindMount(c, workdir) {
logrus.Debugf("Workdir %q resolved to a volume or mount", workdir)
return nil
}
_, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir)
if err != nil {
return err
}
logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir)
st, err := os.Stat(resolvedWorkdir)
if err == nil {
if !st.IsDir() {
return fmt.Errorf("workdir %q exists on container %s, but is not a directory", workdir, c.ID())
}
return nil
}
if !c.config.CreateWorkingDir {
// No need to create it (e.g., `--workdir=/foo`), so let's make sure
// the path exists on the container.
if err != nil {
if os.IsNotExist(err) {
// If resolved Workdir path gets marked as a valid symlink,
// return nil cause this is valid use-case.
if c.isWorkDirSymlink(resolvedWorkdir) {
return nil
}
return fmt.Errorf("workdir %q does not exist on container %s", workdir, c.ID())
}
// This might be a serious error (e.g., permission), so
// we need to return the full error.
return fmt.Errorf("error detecting workdir %q on container %s: %w", workdir, c.ID(), err)
}
return nil
}
if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil {
if os.IsExist(err) {
return nil
}
return fmt.Errorf("error creating container %s workdir: %w", c.ID(), err)
}
// Ensure container entrypoint is created (if required).
uid, gid, _, err := chrootuser.GetUser(c.state.Mountpoint, c.User())
if err != nil {
return fmt.Errorf("error looking up %s inside of the container %s: %w", c.User(), c.ID(), err)
}
if err := os.Chown(resolvedWorkdir, int(uid), int(gid)); err != nil {
return fmt.Errorf("error chowning container %s workdir to container root: %w", c.ID(), err)
}
return nil
}
func (c *Container) getUserOverrides() *lookup.Overrides {
var hasPasswdFile, hasGroupFile bool
overrides := lookup.Overrides{}
for _, m := range c.config.Spec.Mounts {
if m.Destination == "/etc/passwd" {
overrides.ContainerEtcPasswdPath = m.Source
hasPasswdFile = true
}
if m.Destination == "/etc/group" {
overrides.ContainerEtcGroupPath = m.Source
hasGroupFile = true
}
if m.Destination == "/etc" {
if !hasPasswdFile {
overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
}
if !hasGroupFile {
overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
}
}
}
if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
overrides.ContainerEtcPasswdPath = path
}
return &overrides
}
func lookupHostUser(name string) (*runcuser.ExecUser, error) {
var execUser runcuser.ExecUser
// Look up User on host
u, err := util.LookupUser(name)
if err != nil {
return &execUser, err
}
uid, err := strconv.ParseUint(u.Uid, 8, 32)
if err != nil {
return &execUser, err
}
gid, err := strconv.ParseUint(u.Gid, 8, 32)
if err != nil {
return &execUser, err
}
execUser.Uid = int(uid)
execUser.Gid = int(gid)
execUser.Home = u.HomeDir
return &execUser, nil
}
// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
// and if the sdnotify mode is set to container. It also sets c.notifySocket
// to avoid redundantly looking up the env variable.
func (c *Container) mountNotifySocket(g generate.Generator) error {
if c.config.SdNotifySocket == "" {
return nil
}
if c.config.SdNotifyMode != define.SdNotifyModeContainer {
return nil
}
notifyDir := filepath.Join(c.bundlePath(), "notify")
logrus.Debugf("Checking notify %q dir", notifyDir)
if err := os.MkdirAll(notifyDir, 0755); err != nil {
if !os.IsExist(err) {
return fmt.Errorf("unable to create notify %q dir: %w", notifyDir, err)
}
}
if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
return fmt.Errorf("relabel failed %q: %w", notifyDir, err)
}
logrus.Debugf("Add bindmount notify %q dir", notifyDir)
if _, ok := c.state.BindMounts["/run/notify"]; !ok {
c.state.BindMounts["/run/notify"] = notifyDir
}
// Set the container's notify socket to the proxy socket created by conmon
g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
return nil
}
func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) error {
// Get information about host environment
hostInfo, err := c.Runtime().hostInfo()
if err != nil {
return fmt.Errorf("getting host info: %v", err)
}
criuVersion, err := criu.GetCriuVersion()
if err != nil {
return fmt.Errorf("getting criu version: %v", err)
}
rootfsImageID, rootfsImageName := c.Image()
// Add image annotations with information about the container and the host.
// This information is useful to check compatibility before restoring the checkpoint
checkpointImageAnnotations := map[string]string{
define.CheckpointAnnotationName: c.config.Name,
define.CheckpointAnnotationRawImageName: c.config.RawImageName,
define.CheckpointAnnotationRootfsImageID: rootfsImageID,
define.CheckpointAnnotationRootfsImageName: rootfsImageName,
define.CheckpointAnnotationPodmanVersion: version.Version.String(),
define.CheckpointAnnotationCriuVersion: strconv.Itoa(criuVersion),
define.CheckpointAnnotationRuntimeName: hostInfo.OCIRuntime.Name,
define.CheckpointAnnotationRuntimeVersion: hostInfo.OCIRuntime.Version,
define.CheckpointAnnotationConmonVersion: hostInfo.Conmon.Version,
define.CheckpointAnnotationHostArch: hostInfo.Arch,
define.CheckpointAnnotationHostKernel: hostInfo.Kernel,
define.CheckpointAnnotationCgroupVersion: hostInfo.CgroupsVersion,
define.CheckpointAnnotationDistributionVersion: hostInfo.Distribution.Version,
define.CheckpointAnnotationDistributionName: hostInfo.Distribution.Distribution,
}
for key, value := range checkpointImageAnnotations {
importBuilder.SetAnnotation(key, value)
}
return nil
}
func (c *Container) resolveCheckpointImageName(options *ContainerCheckpointOptions) error {
if options.CreateImage == "" {
return nil
}
// Resolve image name
resolvedImageName, err := c.runtime.LibimageRuntime().ResolveName(options.CreateImage)
if err != nil {
return err
}
options.CreateImage = resolvedImageName
return nil
}
func (c *Container) createCheckpointImage(ctx context.Context, options ContainerCheckpointOptions) error {
if options.CreateImage == "" {
return nil
}
logrus.Debugf("Create checkpoint image %s", options.CreateImage)
// Create storage reference
imageRef, err := is.Transport.ParseStoreReference(c.runtime.store, options.CreateImage)
if err != nil {
return errors.New("failed to parse image name")
}
// Build an image scratch
builderOptions := buildah.BuilderOptions{
FromImage: "scratch",
}
importBuilder, err := buildah.NewBuilder(ctx, c.runtime.store, builderOptions)
if err != nil {
return err
}
// Clean up buildah working container
defer func() {
if err := importBuilder.Delete(); err != nil {
logrus.Errorf("Image builder delete failed: %v", err)
}
}()
if err := c.prepareCheckpointExport(); err != nil {
return err
}
// Export checkpoint into temporary tar file
tmpDir, err := ioutil.TempDir("", "checkpoint_image_")
if err != nil {
return err
}
defer os.RemoveAll(tmpDir)
options.TargetFile = path.Join(tmpDir, "checkpoint.tar")
if err := c.exportCheckpoint(options); err != nil {
return err
}
// Copy checkpoint from temporary tar file in the image
addAndCopyOptions := buildah.AddAndCopyOptions{}
if err := importBuilder.Add("", true, addAndCopyOptions, options.TargetFile); err != nil {
return err
}
if err := c.addCheckpointImageMetadata(importBuilder); err != nil {
return err
}
commitOptions := buildah.CommitOptions{
Squash: true,
SystemContext: c.runtime.imageContext,
}
// Create checkpoint image
id, _, _, err := importBuilder.Commit(ctx, imageRef, commitOptions)
if err != nil {
return err
}
logrus.Debugf("Created checkpoint image: %s", id)
return nil
}
func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
if len(c.Dependencies()) == 1 {
// Check if the dependency is an infra container. If it is we can checkpoint
// the container out of the Pod.
if c.config.Pod == "" {
return errors.New("cannot export checkpoints of containers with dependencies")
}
pod, err := c.runtime.state.Pod(c.config.Pod)
if err != nil {
return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
}
infraID, err := pod.InfraContainerID()
if err != nil {
return fmt.Errorf("cannot retrieve infra container ID for pod %s: %w", c.config.Pod, err)
}
if c.Dependencies()[0] != infraID {
return errors.New("cannot export checkpoints of containers with dependencies")
}
}
if len(c.Dependencies()) > 1 {
return errors.New("cannot export checkpoints of containers with dependencies")
}
logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
includeFiles := []string{
"artifacts",
metadata.DevShmCheckpointTar,
metadata.ConfigDumpFile,
metadata.SpecDumpFile,
metadata.NetworkStatusFile,
stats.StatsDump,
}
if c.LogDriver() == define.KubernetesLogging ||
c.LogDriver() == define.JSONLogging {
includeFiles = append(includeFiles, "ctr.log")
}
if options.PreCheckPoint {
includeFiles = append(includeFiles, preCheckpointDir)
} else {
includeFiles = append(includeFiles, metadata.CheckpointDirectory)
}
// Get root file-system changes included in the checkpoint archive
var addToTarFiles []string
if !options.IgnoreRootfs {
// To correctly track deleted files, let's go through the output of 'podman diff'
rootFsChanges, err := c.runtime.GetDiff("", c.ID(), define.DiffContainer)
if err != nil {
return fmt.Errorf("error exporting root file-system diff for %q: %w", c.ID(), err)
}
addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, c.state.Mountpoint, c.bundlePath())
if err != nil {
return err
}
includeFiles = append(includeFiles, addToTarFiles...)
}
// Folder containing archived volumes that will be included in the export
expVolDir := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
// Create an archive for each volume associated with the container
if !options.IgnoreVolumes {
if err := os.MkdirAll(expVolDir, 0700); err != nil {
return fmt.Errorf("error creating volumes export directory %q: %w", expVolDir, err)
}
for _, v := range c.config.NamedVolumes {
volumeTarFilePath := filepath.Join(metadata.CheckpointVolumesDirectory, v.Name+".tar")
volumeTarFileFullPath := filepath.Join(c.bundlePath(), volumeTarFilePath)
volumeTarFile, err := os.Create(volumeTarFileFullPath)
if err != nil {
return fmt.Errorf("error creating %q: %w", volumeTarFileFullPath, err)
}
volume, err := c.runtime.GetVolume(v.Name)
if err != nil {
return err
}
mp, err := volume.MountPoint()
if err != nil {
return err
}
if mp == "" {
return fmt.Errorf("volume %s is not mounted, cannot export: %w", volume.Name(), define.ErrInternal)
}
input, err := archive.TarWithOptions(mp, &archive.TarOptions{
Compression: archive.Uncompressed,
IncludeSourceDir: true,
})
if err != nil {
return fmt.Errorf("error reading volume directory %q: %w", v.Dest, err)
}
_, err = io.Copy(volumeTarFile, input)
if err != nil {
return err
}
volumeTarFile.Close()
includeFiles = append(includeFiles, volumeTarFilePath)
}
}
input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
Compression: options.Compression,
IncludeSourceDir: true,
IncludeFiles: includeFiles,
})
if err != nil {
return fmt.Errorf("error reading checkpoint directory %q: %w", c.ID(), err)
}
outFile, err := os.Create(options.TargetFile)
if err != nil {
return fmt.Errorf("error creating checkpoint export file %q: %w", options.TargetFile, err)
}
defer outFile.Close()
if err := os.Chmod(options.TargetFile, 0600); err != nil {
return err
}
_, err = io.Copy(outFile, input)
if err != nil {
return err
}
for _, file := range addToTarFiles {
os.Remove(filepath.Join(c.bundlePath(), file))
}
if !options.IgnoreVolumes {
os.RemoveAll(expVolDir)
}
return nil
}
func (c *Container) checkpointRestoreSupported(version int) error {
if !criu.CheckForCriu(version) {
return fmt.Errorf("checkpoint/restore requires at least CRIU %d", version)
}
if !c.ociRuntime.SupportsCheckpoint() {
return errors.New("configured runtime does not support checkpoint/restore")
}
return nil
}
func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
return nil, 0, err
}
if c.state.State != define.ContainerStateRunning {
return nil, 0, fmt.Errorf("%q is not running, cannot checkpoint: %w", c.state.State, define.ErrCtrStateInvalid)
}
if c.AutoRemove() && options.TargetFile == "" {
return nil, 0, errors.New("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
}
if err := c.resolveCheckpointImageName(&options); err != nil {
return nil, 0, err
}
if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
return nil, 0, err
}
// Setting CheckpointLog early in case there is a failure.
c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log")
c.state.CheckpointPath = c.CheckpointPath()
runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
if err != nil {
return nil, 0, err
}
// Keep the content of /dev/shm directory
if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
shmDirTarFile, err := os.Create(shmDirTarFileFullPath)
if err != nil {
return nil, 0, err
}
defer shmDirTarFile.Close()
input, err := archive.TarWithOptions(c.config.ShmDir, &archive.TarOptions{
Compression: archive.Uncompressed,
IncludeSourceDir: true,
})
if err != nil {
return nil, 0, err
}
if _, err = io.Copy(shmDirTarFile, input); err != nil {
return nil, 0, err
}
}
// Save network.status. This is needed to restore the container with
// the same IP. Currently limited to one IP address in a container
// with one interface.
// FIXME: will this break something?
if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
return nil, 0, err
}
defer c.newContainerEvent(events.Checkpoint)
// There is a bug from criu: https://github.com/checkpoint-restore/criu/issues/116
// We have to change the symbolic link from absolute path to relative path
if options.WithPrevious {
os.Remove(path.Join(c.CheckpointPath(), "parent"))
if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
return nil, 0, err
}
}
if options.TargetFile != "" {
if err := c.exportCheckpoint(options); err != nil {
return nil, 0, err
}
} else {
if err := c.createCheckpointImage(ctx, options); err != nil {
return nil, 0, err
}
}
logrus.Debugf("Checkpointed container %s", c.ID())
if !options.KeepRunning && !options.PreCheckPoint {
c.state.State = define.ContainerStateStopped
c.state.Checkpointed = true
c.state.CheckpointedTime = time.Now()
c.state.Restored = false
c.state.RestoredTime = time.Time{}
// Clean up Storage and Network
if err := c.cleanup(ctx); err != nil {
return nil, 0, err
}
}
criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
if !options.PrintStats {
return nil, nil
}
statsDirectory, err := os.Open(c.bundlePath())
if err != nil {
return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
}
dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
if err != nil {
return nil, fmt.Errorf("displaying checkpointing statistics not possible: %w", err)
}
return &define.CRIUCheckpointRestoreStatistics{
FreezingTime: dumpStatistics.GetFreezingTime(),
FrozenTime: dumpStatistics.GetFrozenTime(),
MemdumpTime: dumpStatistics.GetMemdumpTime(),
MemwriteTime: dumpStatistics.GetMemwriteTime(),
PagesScanned: dumpStatistics.GetPagesScanned(),
PagesWritten: dumpStatistics.GetPagesWritten(),
}, nil
}()
if err != nil {
return nil, 0, err
}
if !options.Keep && !options.PreCheckPoint {
cleanup := []string{
"dump.log",
stats.StatsDump,
metadata.ConfigDumpFile,
metadata.SpecDumpFile,
}
for _, del := range cleanup {
file := filepath.Join(c.bundlePath(), del)
if err := os.Remove(file); err != nil {
logrus.Debugf("Unable to remove file %s", file)
}
}
// The file has been deleted. Do not mention it.
c.state.CheckpointLog = ""
}
c.state.FinishedTime = time.Now()
return criuStatistics, runtimeCheckpointDuration, c.save()
}
func (c *Container) generateContainerSpec() error {
// Make sure the newly created config.json exists on disk
// NewFromSpec() is deprecated according to its comment
// however the recommended replace just causes a nil map panic
//nolint:staticcheck
g := generate.NewFromSpec(c.config.Spec)
if err := c.saveSpec(g.Config); err != nil {
return fmt.Errorf("saving imported container specification for restore failed: %w", err)
}
return nil
}
func (c *Container) importCheckpointImage(ctx context.Context, imageID string) error {
img, _, err := c.Runtime().LibimageRuntime().LookupImage(imageID, nil)
if err != nil {
return err
}
mountPoint, err := img.Mount(ctx, nil, "")
defer func() {
if err := c.unmount(true); err != nil {
logrus.Errorf("Failed to unmount container: %v", err)
}
}()
if err != nil {
return err
}
// Import all checkpoint files except ConfigDumpFile and SpecDumpFile. We
// generate new container config files to enable to specifying a new
// container name.
checkpoint := []string{
"artifacts",
metadata.CheckpointDirectory,
metadata.CheckpointVolumesDirectory,
metadata.DevShmCheckpointTar,
metadata.RootFsDiffTar,
metadata.DeletedFilesFile,
metadata.PodOptionsFile,
metadata.PodDumpFile,
}
for _, name := range checkpoint {
src := filepath.Join(mountPoint, name)
dst := filepath.Join(c.bundlePath(), name)
if err := archive.NewDefaultArchiver().CopyWithTar(src, dst); err != nil {
logrus.Debugf("Can't import '%s' from checkpoint image", name)
}
}
return c.generateContainerSpec()
}
func (c *Container) importCheckpointTar(input string) error {
if err := crutils.CRImportCheckpointWithoutConfig(c.bundlePath(), input); err != nil {
return err
}
return c.generateContainerSpec()
}
func (c *Container) importPreCheckpoint(input string) error {
archiveFile, err := os.Open(input)
if err != nil {
return fmt.Errorf("failed to open pre-checkpoint archive for import: %w", err)
}
defer archiveFile.Close()
err = archive.Untar(archiveFile, c.bundlePath(), nil)
if err != nil {
return fmt.Errorf("unpacking of pre-checkpoint archive %s failed: %w", input, err)
}
return nil
}
func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
minCriuVersion := func() int {
if options.Pod == "" {
return criu.MinCriuVersion
}
return criu.PodCriuVersion
}()
if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
return nil, 0, err
}
if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
return nil, 0, fmt.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
}
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
return nil, 0, fmt.Errorf("container %s is running or paused, cannot restore: %w", c.ID(), define.ErrCtrStateInvalid)
}
if options.ImportPrevious != "" {
if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
return nil, 0, err
}
}
if options.TargetFile != "" {
if err := c.importCheckpointTar(options.TargetFile); err != nil {
return nil, 0, err
}
} else if options.CheckpointImageID != "" {
if err := c.importCheckpointImage(ctx, options.CheckpointImageID); err != nil {
return nil, 0, err
}
}
// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
// no sense to try a restore. This is a minimal check if a checkpoint exist.
if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
return nil, 0, fmt.Errorf("a complete checkpoint for this container cannot be found, cannot restore: %w", err)
}
if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
return nil, 0, err
}
// Setting RestoreLog early in case there is a failure.
c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log")
c.state.CheckpointPath = c.CheckpointPath()
// Read network configuration from checkpoint
var netStatus map[string]types.StatusBlock
_, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile)
if err != nil {
logrus.Infof("Failed to unmarshal network status, cannot restore the same ip/mac: %v", err)
}
// If the restored container should get a new name, the IP address of
// the container will not be restored. This assumes that if a new name is
// specified, the container is restored multiple times.
// TODO: This implicit restoring with or without IP depending on an
// unrelated restore parameter (--name) does not seem like the
// best solution.
if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
// The file with the network.status does exist. Let's restore the
// container with the same networks settings as during checkpointing.
networkOpts, err := c.networks()
if err != nil {
return nil, 0, err
}
netOpts := make(map[string]types.PerNetworkOptions, len(netStatus))
for network, perNetOpts := range networkOpts {
// unset mac and ips before we start adding the ones from the status
perNetOpts.StaticMAC = nil
perNetOpts.StaticIPs = nil
for name, netInt := range netStatus[network].Interfaces {
perNetOpts.InterfaceName = name
if !options.IgnoreStaticIP {
perNetOpts.StaticMAC = netInt.MacAddress
}
if !options.IgnoreStaticIP {
for _, netAddress := range netInt.Subnets {
perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
}
}
// Normally interfaces have a length of 1, only for some special cni configs we could get more.
// For now just use the first interface to get the ips this should be good enough for most cases.
break
}
netOpts[network] = perNetOpts
}
c.perNetworkOpts = netOpts
}
defer func() {
if retErr != nil {
if err := c.cleanup(ctx); err != nil {
logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
}
}
}()
if err := c.prepare(); err != nil {
return nil, 0, err
}
// Read config
jsonPath := filepath.Join(c.bundlePath(), "config.json")
logrus.Debugf("generate.NewFromFile at %v", jsonPath)
g, err := generate.NewFromFile(jsonPath)
if err != nil {
logrus.Debugf("generate.NewFromFile failed with %v", err)
return nil, 0, err
}
// Restoring from an import means that we are doing migration
if options.TargetFile != "" || options.CheckpointImageID != "" {
g.SetRootPath(c.state.Mountpoint)
}
// We want to have the same network namespace as before.
if err := c.addNetworkNamespace(&g); err != nil {
return nil, 0, err
}
if options.Pod != "" {
// Running in a Pod means that we have to change all namespace settings to
// the ones from the infrastructure container.
pod, err := c.runtime.LookupPod(options.Pod)
if err != nil {
return nil, 0, fmt.Errorf("pod %q cannot be retrieved: %w", options.Pod, err)
}
infraContainer, err := pod.InfraContainer()
if err != nil {
return nil, 0, fmt.Errorf("cannot retrieved infra container from pod %q: %w", options.Pod, err)
}
infraContainer.lock.Lock()
if err := infraContainer.syncContainer(); err != nil {
infraContainer.lock.Unlock()
return nil, 0, fmt.Errorf("error syncing infrastructure container %s status: %w", infraContainer.ID(), err)
}
if infraContainer.state.State != define.ContainerStateRunning {
if err := infraContainer.initAndStart(ctx); err != nil {
infraContainer.lock.Unlock()
return nil, 0, fmt.Errorf("error starting infrastructure container %s status: %w", infraContainer.ID(), err)
}
}
infraContainer.lock.Unlock()
if c.config.IPCNsCtr != "" {
nsPath, err := infraContainer.namespacePath(IPCNS)
if err != nil {
return nil, 0, fmt.Errorf("cannot retrieve IPC namespace path for Pod %q: %w", options.Pod, err)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
return nil, 0, err
}
}
if c.config.NetNsCtr != "" {
nsPath, err := infraContainer.namespacePath(NetNS)
if err != nil {
return nil, 0, fmt.Errorf("cannot retrieve network namespace path for Pod %q: %w", options.Pod, err)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
return nil, 0, err
}
}
if c.config.PIDNsCtr != "" {
nsPath, err := infraContainer.namespacePath(PIDNS)
if err != nil {
return nil, 0, fmt.Errorf("cannot retrieve PID namespace path for Pod %q: %w", options.Pod, err)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
return nil, 0, err
}
}
if c.config.UTSNsCtr != "" {
nsPath, err := infraContainer.namespacePath(UTSNS)
if err != nil {
return nil, 0, fmt.Errorf("cannot retrieve UTS namespace path for Pod %q: %w", options.Pod, err)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
return nil, 0, err
}
}
if c.config.CgroupNsCtr != "" {
nsPath, err := infraContainer.namespacePath(CgroupNS)
if err != nil {
return nil, 0, fmt.Errorf("cannot retrieve Cgroup namespace path for Pod %q: %w", options.Pod, err)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
return nil, 0, err
}
}
}
if err := c.makeBindMounts(); err != nil {
return nil, 0, err
}
if options.TargetFile != "" || options.CheckpointImageID != "" {
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: "bind",
Source: srcPath,
Destination: dstPath,
Options: []string{"bind", "private"},
}
if c.IsReadOnly() && dstPath != "/dev/shm" {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
}
}
}
// Restore /dev/shm content
if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
if _, err := os.Stat(shmDirTarFileFullPath); err != nil {
logrus.Debug("Container checkpoint doesn't contain dev/shm: ", err.Error())
} else {
shmDirTarFile, err := os.Open(shmDirTarFileFullPath)
if err != nil {
return nil, 0, err
}
defer shmDirTarFile.Close()
if err := archive.UntarUncompressed(shmDirTarFile, c.config.ShmDir, nil); err != nil {
return nil, 0, err
}
}
}
// Cleanup for a working restore.
if err := c.removeConmonFiles(); err != nil {
return nil, 0, err
}
// Save the OCI spec to disk
if err := c.saveSpec(g.Config); err != nil {
return nil, 0, err
}
// When restoring from an imported archive, allow restoring the content of volumes.
// Volumes are created in setupContainer()
if !options.IgnoreVolumes && (options.TargetFile != "" || options.CheckpointImageID != "") {
for _, v := range c.config.NamedVolumes {
volumeFilePath := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory, v.Name+".tar")
volumeFile, err := os.Open(volumeFilePath)
if err != nil {
return nil, 0, fmt.Errorf("failed to open volume file %s: %w", volumeFilePath, err)
}
defer volumeFile.Close()
volume, err := c.runtime.GetVolume(v.Name)
if err != nil {
return nil, 0, fmt.Errorf("failed to retrieve volume %s: %w", v.Name, err)
}
mountPoint, err := volume.MountPoint()
if err != nil {
return nil, 0, err
}
if mountPoint == "" {
return nil, 0, fmt.Errorf("unable to import volume %s as it is not mounted: %w", volume.Name(), err)
}
if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
return nil, 0, fmt.Errorf("failed to extract volume %s to %s: %w", volumeFilePath, mountPoint, err)
}
}
}
// Before actually restarting the container, apply the root file-system changes
if !options.IgnoreRootfs {
if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
return nil, 0, err
}
if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
return nil, 0, err
}
}
runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options)
if err != nil {
return nil, 0, err
}
criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) {
if !options.PrintStats {
return nil, nil
}
statsDirectory, err := os.Open(c.bundlePath())
if err != nil {
return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
}
restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory)
if err != nil {
return nil, fmt.Errorf("displaying restore statistics not possible: %w", err)
}
return &define.CRIUCheckpointRestoreStatistics{
PagesCompared: restoreStatistics.GetPagesCompared(),
PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(),
ForkingTime: restoreStatistics.GetForkingTime(),
RestoreTime: restoreStatistics.GetRestoreTime(),
PagesRestored: restoreStatistics.GetPagesRestored(),
}, nil
}()
if err != nil {
return nil, 0, err
}
logrus.Debugf("Restored container %s", c.ID())
c.state.State = define.ContainerStateRunning
c.state.Checkpointed = false
c.state.Restored = true
c.state.CheckpointedTime = time.Time{}
c.state.RestoredTime = time.Now()
if !options.Keep {
// Delete all checkpoint related files. At this point, in theory, all files
// should exist. Still ignoring errors for now as the container should be
// restored and running. Not erroring out just because some cleanup operation
// failed. Starting with the checkpoint directory
err = os.RemoveAll(c.CheckpointPath())
if err != nil {
logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
}
c.state.CheckpointPath = ""
err = os.RemoveAll(c.PreCheckPointPath())
if err != nil {
logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
}
err = os.RemoveAll(c.CheckpointVolumesPath())
if err != nil {
logrus.Debugf("Non-fatal: removal of checkpoint volumes directory (%s) failed: %v", c.CheckpointVolumesPath(), err)
}
cleanup := [...]string{
"restore.log",
"dump.log",
stats.StatsDump,
stats.StatsRestore,
metadata.DevShmCheckpointTar,
metadata.NetworkStatusFile,
metadata.RootFsDiffTar,
metadata.DeletedFilesFile,
}
for _, del := range cleanup {
file := filepath.Join(c.bundlePath(), del)
err = os.Remove(file)
if err != nil {
logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
}
}
c.state.CheckpointLog = ""
c.state.RestoreLog = ""
}
return criuStatistics, runtimeRestoreDuration, c.save()
}