mirror of
https://github.com/containers/podman.git
synced 2025-06-22 01:48:54 +08:00

runc uses CRIU to support checkpoint and restore of containers. This brings an initial checkpoint/restore implementation to podman. None of the additional runc flags are yet supported and container migration optimization (pre-copy/post-copy) is also left for the future. The current status is that it is possible to checkpoint and restore a container. I am testing on RHEL-7.x and as the combination of RHEL-7 and CRIU has seccomp troubles I have to create the container without seccomp. With the following steps I am able to checkpoint and restore a container: # podman run --security-opt="seccomp=unconfined" -d registry.fedoraproject.org/f27/httpd # curl -I 10.22.0.78:8080 HTTP/1.1 403 Forbidden # <-- this is actually a good answer # podman container checkpoint <container> # curl -I 10.22.0.78:8080 curl: (7) Failed connect to 10.22.0.78:8080; No route to host # podman container restore <container> # curl -I 10.22.0.78:8080 HTTP/1.1 403 Forbidden I am using CRIU, runc and conmon from git. All required changes for checkpoint/restore support in podman have been merged in the corresponding projects. To have the same IP address in the restored container as before checkpointing, CNI is told which IP address to use. If the saved network configuration cannot be found during restore, the container is restored with a new IP address. For CRIU to restore established TCP connections the IP address of the network namespace used for restore needs to be the same. For TCP connections in the listening state the IP address can change. During restore only one network interface with one IP address is handled correctly. Support to restore containers with more advanced network configuration will be implemented later. v2: * comment typo * print debug messages during cleanup of restore files * use createContainer() instead of createOCIContainer() * introduce helper CheckpointPath() * do not try to restore a container that is paused * use existing helper functions for cleanup * restructure code flow for better readability * do not try to restore if checkpoint/inventory.img is missing * git add checkpoint.go restore.go v3: * move checkpoint/restore under 'podman container' v4: * incorporated changes from latest reviews Signed-off-by: Adrian Reber <areber@redhat.com>
144 lines
4.0 KiB
Go
144 lines
4.0 KiB
Go
// +build linux
|
|
|
|
package libpod
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/containerd/cgroups"
|
|
"github.com/containers/libpod/utils"
|
|
"github.com/containers/storage/pkg/idtools"
|
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
func (r *OCIRuntime) moveConmonToCgroup(ctr *Container, cgroupParent string, cmd *exec.Cmd) error {
|
|
if os.Geteuid() == 0 {
|
|
if r.cgroupManager == SystemdCgroupsManager {
|
|
unitName := createUnitName("libpod-conmon", ctr.ID())
|
|
|
|
realCgroupParent := cgroupParent
|
|
splitParent := strings.Split(cgroupParent, "/")
|
|
if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 {
|
|
realCgroupParent = splitParent[len(splitParent)-1]
|
|
}
|
|
|
|
logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName)
|
|
if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil {
|
|
logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err)
|
|
}
|
|
} else {
|
|
cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon")
|
|
control, err := cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), &spec.LinuxResources{})
|
|
if err != nil {
|
|
logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
|
|
} else {
|
|
// we need to remove this defer and delete the cgroup once conmon exits
|
|
// maybe need a conmon monitor?
|
|
if err := control.Add(cgroups.Process{Pid: cmd.Process.Pid}); err != nil {
|
|
logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// newPipe creates a unix socket pair for communication
|
|
func newPipe() (parent *os.File, child *os.File, err error) {
|
|
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
|
|
}
|
|
|
|
// CreateContainer creates a container in the OCI runtime
|
|
// TODO terminal support for container
|
|
// Presently just ignoring conmon opts related to it
|
|
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
|
|
if ctr.state.UserNSRoot == "" {
|
|
// no need of an intermediate mount ns
|
|
return r.createOCIContainer(ctr, cgroupParent, restoreContainer)
|
|
}
|
|
var wg sync.WaitGroup
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
runtime.LockOSThread()
|
|
|
|
fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid()))
|
|
if err != nil {
|
|
return
|
|
}
|
|
defer fd.Close()
|
|
|
|
// create a new mountns on the current thread
|
|
if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
|
|
return
|
|
}
|
|
defer unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS)
|
|
|
|
// don't spread our mounts around
|
|
err = unix.Mount("/", "/", "none", unix.MS_REC|unix.MS_SLAVE, "")
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = unix.Mount(ctr.state.Mountpoint, ctr.state.RealMountpoint, "none", unix.MS_BIND, "")
|
|
if err != nil {
|
|
return
|
|
}
|
|
if err := idtools.MkdirAllAs(ctr.state.DestinationRunDir, 0700, ctr.RootUID(), ctr.RootGID()); err != nil {
|
|
return
|
|
}
|
|
|
|
err = unix.Mount(ctr.state.RunDir, ctr.state.DestinationRunDir, "none", unix.MS_BIND, "")
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = r.createOCIContainer(ctr, cgroupParent, restoreContainer)
|
|
}()
|
|
wg.Wait()
|
|
|
|
return err
|
|
}
|
|
|
|
func rpmVersion(path string) string {
|
|
output := "Unknown"
|
|
cmd := exec.Command("/usr/bin/rpm", "-q", "-f", path)
|
|
if outp, err := cmd.Output(); err == nil {
|
|
output = string(outp)
|
|
}
|
|
return strings.Trim(output, "\n")
|
|
}
|
|
|
|
func dpkgVersion(path string) string {
|
|
output := "Unknown"
|
|
cmd := exec.Command("/usr/bin/dpkg", "-S", path)
|
|
if outp, err := cmd.Output(); err == nil {
|
|
output = string(outp)
|
|
}
|
|
return strings.Trim(output, "\n")
|
|
}
|
|
|
|
func (r *OCIRuntime) pathPackage() string {
|
|
if out := rpmVersion(r.path); out != "Unknown" {
|
|
return out
|
|
}
|
|
return dpkgVersion(r.path)
|
|
}
|
|
|
|
func (r *OCIRuntime) conmonPackage() string {
|
|
if out := rpmVersion(r.conmonPath); out != "Unknown" {
|
|
return out
|
|
}
|
|
return dpkgVersion(r.conmonPath)
|
|
}
|