Support checkpoint/restore with pods

This adds support to checkpoint containers out of pods and restore
container into pods.

It is only possible to restore a container into a pod if it has been
checkpointed out of pod. It is also not possible to restore a non pod
container into a pod.

The main reason this does not work is the PID namespace. If a non pod
container is being restored in a pod with a shared PID namespace, at
least one process in the restored container uses PID 1 which is already
in use by the infrastructure container. If someone tries to restore
container from a pod with a shared PID namespace without a shared PID
namespace it will also fail because the resulting PID namespace will not
have a PID 1.

Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
Adrian Reber
2021-07-12 11:43:45 +00:00
committed by Adrian Reber
parent 3375cbb198
commit eb94467780
12 changed files with 294 additions and 5 deletions

View File

@ -71,6 +71,9 @@ func init() {
)
_ = restoreCommand.RegisterFlagCompletionFunc("publish", completion.AutocompleteNone)
flags.StringVar(&restoreOptions.Pod, "pod", "", "Restore container into existing Pod (only works with --import)")
_ = restoreCommand.RegisterFlagCompletionFunc("pod", common.AutocompletePodsRunning)
validate.AddLatestFlag(restoreCommand, &restoreOptions.Latest)
}
@ -91,6 +94,9 @@ func restore(cmd *cobra.Command, args []string) error {
if restoreOptions.Import == "" && restoreOptions.Name != "" {
return errors.Errorf("--name can only be used with --import")
}
if restoreOptions.Import == "" && restoreOptions.Pod != "" {
return errors.Errorf("--pod can only be used with --import")
}
if restoreOptions.Name != "" && restoreOptions.TCPEstablished {
return errors.Errorf("--tcp-established cannot be used with --name")
}

View File

@ -93,6 +93,15 @@ be used once and the restored *container* will have another IP address. This als
that **--name, -n** cannot be used in combination with **--tcp-established**.\
*IMPORTANT: This OPTION is only available in combination with **--import, -i**.*
#### **--pod**=*name*
Restore a container into the pod *name*. The destination pod for this restore
has to have the same namespaces shared as the pod this container was checkpointed
from (see **[podman pod create --share](podman-pod-create.1.md#--share)**).
*IMPORTANT: This OPTION is only available in combination with **--import, -i**.*
This option requires at least CRIU 3.16.
#### **--publish**, **-p**=*port*
Replaces the ports that the *container* publishes, as configured during the
@ -128,7 +137,7 @@ $ podman run --rm -p 2345:80 -d webserver
```
## SEE ALSO
**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)**
**[podman(1)](podman.1.md)**, **[podman-container-checkpoint(1)](podman-container-checkpoint.1.md)**, **[podman-run(1)](podman-run.1.md)**, **[podman-pod-create(1)](podman-pod-create.1.md)**
## HISTORY
September 2018, Originally compiled by Adrian Reber <areber@redhat.com>

View File

@ -780,6 +780,16 @@ type ContainerCheckpointOptions struct {
// Compression tells the API which compression to use for
// the exported checkpoint archive.
Compression archive.Compression
// If Pod is set the container should be restored into the
// given Pod. If Pod is empty it is a restore without a Pod.
// Restoring a non Pod container into a Pod or a Pod container
// without a Pod is theoretically possible, but will
// probably not work if a PID namespace is shared.
// A shared PID namespace means that a Pod container has PID 1
// in the infrastructure container, but without the infrastructure
// container no PID 1 will be in the namespace and that is not
// possible.
Pod string
}
// Checkpoint checkpoints a container
@ -811,7 +821,11 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO
// Restore restores a container
func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) error {
logrus.Debugf("Trying to restore container %s", c.ID())
if options.Pod == "" {
logrus.Debugf("Trying to restore container %s", c.ID())
} else {
logrus.Debugf("Trying to restore container %s into pod %s", c.ID(), options.Pod)
}
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()

View File

@ -901,8 +901,27 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr
}
func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
if len(c.Dependencies()) > 0 {
return errors.Errorf("Cannot export checkpoints of containers with dependencies")
if len(c.Dependencies()) == 1 {
// Check if the dependency is an infra container. If it is we can checkpoint
// the container out of the Pod.
if c.config.Pod == "" {
return errors.Errorf("cannot export checkpoints of containers with dependencies")
}
pod, err := c.runtime.state.Pod(c.config.Pod)
if err != nil {
return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), c.config.Pod)
}
infraID, err := pod.InfraContainerID()
if err != nil {
return errors.Wrapf(err, "cannot retrieve infra container ID for pod %s", c.config.Pod)
}
if c.Dependencies()[0] != infraID {
return errors.Errorf("cannot export checkpoints of containers with dependencies")
}
}
if len(c.Dependencies()) > 1 {
return errors.Errorf("cannot export checkpoints of containers with dependencies")
}
logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
@ -1136,10 +1155,20 @@ func (c *Container) importPreCheckpoint(input string) error {
}
func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) {
if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
minCriuVersion := func() int {
if options.Pod == "" {
return criu.MinCriuVersion
}
return criu.PodCriuVersion
}()
if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
return err
}
if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
return errors.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
}
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
}
@ -1247,6 +1276,83 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
}
}
if options.Pod != "" {
// Running in a Pod means that we have to change all namespace settings to
// the ones from the infrastructure container.
pod, err := c.runtime.LookupPod(options.Pod)
if err != nil {
return errors.Wrapf(err, "pod %q cannot be retrieved", options.Pod)
}
infraContainer, err := pod.InfraContainer()
if err != nil {
return errors.Wrapf(err, "cannot retrieved infra container from pod %q", options.Pod)
}
infraContainer.lock.Lock()
if err := infraContainer.syncContainer(); err != nil {
infraContainer.lock.Unlock()
return errors.Wrapf(err, "Error syncing infrastructure container %s status", infraContainer.ID())
}
if infraContainer.state.State != define.ContainerStateRunning {
if err := infraContainer.initAndStart(ctx); err != nil {
infraContainer.lock.Unlock()
return errors.Wrapf(err, "Error starting infrastructure container %s status", infraContainer.ID())
}
}
infraContainer.lock.Unlock()
if c.config.IPCNsCtr != "" {
nsPath, err := infraContainer.namespacePath(IPCNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve IPC namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
return err
}
}
if c.config.NetNsCtr != "" {
nsPath, err := infraContainer.namespacePath(NetNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve network namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
return err
}
}
if c.config.PIDNsCtr != "" {
nsPath, err := infraContainer.namespacePath(PIDNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve PID namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
return err
}
}
if c.config.UTSNsCtr != "" {
nsPath, err := infraContainer.namespacePath(UTSNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve UTS namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
return err
}
}
if c.config.CgroupNsCtr != "" {
nsPath, err := infraContainer.namespacePath(CgroupNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve Cgroup namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
return err
}
}
}
if err := c.makeBindMounts(); err != nil {
return err
}

View File

@ -1064,6 +1064,30 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
if restoreOptions.TCPEstablished {
args = append(args, "--runtime-opt", "--tcp-established")
}
if restoreOptions.Pod != "" {
mountLabel := ctr.config.MountLabel
processLabel := ctr.config.ProcessLabel
if mountLabel != "" {
args = append(
args,
"--runtime-opt",
fmt.Sprintf(
"--lsm-mount-context=%s",
mountLabel,
),
)
}
if processLabel != "" {
args = append(
args,
"--runtime-opt",
fmt.Sprintf(
"--lsm-profile=selinux:%s",
processLabel,
),
)
}
}
}
logrus.WithFields(logrus.Fields{

View File

@ -62,6 +62,7 @@ type RestoreOptions struct {
Keep *bool
Name *string
TCPEstablished *bool
Pod *string
}
//go:generate go run ../generator/generator.go CreateOptions

View File

@ -131,3 +131,19 @@ func (o *RestoreOptions) GetTCPEstablished() bool {
}
return *o.TCPEstablished
}
// WithPod
func (o *RestoreOptions) WithPod(value string) *RestoreOptions {
v := &value
o.Pod = v
return o
}
// GetPod
func (o *RestoreOptions) GetPod() string {
var pod string
if o.Pod == nil {
return pod
}
return *o.Pod
}

View File

@ -9,6 +9,9 @@ import (
"github.com/containers/common/libimage"
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v3/libpod"
ann "github.com/containers/podman/v3/pkg/annotations"
"github.com/containers/podman/v3/pkg/checkpoint/crutils"
"github.com/containers/podman/v3/pkg/criu"
"github.com/containers/podman/v3/pkg/domain/entities"
"github.com/containers/podman/v3/pkg/errorhandling"
"github.com/containers/podman/v3/pkg/specgen/generate"
@ -68,6 +71,14 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt
return nil, err
}
if ctrConfig.Pod != "" && restoreOptions.Pod == "" {
return nil, errors.New("cannot restore pod container without --pod")
}
if ctrConfig.Pod == "" && restoreOptions.Pod != "" {
return nil, errors.New("cannot restore non pod container into pod")
}
// This should not happen as checkpoints with these options are not exported.
if len(ctrConfig.Dependencies) > 0 {
return nil, errors.Errorf("Cannot import checkpoints of containers with dependencies")
@ -96,6 +107,91 @@ func CRImportCheckpoint(ctx context.Context, runtime *libpod.Runtime, restoreOpt
newName = true
}
if restoreOptions.Pod != "" {
// Restoring into a Pod requires much newer versions of CRIU
if !criu.CheckForCriu(criu.PodCriuVersion) {
return nil, errors.Errorf("restoring containers into pods requires at least CRIU %d", criu.PodCriuVersion)
}
// The runtime also has to support it
if !crutils.CRRuntimeSupportsPodCheckpointRestore(runtime.GetOCIRuntimePath()) {
return nil, errors.Errorf("runtime %s does not support pod restore", runtime.GetOCIRuntimePath())
}
// Restoring into an existing Pod
ctrConfig.Pod = restoreOptions.Pod
// According to podman pod create a pod can share the following namespaces:
// cgroup, ipc, net, pid, uts
// Let's make sure we a restoring into a pod with the same shared namespaces.
pod, err := runtime.LookupPod(ctrConfig.Pod)
if err != nil {
return nil, errors.Wrapf(err, "pod %q cannot be retrieved", ctrConfig.Pod)
}
infraContainer, err := pod.InfraContainer()
if err != nil {
return nil, errors.Wrapf(err, "cannot retrieve infra container from pod %q", ctrConfig.Pod)
}
// If a namespaces was shared (!= "") it needs to be set to the new infrastructure container
// If the infrastructure container does not share the same namespaces as the to be restored
// container we abort.
if ctrConfig.IPCNsCtr != "" {
if !pod.SharesIPC() {
return nil, errors.Errorf("pod %s does not share the IPC namespace", ctrConfig.Pod)
}
ctrConfig.IPCNsCtr = infraContainer.ID()
}
if ctrConfig.NetNsCtr != "" {
if !pod.SharesNet() {
return nil, errors.Errorf("pod %s does not share the network namespace", ctrConfig.Pod)
}
ctrConfig.NetNsCtr = infraContainer.ID()
}
if ctrConfig.PIDNsCtr != "" {
if !pod.SharesPID() {
return nil, errors.Errorf("pod %s does not share the PID namespace", ctrConfig.Pod)
}
ctrConfig.PIDNsCtr = infraContainer.ID()
}
if ctrConfig.UTSNsCtr != "" {
if !pod.SharesUTS() {
return nil, errors.Errorf("pod %s does not share the UTS namespace", ctrConfig.Pod)
}
ctrConfig.UTSNsCtr = infraContainer.ID()
}
if ctrConfig.CgroupNsCtr != "" {
if !pod.SharesCgroup() {
return nil, errors.Errorf("pod %s does not share the cgroup namespace", ctrConfig.Pod)
}
ctrConfig.CgroupNsCtr = infraContainer.ID()
}
// Change SELinux labels to infrastructure container labels
ctrConfig.MountLabel = infraContainer.MountLabel()
ctrConfig.ProcessLabel = infraContainer.ProcessLabel()
// Fix parent cgroup
cgroupPath, err := pod.CgroupPath()
if err != nil {
return nil, errors.Wrapf(err, "cannot retrieve cgroup path from pod %q", ctrConfig.Pod)
}
ctrConfig.CgroupParent = cgroupPath
oldPodID := dumpSpec.Annotations[ann.SandboxID]
// Fix up SandboxID in the annotations
dumpSpec.Annotations[ann.SandboxID] = ctrConfig.Pod
// Fix up CreateCommand
for i, c := range ctrConfig.CreateCommand {
if c == oldPodID {
ctrConfig.CreateCommand[i] = ctrConfig.Pod
}
}
}
if len(restoreOptions.PublishPorts) > 0 {
ports, _, _, err := generate.ParsePortMapping(restoreOptions.PublishPorts)
if err != nil {

View File

@ -1,6 +1,7 @@
package crutils
import (
"bytes"
"io"
"os"
"os/exec"
@ -189,3 +190,13 @@ func CRRuntimeSupportsCheckpointRestore(runtimePath string) bool {
}
return false
}
// CRRuntimeSupportsCheckpointRestore tests if the runtime at 'runtimePath'
// supports restoring into existing Pods. The runtime needs to support
// the CRIU option --lsm-mount-context and the existence of this is checked
// by this function. In addition it is necessary to at least have CRIU 3.16.
func CRRuntimeSupportsPodCheckpointRestore(runtimePath string) bool {
cmd := exec.Command(runtimePath, "restore", "--lsm-mount-context")
out, _ := cmd.CombinedOutput()
return bytes.Contains(out, []byte("flag needs an argument"))
}

View File

@ -7,6 +7,10 @@ import (
// MinCriuVersion for Podman at least CRIU 3.11 is required
const MinCriuVersion = 31100
// PodCriuVersion is the version of CRIU needed for
// checkpointing and restoring containers out of and into Pods.
const PodCriuVersion = 31600
// CheckForCriu uses CRIU's go bindings to check if the CRIU
// binary exists and if it at least the version Podman needs.
func CheckForCriu(version int) bool {

View File

@ -207,6 +207,7 @@ type RestoreOptions struct {
TCPEstablished bool
ImportPrevious string
PublishPorts []specgen.PortMapping
Pod string
}
type RestoreReport struct {

View File

@ -529,6 +529,7 @@ func (ic *ContainerEngine) ContainerRestore(ctx context.Context, namesOrIds []st
IgnoreStaticIP: options.IgnoreStaticIP,
IgnoreStaticMAC: options.IgnoreStaticMAC,
ImportPrevious: options.ImportPrevious,
Pod: options.Pod,
}
filterFuncs := []libpod.ContainerFilter{