mirror of
https://github.com/containers/podman.git
synced 2025-06-01 09:06:44 +08:00
Merge pull request #469 from adrianreber/master
Add support to checkpoint/restore containers
This commit is contained in:
12
Dockerfile
12
Dockerfile
@ -18,6 +18,8 @@ RUN apt-get update && apt-get install -y \
|
||||
libaio-dev \
|
||||
libcap-dev \
|
||||
libfuse-dev \
|
||||
libnet-dev \
|
||||
libnl-3-dev \
|
||||
libostree-dev \
|
||||
libprotobuf-dev \
|
||||
libprotobuf-c0-dev \
|
||||
@ -110,6 +112,16 @@ RUN set -x \
|
||||
&& go get -u github.com/mailru/easyjson/... \
|
||||
&& install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/
|
||||
|
||||
# Install criu
|
||||
ENV CRIU_COMMIT 584cbe4643c3fc7dc901ff08bf923ca0fe7326f9
|
||||
RUN set -x \
|
||||
&& cd /tmp \
|
||||
&& git clone https://github.com/checkpoint-restore/criu.git \
|
||||
&& cd criu \
|
||||
&& make \
|
||||
&& install -D -m 755 criu/criu /usr/sbin/ \
|
||||
&& rm -rf /tmp/criu
|
||||
|
||||
# Install cni config
|
||||
#RUN make install.cni
|
||||
RUN mkdir -p /etc/cni/net.d/
|
||||
|
73
cmd/podman/checkpoint.go
Normal file
73
cmd/podman/checkpoint.go
Normal file
@ -0,0 +1,73 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/containers/libpod/cmd/podman/libpodruntime"
|
||||
"github.com/containers/libpod/pkg/rootless"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
var (
|
||||
checkpointDescription = `
|
||||
podman container checkpoint
|
||||
|
||||
Checkpoints one or more running containers. The container name or ID can be used.
|
||||
`
|
||||
checkpointFlags = []cli.Flag{
|
||||
cli.BoolFlag{
|
||||
Name: "keep, k",
|
||||
Usage: "keep all temporary checkpoint files",
|
||||
},
|
||||
}
|
||||
checkpointCommand = cli.Command{
|
||||
Name: "checkpoint",
|
||||
Usage: "Checkpoints one or more containers",
|
||||
Description: checkpointDescription,
|
||||
Flags: checkpointFlags,
|
||||
Action: checkpointCmd,
|
||||
ArgsUsage: "CONTAINER-NAME [CONTAINER-NAME ...]",
|
||||
}
|
||||
)
|
||||
|
||||
func checkpointCmd(c *cli.Context) error {
|
||||
if rootless.IsRootless() {
|
||||
return errors.New("checkpointing a container requires root")
|
||||
}
|
||||
|
||||
runtime, err := libpodruntime.GetRuntime(c)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "could not get runtime")
|
||||
}
|
||||
defer runtime.Shutdown(false)
|
||||
|
||||
keep := c.Bool("keep")
|
||||
args := c.Args()
|
||||
if len(args) < 1 {
|
||||
return errors.Errorf("you must provide at least one container name or id")
|
||||
}
|
||||
|
||||
var lastError error
|
||||
for _, arg := range args {
|
||||
ctr, err := runtime.LookupContainer(arg)
|
||||
if err != nil {
|
||||
if lastError != nil {
|
||||
fmt.Fprintln(os.Stderr, lastError)
|
||||
}
|
||||
lastError = errors.Wrapf(err, "error looking up container %q", arg)
|
||||
continue
|
||||
}
|
||||
if err = ctr.Checkpoint(context.TODO(), keep); err != nil {
|
||||
if lastError != nil {
|
||||
fmt.Fprintln(os.Stderr, lastError)
|
||||
}
|
||||
lastError = errors.Wrapf(err, "failed to checkpoint container %v", ctr.ID())
|
||||
} else {
|
||||
fmt.Println(ctr.ID())
|
||||
}
|
||||
}
|
||||
return lastError
|
||||
}
|
@ -7,6 +7,7 @@ import (
|
||||
var (
|
||||
subCommands = []cli.Command{
|
||||
attachCommand,
|
||||
checkpointCommand,
|
||||
cleanupCommand,
|
||||
commitCommand,
|
||||
createCommand,
|
||||
@ -23,6 +24,7 @@ var (
|
||||
// pruneCommand,
|
||||
refreshCommand,
|
||||
restartCommand,
|
||||
restoreCommand,
|
||||
rmCommand,
|
||||
runCommand,
|
||||
runlabelCommand,
|
||||
|
73
cmd/podman/restore.go
Normal file
73
cmd/podman/restore.go
Normal file
@ -0,0 +1,73 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/containers/libpod/cmd/podman/libpodruntime"
|
||||
"github.com/containers/libpod/pkg/rootless"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
var (
|
||||
restoreDescription = `
|
||||
podman container restore
|
||||
|
||||
Restores a container from a checkpoint. The container name or ID can be used.
|
||||
`
|
||||
restoreFlags = []cli.Flag{
|
||||
cli.BoolFlag{
|
||||
Name: "keep, k",
|
||||
Usage: "keep all temporary checkpoint files",
|
||||
},
|
||||
}
|
||||
restoreCommand = cli.Command{
|
||||
Name: "restore",
|
||||
Usage: "Restores one or more containers from a checkpoint",
|
||||
Description: restoreDescription,
|
||||
Flags: restoreFlags,
|
||||
Action: restoreCmd,
|
||||
ArgsUsage: "CONTAINER-NAME [CONTAINER-NAME ...]",
|
||||
}
|
||||
)
|
||||
|
||||
func restoreCmd(c *cli.Context) error {
|
||||
if rootless.IsRootless() {
|
||||
return errors.New("restoring a container requires root")
|
||||
}
|
||||
|
||||
runtime, err := libpodruntime.GetRuntime(c)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "could not get runtime")
|
||||
}
|
||||
defer runtime.Shutdown(false)
|
||||
|
||||
keep := c.Bool("keep")
|
||||
args := c.Args()
|
||||
if len(args) < 1 {
|
||||
return errors.Errorf("you must provide at least one container name or id")
|
||||
}
|
||||
|
||||
var lastError error
|
||||
for _, arg := range args {
|
||||
ctr, err := runtime.LookupContainer(arg)
|
||||
if err != nil {
|
||||
if lastError != nil {
|
||||
fmt.Fprintln(os.Stderr, lastError)
|
||||
}
|
||||
lastError = errors.Wrapf(err, "error looking up container %q", arg)
|
||||
continue
|
||||
}
|
||||
if err = ctr.Restore(context.TODO(), keep); err != nil {
|
||||
if lastError != nil {
|
||||
fmt.Fprintln(os.Stderr, lastError)
|
||||
}
|
||||
lastError = errors.Wrapf(err, "failed to restore container %v", ctr.ID())
|
||||
} else {
|
||||
fmt.Println(ctr.ID())
|
||||
}
|
||||
}
|
||||
return lastError
|
||||
}
|
@ -87,6 +87,10 @@ __podman_complete_containers_all() {
|
||||
__podman_complete_containers "$@" --all
|
||||
}
|
||||
|
||||
__podman_complete_containers_created() {
|
||||
__podman_complete_containers "$@" --all --filter status=created
|
||||
}
|
||||
|
||||
__podman_complete_containers_running() {
|
||||
__podman_complete_containers "$@" --filter status=running
|
||||
}
|
||||
@ -710,6 +714,24 @@ _podman_container_attach() {
|
||||
_podman_attach
|
||||
}
|
||||
|
||||
_podman_container_checkpoint() {
|
||||
local options_with_args="
|
||||
--help -h
|
||||
"
|
||||
local boolean_options="
|
||||
--keep
|
||||
-k
|
||||
"
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__podman_complete_containers_running
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_podman_container_commit() {
|
||||
_podman_commit
|
||||
}
|
||||
@ -770,6 +792,24 @@ _podman_container_restart() {
|
||||
_podman_restart
|
||||
}
|
||||
|
||||
_podman_container_restore() {
|
||||
local options_with_args="
|
||||
--help -h
|
||||
"
|
||||
local boolean_options="
|
||||
--keep
|
||||
-k
|
||||
"
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__podman_complete_containers_created
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_podman_container_rm() {
|
||||
_podman_rm
|
||||
}
|
||||
@ -817,6 +857,7 @@ _podman_container() {
|
||||
"
|
||||
subcommands="
|
||||
attach
|
||||
checkpoint
|
||||
commit
|
||||
create
|
||||
diff
|
||||
@ -831,6 +872,7 @@ _podman_container() {
|
||||
port
|
||||
refresh
|
||||
restart
|
||||
restore
|
||||
rm
|
||||
run
|
||||
start
|
||||
|
30
docs/podman-container-checkpoint.1.md
Normal file
30
docs/podman-container-checkpoint.1.md
Normal file
@ -0,0 +1,30 @@
|
||||
% podman-container-checkpoint(1)
|
||||
|
||||
## NAME
|
||||
podman\-container\-checkpoint - Checkpoints one or more running containers
|
||||
|
||||
## SYNOPSIS
|
||||
**podman container checkpoint** [*options*] *container* ...
|
||||
|
||||
## DESCRIPTION
|
||||
Checkpoints all the processes in one or more containers. You may use container IDs or names as input.
|
||||
|
||||
## OPTIONS
|
||||
**-k**, **--keep**
|
||||
|
||||
Keep all temporary log and statistics files created by CRIU during checkpointing. These files
|
||||
are not deleted if checkpointing fails for further debugging. If checkpointing succeeds these
|
||||
files are theoretically not needed, but if these files are needed Podman can keep the files
|
||||
for further analysis.
|
||||
|
||||
## EXAMPLE
|
||||
|
||||
podman container checkpoint mywebserver
|
||||
|
||||
podman container checkpoint 860a4b23
|
||||
|
||||
## SEE ALSO
|
||||
podman(1), podman-container-restore(1)
|
||||
|
||||
## HISTORY
|
||||
September 2018, Originally compiled by Adrian Reber <areber@redhat.com>
|
37
docs/podman-container-restore.1.md
Normal file
37
docs/podman-container-restore.1.md
Normal file
@ -0,0 +1,37 @@
|
||||
% podman-container-restore(1)
|
||||
|
||||
## NAME
|
||||
podman\-container\-restore - Restores one or more running containers
|
||||
|
||||
## SYNOPSIS
|
||||
**podman container restore** [*options*] *container* ...
|
||||
|
||||
## DESCRIPTION
|
||||
Restores a container from a checkpoint. You may use container IDs or names as input.
|
||||
|
||||
## OPTIONS
|
||||
**-k**, **--keep**
|
||||
|
||||
Keep all temporary log and statistics files created by CRIU during
|
||||
checkpointing as well as restoring. These files are not deleted if restoring
|
||||
fails for further debugging. If restoring succeeds these files are
|
||||
theoretically not needed, but if these files are needed Podman can keep the
|
||||
files for further analysis. This includes the checkpoint directory with all
|
||||
files created during checkpointing. The size required by the checkpoint
|
||||
directory is roughly the same as the amount of memory required by the
|
||||
processes in the checkpointed container.
|
||||
|
||||
Without the **-k**, **--keep** option the checkpoint will be consumed and cannot be used
|
||||
again.
|
||||
|
||||
## EXAMPLE
|
||||
|
||||
podman container restore mywebserver
|
||||
|
||||
podman container restore 860a4b23
|
||||
|
||||
## SEE ALSO
|
||||
podman(1), podman-container-checkpoint(1)
|
||||
|
||||
## HISTORY
|
||||
September 2018, Originally compiled by Adrian Reber <areber@redhat.com>
|
@ -14,6 +14,7 @@ The container command allows you to manage containers
|
||||
| Command | Man Page | Description |
|
||||
| ------- | --------------------------------------------------- | ---------------------------------------------------------------------------- |
|
||||
| attach | [podman-attach(1)](podman-attach.1.md) | Attach to a running container. |
|
||||
| checkpoint | [podman-container-checkpoint(1)](podman-container-checkpoint.1.md) | Checkpoints one or more containers. |
|
||||
| cleanup | [podman-container-cleanup(1)](podman-container-cleanup.1.md) | Cleanup containers network and mountpoints. |
|
||||
| commit | [podman-commit(1)](podman-commit.1.md) | Create new image based on the changed container. |
|
||||
| create | [podman-create(1)](podman-create.1.md) | Create a new container. |
|
||||
@ -29,6 +30,7 @@ The container command allows you to manage containers
|
||||
| port | [podman-port(1)](podman-port.1.md) | List port mappings for the container. |
|
||||
| refresh | [podman-refresh(1)](podman-container-refresh.1.md) | Refresh the state of all containers |
|
||||
| restart | [podman-restart(1)](podman-restart.1.md) | Restart one or more containers. |
|
||||
| restore | [podman-container-restore(1)](podman-container-restore.1.md) | Restores one or more containers from a checkpoint. |
|
||||
| rm | [podman-rm(1)](podman-rm.1.md) | Remove one or more containers. |
|
||||
| run | [podman-run(1)](podman-run.1.md) | Run a command in a container. |
|
||||
| start | [podman-start(1)](podman-start.1.md) | Starts one or more containers. |
|
||||
|
@ -157,6 +157,28 @@ $ sudo podman top <container_id>
|
||||
101 31889 31873 0 09:21 ? 00:00:00 nginx: worker process
|
||||
```
|
||||
|
||||
### Checkpointing the container
|
||||
Checkpointing a container stops the container while writing the state of all processes in the container to disk.
|
||||
With this a container can later be restored and continue running at exactly the same point in time as the
|
||||
checkpoint. This capability requires CRIU 3.11 or later installed on the system.
|
||||
To checkpoint the container use:
|
||||
```console
|
||||
$ sudo podman container checkpoint <container_id>
|
||||
```
|
||||
|
||||
### Restoring the container
|
||||
Restoring a container is only possible for a previously checkpointed container. The restored container will
|
||||
continue to run at exactly the same point in time it was checkpointed.
|
||||
To restore the container use:
|
||||
```console
|
||||
$ sudo podman container restore <container_id>
|
||||
```
|
||||
|
||||
After being restored, the container will answer requests again as it did before checkpointing.
|
||||
```console
|
||||
# curl http://<IP_address>:8080
|
||||
```
|
||||
|
||||
### Stopping the container
|
||||
To stop the httpd container:
|
||||
```console
|
||||
|
@ -832,3 +832,33 @@ func (c *Container) Refresh(ctx context.Context) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Checkpoint checkpoints a container
|
||||
func (c *Container) Checkpoint(ctx context.Context, keep bool) error {
|
||||
logrus.Debugf("Trying to checkpoint container %s", c)
|
||||
if !c.batched {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
if err := c.syncContainer(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return c.checkpoint(ctx, keep)
|
||||
}
|
||||
|
||||
// Restore restores a container
|
||||
func (c *Container) Restore(ctx context.Context, keep bool) (err error) {
|
||||
logrus.Debugf("Trying to restore container %s", c)
|
||||
if !c.batched {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
if err := c.syncContainer(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return c.restore(ctx, keep)
|
||||
}
|
||||
|
@ -129,6 +129,11 @@ func (c *Container) ControlSocketPath() string {
|
||||
return filepath.Join(c.bundlePath(), "ctl")
|
||||
}
|
||||
|
||||
// CheckpointPath returns the path to the directory containing the checkpoint
|
||||
func (c *Container) CheckpointPath() string {
|
||||
return filepath.Join(c.bundlePath(), "checkpoint")
|
||||
}
|
||||
|
||||
// AttachSocketPath retrieves the path of the container's attach socket
|
||||
func (c *Container) AttachSocketPath() string {
|
||||
return filepath.Join(c.runtime.ociRuntime.socketsDir, c.ID(), "attach")
|
||||
@ -523,7 +528,7 @@ func (c *Container) init(ctx context.Context) error {
|
||||
}
|
||||
|
||||
// With the spec complete, do an OCI create
|
||||
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent); err != nil {
|
||||
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, false); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -4,12 +4,18 @@ package libpod
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
cnitypes "github.com/containernetworking/cni/pkg/types/current"
|
||||
crioAnnotations "github.com/containers/libpod/pkg/annotations"
|
||||
"github.com/containers/libpod/pkg/chrootuser"
|
||||
"github.com/containers/libpod/pkg/rootless"
|
||||
@ -307,3 +313,155 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) {
|
||||
|
||||
if c.state.State != ContainerStateRunning {
|
||||
return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
|
||||
}
|
||||
if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Save network.status. This is needed to restore the container with
|
||||
// the same IP. Currently limited to one IP address in a container
|
||||
// with one interface.
|
||||
formatJSON, err := json.MarshalIndent(c.state.NetworkStatus, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(c.bundlePath(), "network.status"), formatJSON, 0644); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
logrus.Debugf("Checkpointed container %s", c.ID())
|
||||
|
||||
c.state.State = ContainerStateStopped
|
||||
|
||||
// Cleanup Storage and Network
|
||||
if err := c.cleanup(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !keep {
|
||||
// Remove log file
|
||||
os.Remove(filepath.Join(c.bundlePath(), "dump.log"))
|
||||
// Remove statistic file
|
||||
os.Remove(filepath.Join(c.bundlePath(), "stats-dump"))
|
||||
}
|
||||
|
||||
return c.save()
|
||||
}
|
||||
|
||||
func (c *Container) restore(ctx context.Context, keep bool) (err error) {
|
||||
|
||||
if (c.state.State != ContainerStateConfigured) && (c.state.State != ContainerStateExited) {
|
||||
return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
|
||||
}
|
||||
|
||||
// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
|
||||
// no sense to try a restore. This is a minimal check if a checkpoint exist.
|
||||
if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
|
||||
return errors.Wrapf(err, "A complete checkpoint for this container cannot be found, cannot restore")
|
||||
}
|
||||
|
||||
// Read network configuration from checkpoint
|
||||
// Currently only one interface with one IP is supported.
|
||||
networkStatusFile, err := os.Open(filepath.Join(c.bundlePath(), "network.status"))
|
||||
if err == nil {
|
||||
// The file with the network.status does exist. Let's restore the
|
||||
// container with the same IP address as during checkpointing.
|
||||
defer networkStatusFile.Close()
|
||||
var networkStatus []*cnitypes.Result
|
||||
networkJSON, err := ioutil.ReadAll(networkStatusFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
json.Unmarshal(networkJSON, &networkStatus)
|
||||
// Take the first IP address
|
||||
var IP net.IP
|
||||
if len(networkStatus) > 0 {
|
||||
if len(networkStatus[0].IPs) > 0 {
|
||||
IP = networkStatus[0].IPs[0].Address.IP
|
||||
}
|
||||
}
|
||||
if IP != nil {
|
||||
env := fmt.Sprintf("IP=%s", IP)
|
||||
// Tell CNI which IP address we want.
|
||||
os.Setenv("CNI_ARGS", env)
|
||||
logrus.Debugf("Restoring container with %s", env)
|
||||
}
|
||||
}
|
||||
|
||||
if err := c.prepare(); err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
if err2 := c.cleanup(ctx); err2 != nil {
|
||||
logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// TODO: use existing way to request static IPs, once it is merged in ocicni
|
||||
// https://github.com/cri-o/ocicni/pull/23/
|
||||
|
||||
// CNI_ARGS was used to request a certain IP address. Unconditionally remove it.
|
||||
os.Unsetenv("CNI_ARGS")
|
||||
|
||||
// Read config
|
||||
jsonPath := filepath.Join(c.bundlePath(), "config.json")
|
||||
logrus.Debugf("generate.NewFromFile at %v", jsonPath)
|
||||
g, err := generate.NewFromFile(jsonPath)
|
||||
if err != nil {
|
||||
logrus.Debugf("generate.NewFromFile failed with %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// We want to have the same network namespace as before.
|
||||
if c.config.CreateNetNS {
|
||||
g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path())
|
||||
}
|
||||
|
||||
// Save the OCI spec to disk
|
||||
if err := c.saveSpec(g.Spec()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := c.makeBindMounts(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Cleanup for a working restore.
|
||||
c.removeConmonFiles()
|
||||
|
||||
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, true); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
logrus.Debugf("Restored container %s", c.ID())
|
||||
|
||||
c.state.State = ContainerStateRunning
|
||||
|
||||
if !keep {
|
||||
// Delete all checkpoint related files. At this point, in theory, all files
|
||||
// should exist. Still ignoring errors for now as the container should be
|
||||
// restored and running. Not erroring out just because some cleanup operation
|
||||
// failed. Starting with the checkpoint directory
|
||||
err = os.RemoveAll(c.CheckpointPath())
|
||||
if err != nil {
|
||||
logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
|
||||
}
|
||||
cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status"}
|
||||
for _, delete := range cleanup {
|
||||
file := filepath.Join(c.bundlePath(), delete)
|
||||
err = os.Remove(file)
|
||||
if err != nil {
|
||||
logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return c.save()
|
||||
}
|
||||
|
@ -27,3 +27,11 @@ func (c *Container) cleanupNetwork() error {
|
||||
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
|
||||
return nil, ErrNotImplemented
|
||||
}
|
||||
|
||||
func (c *Container) checkpoint(ctx context.Context, keep bool) error {
|
||||
return ErrNotImplemented
|
||||
}
|
||||
|
||||
func (c *Container) restore(ctx context.Context, keep bool) error {
|
||||
return ErrNotImplemented
|
||||
}
|
||||
|
@ -227,7 +227,7 @@ func bindPorts(ports []ocicni.PortMapping) ([]*os.File, error) {
|
||||
return files, nil
|
||||
}
|
||||
|
||||
func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (err error) {
|
||||
func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
|
||||
var stderrBuf bytes.Buffer
|
||||
|
||||
runtimeDir, err := GetRootlessRuntimeDir()
|
||||
@ -289,6 +289,10 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (er
|
||||
args = append(args, "--syslog")
|
||||
}
|
||||
|
||||
if restoreContainer {
|
||||
args = append(args, "--restore", ctr.CheckpointPath())
|
||||
}
|
||||
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"args": args,
|
||||
}).Debugf("running conmon: %s", r.conmonPath)
|
||||
@ -766,3 +770,15 @@ func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkpointContainer checkpoints the given container
|
||||
func (r *OCIRuntime) checkpointContainer(ctr *Container) error {
|
||||
// imagePath is used by CRIU to store the actual checkpoint files
|
||||
imagePath := ctr.CheckpointPath()
|
||||
// workPath will be used to store dump.log and stats-dump
|
||||
workPath := ctr.bundlePath()
|
||||
logrus.Debugf("Writing checkpoint to %s", imagePath)
|
||||
logrus.Debugf("Writing checkpoint logs to %s", workPath)
|
||||
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, "checkpoint",
|
||||
"--image-path", imagePath, "--work-path", workPath, ctr.ID())
|
||||
}
|
||||
|
@ -63,10 +63,10 @@ func newPipe() (parent *os.File, child *os.File, err error) {
|
||||
// CreateContainer creates a container in the OCI runtime
|
||||
// TODO terminal support for container
|
||||
// Presently just ignoring conmon opts related to it
|
||||
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) {
|
||||
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
|
||||
if ctr.state.UserNSRoot == "" {
|
||||
// no need of an intermediate mount ns
|
||||
return r.createOCIContainer(ctr, cgroupParent)
|
||||
return r.createOCIContainer(ctr, cgroupParent, restoreContainer)
|
||||
}
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
@ -103,7 +103,7 @@ func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err e
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = r.createOCIContainer(ctr, cgroupParent)
|
||||
err = r.createOCIContainer(ctr, cgroupParent, restoreContainer)
|
||||
}()
|
||||
wg.Wait()
|
||||
|
||||
|
@ -15,7 +15,7 @@ func newPipe() (parent *os.File, child *os.File, err error) {
|
||||
return nil, nil, ErrNotImplemented
|
||||
}
|
||||
|
||||
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) {
|
||||
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
|
||||
return ErrNotImplemented
|
||||
}
|
||||
|
||||
|
129
test/e2e/checkpoint_test.go
Normal file
129
test/e2e/checkpoint_test.go
Normal file
@ -0,0 +1,129 @@
|
||||
package integration
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("Podman checkpoint", func() {
|
||||
var (
|
||||
tempdir string
|
||||
err error
|
||||
podmanTest PodmanTest
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
tempdir, err = CreateTempDirInTempDir()
|
||||
if err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
podmanTest = PodmanCreate(tempdir)
|
||||
podmanTest.RestoreAllArtifacts()
|
||||
// At least CRIU 3.11 is needed
|
||||
skip, err := podmanTest.isCriuAtLeast(31100)
|
||||
if err != nil || skip {
|
||||
Skip("CRIU missing or too old.")
|
||||
}
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
podmanTest.Cleanup()
|
||||
f := CurrentGinkgoTestDescription()
|
||||
timedResult := fmt.Sprintf("Test: %s completed in %f seconds", f.TestText, f.Duration.Seconds())
|
||||
GinkgoWriter.Write([]byte(timedResult))
|
||||
})
|
||||
|
||||
It("podman checkpoint bogus container", func() {
|
||||
session := podmanTest.Podman([]string{"container", "checkpoint", "foobar"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Not(Equal(0)))
|
||||
})
|
||||
|
||||
It("podman restore bogus container", func() {
|
||||
session := podmanTest.Podman([]string{"container", "restore", "foobar"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Not(Equal(0)))
|
||||
})
|
||||
|
||||
It("podman checkpoint a running container by id", func() {
|
||||
// CRIU does not work with seccomp correctly on RHEL7
|
||||
session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Equal(0))
|
||||
cid := session.OutputToString()
|
||||
|
||||
result := podmanTest.Podman([]string{"container", "checkpoint", cid})
|
||||
result.WaitWithDefaultTimeout()
|
||||
|
||||
Expect(result.ExitCode()).To(Equal(0))
|
||||
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
|
||||
|
||||
result = podmanTest.Podman([]string{"container", "restore", cid})
|
||||
result.WaitWithDefaultTimeout()
|
||||
|
||||
Expect(result.ExitCode()).To(Equal(0))
|
||||
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
|
||||
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
|
||||
})
|
||||
|
||||
It("podman checkpoint a running container by name", func() {
|
||||
session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--name", "test_name", "-d", ALPINE, "top"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Equal(0))
|
||||
|
||||
result := podmanTest.Podman([]string{"container", "checkpoint", "test_name"})
|
||||
result.WaitWithDefaultTimeout()
|
||||
|
||||
Expect(result.ExitCode()).To(Equal(0))
|
||||
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
|
||||
|
||||
result = podmanTest.Podman([]string{"container", "restore", "test_name"})
|
||||
result.WaitWithDefaultTimeout()
|
||||
|
||||
Expect(result.ExitCode()).To(Equal(0))
|
||||
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
|
||||
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
|
||||
})
|
||||
|
||||
It("podman pause a checkpointed container by id", func() {
|
||||
session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Equal(0))
|
||||
cid := session.OutputToString()
|
||||
|
||||
result := podmanTest.Podman([]string{"container", "checkpoint", cid})
|
||||
result.WaitWithDefaultTimeout()
|
||||
|
||||
Expect(result.ExitCode()).To(Equal(0))
|
||||
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
|
||||
|
||||
result = podmanTest.Podman([]string{"pause", cid})
|
||||
result.WaitWithDefaultTimeout()
|
||||
|
||||
Expect(result.ExitCode()).To(Equal(125))
|
||||
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
|
||||
|
||||
result = podmanTest.Podman([]string{"container", "restore", cid})
|
||||
result.WaitWithDefaultTimeout()
|
||||
Expect(result.ExitCode()).To(Equal(0))
|
||||
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
|
||||
|
||||
result = podmanTest.Podman([]string{"rm", cid})
|
||||
result.WaitWithDefaultTimeout()
|
||||
Expect(result.ExitCode()).To(Equal(125))
|
||||
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
|
||||
|
||||
result = podmanTest.Podman([]string{"rm", "-f", cid})
|
||||
result.WaitWithDefaultTimeout()
|
||||
Expect(result.ExitCode()).To(Equal(0))
|
||||
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||
|
||||
})
|
||||
})
|
@ -2,6 +2,7 @@ package integration
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
@ -64,6 +65,7 @@ type PodmanTest struct {
|
||||
TempDir string
|
||||
CgroupManager string
|
||||
Host HostOS
|
||||
CriuBinary string
|
||||
}
|
||||
|
||||
// HostOS is a simple struct for the test os
|
||||
@ -164,6 +166,7 @@ func PodmanCreate(tempDir string) PodmanTest {
|
||||
runCBinary = "/usr/bin/runc"
|
||||
}
|
||||
|
||||
criuBinary := "/usr/sbin/criu"
|
||||
CNIConfigDir := "/etc/cni/net.d"
|
||||
|
||||
p := PodmanTest{
|
||||
@ -179,6 +182,7 @@ func PodmanCreate(tempDir string) PodmanTest {
|
||||
TempDir: tempDir,
|
||||
CgroupManager: cgroupManager,
|
||||
Host: host,
|
||||
CriuBinary: criuBinary,
|
||||
}
|
||||
|
||||
// Setup registries.conf ENV variable
|
||||
@ -678,6 +682,39 @@ func (p *PodmanTest) setRegistriesConfigEnv(b []byte) {
|
||||
ioutil.WriteFile(outfile, b, 0644)
|
||||
}
|
||||
|
||||
func (p *PodmanTest) isCriuAtLeast(version int) (bool, error) {
|
||||
cmd := exec.Command(p.CriuBinary, "-V")
|
||||
var out bytes.Buffer
|
||||
cmd.Stdout = &out
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var x int
|
||||
var y int
|
||||
var z int
|
||||
|
||||
fmt.Sscanf(out.String(), "Version: %d.%d.%d", &x, &y, &z)
|
||||
|
||||
if strings.Contains(out.String(), "GitID") {
|
||||
// If CRIU is built from git it contains a git ID.
|
||||
// If that is the case, increase minor by one as this
|
||||
// could mean we are running a development version.
|
||||
y = y + 1
|
||||
}
|
||||
|
||||
parsed_version := x*10000 + y*100 + z
|
||||
|
||||
fmt.Println(parsed_version)
|
||||
|
||||
if parsed_version >= version {
|
||||
return false, nil
|
||||
} else {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
func resetRegistriesConfigEnv() {
|
||||
os.Setenv("REGISTRIES_CONFIG_PATH", "")
|
||||
}
|
||||
|
Reference in New Issue
Block a user