mirror of
https://github.com/containers/podman.git
synced 2025-06-22 18:08:11 +08:00
Merge pull request #469 from adrianreber/master
Add support to checkpoint/restore containers
This commit is contained in:
12
Dockerfile
12
Dockerfile
@ -18,6 +18,8 @@ RUN apt-get update && apt-get install -y \
|
|||||||
libaio-dev \
|
libaio-dev \
|
||||||
libcap-dev \
|
libcap-dev \
|
||||||
libfuse-dev \
|
libfuse-dev \
|
||||||
|
libnet-dev \
|
||||||
|
libnl-3-dev \
|
||||||
libostree-dev \
|
libostree-dev \
|
||||||
libprotobuf-dev \
|
libprotobuf-dev \
|
||||||
libprotobuf-c0-dev \
|
libprotobuf-c0-dev \
|
||||||
@ -110,6 +112,16 @@ RUN set -x \
|
|||||||
&& go get -u github.com/mailru/easyjson/... \
|
&& go get -u github.com/mailru/easyjson/... \
|
||||||
&& install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/
|
&& install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/
|
||||||
|
|
||||||
|
# Install criu
|
||||||
|
ENV CRIU_COMMIT 584cbe4643c3fc7dc901ff08bf923ca0fe7326f9
|
||||||
|
RUN set -x \
|
||||||
|
&& cd /tmp \
|
||||||
|
&& git clone https://github.com/checkpoint-restore/criu.git \
|
||||||
|
&& cd criu \
|
||||||
|
&& make \
|
||||||
|
&& install -D -m 755 criu/criu /usr/sbin/ \
|
||||||
|
&& rm -rf /tmp/criu
|
||||||
|
|
||||||
# Install cni config
|
# Install cni config
|
||||||
#RUN make install.cni
|
#RUN make install.cni
|
||||||
RUN mkdir -p /etc/cni/net.d/
|
RUN mkdir -p /etc/cni/net.d/
|
||||||
|
73
cmd/podman/checkpoint.go
Normal file
73
cmd/podman/checkpoint.go
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/containers/libpod/cmd/podman/libpodruntime"
|
||||||
|
"github.com/containers/libpod/pkg/rootless"
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"github.com/urfave/cli"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
checkpointDescription = `
|
||||||
|
podman container checkpoint
|
||||||
|
|
||||||
|
Checkpoints one or more running containers. The container name or ID can be used.
|
||||||
|
`
|
||||||
|
checkpointFlags = []cli.Flag{
|
||||||
|
cli.BoolFlag{
|
||||||
|
Name: "keep, k",
|
||||||
|
Usage: "keep all temporary checkpoint files",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
checkpointCommand = cli.Command{
|
||||||
|
Name: "checkpoint",
|
||||||
|
Usage: "Checkpoints one or more containers",
|
||||||
|
Description: checkpointDescription,
|
||||||
|
Flags: checkpointFlags,
|
||||||
|
Action: checkpointCmd,
|
||||||
|
ArgsUsage: "CONTAINER-NAME [CONTAINER-NAME ...]",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func checkpointCmd(c *cli.Context) error {
|
||||||
|
if rootless.IsRootless() {
|
||||||
|
return errors.New("checkpointing a container requires root")
|
||||||
|
}
|
||||||
|
|
||||||
|
runtime, err := libpodruntime.GetRuntime(c)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrapf(err, "could not get runtime")
|
||||||
|
}
|
||||||
|
defer runtime.Shutdown(false)
|
||||||
|
|
||||||
|
keep := c.Bool("keep")
|
||||||
|
args := c.Args()
|
||||||
|
if len(args) < 1 {
|
||||||
|
return errors.Errorf("you must provide at least one container name or id")
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastError error
|
||||||
|
for _, arg := range args {
|
||||||
|
ctr, err := runtime.LookupContainer(arg)
|
||||||
|
if err != nil {
|
||||||
|
if lastError != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, lastError)
|
||||||
|
}
|
||||||
|
lastError = errors.Wrapf(err, "error looking up container %q", arg)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err = ctr.Checkpoint(context.TODO(), keep); err != nil {
|
||||||
|
if lastError != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, lastError)
|
||||||
|
}
|
||||||
|
lastError = errors.Wrapf(err, "failed to checkpoint container %v", ctr.ID())
|
||||||
|
} else {
|
||||||
|
fmt.Println(ctr.ID())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lastError
|
||||||
|
}
|
@ -7,6 +7,7 @@ import (
|
|||||||
var (
|
var (
|
||||||
subCommands = []cli.Command{
|
subCommands = []cli.Command{
|
||||||
attachCommand,
|
attachCommand,
|
||||||
|
checkpointCommand,
|
||||||
cleanupCommand,
|
cleanupCommand,
|
||||||
commitCommand,
|
commitCommand,
|
||||||
createCommand,
|
createCommand,
|
||||||
@ -23,6 +24,7 @@ var (
|
|||||||
// pruneCommand,
|
// pruneCommand,
|
||||||
refreshCommand,
|
refreshCommand,
|
||||||
restartCommand,
|
restartCommand,
|
||||||
|
restoreCommand,
|
||||||
rmCommand,
|
rmCommand,
|
||||||
runCommand,
|
runCommand,
|
||||||
runlabelCommand,
|
runlabelCommand,
|
||||||
|
73
cmd/podman/restore.go
Normal file
73
cmd/podman/restore.go
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/containers/libpod/cmd/podman/libpodruntime"
|
||||||
|
"github.com/containers/libpod/pkg/rootless"
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"github.com/urfave/cli"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
restoreDescription = `
|
||||||
|
podman container restore
|
||||||
|
|
||||||
|
Restores a container from a checkpoint. The container name or ID can be used.
|
||||||
|
`
|
||||||
|
restoreFlags = []cli.Flag{
|
||||||
|
cli.BoolFlag{
|
||||||
|
Name: "keep, k",
|
||||||
|
Usage: "keep all temporary checkpoint files",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
restoreCommand = cli.Command{
|
||||||
|
Name: "restore",
|
||||||
|
Usage: "Restores one or more containers from a checkpoint",
|
||||||
|
Description: restoreDescription,
|
||||||
|
Flags: restoreFlags,
|
||||||
|
Action: restoreCmd,
|
||||||
|
ArgsUsage: "CONTAINER-NAME [CONTAINER-NAME ...]",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func restoreCmd(c *cli.Context) error {
|
||||||
|
if rootless.IsRootless() {
|
||||||
|
return errors.New("restoring a container requires root")
|
||||||
|
}
|
||||||
|
|
||||||
|
runtime, err := libpodruntime.GetRuntime(c)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrapf(err, "could not get runtime")
|
||||||
|
}
|
||||||
|
defer runtime.Shutdown(false)
|
||||||
|
|
||||||
|
keep := c.Bool("keep")
|
||||||
|
args := c.Args()
|
||||||
|
if len(args) < 1 {
|
||||||
|
return errors.Errorf("you must provide at least one container name or id")
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastError error
|
||||||
|
for _, arg := range args {
|
||||||
|
ctr, err := runtime.LookupContainer(arg)
|
||||||
|
if err != nil {
|
||||||
|
if lastError != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, lastError)
|
||||||
|
}
|
||||||
|
lastError = errors.Wrapf(err, "error looking up container %q", arg)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err = ctr.Restore(context.TODO(), keep); err != nil {
|
||||||
|
if lastError != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, lastError)
|
||||||
|
}
|
||||||
|
lastError = errors.Wrapf(err, "failed to restore container %v", ctr.ID())
|
||||||
|
} else {
|
||||||
|
fmt.Println(ctr.ID())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lastError
|
||||||
|
}
|
@ -87,6 +87,10 @@ __podman_complete_containers_all() {
|
|||||||
__podman_complete_containers "$@" --all
|
__podman_complete_containers "$@" --all
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__podman_complete_containers_created() {
|
||||||
|
__podman_complete_containers "$@" --all --filter status=created
|
||||||
|
}
|
||||||
|
|
||||||
__podman_complete_containers_running() {
|
__podman_complete_containers_running() {
|
||||||
__podman_complete_containers "$@" --filter status=running
|
__podman_complete_containers "$@" --filter status=running
|
||||||
}
|
}
|
||||||
@ -710,6 +714,24 @@ _podman_container_attach() {
|
|||||||
_podman_attach
|
_podman_attach
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_podman_container_checkpoint() {
|
||||||
|
local options_with_args="
|
||||||
|
--help -h
|
||||||
|
"
|
||||||
|
local boolean_options="
|
||||||
|
--keep
|
||||||
|
-k
|
||||||
|
"
|
||||||
|
case "$cur" in
|
||||||
|
-*)
|
||||||
|
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
__podman_complete_containers_running
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
_podman_container_commit() {
|
_podman_container_commit() {
|
||||||
_podman_commit
|
_podman_commit
|
||||||
}
|
}
|
||||||
@ -770,6 +792,24 @@ _podman_container_restart() {
|
|||||||
_podman_restart
|
_podman_restart
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_podman_container_restore() {
|
||||||
|
local options_with_args="
|
||||||
|
--help -h
|
||||||
|
"
|
||||||
|
local boolean_options="
|
||||||
|
--keep
|
||||||
|
-k
|
||||||
|
"
|
||||||
|
case "$cur" in
|
||||||
|
-*)
|
||||||
|
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
__podman_complete_containers_created
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
_podman_container_rm() {
|
_podman_container_rm() {
|
||||||
_podman_rm
|
_podman_rm
|
||||||
}
|
}
|
||||||
@ -817,6 +857,7 @@ _podman_container() {
|
|||||||
"
|
"
|
||||||
subcommands="
|
subcommands="
|
||||||
attach
|
attach
|
||||||
|
checkpoint
|
||||||
commit
|
commit
|
||||||
create
|
create
|
||||||
diff
|
diff
|
||||||
@ -831,6 +872,7 @@ _podman_container() {
|
|||||||
port
|
port
|
||||||
refresh
|
refresh
|
||||||
restart
|
restart
|
||||||
|
restore
|
||||||
rm
|
rm
|
||||||
run
|
run
|
||||||
start
|
start
|
||||||
|
30
docs/podman-container-checkpoint.1.md
Normal file
30
docs/podman-container-checkpoint.1.md
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
% podman-container-checkpoint(1)
|
||||||
|
|
||||||
|
## NAME
|
||||||
|
podman\-container\-checkpoint - Checkpoints one or more running containers
|
||||||
|
|
||||||
|
## SYNOPSIS
|
||||||
|
**podman container checkpoint** [*options*] *container* ...
|
||||||
|
|
||||||
|
## DESCRIPTION
|
||||||
|
Checkpoints all the processes in one or more containers. You may use container IDs or names as input.
|
||||||
|
|
||||||
|
## OPTIONS
|
||||||
|
**-k**, **--keep**
|
||||||
|
|
||||||
|
Keep all temporary log and statistics files created by CRIU during checkpointing. These files
|
||||||
|
are not deleted if checkpointing fails for further debugging. If checkpointing succeeds these
|
||||||
|
files are theoretically not needed, but if these files are needed Podman can keep the files
|
||||||
|
for further analysis.
|
||||||
|
|
||||||
|
## EXAMPLE
|
||||||
|
|
||||||
|
podman container checkpoint mywebserver
|
||||||
|
|
||||||
|
podman container checkpoint 860a4b23
|
||||||
|
|
||||||
|
## SEE ALSO
|
||||||
|
podman(1), podman-container-restore(1)
|
||||||
|
|
||||||
|
## HISTORY
|
||||||
|
September 2018, Originally compiled by Adrian Reber <areber@redhat.com>
|
37
docs/podman-container-restore.1.md
Normal file
37
docs/podman-container-restore.1.md
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
% podman-container-restore(1)
|
||||||
|
|
||||||
|
## NAME
|
||||||
|
podman\-container\-restore - Restores one or more running containers
|
||||||
|
|
||||||
|
## SYNOPSIS
|
||||||
|
**podman container restore** [*options*] *container* ...
|
||||||
|
|
||||||
|
## DESCRIPTION
|
||||||
|
Restores a container from a checkpoint. You may use container IDs or names as input.
|
||||||
|
|
||||||
|
## OPTIONS
|
||||||
|
**-k**, **--keep**
|
||||||
|
|
||||||
|
Keep all temporary log and statistics files created by CRIU during
|
||||||
|
checkpointing as well as restoring. These files are not deleted if restoring
|
||||||
|
fails for further debugging. If restoring succeeds these files are
|
||||||
|
theoretically not needed, but if these files are needed Podman can keep the
|
||||||
|
files for further analysis. This includes the checkpoint directory with all
|
||||||
|
files created during checkpointing. The size required by the checkpoint
|
||||||
|
directory is roughly the same as the amount of memory required by the
|
||||||
|
processes in the checkpointed container.
|
||||||
|
|
||||||
|
Without the **-k**, **--keep** option the checkpoint will be consumed and cannot be used
|
||||||
|
again.
|
||||||
|
|
||||||
|
## EXAMPLE
|
||||||
|
|
||||||
|
podman container restore mywebserver
|
||||||
|
|
||||||
|
podman container restore 860a4b23
|
||||||
|
|
||||||
|
## SEE ALSO
|
||||||
|
podman(1), podman-container-checkpoint(1)
|
||||||
|
|
||||||
|
## HISTORY
|
||||||
|
September 2018, Originally compiled by Adrian Reber <areber@redhat.com>
|
@ -14,6 +14,7 @@ The container command allows you to manage containers
|
|||||||
| Command | Man Page | Description |
|
| Command | Man Page | Description |
|
||||||
| ------- | --------------------------------------------------- | ---------------------------------------------------------------------------- |
|
| ------- | --------------------------------------------------- | ---------------------------------------------------------------------------- |
|
||||||
| attach | [podman-attach(1)](podman-attach.1.md) | Attach to a running container. |
|
| attach | [podman-attach(1)](podman-attach.1.md) | Attach to a running container. |
|
||||||
|
| checkpoint | [podman-container-checkpoint(1)](podman-container-checkpoint.1.md) | Checkpoints one or more containers. |
|
||||||
| cleanup | [podman-container-cleanup(1)](podman-container-cleanup.1.md) | Cleanup containers network and mountpoints. |
|
| cleanup | [podman-container-cleanup(1)](podman-container-cleanup.1.md) | Cleanup containers network and mountpoints. |
|
||||||
| commit | [podman-commit(1)](podman-commit.1.md) | Create new image based on the changed container. |
|
| commit | [podman-commit(1)](podman-commit.1.md) | Create new image based on the changed container. |
|
||||||
| create | [podman-create(1)](podman-create.1.md) | Create a new container. |
|
| create | [podman-create(1)](podman-create.1.md) | Create a new container. |
|
||||||
@ -29,6 +30,7 @@ The container command allows you to manage containers
|
|||||||
| port | [podman-port(1)](podman-port.1.md) | List port mappings for the container. |
|
| port | [podman-port(1)](podman-port.1.md) | List port mappings for the container. |
|
||||||
| refresh | [podman-refresh(1)](podman-container-refresh.1.md) | Refresh the state of all containers |
|
| refresh | [podman-refresh(1)](podman-container-refresh.1.md) | Refresh the state of all containers |
|
||||||
| restart | [podman-restart(1)](podman-restart.1.md) | Restart one or more containers. |
|
| restart | [podman-restart(1)](podman-restart.1.md) | Restart one or more containers. |
|
||||||
|
| restore | [podman-container-restore(1)](podman-container-restore.1.md) | Restores one or more containers from a checkpoint. |
|
||||||
| rm | [podman-rm(1)](podman-rm.1.md) | Remove one or more containers. |
|
| rm | [podman-rm(1)](podman-rm.1.md) | Remove one or more containers. |
|
||||||
| run | [podman-run(1)](podman-run.1.md) | Run a command in a container. |
|
| run | [podman-run(1)](podman-run.1.md) | Run a command in a container. |
|
||||||
| start | [podman-start(1)](podman-start.1.md) | Starts one or more containers. |
|
| start | [podman-start(1)](podman-start.1.md) | Starts one or more containers. |
|
||||||
|
@ -157,6 +157,28 @@ $ sudo podman top <container_id>
|
|||||||
101 31889 31873 0 09:21 ? 00:00:00 nginx: worker process
|
101 31889 31873 0 09:21 ? 00:00:00 nginx: worker process
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Checkpointing the container
|
||||||
|
Checkpointing a container stops the container while writing the state of all processes in the container to disk.
|
||||||
|
With this a container can later be restored and continue running at exactly the same point in time as the
|
||||||
|
checkpoint. This capability requires CRIU 3.11 or later installed on the system.
|
||||||
|
To checkpoint the container use:
|
||||||
|
```console
|
||||||
|
$ sudo podman container checkpoint <container_id>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restoring the container
|
||||||
|
Restoring a container is only possible for a previously checkpointed container. The restored container will
|
||||||
|
continue to run at exactly the same point in time it was checkpointed.
|
||||||
|
To restore the container use:
|
||||||
|
```console
|
||||||
|
$ sudo podman container restore <container_id>
|
||||||
|
```
|
||||||
|
|
||||||
|
After being restored, the container will answer requests again as it did before checkpointing.
|
||||||
|
```console
|
||||||
|
# curl http://<IP_address>:8080
|
||||||
|
```
|
||||||
|
|
||||||
### Stopping the container
|
### Stopping the container
|
||||||
To stop the httpd container:
|
To stop the httpd container:
|
||||||
```console
|
```console
|
||||||
|
@ -832,3 +832,33 @@ func (c *Container) Refresh(ctx context.Context) error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Checkpoint checkpoints a container
|
||||||
|
func (c *Container) Checkpoint(ctx context.Context, keep bool) error {
|
||||||
|
logrus.Debugf("Trying to checkpoint container %s", c)
|
||||||
|
if !c.batched {
|
||||||
|
c.lock.Lock()
|
||||||
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
|
if err := c.syncContainer(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.checkpoint(ctx, keep)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore restores a container
|
||||||
|
func (c *Container) Restore(ctx context.Context, keep bool) (err error) {
|
||||||
|
logrus.Debugf("Trying to restore container %s", c)
|
||||||
|
if !c.batched {
|
||||||
|
c.lock.Lock()
|
||||||
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
|
if err := c.syncContainer(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.restore(ctx, keep)
|
||||||
|
}
|
||||||
|
@ -129,6 +129,11 @@ func (c *Container) ControlSocketPath() string {
|
|||||||
return filepath.Join(c.bundlePath(), "ctl")
|
return filepath.Join(c.bundlePath(), "ctl")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CheckpointPath returns the path to the directory containing the checkpoint
|
||||||
|
func (c *Container) CheckpointPath() string {
|
||||||
|
return filepath.Join(c.bundlePath(), "checkpoint")
|
||||||
|
}
|
||||||
|
|
||||||
// AttachSocketPath retrieves the path of the container's attach socket
|
// AttachSocketPath retrieves the path of the container's attach socket
|
||||||
func (c *Container) AttachSocketPath() string {
|
func (c *Container) AttachSocketPath() string {
|
||||||
return filepath.Join(c.runtime.ociRuntime.socketsDir, c.ID(), "attach")
|
return filepath.Join(c.runtime.ociRuntime.socketsDir, c.ID(), "attach")
|
||||||
@ -523,7 +528,7 @@ func (c *Container) init(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// With the spec complete, do an OCI create
|
// With the spec complete, do an OCI create
|
||||||
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent); err != nil {
|
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, false); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,12 +4,18 @@ package libpod
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
cnitypes "github.com/containernetworking/cni/pkg/types/current"
|
||||||
crioAnnotations "github.com/containers/libpod/pkg/annotations"
|
crioAnnotations "github.com/containers/libpod/pkg/annotations"
|
||||||
"github.com/containers/libpod/pkg/chrootuser"
|
"github.com/containers/libpod/pkg/chrootuser"
|
||||||
"github.com/containers/libpod/pkg/rootless"
|
"github.com/containers/libpod/pkg/rootless"
|
||||||
@ -307,3 +313,155 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) {
|
||||||
|
|
||||||
|
if c.state.State != ContainerStateRunning {
|
||||||
|
return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
|
||||||
|
}
|
||||||
|
if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save network.status. This is needed to restore the container with
|
||||||
|
// the same IP. Currently limited to one IP address in a container
|
||||||
|
// with one interface.
|
||||||
|
formatJSON, err := json.MarshalIndent(c.state.NetworkStatus, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := ioutil.WriteFile(filepath.Join(c.bundlePath(), "network.status"), formatJSON, 0644); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
logrus.Debugf("Checkpointed container %s", c.ID())
|
||||||
|
|
||||||
|
c.state.State = ContainerStateStopped
|
||||||
|
|
||||||
|
// Cleanup Storage and Network
|
||||||
|
if err := c.cleanup(ctx); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !keep {
|
||||||
|
// Remove log file
|
||||||
|
os.Remove(filepath.Join(c.bundlePath(), "dump.log"))
|
||||||
|
// Remove statistic file
|
||||||
|
os.Remove(filepath.Join(c.bundlePath(), "stats-dump"))
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.save()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Container) restore(ctx context.Context, keep bool) (err error) {
|
||||||
|
|
||||||
|
if (c.state.State != ContainerStateConfigured) && (c.state.State != ContainerStateExited) {
|
||||||
|
return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
|
||||||
|
// no sense to try a restore. This is a minimal check if a checkpoint exist.
|
||||||
|
if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
|
||||||
|
return errors.Wrapf(err, "A complete checkpoint for this container cannot be found, cannot restore")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read network configuration from checkpoint
|
||||||
|
// Currently only one interface with one IP is supported.
|
||||||
|
networkStatusFile, err := os.Open(filepath.Join(c.bundlePath(), "network.status"))
|
||||||
|
if err == nil {
|
||||||
|
// The file with the network.status does exist. Let's restore the
|
||||||
|
// container with the same IP address as during checkpointing.
|
||||||
|
defer networkStatusFile.Close()
|
||||||
|
var networkStatus []*cnitypes.Result
|
||||||
|
networkJSON, err := ioutil.ReadAll(networkStatusFile)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
json.Unmarshal(networkJSON, &networkStatus)
|
||||||
|
// Take the first IP address
|
||||||
|
var IP net.IP
|
||||||
|
if len(networkStatus) > 0 {
|
||||||
|
if len(networkStatus[0].IPs) > 0 {
|
||||||
|
IP = networkStatus[0].IPs[0].Address.IP
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if IP != nil {
|
||||||
|
env := fmt.Sprintf("IP=%s", IP)
|
||||||
|
// Tell CNI which IP address we want.
|
||||||
|
os.Setenv("CNI_ARGS", env)
|
||||||
|
logrus.Debugf("Restoring container with %s", env)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := c.prepare(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
if err2 := c.cleanup(ctx); err2 != nil {
|
||||||
|
logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// TODO: use existing way to request static IPs, once it is merged in ocicni
|
||||||
|
// https://github.com/cri-o/ocicni/pull/23/
|
||||||
|
|
||||||
|
// CNI_ARGS was used to request a certain IP address. Unconditionally remove it.
|
||||||
|
os.Unsetenv("CNI_ARGS")
|
||||||
|
|
||||||
|
// Read config
|
||||||
|
jsonPath := filepath.Join(c.bundlePath(), "config.json")
|
||||||
|
logrus.Debugf("generate.NewFromFile at %v", jsonPath)
|
||||||
|
g, err := generate.NewFromFile(jsonPath)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Debugf("generate.NewFromFile failed with %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// We want to have the same network namespace as before.
|
||||||
|
if c.config.CreateNetNS {
|
||||||
|
g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save the OCI spec to disk
|
||||||
|
if err := c.saveSpec(g.Spec()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := c.makeBindMounts(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup for a working restore.
|
||||||
|
c.removeConmonFiles()
|
||||||
|
|
||||||
|
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, true); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
logrus.Debugf("Restored container %s", c.ID())
|
||||||
|
|
||||||
|
c.state.State = ContainerStateRunning
|
||||||
|
|
||||||
|
if !keep {
|
||||||
|
// Delete all checkpoint related files. At this point, in theory, all files
|
||||||
|
// should exist. Still ignoring errors for now as the container should be
|
||||||
|
// restored and running. Not erroring out just because some cleanup operation
|
||||||
|
// failed. Starting with the checkpoint directory
|
||||||
|
err = os.RemoveAll(c.CheckpointPath())
|
||||||
|
if err != nil {
|
||||||
|
logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
|
||||||
|
}
|
||||||
|
cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status"}
|
||||||
|
for _, delete := range cleanup {
|
||||||
|
file := filepath.Join(c.bundlePath(), delete)
|
||||||
|
err = os.Remove(file)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.save()
|
||||||
|
}
|
||||||
|
@ -27,3 +27,11 @@ func (c *Container) cleanupNetwork() error {
|
|||||||
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
|
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
|
||||||
return nil, ErrNotImplemented
|
return nil, ErrNotImplemented
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Container) checkpoint(ctx context.Context, keep bool) error {
|
||||||
|
return ErrNotImplemented
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Container) restore(ctx context.Context, keep bool) error {
|
||||||
|
return ErrNotImplemented
|
||||||
|
}
|
||||||
|
@ -227,7 +227,7 @@ func bindPorts(ports []ocicni.PortMapping) ([]*os.File, error) {
|
|||||||
return files, nil
|
return files, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (err error) {
|
func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
|
||||||
var stderrBuf bytes.Buffer
|
var stderrBuf bytes.Buffer
|
||||||
|
|
||||||
runtimeDir, err := GetRootlessRuntimeDir()
|
runtimeDir, err := GetRootlessRuntimeDir()
|
||||||
@ -289,6 +289,10 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (er
|
|||||||
args = append(args, "--syslog")
|
args = append(args, "--syslog")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if restoreContainer {
|
||||||
|
args = append(args, "--restore", ctr.CheckpointPath())
|
||||||
|
}
|
||||||
|
|
||||||
logrus.WithFields(logrus.Fields{
|
logrus.WithFields(logrus.Fields{
|
||||||
"args": args,
|
"args": args,
|
||||||
}).Debugf("running conmon: %s", r.conmonPath)
|
}).Debugf("running conmon: %s", r.conmonPath)
|
||||||
@ -766,3 +770,15 @@ func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// checkpointContainer checkpoints the given container
|
||||||
|
func (r *OCIRuntime) checkpointContainer(ctr *Container) error {
|
||||||
|
// imagePath is used by CRIU to store the actual checkpoint files
|
||||||
|
imagePath := ctr.CheckpointPath()
|
||||||
|
// workPath will be used to store dump.log and stats-dump
|
||||||
|
workPath := ctr.bundlePath()
|
||||||
|
logrus.Debugf("Writing checkpoint to %s", imagePath)
|
||||||
|
logrus.Debugf("Writing checkpoint logs to %s", workPath)
|
||||||
|
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, "checkpoint",
|
||||||
|
"--image-path", imagePath, "--work-path", workPath, ctr.ID())
|
||||||
|
}
|
||||||
|
@ -63,10 +63,10 @@ func newPipe() (parent *os.File, child *os.File, err error) {
|
|||||||
// CreateContainer creates a container in the OCI runtime
|
// CreateContainer creates a container in the OCI runtime
|
||||||
// TODO terminal support for container
|
// TODO terminal support for container
|
||||||
// Presently just ignoring conmon opts related to it
|
// Presently just ignoring conmon opts related to it
|
||||||
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) {
|
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
|
||||||
if ctr.state.UserNSRoot == "" {
|
if ctr.state.UserNSRoot == "" {
|
||||||
// no need of an intermediate mount ns
|
// no need of an intermediate mount ns
|
||||||
return r.createOCIContainer(ctr, cgroupParent)
|
return r.createOCIContainer(ctr, cgroupParent, restoreContainer)
|
||||||
}
|
}
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
@ -103,7 +103,7 @@ func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err e
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
err = r.createOCIContainer(ctr, cgroupParent)
|
err = r.createOCIContainer(ctr, cgroupParent, restoreContainer)
|
||||||
}()
|
}()
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ func newPipe() (parent *os.File, child *os.File, err error) {
|
|||||||
return nil, nil, ErrNotImplemented
|
return nil, nil, ErrNotImplemented
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) {
|
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) {
|
||||||
return ErrNotImplemented
|
return ErrNotImplemented
|
||||||
}
|
}
|
||||||
|
|
||||||
|
129
test/e2e/checkpoint_test.go
Normal file
129
test/e2e/checkpoint_test.go
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
package integration
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("Podman checkpoint", func() {
|
||||||
|
var (
|
||||||
|
tempdir string
|
||||||
|
err error
|
||||||
|
podmanTest PodmanTest
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
tempdir, err = CreateTempDirInTempDir()
|
||||||
|
if err != nil {
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
podmanTest = PodmanCreate(tempdir)
|
||||||
|
podmanTest.RestoreAllArtifacts()
|
||||||
|
// At least CRIU 3.11 is needed
|
||||||
|
skip, err := podmanTest.isCriuAtLeast(31100)
|
||||||
|
if err != nil || skip {
|
||||||
|
Skip("CRIU missing or too old.")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
AfterEach(func() {
|
||||||
|
podmanTest.Cleanup()
|
||||||
|
f := CurrentGinkgoTestDescription()
|
||||||
|
timedResult := fmt.Sprintf("Test: %s completed in %f seconds", f.TestText, f.Duration.Seconds())
|
||||||
|
GinkgoWriter.Write([]byte(timedResult))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("podman checkpoint bogus container", func() {
|
||||||
|
session := podmanTest.Podman([]string{"container", "checkpoint", "foobar"})
|
||||||
|
session.WaitWithDefaultTimeout()
|
||||||
|
Expect(session.ExitCode()).To(Not(Equal(0)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("podman restore bogus container", func() {
|
||||||
|
session := podmanTest.Podman([]string{"container", "restore", "foobar"})
|
||||||
|
session.WaitWithDefaultTimeout()
|
||||||
|
Expect(session.ExitCode()).To(Not(Equal(0)))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("podman checkpoint a running container by id", func() {
|
||||||
|
// CRIU does not work with seccomp correctly on RHEL7
|
||||||
|
session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"})
|
||||||
|
session.WaitWithDefaultTimeout()
|
||||||
|
Expect(session.ExitCode()).To(Equal(0))
|
||||||
|
cid := session.OutputToString()
|
||||||
|
|
||||||
|
result := podmanTest.Podman([]string{"container", "checkpoint", cid})
|
||||||
|
result.WaitWithDefaultTimeout()
|
||||||
|
|
||||||
|
Expect(result.ExitCode()).To(Equal(0))
|
||||||
|
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||||
|
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
|
||||||
|
|
||||||
|
result = podmanTest.Podman([]string{"container", "restore", cid})
|
||||||
|
result.WaitWithDefaultTimeout()
|
||||||
|
|
||||||
|
Expect(result.ExitCode()).To(Equal(0))
|
||||||
|
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
|
||||||
|
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("podman checkpoint a running container by name", func() {
|
||||||
|
session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--name", "test_name", "-d", ALPINE, "top"})
|
||||||
|
session.WaitWithDefaultTimeout()
|
||||||
|
Expect(session.ExitCode()).To(Equal(0))
|
||||||
|
|
||||||
|
result := podmanTest.Podman([]string{"container", "checkpoint", "test_name"})
|
||||||
|
result.WaitWithDefaultTimeout()
|
||||||
|
|
||||||
|
Expect(result.ExitCode()).To(Equal(0))
|
||||||
|
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||||
|
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
|
||||||
|
|
||||||
|
result = podmanTest.Podman([]string{"container", "restore", "test_name"})
|
||||||
|
result.WaitWithDefaultTimeout()
|
||||||
|
|
||||||
|
Expect(result.ExitCode()).To(Equal(0))
|
||||||
|
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
|
||||||
|
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("podman pause a checkpointed container by id", func() {
|
||||||
|
session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"})
|
||||||
|
session.WaitWithDefaultTimeout()
|
||||||
|
Expect(session.ExitCode()).To(Equal(0))
|
||||||
|
cid := session.OutputToString()
|
||||||
|
|
||||||
|
result := podmanTest.Podman([]string{"container", "checkpoint", cid})
|
||||||
|
result.WaitWithDefaultTimeout()
|
||||||
|
|
||||||
|
Expect(result.ExitCode()).To(Equal(0))
|
||||||
|
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||||
|
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
|
||||||
|
|
||||||
|
result = podmanTest.Podman([]string{"pause", cid})
|
||||||
|
result.WaitWithDefaultTimeout()
|
||||||
|
|
||||||
|
Expect(result.ExitCode()).To(Equal(125))
|
||||||
|
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||||
|
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
|
||||||
|
|
||||||
|
result = podmanTest.Podman([]string{"container", "restore", cid})
|
||||||
|
result.WaitWithDefaultTimeout()
|
||||||
|
Expect(result.ExitCode()).To(Equal(0))
|
||||||
|
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
|
||||||
|
|
||||||
|
result = podmanTest.Podman([]string{"rm", cid})
|
||||||
|
result.WaitWithDefaultTimeout()
|
||||||
|
Expect(result.ExitCode()).To(Equal(125))
|
||||||
|
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
|
||||||
|
|
||||||
|
result = podmanTest.Podman([]string{"rm", "-f", cid})
|
||||||
|
result.WaitWithDefaultTimeout()
|
||||||
|
Expect(result.ExitCode()).To(Equal(0))
|
||||||
|
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
|
||||||
|
|
||||||
|
})
|
||||||
|
})
|
@ -2,6 +2,7 @@ package integration
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
@ -64,6 +65,7 @@ type PodmanTest struct {
|
|||||||
TempDir string
|
TempDir string
|
||||||
CgroupManager string
|
CgroupManager string
|
||||||
Host HostOS
|
Host HostOS
|
||||||
|
CriuBinary string
|
||||||
}
|
}
|
||||||
|
|
||||||
// HostOS is a simple struct for the test os
|
// HostOS is a simple struct for the test os
|
||||||
@ -164,6 +166,7 @@ func PodmanCreate(tempDir string) PodmanTest {
|
|||||||
runCBinary = "/usr/bin/runc"
|
runCBinary = "/usr/bin/runc"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
criuBinary := "/usr/sbin/criu"
|
||||||
CNIConfigDir := "/etc/cni/net.d"
|
CNIConfigDir := "/etc/cni/net.d"
|
||||||
|
|
||||||
p := PodmanTest{
|
p := PodmanTest{
|
||||||
@ -179,6 +182,7 @@ func PodmanCreate(tempDir string) PodmanTest {
|
|||||||
TempDir: tempDir,
|
TempDir: tempDir,
|
||||||
CgroupManager: cgroupManager,
|
CgroupManager: cgroupManager,
|
||||||
Host: host,
|
Host: host,
|
||||||
|
CriuBinary: criuBinary,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup registries.conf ENV variable
|
// Setup registries.conf ENV variable
|
||||||
@ -678,6 +682,39 @@ func (p *PodmanTest) setRegistriesConfigEnv(b []byte) {
|
|||||||
ioutil.WriteFile(outfile, b, 0644)
|
ioutil.WriteFile(outfile, b, 0644)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *PodmanTest) isCriuAtLeast(version int) (bool, error) {
|
||||||
|
cmd := exec.Command(p.CriuBinary, "-V")
|
||||||
|
var out bytes.Buffer
|
||||||
|
cmd.Stdout = &out
|
||||||
|
err := cmd.Run()
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var x int
|
||||||
|
var y int
|
||||||
|
var z int
|
||||||
|
|
||||||
|
fmt.Sscanf(out.String(), "Version: %d.%d.%d", &x, &y, &z)
|
||||||
|
|
||||||
|
if strings.Contains(out.String(), "GitID") {
|
||||||
|
// If CRIU is built from git it contains a git ID.
|
||||||
|
// If that is the case, increase minor by one as this
|
||||||
|
// could mean we are running a development version.
|
||||||
|
y = y + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
parsed_version := x*10000 + y*100 + z
|
||||||
|
|
||||||
|
fmt.Println(parsed_version)
|
||||||
|
|
||||||
|
if parsed_version >= version {
|
||||||
|
return false, nil
|
||||||
|
} else {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func resetRegistriesConfigEnv() {
|
func resetRegistriesConfigEnv() {
|
||||||
os.Setenv("REGISTRIES_CONFIG_PATH", "")
|
os.Setenv("REGISTRIES_CONFIG_PATH", "")
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user