Added option to keep containers running after checkpointing

CRIU supports to leave processes running after checkpointing:

  -R|--leave-running    leave tasks in running state after checkpoint

runc also support to leave containers running after checkpointing:

   --leave-running      leave the process running after checkpointing

With this commit the support to leave a container running after
checkpointing is brought to Podman:

   --leave-running, -R  leave the container running after writing checkpoint to disk

Now it is possible to checkpoint a container at some point in time
without stopping the container. This can be used to rollback the
container to an early state:

$ podman run --tmpfs /tmp --name podman-criu-test -d docker://docker.io/yovfiatbeb/podman-criu-test
$ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample
3
$ podman container checkpoint -R -l
$ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample
4
$ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample
5
$ podman stop -l
$ podman container restore -l
$ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample
4

So after checkpointing the container kept running and was stopped after
some time. Restoring this container will restore the state right at the
checkpoint.

Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
Adrian Reber
2018-11-20 15:34:15 +00:00
committed by Adrian Reber
parent ff47a4c2d5
commit b0572d6229
4 changed files with 27 additions and 10 deletions

View File

@ -23,6 +23,10 @@ var (
Name: "keep, k", Name: "keep, k",
Usage: "keep all temporary checkpoint files", Usage: "keep all temporary checkpoint files",
}, },
cli.BoolFlag{
Name: "leave-running, R",
Usage: "leave the container running after writing checkpoint to disk",
},
cli.BoolFlag{ cli.BoolFlag{
Name: "all, a", Name: "all, a",
Usage: "checkpoint all running containers", Usage: "checkpoint all running containers",
@ -51,7 +55,8 @@ func checkpointCmd(c *cli.Context) error {
defer runtime.Shutdown(false) defer runtime.Shutdown(false)
options := libpod.ContainerCheckpointOptions{ options := libpod.ContainerCheckpointOptions{
Keep: c.Bool("keep"), Keep: c.Bool("keep"),
KeepRunning: c.Bool("leave-running"),
} }
if err := checkAllAndLatest(c); err != nil { if err := checkAllAndLatest(c); err != nil {

View File

@ -833,7 +833,8 @@ func (c *Container) Refresh(ctx context.Context) error {
// ContainerCheckpointOptions is a struct used to pass the parameters // ContainerCheckpointOptions is a struct used to pass the parameters
// for checkpointing to corresponding functions // for checkpointing to corresponding functions
type ContainerCheckpointOptions struct { type ContainerCheckpointOptions struct {
Keep bool Keep bool
KeepRunning bool
} }
// Checkpoint checkpoints a container // Checkpoint checkpoints a container

View File

@ -440,7 +440,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
if c.state.State != ContainerStateRunning { if c.state.State != ContainerStateRunning {
return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
} }
if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil { if err := c.runtime.ociRuntime.checkpointContainer(c, options); err != nil {
return err return err
} }
@ -457,11 +457,13 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
logrus.Debugf("Checkpointed container %s", c.ID()) logrus.Debugf("Checkpointed container %s", c.ID())
c.state.State = ContainerStateStopped if !options.KeepRunning {
c.state.State = ContainerStateStopped
// Cleanup Storage and Network // Cleanup Storage and Network
if err := c.cleanup(ctx); err != nil { if err := c.cleanup(ctx); err != nil {
return err return err
}
} }
if !options.Keep { if !options.Keep {

View File

@ -844,13 +844,22 @@ func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error {
} }
// checkpointContainer checkpoints the given container // checkpointContainer checkpoints the given container
func (r *OCIRuntime) checkpointContainer(ctr *Container) error { func (r *OCIRuntime) checkpointContainer(ctr *Container, options ContainerCheckpointOptions) error {
// imagePath is used by CRIU to store the actual checkpoint files // imagePath is used by CRIU to store the actual checkpoint files
imagePath := ctr.CheckpointPath() imagePath := ctr.CheckpointPath()
// workPath will be used to store dump.log and stats-dump // workPath will be used to store dump.log and stats-dump
workPath := ctr.bundlePath() workPath := ctr.bundlePath()
logrus.Debugf("Writing checkpoint to %s", imagePath) logrus.Debugf("Writing checkpoint to %s", imagePath)
logrus.Debugf("Writing checkpoint logs to %s", workPath) logrus.Debugf("Writing checkpoint logs to %s", workPath)
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, "checkpoint", args := []string{}
"--image-path", imagePath, "--work-path", workPath, ctr.ID()) args = append(args, "checkpoint")
args = append(args, "--image-path")
args = append(args, imagePath)
args = append(args, "--work-path")
args = append(args, workPath)
if options.KeepRunning {
args = append(args, "--leave-running")
}
args = append(args, ctr.ID())
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
} }