Merge pull request #1848 from adrianreber/master

Add tcp-established to checkpoint/restore
This commit is contained in:
OpenShift Merge Robot
2018-11-28 07:00:24 -08:00
committed by GitHub
16 changed files with 245 additions and 36 deletions

View File

@ -18,8 +18,8 @@ gce_instance:
env: env:
FEDORA_CNI_COMMIT: "412b6d31280682bb4fab4446f113c22ff1886554" FEDORA_CNI_COMMIT: "412b6d31280682bb4fab4446f113c22ff1886554"
CNI_COMMIT: "7480240de9749f9a0a5c8614b17f1f03e0c06ab9" CNI_COMMIT: "7480240de9749f9a0a5c8614b17f1f03e0c06ab9"
CRIO_COMMIT: "662dbb31b5d4f5ed54511a47cde7190c61c28677" CRIO_COMMIT: "7a283c391abb7bd25086a8ff91dbb36ebdd24466"
CRIU_COMMIT: "584cbe4643c3fc7dc901ff08bf923ca0fe7326f9" CRIU_COMMIT: "c74b83cd49c00589c0c0468ba5fe685b67fdbd0a"
RUNC_COMMIT: "78ef28e63bec2ee4c139b5e3e0d691eb9bdc748d" RUNC_COMMIT: "78ef28e63bec2ee4c139b5e3e0d691eb9bdc748d"
# File to update in home-dir with task-specific env. var values # File to update in home-dir with task-specific env. var values
ENVLIB: ".bash_profile" ENVLIB: ".bash_profile"

View File

@ -64,7 +64,7 @@ RUN set -x \
&& rm -rf "$GOPATH" && rm -rf "$GOPATH"
# Install conmon # Install conmon
ENV CRIO_COMMIT 662dbb31b5d4f5ed54511a47cde7190c61c28677 ENV CRIO_COMMIT 7a283c391abb7bd25086a8ff91dbb36ebdd24466
RUN set -x \ RUN set -x \
&& export GOPATH="$(mktemp -d)" \ && export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/kubernetes-sigs/cri-o.git "$GOPATH/src/github.com/kubernetes-sigs/cri-o.git" \ && git clone https://github.com/kubernetes-sigs/cri-o.git "$GOPATH/src/github.com/kubernetes-sigs/cri-o.git" \
@ -112,8 +112,7 @@ RUN set -x \
&& go get -u github.com/mailru/easyjson/... \ && go get -u github.com/mailru/easyjson/... \
&& install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/ && install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/
# Install criu # Install latest stable criu version
ENV CRIU_COMMIT 584cbe4643c3fc7dc901ff08bf923ca0fe7326f9
RUN set -x \ RUN set -x \
&& cd /tmp \ && cd /tmp \
&& git clone https://github.com/checkpoint-restore/criu.git \ && git clone https://github.com/checkpoint-restore/criu.git \

View File

@ -68,7 +68,7 @@ RUN set -x \
&& install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/ && install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/
# Install conmon # Install conmon
ENV CRIO_COMMIT 662dbb31b5d4f5ed54511a47cde7190c61c28677 ENV CRIO_COMMIT 7a283c391abb7bd25086a8ff91dbb36ebdd24466
RUN set -x \ RUN set -x \
&& export GOPATH="$(mktemp -d)" \ && export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/kubernetes-sigs/cri-o.git "$GOPATH/src/github.com/kubernetes-sigs/cri-o.git" \ && git clone https://github.com/kubernetes-sigs/cri-o.git "$GOPATH/src/github.com/kubernetes-sigs/cri-o.git" \

View File

@ -72,7 +72,7 @@ RUN set -x \
&& install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/ && install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/
# Install conmon # Install conmon
ENV CRIO_COMMIT 662dbb31b5d4f5ed54511a47cde7190c61c28677 ENV CRIO_COMMIT 7a283c391abb7bd25086a8ff91dbb36ebdd24466
RUN set -x \ RUN set -x \
&& export GOPATH="$(mktemp -d)" \ && export GOPATH="$(mktemp -d)" \
&& git clone https://github.com/kubernetes-sigs/cri-o.git "$GOPATH/src/github.com/kubernetes-sigs/cri-o.git" \ && git clone https://github.com/kubernetes-sigs/cri-o.git "$GOPATH/src/github.com/kubernetes-sigs/cri-o.git" \

View File

@ -27,6 +27,10 @@ var (
Name: "leave-running, R", Name: "leave-running, R",
Usage: "leave the container running after writing checkpoint to disk", Usage: "leave the container running after writing checkpoint to disk",
}, },
cli.BoolFlag{
Name: "tcp-established",
Usage: "checkpoint a container with established TCP connections",
},
cli.BoolFlag{ cli.BoolFlag{
Name: "all, a", Name: "all, a",
Usage: "checkpoint all running containers", Usage: "checkpoint all running containers",
@ -57,6 +61,7 @@ func checkpointCmd(c *cli.Context) error {
options := libpod.ContainerCheckpointOptions{ options := libpod.ContainerCheckpointOptions{
Keep: c.Bool("keep"), Keep: c.Bool("keep"),
KeepRunning: c.Bool("leave-running"), KeepRunning: c.Bool("leave-running"),
TCPEstablished: c.Bool("tcp-established"),
} }
if err := checkAllAndLatest(c); err != nil { if err := checkAllAndLatest(c); err != nil {

View File

@ -26,6 +26,10 @@ var (
// restore --all would make more sense if there would be // restore --all would make more sense if there would be
// dedicated state for container which are checkpointed. // dedicated state for container which are checkpointed.
// TODO: add ContainerStateCheckpointed // TODO: add ContainerStateCheckpointed
cli.BoolFlag{
Name: "tcp-established",
Usage: "checkpoint a container with established TCP connections",
},
cli.BoolFlag{ cli.BoolFlag{
Name: "all, a", Name: "all, a",
Usage: "restore all checkpointed containers", Usage: "restore all checkpointed containers",
@ -53,16 +57,19 @@ func restoreCmd(c *cli.Context) error {
} }
defer runtime.Shutdown(false) defer runtime.Shutdown(false)
keep := c.Bool("keep") options := libpod.ContainerCheckpointOptions{
Keep: c.Bool("keep"),
TCPEstablished: c.Bool("tcp-established"),
}
if err := checkAllAndLatest(c); err != nil { if err := checkAllAndLatest(c); err != nil {
return err return err
} }
containers, lastError := getAllOrLatestContainers(c, runtime, libpod.ContainerStateRunning, "checkpointed") containers, lastError := getAllOrLatestContainers(c, runtime, libpod.ContainerStateExited, "checkpointed")
for _, ctr := range containers { for _, ctr := range containers {
if err = ctr.Restore(context.TODO(), keep); err != nil { if err = ctr.Restore(context.TODO(), options); err != nil {
if lastError != nil { if lastError != nil {
fmt.Fprintln(os.Stderr, lastError) fmt.Fprintln(os.Stderr, lastError)
} }

View File

@ -716,16 +716,22 @@ _podman_container_attach() {
} }
_podman_container_checkpoint() { _podman_container_checkpoint() {
local options_with_args="
--help -h
"
local boolean_options=" local boolean_options="
--keep -a
--all
-h
--help
-k -k
--keep
-l
--latest
-R
--leave-running
--tcp-established
" "
case "$cur" in case "$cur" in
-*) -*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) COMPREPLY=($(compgen -W "$boolean_options" -- "$cur"))
;; ;;
*) *)
__podman_complete_containers_running __podman_complete_containers_running
@ -794,16 +800,20 @@ _podman_container_restart() {
} }
_podman_container_restore() { _podman_container_restore() {
local options_with_args="
--help -h
"
local boolean_options=" local boolean_options="
--keep -a
--all
-h
--help
-k -k
--keep
-l
--latest
--tcp-established
" "
case "$cur" in case "$cur" in
-*) -*)
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) COMPREPLY=($(compgen -W "$boolean_options" -- "$cur"))
;; ;;
*) *)
__podman_complete_containers_created __podman_complete_containers_created

View File

@ -29,6 +29,13 @@ Instead of providing the container name or ID, checkpoint the last created conta
Leave the container running after checkpointing instead of stopping it. Leave the container running after checkpointing instead of stopping it.
**--tcp-established**
Checkpoint a container with established TCP connections. If the checkpoint
image contains established TCP connections, this options is required during
restore. Defaults to not checkpointing containers with established TCP
connections.
## EXAMPLE ## EXAMPLE
podman container checkpoint mywebserver podman container checkpoint mywebserver

View File

@ -32,6 +32,14 @@ Restore all checkpointed containers.
Instead of providing the container name or ID, restore the last created container. Instead of providing the container name or ID, restore the last created container.
**--tcp-established**
Restore a container with established TCP connections. If the checkpoint image
contains established TCP connections, this option is required during restore.
If the checkpoint image does not contain established TCP connections this
option is ignored. Defaults to not restoring containers with established TCP
connections.
## EXAMPLE ## EXAMPLE
podman container restore mywebserver podman container restore mywebserver

View File

@ -833,10 +833,16 @@ func (c *Container) Refresh(ctx context.Context) error {
} }
// ContainerCheckpointOptions is a struct used to pass the parameters // ContainerCheckpointOptions is a struct used to pass the parameters
// for checkpointing to corresponding functions // for checkpointing (and restoring) to the corresponding functions
type ContainerCheckpointOptions struct { type ContainerCheckpointOptions struct {
// Keep tells the API to not delete checkpoint artifacts
Keep bool Keep bool
// KeepRunning tells the API to keep the container running
// after writing the checkpoint to disk
KeepRunning bool KeepRunning bool
// TCPEstablished tells the API to checkpoint a container
// even if it contains established TCP connections
TCPEstablished bool
} }
// Checkpoint checkpoints a container // Checkpoint checkpoints a container
@ -855,7 +861,7 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO
} }
// Restore restores a container // Restore restores a container
func (c *Container) Restore(ctx context.Context, keep bool) (err error) { func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) (err error) {
logrus.Debugf("Trying to restore container %s", c) logrus.Debugf("Trying to restore container %s", c)
if !c.batched { if !c.batched {
c.lock.Lock() c.lock.Lock()
@ -866,5 +872,5 @@ func (c *Container) Restore(ctx context.Context, keep bool) (err error) {
} }
} }
return c.restore(ctx, keep) return c.restore(ctx, options)
} }

View File

@ -606,7 +606,7 @@ func (c *Container) init(ctx context.Context) error {
} }
// With the spec complete, do an OCI create // With the spec complete, do an OCI create
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, false); err != nil { if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, nil); err != nil {
return err return err
} }

View File

@ -514,7 +514,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
return c.save() return c.save()
} }
func (c *Container) restore(ctx context.Context, keep bool) (err error) { func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (err error) {
if !criu.CheckForCriu() { if !criu.CheckForCriu() {
return errors.Errorf("restoring a container requires at least CRIU %d", criu.MinCriuVersion) return errors.Errorf("restoring a container requires at least CRIU %d", criu.MinCriuVersion)
@ -602,7 +602,7 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) {
// Cleanup for a working restore. // Cleanup for a working restore.
c.removeConmonFiles() c.removeConmonFiles()
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, true); err != nil { if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, &options); err != nil {
return err return err
} }
@ -610,7 +610,7 @@ func (c *Container) restore(ctx context.Context, keep bool) (err error) {
c.state.State = ContainerStateRunning c.state.State = ContainerStateRunning
if !keep { if !options.Keep {
// Delete all checkpoint related files. At this point, in theory, all files // Delete all checkpoint related files. At this point, in theory, all files
// should exist. Still ignoring errors for now as the container should be // should exist. Still ignoring errors for now as the container should be
// restored and running. Not erroring out just because some cleanup operation // restored and running. Not erroring out just because some cleanup operation

View File

@ -227,7 +227,7 @@ func bindPorts(ports []ocicni.PortMapping) ([]*os.File, error) {
return files, nil return files, nil
} }
func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) { func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) {
var stderrBuf bytes.Buffer var stderrBuf bytes.Buffer
runtimeDir, err := util.GetRootlessRuntimeDir() runtimeDir, err := util.GetRootlessRuntimeDir()
@ -289,8 +289,11 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, res
args = append(args, "--syslog") args = append(args, "--syslog")
} }
if restoreContainer { if restoreOptions != nil {
args = append(args, "--restore", ctr.CheckpointPath()) args = append(args, "--restore", ctr.CheckpointPath())
if restoreOptions.TCPEstablished {
args = append(args, "--restore-arg", "--tcp-established")
}
} }
logrus.WithFields(logrus.Fields{ logrus.WithFields(logrus.Fields{
@ -866,6 +869,9 @@ func (r *OCIRuntime) checkpointContainer(ctr *Container, options ContainerCheckp
if options.KeepRunning { if options.KeepRunning {
args = append(args, "--leave-running") args = append(args, "--leave-running")
} }
if options.TCPEstablished {
args = append(args, "--tcp-established")
}
args = append(args, ctr.ID()) args = append(args, ctr.ID())
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
} }

View File

@ -65,10 +65,10 @@ func newPipe() (parent *os.File, child *os.File, err error) {
// CreateContainer creates a container in the OCI runtime // CreateContainer creates a container in the OCI runtime
// TODO terminal support for container // TODO terminal support for container
// Presently just ignoring conmon opts related to it // Presently just ignoring conmon opts related to it
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) { func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) {
if ctr.state.UserNSRoot == "" { if ctr.state.UserNSRoot == "" {
// no need of an intermediate mount ns // no need of an intermediate mount ns
return r.createOCIContainer(ctr, cgroupParent, restoreContainer) return r.createOCIContainer(ctr, cgroupParent, restoreOptions)
} }
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(1) wg.Add(1)
@ -106,7 +106,7 @@ func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restor
if err != nil { if err != nil {
return return
} }
err = r.createOCIContainer(ctr, cgroupParent, restoreContainer) err = r.createOCIContainer(ctr, cgroupParent, restoreOptions)
}() }()
wg.Wait() wg.Wait()

View File

@ -15,7 +15,7 @@ func newPipe() (parent *os.File, child *os.File, err error) {
return nil, nil, ErrNotImplemented return nil, nil, ErrNotImplemented
} }
func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) { func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) {
return ErrNotImplemented return ErrNotImplemented
} }

View File

@ -2,6 +2,7 @@ package integration
import ( import (
"fmt" "fmt"
"net"
"os" "os"
"github.com/containers/libpod/pkg/criu" "github.com/containers/libpod/pkg/criu"
@ -126,4 +127,164 @@ var _ = Describe("Podman checkpoint", func() {
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
}) })
It("podman checkpoint latest running container", func() {
session1 := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--name", "first", "-d", ALPINE, "top"})
session1.WaitWithDefaultTimeout()
Expect(session1.ExitCode()).To(Equal(0))
session2 := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--name", "second", "-d", ALPINE, "top"})
session2.WaitWithDefaultTimeout()
Expect(session2.ExitCode()).To(Equal(0))
result := podmanTest.Podman([]string{"container", "checkpoint", "-l"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
ps := podmanTest.Podman([]string{"ps", "-q", "--no-trunc"})
ps.WaitWithDefaultTimeout()
Expect(ps.ExitCode()).To(Equal(0))
Expect(ps.LineInOutputContains(session1.OutputToString())).To(BeTrue())
Expect(ps.LineInOutputContains(session2.OutputToString())).To(BeFalse())
result = podmanTest.Podman([]string{"container", "restore", "-l"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(2))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
Expect(podmanTest.GetContainerStatus()).To(Not(ContainSubstring("Exited")))
result = podmanTest.Podman([]string{"rm", "-fa"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
})
It("podman checkpoint all running container", func() {
session1 := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--name", "first", "-d", ALPINE, "top"})
session1.WaitWithDefaultTimeout()
Expect(session1.ExitCode()).To(Equal(0))
session2 := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--name", "second", "-d", ALPINE, "top"})
session2.WaitWithDefaultTimeout()
Expect(session2.ExitCode()).To(Equal(0))
result := podmanTest.Podman([]string{"container", "checkpoint", "-a"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
ps := podmanTest.Podman([]string{"ps", "-q", "--no-trunc"})
ps.WaitWithDefaultTimeout()
Expect(ps.ExitCode()).To(Equal(0))
Expect(ps.LineInOutputContains(session1.OutputToString())).To(BeFalse())
Expect(ps.LineInOutputContains(session2.OutputToString())).To(BeFalse())
result = podmanTest.Podman([]string{"container", "restore", "-a"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(2))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
Expect(podmanTest.GetContainerStatus()).To(Not(ContainSubstring("Exited")))
result = podmanTest.Podman([]string{"rm", "-fa"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
})
It("podman checkpoint container with established tcp connections", func() {
Skip("Seems to not work (yet) in CI")
podmanTest.RestoreArtifact(redis)
session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--network", "host", "-d", redis})
session.WaitWithDefaultTimeout()
Expect(session.ExitCode()).To(Equal(0))
// Open a network connection to the redis server
conn, err := net.Dial("tcp", "127.0.0.1:6379")
if err != nil {
os.Exit(1)
}
// This should fail as the container has established TCP connections
result := podmanTest.Podman([]string{"container", "checkpoint", "-l"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(125))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
// Now it should work thanks to "--tcp-established"
result = podmanTest.Podman([]string{"container", "checkpoint", "-l", "--tcp-established"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
// Restore should fail as the checkpoint image contains established TCP connections
result = podmanTest.Podman([]string{"container", "restore", "-l"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(125))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
// Now it should work thanks to "--tcp-established"
result = podmanTest.Podman([]string{"container", "restore", "-l", "--tcp-established"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
result = podmanTest.Podman([]string{"rm", "-fa"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
conn.Close()
})
It("podman checkpoint with --leave-running", func() {
session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"})
session.WaitWithDefaultTimeout()
Expect(session.ExitCode()).To(Equal(0))
cid := session.OutputToString()
// Checkpoint container, but leave it running
result := podmanTest.Podman([]string{"container", "checkpoint", "--leave-running", cid})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
// Make sure it is still running
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
// Stop the container
result = podmanTest.Podman([]string{"container", "stop", cid})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
// Restore the stopped container from the previous checkpoint
result = podmanTest.Podman([]string{"container", "restore", cid})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
result = podmanTest.Podman([]string{"rm", "-fa"})
result.WaitWithDefaultTimeout()
Expect(result.ExitCode()).To(Equal(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
})
}) })