Merge pull request #12672 from adrianreber/2021-12-21-check-for-memtrack

Error out early if system does not support pre-copy  checkpointing
This commit is contained in:
OpenShift Merge Robot
2021-12-23 13:16:10 +01:00
committed by GitHub
11 changed files with 119 additions and 9 deletions

View File

@ -11,6 +11,7 @@ import (
"github.com/containers/podman/v3/cmd/podman/registry"
"github.com/containers/podman/v3/cmd/podman/utils"
"github.com/containers/podman/v3/cmd/podman/validate"
"github.com/containers/podman/v3/pkg/criu"
"github.com/containers/podman/v3/pkg/domain/entities"
"github.com/containers/podman/v3/pkg/rootless"
"github.com/containers/storage/pkg/archive"
@ -113,6 +114,9 @@ func checkpoint(cmd *cobra.Command, args []string) error {
if checkpointOptions.WithPrevious && checkpointOptions.PreCheckPoint {
return errors.Errorf("--with-previous can not be used with --pre-checkpoint")
}
if (checkpointOptions.WithPrevious || checkpointOptions.PreCheckPoint) && !criu.MemTrack() {
return errors.New("system (architecture/kernel/CRIU) does not support memory tracking")
}
responses, err := registry.ContainerEngine().ContainerCheckpoint(context.Background(), args, checkpointOptions)
if err != nil {
return err

View File

@ -70,6 +70,13 @@ Dump the *container's* memory information only, leaving the *container* running.
operations will supersede prior dumps. It only works on `runc 1.0-rc3` or `higher`.\
The default is **false**.
The functionality to only checkpoint the memory of the container and in a second
checkpoint only write out the memory pages which have changed since the first
checkpoint relies on the Linux kernel's soft-dirty bit, which is not available
on all systems as it depends on the system architecture and the configuration
of the Linux kernel. Podman will verify if the current system supports this
functionality and return an error if the current system does not support it.
#### **--print-stats**
Print out statistics about checkpointing the container(s). The output is
@ -126,6 +133,11 @@ Check out the *container* with previous criu image files in pre-dump. It only wo
The default is **false**.\
*IMPORTANT: This OPTION is not available with __--pre-checkpoint__*.
This option requires that the option __--pre-checkpoint__ has been used before on the
same container. Without an existing pre-checkpoint, this option will fail.
Also see __--pre-checkpoint__ for additional information about __--pre-checkpoint__
availability on different systems.
## EXAMPLES
Make a checkpoint for the container "mywebserver".

3
go.mod
View File

@ -7,7 +7,7 @@ require (
github.com/blang/semver v3.5.1+incompatible
github.com/buger/goterm v0.0.0-20181115115552-c206103e1f37
github.com/checkpoint-restore/checkpointctl v0.0.0-20211204171957-54b4ebfdb681
github.com/checkpoint-restore/go-criu/v5 v5.2.0
github.com/checkpoint-restore/go-criu/v5 v5.3.0
github.com/container-orchestrated-devices/container-device-interface v0.0.0-20210325223243-f99e8b6c10b9
github.com/containernetworking/cni v1.0.1
github.com/containernetworking/plugins v1.0.1
@ -66,6 +66,7 @@ require (
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
golang.org/x/sys v0.0.0-20211205182925-97ca703d548d
google.golang.org/protobuf v1.27.1
gopkg.in/fsnotify.v1 v1.4.7 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
k8s.io/api v0.22.4

3
go.sum
View File

@ -155,8 +155,9 @@ github.com/checkpoint-restore/checkpointctl v0.0.0-20211204171957-54b4ebfdb681 h
github.com/checkpoint-restore/checkpointctl v0.0.0-20211204171957-54b4ebfdb681/go.mod h1:67kWC1PXQLR3lM/mmNnu3Kzn7K4TSWZAGUuQP1JSngk=
github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw=
github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M=
github.com/checkpoint-restore/go-criu/v5 v5.2.0 h1:QwsRK9EdBr2kQr44DqSdBrP4dULp2+4EkqounYQOnF8=
github.com/checkpoint-restore/go-criu/v5 v5.2.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/checkpoint-restore/go-criu/v5 v5.3.0 h1:wpFFOoomK3389ue2lAb0Boag6XPht5QYpipxmSNL4d8=
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8=

View File

@ -1,7 +1,12 @@
// +build linux
package criu
import (
"github.com/checkpoint-restore/go-criu/v5"
"github.com/checkpoint-restore/go-criu/v5/rpc"
"google.golang.org/protobuf/proto"
)
// MinCriuVersion for Podman at least CRIU 3.11 is required
@ -21,3 +26,20 @@ func CheckForCriu(version int) bool {
}
return result
}
func MemTrack() bool {
features, err := criu.MakeCriu().FeatureCheck(
&rpc.CriuFeatures{
MemTrack: proto.Bool(true),
},
)
if err != nil {
return false
}
if features == nil || features.MemTrack == nil {
return false
}
return *features.MemTrack
}

View File

@ -0,0 +1,7 @@
// +build !linux
package criu
func MemTrack() bool {
return false
}

View File

@ -939,6 +939,9 @@ var _ = Describe("Podman checkpoint", func() {
})
It("podman checkpoint container with --pre-checkpoint", func() {
if !criu.MemTrack() {
Skip("system (architecture/kernel/CRIU) does not support memory tracking")
}
if !strings.Contains(podmanTest.OCIRuntime, "runc") {
Skip("Test only works on runc 1.0-rc3 or higher.")
}
@ -972,6 +975,9 @@ var _ = Describe("Podman checkpoint", func() {
It("podman checkpoint container with --pre-checkpoint and export (migration)", func() {
SkipIfRemote("--import-previous is not yet supported on the remote client")
if !criu.MemTrack() {
Skip("system (architecture/kernel/CRIU) does not support memory tracking")
}
if !strings.Contains(podmanTest.OCIRuntime, "runc") {
Skip("Test only works on runc 1.0-rc3 or higher.")
}

View File

@ -2,6 +2,11 @@ SHELL = /bin/bash
GO ?= go
CC ?= gcc
COVERAGE_PATH ?= $(shell pwd)/.coverage
CRIU_FEATURE_MEM_TRACK = $(shell if criu check --feature mem_dirty_track > /dev/null; then echo 1; else echo 0; fi)
CRIU_FEATURE_LAZY_PAGES = $(shell if criu check --feature uffd-noncoop > /dev/null; then echo 1; else echo 0; fi)
CRIU_FEATURE_PIDFD_STORE = $(shell if criu check --feature pidfd_store > /dev/null; then echo 1; else echo 0; fi)
export CRIU_FEATURE_MEM_TRACK CRIU_FEATURE_LAZY_PAGES CRIU_FEATURE_PIDFD_STORE
all: build test phaul-test
@ -70,6 +75,8 @@ coverage: $(COVERAGE_BINARIES) $(TEST_PAYLOAD)
test/phaul/phaul.coverage -test.coverprofile=coverprofile.integration.$$RANDOM -test.outputdir=${COVERAGE_PATH} COVERAGE $$PID; \
pkill -9 piggie; \
}
echo "mode: set" > .coverage/coverage.out && cat .coverage/coverprofile* | \
grep -v mode: | sort -r | awk '{if($$1 != last) {print $$0;last=$$1}}' >> .coverage/coverage.out
clean:
@rm -f $(TEST_BINARIES) $(COVERAGE_BINARIES) codecov
@ -95,6 +102,6 @@ vendor:
codecov:
curl -Os https://uploader.codecov.io/latest/linux/codecov
chmod +x codecov
./codecov -f '.coverage/*'
./codecov -f '.coverage/coverage.out'
.PHONY: build test phaul-test test-bin clean lint vendor coverage codecov

View File

@ -0,0 +1,45 @@
package criu
import (
"fmt"
"github.com/checkpoint-restore/go-criu/v5/rpc"
)
// Feature checking in go-criu is based on the libcriu feature checking function.
// Feature checking allows the user to check if CRIU supports
// certain features. There are CRIU features which do not depend
// on the version of CRIU but on kernel features or architecture.
//
// One example is memory tracking. Memory tracking can be disabled
// in the kernel or there are architectures which do not support
// it (aarch64 for example). By using the feature check a libcriu
// user can easily query CRIU if a certain feature is available.
//
// The features which should be checked can be marked in the
// structure 'struct criu_feature_check'. Each structure member
// that is set to true will result in CRIU checking for the
// availability of that feature in the current combination of
// CRIU/kernel/architecture.
//
// Available features will be set to true when the function
// returns successfully. Missing features will be set to false.
func (c *Criu) FeatureCheck(features *rpc.CriuFeatures) (*rpc.CriuFeatures, error) {
resp, err := c.doSwrkWithResp(
rpc.CriuReqType_FEATURE_CHECK,
nil,
nil,
features,
)
if err != nil {
return nil, err
}
if resp.GetType() != rpc.CriuReqType_FEATURE_CHECK {
return nil, fmt.Errorf("Unexpected CRIU RPC response")
}
return features, nil
}

View File

@ -87,19 +87,19 @@ func (c *Criu) sendAndRecv(reqB []byte) ([]byte, int, error) {
}
func (c *Criu) doSwrk(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify) error {
resp, err := c.doSwrkWithResp(reqType, opts, nfy)
resp, err := c.doSwrkWithResp(reqType, opts, nfy, nil)
if err != nil {
return err
}
respType := resp.GetType()
if respType != reqType {
return errors.New("unexpected responce")
return errors.New("unexpected CRIU RPC response")
}
return nil
}
func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify) (*rpc.CriuResp, error) {
func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify, features *rpc.CriuFeatures) (*rpc.CriuResp, error) {
var resp *rpc.CriuResp
req := rpc.CriuReq{
@ -111,6 +111,10 @@ func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy N
opts.NotifyScripts = proto.Bool(true)
}
if features != nil {
req.Features = features
}
if c.swrkCmd == nil {
err := c.Prepare()
if err != nil {
@ -209,7 +213,7 @@ func (c *Criu) StartPageServer(opts *rpc.CriuOpts) error {
// StartPageServerChld starts the page server and returns PID and port
func (c *Criu) StartPageServerChld(opts *rpc.CriuOpts) (int, int, error) {
resp, err := c.doSwrkWithResp(rpc.CriuReqType_PAGE_SERVER_CHLD, opts, nil)
resp, err := c.doSwrkWithResp(rpc.CriuReqType_PAGE_SERVER_CHLD, opts, nil, nil)
if err != nil {
return 0, 0, err
}
@ -220,7 +224,7 @@ func (c *Criu) StartPageServerChld(opts *rpc.CriuOpts) (int, int, error) {
// GetCriuVersion executes the VERSION RPC call and returns the version
// as an integer. Major * 10000 + Minor * 100 + SubLevel
func (c *Criu) GetCriuVersion() (int, error) {
resp, err := c.doSwrkWithResp(rpc.CriuReqType_VERSION, nil, nil)
resp, err := c.doSwrkWithResp(rpc.CriuReqType_VERSION, nil, nil, nil)
if err != nil {
return 0, err
}

3
vendor/modules.txt vendored
View File

@ -49,7 +49,7 @@ github.com/cespare/xxhash/v2
# github.com/checkpoint-restore/checkpointctl v0.0.0-20211204171957-54b4ebfdb681
## explicit
github.com/checkpoint-restore/checkpointctl/lib
# github.com/checkpoint-restore/go-criu/v5 v5.2.0
# github.com/checkpoint-restore/go-criu/v5 v5.3.0
## explicit
github.com/checkpoint-restore/go-criu/v5
github.com/checkpoint-restore/go-criu/v5/magic
@ -818,6 +818,7 @@ google.golang.org/grpc/stats
google.golang.org/grpc/status
google.golang.org/grpc/tap
# google.golang.org/protobuf v1.27.1
## explicit
google.golang.org/protobuf/encoding/prototext
google.golang.org/protobuf/encoding/protowire
google.golang.org/protobuf/internal/descfmt