Added optional container restore statistics

This adds the parameter '--print-stats' to 'podman container restore'.
With '--print-stats' Podman will measure how long Podman itself, the OCI
runtime and CRIU requires to restore a checkpoint and print out these
information. CRIU already creates process restore statistics which are
just read in addition to the added measurements. In contrast to just
printing out the ID of the restored container, Podman will now print
out JSON:

 # podman container restore --latest --print-stats
 {
     "podman_restore_duration": 305871,
     "container_statistics": [
         {
             "Id": "47b02e1d474b5d5fe917825e91ac653efa757c91e5a81a368d771a78f6b5ed20",
             "runtime_restore_duration": 140614,
             "criu_statistics": {
                 "forking_time": 5,
                 "restore_time": 67672,
                 "pages_restored": 14
             }
         }
     ]
 }

The output contains 'podman_restore_duration' which contains the
number of microseconds Podman required to restore the checkpoint. The
output also includes 'runtime_restore_duration' which is the time
the runtime needed to restore that specific container. Each container
also includes 'criu_statistics' which displays the timing information
collected by CRIU.

Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
Adrian Reber
2021-11-09 21:21:07 +00:00
parent 6202e8102b
commit 80e56fa12b
11 changed files with 186 additions and 83 deletions

View File

@ -1264,7 +1264,7 @@ func (c *Container) importPreCheckpoint(input string) error {
return nil
}
func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (retErr error) {
func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
minCriuVersion := func() int {
if options.Pod == "" {
return criu.MinCriuVersion
@ -1272,37 +1272,37 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
return criu.PodCriuVersion
}()
if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
return err
return nil, 0, err
}
if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
return errors.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
return nil, 0, errors.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
}
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
return nil, 0, errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID())
}
if options.ImportPrevious != "" {
if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
return err
return nil, 0, err
}
}
if options.TargetFile != "" {
if err := c.importCheckpoint(options.TargetFile); err != nil {
return err
return nil, 0, err
}
}
// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
// no sense to try a restore. This is a minimal check if a checkpoint exist.
if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) {
return errors.Wrapf(err, "a complete checkpoint for this container cannot be found, cannot restore")
return nil, 0, errors.Wrapf(err, "a complete checkpoint for this container cannot be found, cannot restore")
}
if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
return err
return nil, 0, err
}
// If a container is restored multiple times from an exported checkpoint with
@ -1339,7 +1339,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
// container with the same networks settings as during checkpointing.
aliases, err := c.GetAllNetworkAliases()
if err != nil {
return err
return nil, 0, err
}
netOpts := make(map[string]types.PerNetworkOptions, len(netStatus))
for network, status := range netStatus {
@ -1364,7 +1364,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
if perNetOpts.InterfaceName == "" {
eth, exists := c.state.NetInterfaceDescriptions.getInterfaceByName(network)
if !exists {
return errors.Errorf("no network interface name for container %s on network %s", c.config.ID, network)
return nil, 0, errors.Errorf("no network interface name for container %s on network %s", c.config.ID, network)
}
perNetOpts.InterfaceName = eth
}
@ -1382,7 +1382,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
}()
if err := c.prepare(); err != nil {
return err
return nil, 0, err
}
// Read config
@ -1391,7 +1391,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
g, err := generate.NewFromFile(jsonPath)
if err != nil {
logrus.Debugf("generate.NewFromFile failed with %v", err)
return err
return nil, 0, err
}
// Restoring from an import means that we are doing migration
@ -1407,7 +1407,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), netNSPath); err != nil {
return err
return nil, 0, err
}
}
@ -1416,23 +1416,23 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
// the ones from the infrastructure container.
pod, err := c.runtime.LookupPod(options.Pod)
if err != nil {
return errors.Wrapf(err, "pod %q cannot be retrieved", options.Pod)
return nil, 0, errors.Wrapf(err, "pod %q cannot be retrieved", options.Pod)
}
infraContainer, err := pod.InfraContainer()
if err != nil {
return errors.Wrapf(err, "cannot retrieved infra container from pod %q", options.Pod)
return nil, 0, errors.Wrapf(err, "cannot retrieved infra container from pod %q", options.Pod)
}
infraContainer.lock.Lock()
if err := infraContainer.syncContainer(); err != nil {
infraContainer.lock.Unlock()
return errors.Wrapf(err, "Error syncing infrastructure container %s status", infraContainer.ID())
return nil, 0, errors.Wrapf(err, "Error syncing infrastructure container %s status", infraContainer.ID())
}
if infraContainer.state.State != define.ContainerStateRunning {
if err := infraContainer.initAndStart(ctx); err != nil {
infraContainer.lock.Unlock()
return errors.Wrapf(err, "Error starting infrastructure container %s status", infraContainer.ID())
return nil, 0, errors.Wrapf(err, "Error starting infrastructure container %s status", infraContainer.ID())
}
}
infraContainer.lock.Unlock()
@ -1440,56 +1440,56 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
if c.config.IPCNsCtr != "" {
nsPath, err := infraContainer.namespacePath(IPCNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve IPC namespace path for Pod %q", options.Pod)
return nil, 0, errors.Wrapf(err, "cannot retrieve IPC namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
return err
return nil, 0, err
}
}
if c.config.NetNsCtr != "" {
nsPath, err := infraContainer.namespacePath(NetNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve network namespace path for Pod %q", options.Pod)
return nil, 0, errors.Wrapf(err, "cannot retrieve network namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
return err
return nil, 0, err
}
}
if c.config.PIDNsCtr != "" {
nsPath, err := infraContainer.namespacePath(PIDNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve PID namespace path for Pod %q", options.Pod)
return nil, 0, errors.Wrapf(err, "cannot retrieve PID namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
return err
return nil, 0, err
}
}
if c.config.UTSNsCtr != "" {
nsPath, err := infraContainer.namespacePath(UTSNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve UTS namespace path for Pod %q", options.Pod)
return nil, 0, errors.Wrapf(err, "cannot retrieve UTS namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
return err
return nil, 0, err
}
}
if c.config.CgroupNsCtr != "" {
nsPath, err := infraContainer.namespacePath(CgroupNS)
if err != nil {
return errors.Wrapf(err, "cannot retrieve Cgroup namespace path for Pod %q", options.Pod)
return nil, 0, errors.Wrapf(err, "cannot retrieve Cgroup namespace path for Pod %q", options.Pod)
}
if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
return err
return nil, 0, err
}
}
}
if err := c.makeBindMounts(); err != nil {
return err
return nil, 0, err
}
if options.TargetFile != "" {
@ -1511,12 +1511,12 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
// Cleanup for a working restore.
if err := c.removeConmonFiles(); err != nil {
return err
return nil, 0, err
}
// Save the OCI spec to disk
if err := c.saveSpec(g.Config); err != nil {
return err
return nil, 0, err
}
// When restoring from an imported archive, allow restoring the content of volumes.
@ -1527,24 +1527,24 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
volumeFile, err := os.Open(volumeFilePath)
if err != nil {
return errors.Wrapf(err, "failed to open volume file %s", volumeFilePath)
return nil, 0, errors.Wrapf(err, "failed to open volume file %s", volumeFilePath)
}
defer volumeFile.Close()
volume, err := c.runtime.GetVolume(v.Name)
if err != nil {
return errors.Wrapf(err, "failed to retrieve volume %s", v.Name)
return nil, 0, errors.Wrapf(err, "failed to retrieve volume %s", v.Name)
}
mountPoint, err := volume.MountPoint()
if err != nil {
return err
return nil, 0, err
}
if mountPoint == "" {
return errors.Wrapf(err, "unable to import volume %s as it is not mounted", volume.Name())
return nil, 0, errors.Wrapf(err, "unable to import volume %s as it is not mounted", volume.Name())
}
if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
return errors.Wrapf(err, "Failed to extract volume %s to %s", volumeFilePath, mountPoint)
return nil, 0, errors.Wrapf(err, "Failed to extract volume %s to %s", volumeFilePath, mountPoint)
}
}
}
@ -1552,16 +1552,43 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
// Before actually restarting the container, apply the root file-system changes
if !options.IgnoreRootfs {
if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
return err
return nil, 0, err
}
if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
return err
return nil, 0, err
}
}
if err := c.ociRuntime.CreateContainer(c, &options); err != nil {
return err
runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options)
if err != nil {
return nil, 0, err
}
criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) {
if !options.PrintStats {
return nil, nil
}
statsDirectory, err := os.Open(c.bundlePath())
if err != nil {
return nil, errors.Wrapf(err, "Not able to open %q", c.bundlePath())
}
restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory)
if err != nil {
return nil, errors.Wrap(err, "Displaying restore statistics not possible")
}
return &define.CRIUCheckpointRestoreStatistics{
PagesCompared: restoreStatistics.GetPagesCompared(),
PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(),
ForkingTime: restoreStatistics.GetForkingTime(),
RestoreTime: restoreStatistics.GetRestoreTime(),
PagesRestored: restoreStatistics.GetPagesRestored(),
}, nil
}()
if err != nil {
return nil, 0, err
}
logrus.Debugf("Restored container %s", c.ID())
@ -1600,7 +1627,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
}
}
return c.save()
return criuStatistics, runtimeRestoreDuration, c.save()
}
// Retrieves a container's "root" net namespace container dependency.