Add a MissingRuntime implementation

When a container is created with a given OCI runtime, but then it
is uninstalled or removed from the configuration file, Libpod
presently reacts very poorly. The EvictContainer code can
potentially remove these containers, but we still can't see them
in `podman ps` (aside from the massive logrus.Errorf messages
they create).

Providing a minimal OCI runtime implementation for missing
runtimes allows us to behave better. We'll be able to retrieve
containers from the database, though we still pop up an error for
each missing runtime. For containers which are stopped, we can
remove them as normal.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
This commit is contained in:
Matthew Heon
2019-10-15 15:11:26 -04:00
parent 5f72e6ef2e
commit cab7bfbb21
7 changed files with 236 additions and 36 deletions

View File

@ -396,7 +396,11 @@ func (s *BoltState) getContainerFromDB(id []byte, ctr *Container, ctrsBkt *bolt.
ociRuntime, ok := s.runtime.ociRuntimes[runtimeName]
if !ok {
return errors.Wrapf(define.ErrOCIRuntimeUnavailable, "cannot find OCI runtime %q for container %s", ctr.config.OCIRuntime, ctr.ID())
// Use a MissingRuntime implementation
ociRuntime, err = getMissingRuntime(runtimeName, s.runtime)
if err != nil {
return err
}
}
ctr.ociRuntime = ociRuntime
}

View File

@ -656,7 +656,7 @@ func (c *Container) Sync() error {
(c.state.State != define.ContainerStateConfigured) &&
(c.state.State != define.ContainerStateExited) {
oldState := c.state.State
if err := c.ociRuntime.UpdateContainerStatus(c, true); err != nil {
if err := c.ociRuntime.UpdateContainerStatus(c); err != nil {
return err
}
// Only save back to DB if state changed

View File

@ -252,7 +252,7 @@ func (c *Container) waitForExitFileAndSync() error {
return err
}
if err := c.ociRuntime.UpdateContainerStatus(c, false); err != nil {
if err := c.checkExitFile(); err != nil {
return err
}
@ -386,10 +386,11 @@ func (c *Container) syncContainer() error {
(c.state.State != define.ContainerStateConfigured) &&
(c.state.State != define.ContainerStateExited) {
oldState := c.state.State
// TODO: optionally replace this with a stat for the exit file
if err := c.ociRuntime.UpdateContainerStatus(c, false); err != nil {
if err := c.checkExitFile(); err != nil {
return err
}
// Only save back to DB if state changed
if c.state.State != oldState {
// Check for a restart policy match
@ -1811,3 +1812,35 @@ func (c *Container) sortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume
}
return namedUserVolumes, userMounts
}
// Check for an exit file, and handle one if present
func (c *Container) checkExitFile() error {
// If the container's not running, nothing to do.
if c.state.State != define.ContainerStateRunning && c.state.State != define.ContainerStatePaused {
return nil
}
exitFile, err := c.exitFilePath()
if err != nil {
return err
}
// Check for the exit file
info, err := os.Stat(exitFile)
if err != nil {
if os.IsNotExist(err) {
// Container is still running, no error
return nil
}
return errors.Wrapf(err, "error running stat on container %s exit file", c.ID())
}
// Alright, it exists. Transition to Stopped state.
c.state.State = define.ContainerStateStopped
c.state.PID = 0
c.state.ConmonPID = 0
// Read the exit file to get our stopped time and exit code.
return c.handleExitFile(exitFile, info)
}

View File

@ -26,7 +26,7 @@ type OCIRuntime interface {
// It includes a switch for whether to perform a hard query of the
// runtime. If unset, the exit file (if supported by the implementation)
// will be used.
UpdateContainerStatus(ctr *Container, useRuntime bool) error
UpdateContainerStatus(ctr *Container) error
// StartContainer starts the given container.
StartContainer(ctr *Container) error
// KillContainer sends the given signal to the given container.

View File

@ -216,8 +216,8 @@ func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *Conta
// If useRuntime is false, we will not directly hit runc to see the container's
// status, but will instead only check for the existence of the conmon exit file
// and update state to stopped if it exists.
func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container, useRuntime bool) error {
exitFile, err := ctr.exitFilePath()
func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
exitFile, err := r.ExitFilePath(ctr)
if err != nil {
return err
}
@ -227,33 +227,6 @@ func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container, useRuntime bool
return err
}
// If not using the OCI runtime, we don't need to do most of this.
if !useRuntime {
// If the container's not running, nothing to do.
if ctr.state.State != define.ContainerStateRunning && ctr.state.State != define.ContainerStatePaused {
return nil
}
// Check for the exit file conmon makes
info, err := os.Stat(exitFile)
if err != nil {
if os.IsNotExist(err) {
// Container is still running, no error
return nil
}
return errors.Wrapf(err, "error running stat on container %s exit file", ctr.ID())
}
// Alright, it exists. Transition to Stopped state.
ctr.state.State = define.ContainerStateStopped
ctr.state.PID = 0
ctr.state.ConmonPID = 0
// Read the exit file to get our stopped time and exit code.
return ctr.handleExitFile(exitFile, info)
}
// Store old state so we know if we were already stopped
oldState := ctr.state.State
@ -825,6 +798,7 @@ func (r *ConmonOCIRuntime) RuntimeInfo() (map[string]interface{}, error) {
"version": conmonVersion,
}
info["OCIRuntime"] = map[string]interface{}{
"name": r.name,
"path": r.path,
"package": runtimePackage,
"version": runtimeVersion,

189
libpod/oci_missing.go Normal file
View File

@ -0,0 +1,189 @@
package libpod
import (
"fmt"
"path/filepath"
"sync"
"github.com/containers/libpod/libpod/define"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
var (
// Only create each missing runtime once.
// Creation makes error messages we don't want to duplicate.
missingRuntimes map[string]*MissingRuntime
// We need a lock for this
missingRuntimesLock sync.Mutex
)
// MissingRuntime is used when the OCI runtime requested by the container is
// missing (not installed or not in the configuration file).
type MissingRuntime struct {
// Name is the name of the missing runtime. Will be used in errors.
name string
// exitsDir is the directory for exit files.
exitsDir string
}
// Get a new MissingRuntime for the given name.
// Requires a libpod Runtime so we can make a sane path for the exits dir.
func getMissingRuntime(name string, r *Runtime) (OCIRuntime, error) {
missingRuntimesLock.Lock()
defer missingRuntimesLock.Unlock()
if missingRuntimes == nil {
missingRuntimes = make(map[string]*MissingRuntime)
}
runtime, ok := missingRuntimes[name]
if ok {
return runtime, nil
}
// Once for each missing runtime, we want to error.
logrus.Errorf("OCI Runtime %s is in use by a container, but is not available (not in configuration file or not installed)", name)
newRuntime := new(MissingRuntime)
newRuntime.name = name
newRuntime.exitsDir = filepath.Join(r.config.TmpDir, "exits")
missingRuntimes[name] = newRuntime
return newRuntime, nil
}
// Name is the name of the missing runtime
func (r *MissingRuntime) Name() string {
return fmt.Sprintf("%s (missing/not available)", r.name)
}
// Path is not available as the runtime is missing
func (r *MissingRuntime) Path() string {
return "(missing/not available)"
}
// CreateContainer is not available as the runtime is missing
func (r *MissingRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error {
return r.printError()
}
// UpdateContainerStatus is not available as the runtime is missing
func (r *MissingRuntime) UpdateContainerStatus(ctr *Container) error {
return r.printError()
}
// StartContainer is not available as the runtime is missing
func (r *MissingRuntime) StartContainer(ctr *Container) error {
return r.printError()
}
// KillContainer is not available as the runtime is missing
// TODO: We could attempt to unix.Kill() the PID as recorded in the state if we
// really want to smooth things out? Won't be perfect, but if the container has
// a PID namespace it could be enough?
func (r *MissingRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
return r.printError()
}
// StopContainer is not available as the runtime is missing
func (r *MissingRuntime) StopContainer(ctr *Container, timeout uint, all bool) error {
return r.printError()
}
// DeleteContainer is not available as the runtime is missing
func (r *MissingRuntime) DeleteContainer(ctr *Container) error {
return r.printError()
}
// PauseContainer is not available as the runtime is missing
func (r *MissingRuntime) PauseContainer(ctr *Container) error {
return r.printError()
}
// UnpauseContainer is not available as the runtime is missing
func (r *MissingRuntime) UnpauseContainer(ctr *Container) error {
return r.printError()
}
// ExecContainer is not available as the runtime is missing
func (r *MissingRuntime) ExecContainer(ctr *Container, sessionID string, options *ExecOptions) (int, chan error, error) {
return -1, nil, r.printError()
}
// ExecStopContainer is not available as the runtime is missing.
// TODO: We can also investigate using unix.Kill() on the PID of the exec
// session here if we want to make stopping containers possible. Won't be
// perfect, though.
func (r *MissingRuntime) ExecStopContainer(ctr *Container, sessionID string, timeout uint) error {
return r.printError()
}
// ExecContainerCleanup is not available as the runtime is missing
func (r *MissingRuntime) ExecContainerCleanup(ctr *Container, sessionID string) error {
return r.printError()
}
// CheckpointContainer is not available as the runtime is missing
func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error {
return r.printError()
}
// SupportsCheckpoint returns false as checkpointing requires a working runtime
func (r *MissingRuntime) SupportsCheckpoint() bool {
return false
}
// SupportsJSONErrors returns false as there is no runtime to give errors
func (r *MissingRuntime) SupportsJSONErrors() bool {
return false
}
// SupportsNoCgroups returns false as there is no runtime to create containers
func (r *MissingRuntime) SupportsNoCgroups() bool {
return false
}
// AttachSocketPath does not work as there is no runtime to attach to.
// (Theoretically we could follow ExitFilePath but there is no guarantee the
// container is running and thus has an attach socket...)
func (r *MissingRuntime) AttachSocketPath(ctr *Container) (string, error) {
return "", r.printError()
}
// ExecAttachSocketPath does not work as there is no runtime to attach to.
// (Again, we could follow ExitFilePath, but no guarantee there is an existing
// and running exec session)
func (r *MissingRuntime) ExecAttachSocketPath(ctr *Container, sessionID string) (string, error) {
return "", r.printError()
}
// ExitFilePath returns the exit file path for containers.
// Here, we mimic what ConmonOCIRuntime does, because there is a chance that the
// container in question is still running happily (config file modified to
// remove a runtime, for example). We can't find the runtime to do anything to
// the container, but Conmon should still place an exit file for it.
func (r *MissingRuntime) ExitFilePath(ctr *Container) (string, error) {
if ctr == nil {
return "", errors.Wrapf(define.ErrInvalidArg, "must provide a valid container to get exit file path")
}
return filepath.Join(r.exitsDir, ctr.ID()), nil
}
// RuntimeInfo returns information on the missing runtime
func (r *MissingRuntime) RuntimeInfo() (map[string]interface{}, error) {
info := make(map[string]interface{})
info["OCIRuntime"] = map[string]interface{}{
"name": r.name,
"path": "missing",
"package": "missing",
"version": "missing",
}
return info, nil
}
// Return an error indicating the runtime is missing
func (r *MissingRuntime) printError() error {
return errors.Wrapf(define.ErrOCIRuntimeNotFound, "runtime %s is missing", r.name)
}

View File

@ -11,7 +11,7 @@ load helpers
BuildahVersion: *[0-9.]\\\+
Conmon:\\\s\\\+package:
Distribution:
OCIRuntime:\\\s\\\+package:
OCIRuntime:\\\s\\\+name:
os:
rootless:
registries: