mirror of
https://github.com/containers/podman.git
synced 2025-09-21 03:35:06 +08:00
Merge pull request #6520 from mheon/no_conmon_no_error
Ensure Conmon is alive before waiting for exit file
This commit is contained in:
@ -1209,13 +1209,35 @@ func (c *Container) stop(timeout uint) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Check if conmon is still alive.
|
||||
// If it is not, we won't be getting an exit file.
|
||||
conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.newContainerEvent(events.Stop)
|
||||
|
||||
c.state.PID = 0
|
||||
c.state.ConmonPID = 0
|
||||
c.state.StoppedByUser = true
|
||||
|
||||
if !conmonAlive {
|
||||
// Conmon is dead, so we can't epect an exit code.
|
||||
c.state.ExitCode = -1
|
||||
c.state.FinishedTime = time.Now()
|
||||
c.state.State = define.ContainerStateStopped
|
||||
if err := c.save(); err != nil {
|
||||
logrus.Errorf("Error saving container %s status: %v", c.ID(), err)
|
||||
}
|
||||
|
||||
return errors.Wrapf(define.ErrConmonDead, "container %s conmon process missing, cannot retrieve exit code", c.ID())
|
||||
}
|
||||
|
||||
if err := c.save(); err != nil {
|
||||
return errors.Wrapf(err, "error saving container %s state after stopping", c.ID())
|
||||
}
|
||||
@ -1225,8 +1247,6 @@ func (c *Container) stop(timeout uint) error {
|
||||
return err
|
||||
}
|
||||
|
||||
c.newContainerEvent(events.Stop)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -141,6 +141,9 @@ var (
|
||||
// ErrConmonOutdated indicates the version of conmon found (whether via the configuration or $PATH)
|
||||
// is out of date for the current podman version
|
||||
ErrConmonOutdated = errors.New("outdated conmon version")
|
||||
// ErrConmonDead indicates that the container's conmon process has been
|
||||
// killed, preventing normal operation.
|
||||
ErrConmonDead = errors.New("conmon process killed")
|
||||
|
||||
// ErrImageInUse indicates the requested operation failed because the image was in use
|
||||
ErrImageInUse = errors.New("image is being used")
|
||||
|
@ -107,6 +107,13 @@ type OCIRuntime interface {
|
||||
// error.
|
||||
CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error
|
||||
|
||||
// CheckConmonRunning verifies that the given container's Conmon
|
||||
// instance is still running. Runtimes without Conmon, or systems where
|
||||
// the PID of conmon is not available, should mock this as True.
|
||||
// True indicates that Conmon for the instance is running, False
|
||||
// indicates it is not.
|
||||
CheckConmonRunning(ctr *Container) (bool, error)
|
||||
|
||||
// SupportsCheckpoint returns whether this OCI runtime
|
||||
// implementation supports the CheckpointContainer() operation.
|
||||
SupportsCheckpoint() bool
|
||||
|
@ -669,6 +669,31 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container
|
||||
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
|
||||
}
|
||||
|
||||
func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
|
||||
if ctr.state.ConmonPID == 0 {
|
||||
// If the container is running or paused, assume Conmon is
|
||||
// running. We didn't record Conmon PID on some old versions, so
|
||||
// that is likely what's going on...
|
||||
// Unusual enough that we should print a warning message though.
|
||||
if ctr.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
|
||||
logrus.Warnf("Conmon PID is not set, but container is running!")
|
||||
return true, nil
|
||||
}
|
||||
// Container's not running, so conmon PID being unset is
|
||||
// expected. Conmon is not running.
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// We have a conmon PID. Ping it with signal 0.
|
||||
if err := unix.Kill(ctr.state.ConmonPID, 0); err != nil {
|
||||
if err == unix.ESRCH {
|
||||
return false, nil
|
||||
}
|
||||
return false, errors.Wrapf(err, "error pinging container %s conmon with signal 0", ctr.ID())
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// SupportsCheckpoint checks if the OCI runtime supports checkpointing
|
||||
// containers.
|
||||
func (r *ConmonOCIRuntime) SupportsCheckpoint() bool {
|
||||
|
@ -163,6 +163,11 @@ func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCh
|
||||
return r.printError()
|
||||
}
|
||||
|
||||
// CheckConmonRunning is not available as the runtime is missing
|
||||
func (r *MissingRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
|
||||
return false, r.printError()
|
||||
}
|
||||
|
||||
// SupportsCheckpoint returns false as checkpointing requires a working runtime
|
||||
func (r *MissingRuntime) SupportsCheckpoint() bool {
|
||||
return false
|
||||
|
@ -464,9 +464,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the container's in a good state to be removed
|
||||
// Check that the container's in a good state to be removed.
|
||||
if c.state.State == define.ContainerStateRunning {
|
||||
if err := c.stop(c.StopTimeout()); err != nil {
|
||||
// Ignore ErrConmonDead - we couldn't retrieve the container's
|
||||
// exit code properly, but it's still stopped.
|
||||
if err := c.stop(c.StopTimeout()); err != nil && errors.Cause(err) != define.ErrConmonDead {
|
||||
return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID())
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user