mirror of
https://github.com/containers/podman.git
synced 2025-05-20 16:47:39 +08:00
Merge pull request #8906 from vrothberg/fix-8501
container stop: release lock before calling the runtime
This commit is contained in:
@ -210,7 +210,13 @@ func (c *Container) Kill(signal uint) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Is killing a paused container OK?
|
// TODO: Is killing a paused container OK?
|
||||||
if c.state.State != define.ContainerStateRunning {
|
switch c.state.State {
|
||||||
|
case define.ContainerStateRunning, define.ContainerStateStopping:
|
||||||
|
// Note that killing containers in "stopping" state is okay.
|
||||||
|
// In that state, the Podman is waiting for the runtime to
|
||||||
|
// stop the container and if that is taking too long, a user
|
||||||
|
// may have decided to kill the container after all.
|
||||||
|
default:
|
||||||
return errors.Wrapf(define.ErrCtrStateInvalid, "can only kill running containers. %s is in state %s", c.ID(), c.state.State.String())
|
return errors.Wrapf(define.ErrCtrStateInvalid, "can only kill running containers. %s is in state %s", c.ID(), c.state.State.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -539,7 +545,7 @@ func (c *Container) Cleanup(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check if state is good
|
// Check if state is good
|
||||||
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
|
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) {
|
||||||
return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, refusing to clean up", c.ID())
|
return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is running or paused, refusing to clean up", c.ID())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -764,7 +764,7 @@ func (c *Container) isStopped() (bool, error) {
|
|||||||
return true, err
|
return true, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused), nil
|
return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// save container state to the database
|
// save container state to the database
|
||||||
@ -1290,10 +1290,49 @@ func (c *Container) stop(timeout uint) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set the container state to "stopping" and unlock the container
|
||||||
|
// before handing it over to conmon to unblock other commands. #8501
|
||||||
|
// demonstrates nicely that a high stop timeout will block even simple
|
||||||
|
// commands such as `podman ps` from progressing if the container lock
|
||||||
|
// is held when busy-waiting for the container to be stopped.
|
||||||
|
c.state.State = define.ContainerStateStopping
|
||||||
|
if err := c.save(); err != nil {
|
||||||
|
return errors.Wrapf(err, "error saving container %s state before stopping", c.ID())
|
||||||
|
}
|
||||||
|
if !c.batched {
|
||||||
|
c.lock.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil {
|
if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !c.batched {
|
||||||
|
c.lock.Lock()
|
||||||
|
if err := c.syncContainer(); err != nil {
|
||||||
|
switch errors.Cause(err) {
|
||||||
|
// If the container has already been removed (e.g., via
|
||||||
|
// the cleanup process), there's nothing left to do.
|
||||||
|
case define.ErrNoSuchCtr, define.ErrCtrRemoved:
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since we're now subject to a race condition with other processes who
|
||||||
|
// may have altered the state (and other data), let's check if the
|
||||||
|
// state has changed. If so, we should return immediately and log a
|
||||||
|
// warning.
|
||||||
|
if c.state.State != define.ContainerStateStopping {
|
||||||
|
logrus.Warnf(
|
||||||
|
"Container %q state changed from %q to %q while waiting for it to be stopped: discontinuing stop procedure as another process interfered",
|
||||||
|
c.ID(), define.ContainerStateStopping, c.state.State,
|
||||||
|
)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
c.newContainerEvent(events.Stop)
|
c.newContainerEvent(events.Stop)
|
||||||
|
|
||||||
c.state.PID = 0
|
c.state.PID = 0
|
||||||
@ -2116,7 +2155,7 @@ func (c *Container) sortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume
|
|||||||
// Check for an exit file, and handle one if present
|
// Check for an exit file, and handle one if present
|
||||||
func (c *Container) checkExitFile() error {
|
func (c *Container) checkExitFile() error {
|
||||||
// If the container's not running, nothing to do.
|
// If the container's not running, nothing to do.
|
||||||
if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
|
if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,6 +28,9 @@ const (
|
|||||||
// ContainerStateRemoving indicates the container is in the process of
|
// ContainerStateRemoving indicates the container is in the process of
|
||||||
// being removed.
|
// being removed.
|
||||||
ContainerStateRemoving ContainerStatus = iota
|
ContainerStateRemoving ContainerStatus = iota
|
||||||
|
// ContainerStateStopping indicates the container is in the process of
|
||||||
|
// being stopped.
|
||||||
|
ContainerStateStopping ContainerStatus = iota
|
||||||
)
|
)
|
||||||
|
|
||||||
// ContainerStatus returns a string representation for users
|
// ContainerStatus returns a string representation for users
|
||||||
@ -50,6 +53,8 @@ func (t ContainerStatus) String() string {
|
|||||||
return "exited"
|
return "exited"
|
||||||
case ContainerStateRemoving:
|
case ContainerStateRemoving:
|
||||||
return "removing"
|
return "removing"
|
||||||
|
case ContainerStateStopping:
|
||||||
|
return "stopping"
|
||||||
}
|
}
|
||||||
return "bad state"
|
return "bad state"
|
||||||
}
|
}
|
||||||
|
@ -110,6 +110,8 @@ func DeallocRootlessCNI(ctx context.Context, c *Container) error {
|
|||||||
logrus.Warn(err)
|
logrus.Warn(err)
|
||||||
}
|
}
|
||||||
logrus.Debugf("rootless CNI: removing infra container %q", infra.ID())
|
logrus.Debugf("rootless CNI: removing infra container %q", infra.ID())
|
||||||
|
infra.lock.Lock()
|
||||||
|
defer infra.lock.Unlock()
|
||||||
if err := c.runtime.removeContainer(ctx, infra, true, false, true); err != nil {
|
if err := c.runtime.removeContainer(ctx, infra, true, false, true); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -67,4 +67,32 @@ load helpers
|
|||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Regression test for #8501
|
||||||
|
@test "podman stop - unlock while waiting for timeout" {
|
||||||
|
# Test that the container state transitions to "stopping" and that other
|
||||||
|
# commands can get the container's lock. To do that, run a container that
|
||||||
|
# ingores SIGTERM such that the Podman would wait 20 seconds for the stop
|
||||||
|
# to finish. This gives us enough time to try some commands and inspect
|
||||||
|
# the container's status.
|
||||||
|
|
||||||
|
run_podman run --name stopme -d $IMAGE sh -c \
|
||||||
|
"trap 'echo Received SIGTERM, ignoring' SIGTERM; echo READY; while :; do sleep 1; done"
|
||||||
|
|
||||||
|
# Stop the container in the background
|
||||||
|
$PODMAN stop -t 20 stopme &
|
||||||
|
|
||||||
|
# Other commands can acquire the lock
|
||||||
|
run_podman ps -a
|
||||||
|
|
||||||
|
# The container state transitioned to "stopping"
|
||||||
|
run_podman inspect --format '{{.State.Status}}' stopme
|
||||||
|
is "$output" "stopping" "Status of container should be 'stopping'"
|
||||||
|
|
||||||
|
run_podman kill stopme
|
||||||
|
|
||||||
|
# Exit code should be 137 as it was killed
|
||||||
|
run_podman inspect --format '{{.State.ExitCode}}' stopme
|
||||||
|
is "$output" "137" "Exit code of killed container"
|
||||||
|
}
|
||||||
|
|
||||||
# vim: filetype=sh
|
# vim: filetype=sh
|
||||||
|
Reference in New Issue
Block a user