Merge pull request #17165 from vrothberg/fix-17142

StopContainer: return if cleanup process changed state
This commit is contained in:
OpenShift Merge Robot
2023-01-19 08:30:29 -05:00
committed by GitHub

View File

@ -400,12 +400,11 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
return nil return nil
} }
stopSignal := ctr.config.StopSignal
if stopSignal == 0 {
stopSignal = uint(syscall.SIGTERM)
}
if timeout > 0 { if timeout > 0 {
stopSignal := ctr.config.StopSignal
if stopSignal == 0 {
stopSignal = uint(syscall.SIGTERM)
}
if err := r.KillContainer(ctr, stopSignal, all); err != nil { if err := r.KillContainer(ctr, stopSignal, all); err != nil {
// Is the container gone? // Is the container gone?
// If so, it probably died between the first check and // If so, it probably died between the first check and
@ -428,21 +427,26 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
} }
} }
// If the timeout was set to 0 or if stopping the container with the
// specified signal did not work, use the big hammer with SIGKILL.
if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil { if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil {
// Ignore the error if KillContainer complains about it already // There's an inherent race with the cleanup process (see
// being stopped or exited. There's an inherent race with the // #16142, #17142). If the container has already been marked as
// cleanup process (see #16142). // stopped or exited by the cleanup process, we can return
if !(errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited)) { // immediately.
// If the PID is 0, then the container is already stopped. if errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
if ctr.state.PID == 0 { return nil
return nil
}
// Again, check if the container is gone. If it is, exit cleanly.
if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
return nil
}
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
} }
// If the PID is 0, then the container is already stopped.
if ctr.state.PID == 0 {
return nil
}
// Again, check if the container is gone. If it is, exit cleanly.
if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
return nil
}
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
} }
// Give runtime a few seconds to make it happen // Give runtime a few seconds to make it happen
@ -949,31 +953,20 @@ func waitContainerStop(ctr *Container, timeout time.Duration) error {
// Wait for a given PID to stop // Wait for a given PID to stop
func waitPidStop(pid int, timeout time.Duration) error { func waitPidStop(pid int, timeout time.Duration) error {
done := make(chan struct{}) timer := time.NewTimer(timeout)
chControl := make(chan struct{}) for {
go func() { select {
for { case <-timer.C:
select { return fmt.Errorf("given PID did not die within timeout")
case <-chControl: default:
return if err := unix.Kill(pid, 0); err != nil {
default: if err == unix.ESRCH {
if err := unix.Kill(pid, 0); err != nil { return nil
if err == unix.ESRCH {
close(done)
return
}
logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
} }
time.Sleep(100 * time.Millisecond) logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
} }
time.Sleep(10 * time.Millisecond)
} }
}()
select {
case <-done:
return nil
case <-time.After(timeout):
close(chControl)
return fmt.Errorf("given PIDs did not die within timeout")
} }
} }