Merge pull request #17165 from vrothberg/fix-17142

StopContainer: return if cleanup process changed state
This commit is contained in:
OpenShift Merge Robot
2023-01-19 08:30:29 -05:00
committed by GitHub

View File

@ -400,12 +400,11 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
return nil return nil
} }
if timeout > 0 {
stopSignal := ctr.config.StopSignal stopSignal := ctr.config.StopSignal
if stopSignal == 0 { if stopSignal == 0 {
stopSignal = uint(syscall.SIGTERM) stopSignal = uint(syscall.SIGTERM)
} }
if timeout > 0 {
if err := r.KillContainer(ctr, stopSignal, all); err != nil { if err := r.KillContainer(ctr, stopSignal, all); err != nil {
// Is the container gone? // Is the container gone?
// If so, it probably died between the first check and // If so, it probably died between the first check and
@ -428,11 +427,17 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
} }
} }
// If the timeout was set to 0 or if stopping the container with the
// specified signal did not work, use the big hammer with SIGKILL.
if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil { if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil {
// Ignore the error if KillContainer complains about it already // There's an inherent race with the cleanup process (see
// being stopped or exited. There's an inherent race with the // #16142, #17142). If the container has already been marked as
// cleanup process (see #16142). // stopped or exited by the cleanup process, we can return
if !(errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited)) { // immediately.
if errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
return nil
}
// If the PID is 0, then the container is already stopped. // If the PID is 0, then the container is already stopped.
if ctr.state.PID == 0 { if ctr.state.PID == 0 {
return nil return nil
@ -443,7 +448,6 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
} }
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err) return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
} }
}
// Give runtime a few seconds to make it happen // Give runtime a few seconds to make it happen
if err := waitContainerStop(ctr, killContainerTimeout); err != nil { if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
@ -949,32 +953,21 @@ func waitContainerStop(ctr *Container, timeout time.Duration) error {
// Wait for a given PID to stop // Wait for a given PID to stop
func waitPidStop(pid int, timeout time.Duration) error { func waitPidStop(pid int, timeout time.Duration) error {
done := make(chan struct{}) timer := time.NewTimer(timeout)
chControl := make(chan struct{})
go func() {
for { for {
select { select {
case <-chControl: case <-timer.C:
return return fmt.Errorf("given PID did not die within timeout")
default: default:
if err := unix.Kill(pid, 0); err != nil { if err := unix.Kill(pid, 0); err != nil {
if err == unix.ESRCH { if err == unix.ESRCH {
close(done) return nil
return
} }
logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err) logrus.Errorf("Pinging PID %d with signal 0: %v", pid, err)
} }
time.Sleep(100 * time.Millisecond) time.Sleep(10 * time.Millisecond)
} }
} }
}()
select {
case <-done:
return nil
case <-time.After(timeout):
close(chControl)
return fmt.Errorf("given PIDs did not die within timeout")
}
} }
func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) { func (r *ConmonOCIRuntime) getLogTag(ctr *Container) (string, error) {