mirror of
https://github.com/containers/podman.git
synced 2025-10-15 10:16:28 +08:00
wait: fix handling of multiple conditions with exited
As it turns on things are not so simple after all... In podman-py it was reported[1] that waiting might hang, per our docs wait on multiple conditions should exit once the first one is hit and not all of them. However because the new wait logic never checked if the context was cancelled the goroutine kept running until conmon exited and because we used a waitgroup to wait for all of them to finish it blocked until that happened. First we can remove the waitgroup as we only need to wait for one of them anyway via the channel. While this alone fixes the hang it would still leak the other goroutine. As there is no way to cancel a goroutine all the code must check for a cancelled context in the wait loop to no leak. Fixes 8a943311db ("libpod: simplify WaitForExit()") [1] https://github.com/containers/podman-py/issues/425 Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
@ -9,7 +9,6 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/containers/common/pkg/resize"
|
||||
@ -596,7 +595,7 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
|
||||
|
||||
// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
|
||||
c.lock.Unlock()
|
||||
err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
|
||||
err := waitForConmonExit(ctx, conmonPID, conmonPidFd, pollInterval)
|
||||
c.lock.Lock()
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
|
||||
@ -619,15 +618,24 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
|
||||
return c.runtime.state.GetContainerExitCode(id)
|
||||
}
|
||||
|
||||
func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
|
||||
func waitForConmonExit(ctx context.Context, conmonPID, conmonPidFd int, pollInterval time.Duration) error {
|
||||
if conmonPidFd > -1 {
|
||||
for {
|
||||
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
|
||||
if _, err := unix.Poll(fds, -1); err != nil {
|
||||
if n, err := unix.Poll(fds, int(pollInterval.Milliseconds())); err != nil {
|
||||
if err == unix.EINTR {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
} else if n == 0 {
|
||||
// n == 0 means timeout
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return define.ErrCanceled
|
||||
default:
|
||||
// context not done, wait again
|
||||
continue
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -640,7 +648,11 @@ func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) e
|
||||
}
|
||||
return err
|
||||
}
|
||||
time.Sleep(pollInterval)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return define.ErrCanceled
|
||||
case <-time.After(pollInterval):
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -695,22 +707,15 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
|
||||
}
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
if waitForExit {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
code, err := c.WaitForExit(ctx, waitTimeout)
|
||||
trySend(code, err)
|
||||
}()
|
||||
}
|
||||
|
||||
if len(wantedStates) > 0 || len(wantedHealthStates) > 0 {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
stoppedCount := 0
|
||||
for {
|
||||
if len(wantedStates) > 0 {
|
||||
@ -780,7 +785,6 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
|
||||
case <-ctx.Done():
|
||||
result = waitResult{-1, define.ErrCanceled}
|
||||
}
|
||||
wg.Wait()
|
||||
return result.code, result.err
|
||||
}
|
||||
|
||||
|
@ -108,4 +108,19 @@ var _ = Describe("Podman wait", func() {
|
||||
Expect(session).Should(ExitCleanly())
|
||||
Expect(session.OutputToStringArray()).To(Equal([]string{"0", "0", "0"}))
|
||||
})
|
||||
|
||||
It("podman wait on multiple conditions", func() {
|
||||
session := podmanTest.Podman([]string{"run", "-d", ALPINE, "sleep", "100"})
|
||||
session.Wait(20)
|
||||
Expect(session).Should(ExitCleanly())
|
||||
cid := session.OutputToString()
|
||||
|
||||
// condition should return once nay of the condition is met not all of them,
|
||||
// as the container is running this should return immediately
|
||||
// https://github.com/containers/podman-py/issues/425
|
||||
session = podmanTest.Podman([]string{"wait", "--condition", "running,exited", cid})
|
||||
session.Wait(20)
|
||||
Expect(session).Should(ExitCleanly())
|
||||
Expect(session.OutputToString()).To(Equal("-1"))
|
||||
})
|
||||
})
|
||||
|
Reference in New Issue
Block a user