libpod: wait for healthy on main thread

wait for the healthy status on the thread where the container lock is
held.  Otherwise, if it is performed from a go routine, a different
thread is used (since the runtime.LockOSThread() call doesn't have any
effect), causing pthread_mutex_unlock() to fail with EPERM.

Closes: https://github.com/containers/podman/issues/22651

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano
2024-05-09 16:08:55 +02:00
parent 87bd77538e
commit b06c58b4a5
5 changed files with 50 additions and 19 deletions

View File

@ -115,7 +115,10 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error)
}
// Start the container
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}
// Update updates the given container.
@ -194,6 +197,9 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
opts.Start = true
opts.Started = startedChan
// attach and start the container on a different thread. waitForHealthy must
// be done later, as it requires to run on the same thread that holds the lock
// for the container.
if err := c.ociRuntime.Attach(c, opts); err != nil {
attachChan <- err
}
@ -207,7 +213,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
c.newContainerEvent(events.Attach)
}
return attachChan, nil
return attachChan, c.waitForHealthy(ctx)
}
// RestartWithTimeout restarts a running container and takes a given timeout in uint

View File

@ -330,10 +330,10 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
return false, err
}
}
if err := c.start(ctx); err != nil {
if err := c.start(); err != nil {
return false, err
}
return true, nil
return true, c.waitForHealthy(ctx)
}
// Ensure that the container is in a specific state or state.
@ -1208,7 +1208,7 @@ func (c *Container) reinit(ctx context.Context, retainRetries bool) error {
// Initialize (if necessary) and start a container
// Performs all necessary steps to start a container that is not running
// Does not lock or check validity
// Does not lock or check validity, requires to run on the same thread that holds the lock for the container.
func (c *Container) initAndStart(ctx context.Context) (retErr error) {
// If we are ContainerStateUnknown, throw an error
if c.state.State == define.ContainerStateUnknown {
@ -1253,11 +1253,14 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
}
// Now start the container
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}
// Internal, non-locking function to start a container
func (c *Container) start(ctx context.Context) error {
func (c *Container) start() error {
if c.config.Spec.Process != nil {
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
}
@ -1298,10 +1301,14 @@ func (c *Container) start(ctx context.Context) error {
c.newContainerEvent(events.Start)
if err := c.save(); err != nil {
return err
}
return c.save()
}
// waitForHealthy, when sdNotifyMode == SdNotifyModeHealthy, waits up to the DefaultWaitInterval
// for the container to get into the healthy state and reports the status to the notify socket.
// The function unlocks the container lock, so it must be called from the same thread that locks
// the container.
func (c *Container) waitForHealthy(ctx context.Context) error {
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
return nil
}
@ -1315,6 +1322,9 @@ func (c *Container) start(ctx context.Context) error {
}
if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
return nil
}
return err
}
@ -1566,6 +1576,7 @@ func (c *Container) unpause() error {
}
// Internal, non-locking function to restart a container
// It requires to run on the same thread that holds the lock.
func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) {
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) {
return fmt.Errorf("unable to restart a container in a paused or unknown state: %w", define.ErrCtrStateInvalid)
@ -1613,7 +1624,10 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
return err
}
}
return c.start(ctx)
if err := c.start(); err != nil {
return err
}
return c.waitForHealthy(ctx)
}
// mountStorage sets up the container's root filesystem

View File

@ -3,7 +3,6 @@
package libpod
import (
"context"
"errors"
"fmt"
"io"
@ -32,6 +31,7 @@ const (
// Attach to the given container.
// Does not check if state is appropriate.
// started is only required if startContainer is true.
// It does not wait for the container to be healthy, it is the caller responsibility to do so.
func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
passthrough := c.LogDriver() == define.PassthroughLogging || c.LogDriver() == define.PassthroughTTYLogging
@ -86,7 +86,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
// If starting was requested, start the container and notify when that's
// done.
if params.Start {
if err := c.start(context.TODO()); err != nil {
if err := c.start(); err != nil {
return err
}
params.Started <- true

View File

@ -995,7 +995,10 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
return reports, fmt.Errorf("unable to start container %s: %w", ctr.ID(), err)
}
exitCode = ic.GetContainerExitCode(ctx, ctr.Container)
exitCode, err2 := ic.ContainerWaitForExitCode(ctx, ctr.Container)
if err2 != nil {
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err2)
}
reports = append(reports, &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
@ -1189,7 +1192,7 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
report.ExitCode = define.ExitCode(err)
return &report, err
}
report.ExitCode = ic.GetContainerExitCode(ctx, ctr)
report.ExitCode, _ = ic.ContainerWaitForExitCode(ctx, ctr)
if opts.Rm && !ctr.ShouldRestart(ctx) {
if err := removeContainer(ctr, false); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) ||
@ -1203,14 +1206,13 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
return &report, nil
}
func (ic *ContainerEngine) GetContainerExitCode(ctx context.Context, ctr *libpod.Container) int {
func (ic *ContainerEngine) ContainerWaitForExitCode(ctx context.Context, ctr *libpod.Container) (int, error) {
exitCode, err := ctr.Wait(ctx)
if err != nil {
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err)
intExitCode := int(define.ExecErrorCodeNotFound)
return intExitCode
return intExitCode, err
}
return int(exitCode)
return int(exitCode), nil
}
func (ic *ContainerEngine) ContainerLogs(ctx context.Context, namesOrIds []string, options entities.ContainerLogsOptions) error {

View File

@ -233,6 +233,15 @@ READY=1" "Container log after healthcheck run"
is "$output" "running" "make sure container is still running"
run_podman rm -f -t0 $ctr
ctr=$(random_string)
run_podman run --name $ctr \
--health-cmd="touch /terminate" \
--sdnotify=healthy \
$IMAGE sh -c 'while test \! -e /terminate; do sleep 0.1; done; echo finished'
is "$output" "finished" "make sure container exited successfully"
run_podman rm -f -t0 $ctr
_stop_socat
}