From f93cad508a3cb071a9895242f4607f1baad4a344 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Tue, 2 Sep 2025 20:39:36 -0400 Subject: [PATCH] Fix a locking bug in that could cause a double-unlock The `cleanupExecBundle` function was only meant to be called on a locked container, as it does some state mutation operations. It also has a timed wait (if the directory is busy and can't be removed yet, give it a few milliseconds) in which it deliberately yields the lock to not block the container for that time. The `healthCheckExec()` function calls `cleanupExecBundle` out of a `defer` block. This is after the `defer c.lock.Unlock()` so it fires afterwards when the function returns, so we're normally fine - the container is still locked when our defer runs. The problem is that `healthCheckExec()` also unlocks the container during the expensive exec operation, and can actually fail and return while not holding the lock - meaning our `defer` can fire on an unlocked container, leading to a potential double unlock in `cleanupExecBundle`. We could, potentially, re-lock the container after the exec occurs, but we're actually waiting for a `select` to trigger to end the function, so that's not a good solution. Instead, just re-lock (if necessary) in the defer, before invoking `cleanupExecBundle()`. The `defer c.lock.Unlock()` will fire right after and unlock after us. Fixes #26968 Signed-off-by: Matthew Heon --- libpod/container_exec.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/libpod/container_exec.go b/libpod/container_exec.go index b7397b71b2..5e8813ee5f 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -859,6 +859,19 @@ func (c *Container) healthCheckExec(config *ExecConfig, timeout time.Duration, s return -1, err } defer func() { + // cleanupExecBundle MUST be called with the parent container locked. + if !unlock && !c.batched { + c.lock.Lock() + unlock = true + + if err := c.syncContainer(); err != nil { + logrus.Errorf("Error syncing container %s state: %v", c.ID(), err) + // Normally we'd want to continue here, get rid of the exec directory. + // But the risk of proceeding into a function that can mutate state with a bad state is high. + // Lesser of two evils is to bail and leak a directory. + return + } + } if err := c.cleanupExecBundle(session.ID()); err != nil { logrus.Errorf("Container %s light exec session cleanup error: %v", c.ID(), err) } @@ -971,7 +984,8 @@ func (c *Container) exec(config *ExecConfig, streams *define.AttachStreams, resi return session.ExitCode, nil } -// cleanupExecBundle cleanups an exec session after its done +// cleanupExecBundle cleans up an exec session after completion. +// MUST BE CALLED with container `c` locked. // Please be careful when using this function since it might temporarily unlock // the container when os.RemoveAll($bundlePath) fails with ENOTEMPTY or EBUSY // errors.