libpod: fix broken saveContainerError()

We cannot unlock then lock again without syncing the state as this will then save a potentially old state causing very bad things, such as double netns cleanup issues. The fix here is simple move the saveContainerError() under the same lock. The comment about the re-lock is just wrong. Not doing this under the same lock would cause us to update the error after something else changed the container alreayd. Most likely this was caused by a misunderstanding on how go defer's work. Given they run Last In - First Out (LIFO) it is safe as long as out defer function is after the defer unlock() call. I think this issue is very bad and might have caused a variety of other weird flakes. As fact I am confident that this fixes the double cleanup errors. Fixes #21569 Also fixes the netns removal ENOENT issues seen in #19721. Signed-off-by: Paul Holzinger <pholzing@redhat.com>
2025-10-18 03:33:32 +08:00 · 2024-08-09 18:59:22 +02:00
parent 277e061878
commit f276d53532
1 changed files with 30 additions and 51 deletions
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@ -84,27 +84,20 @@ func (c *Container) Init(ctx context.Context, recursive bool) error {
 // running before starting the container. The recursive parameter, if set, will start all
 // dependencies before starting this container.
 func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error) {
-	defer func() {
-		if finalErr != nil {
-			// Have to re-lock.
-			// As this is the first defer, it's the last thing to
-			// happen in the function - so `defer c.lock.Unlock()`
-			// below already fired.
-			if !c.batched {
-				c.lock.Lock()
-				defer c.lock.Unlock()
-			}
-
-			if err := saveContainerError(c, finalErr); err != nil {
-				logrus.Debug(err)
-			}
-		}
-	}()
-
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()

+		// defer's are executed LIFO so we are locked here
+		// as long as we call this after the defer unlock()
+		defer func() {
+			if finalErr != nil {
+				if err := saveContainerError(c, finalErr); err != nil {
+					logrus.Debug(err)
+				}
+			}
+		}()
+
 		if err := c.syncContainer(); err != nil {
 			return err
 		}
@ -147,27 +140,20 @@ func (c *Container) Update(resources *spec.LinuxResources, restartPolicy *string
 // ordering of the two such that no output from the container is lost (e.g. the
 // Attach call occurs before Start).
 func (c *Container) Attach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, start bool) (retChan <-chan error, finalErr error) {
-	defer func() {
-		if finalErr != nil {
-			// Have to re-lock.
-			// As this is the first defer, it's the last thing to
-			// happen in the function - so `defer c.lock.Unlock()`
-			// below already fired.
-			if !c.batched {
-				c.lock.Lock()
-				defer c.lock.Unlock()
-			}
-
-			if err := saveContainerError(c, finalErr); err != nil {
-				logrus.Debug(err)
-			}
-		}
-	}()
-
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()

+		// defer's are executed LIFO so we are locked here
+		// as long as we call this after the defer unlock()
+		defer func() {
+			if finalErr != nil {
+				if err := saveContainerError(c, finalErr); err != nil {
+					logrus.Debug(err)
+				}
+			}
+		}()
+
 		if err := c.syncContainer(); err != nil {
 			return nil, err
 		}
@ -270,27 +256,20 @@ func (c *Container) Stop() error {
 // manually. If timeout is 0, SIGKILL will be used immediately to kill the
 // container.
 func (c *Container) StopWithTimeout(timeout uint) (finalErr error) {
-	defer func() {
-		if finalErr != nil {
-			// Have to re-lock.
-			// As this is the first defer, it's the last thing to
-			// happen in the function - so `defer c.lock.Unlock()`
-			// below already fired.
-			if !c.batched {
-				c.lock.Lock()
-				defer c.lock.Unlock()
-			}
-
-			if err := saveContainerError(c, finalErr); err != nil {
-				logrus.Debug(err)
-			}
-		}
-	}()
-
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()

+		// defer's are executed LIFO so we are locked here
+		// as long as we call this after the defer unlock()
+		defer func() {
+			if finalErr != nil {
+				if err := saveContainerError(c, finalErr); err != nil {
+					logrus.Debug(err)
+				}
+			}
+		}()
+
 		if err := c.syncContainer(); err != nil {
 			return err
 		}