Fix a deadlock when removing pods

The infra container would try to remove the pod, despite the pod already being in the process of being removed - oops. Add a check to ensure we don't try and remove the pod when called by the `podman pod rm` command. Also, wire up noLockPod - it wasn't previously wired in, which is concerning, and could be related? Finally, make a few minor fixes to un-break lint. Signed-off-by: Matthew Heon <matthew.heon@pm.me>
2025-07-01 16:17:06 +08:00 · 2023-05-05 13:57:44 -04:00
parent 8cb5d39d43
commit ef1a22cdea
3 changed files with 12 additions and 6 deletions
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@ -4,6 +4,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io/fs"
 	"os"
 	"path"
 	"path/filepath"
@ -690,8 +691,10 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 				retErr = fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock)
 				return
 			}
-			pod.lock.Lock()
-			defer pod.lock.Unlock()
+			if !noLockPod {
+				pod.lock.Lock()
+				defer pod.lock.Unlock()
+			}
 			if err := pod.updatePod(); err != nil {
 				retErr = err
 				return
@ -763,7 +766,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 			removedPods[depPod.ID()] = nil
 		}
 	}
-	if serviceForPod || c.config.IsInfra {
+	if (serviceForPod || c.config.IsInfra) && !removePod {
 		// We're going to remove the pod we are a part of.
 		// This will get rid of us as well, so we can just return
 		// immediately after.
@ -946,7 +949,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 	removedCtrs[c.ID()] = nil

 	// Deallocate the container's lock
-	if err := c.lock.Free(); err != nil {
+	if err := c.lock.Free(); err != nil && !errors.Is(err, fs.ErrNotExist) {
 		reportErrorf("freeing lock for container %s: %w", c.ID(), err)
 	}

@ -978,6 +981,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 		}
 	}

+	//nolint:nakedret
 	return
 }

--- a/libpod/runtime_pod_common.go
+++ b/libpod/runtime_pod_common.go
@ -237,7 +237,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
 		}

 		// Finalize the removed containers list
-		for ctr, _ := range ctrsVisited {
+		for ctr := range ctrsVisited {
 			removedCtrs[ctr] = ctrErrors[ctr]
 		}

--- a/pkg/domain/infra/abi/containers.go
+++ b/pkg/domain/infra/abi/containers.go
@ -376,6 +376,7 @@ func (ic *ContainerEngine) ContainerRestart(ctx context.Context, namesOrIds []st
 	return reports, nil
 }

+//nolint:unparam
 func (ic *ContainerEngine) removeContainer(ctx context.Context, ctr *libpod.Container, options entities.RmOptions) (map[string]error, map[string]error, error) {
 	var err error
 	ctrs := make(map[string]error)
@ -442,6 +443,7 @@ func (ic *ContainerEngine) ContainerRm(ctx context.Context, namesOrIds []string,
 		}
 		mapMutex.Unlock()

+		// TODO: We should report removed pods back somehow.
 		ctrs, _, err := ic.removeContainer(ctx, c, options)

 		mapMutex.Lock()
@ -458,7 +460,7 @@ func (ic *ContainerEngine) ContainerRm(ctx context.Context, namesOrIds []string,

 	for ctr, err := range errMap {
 		if _, ok := ctrsMap[ctr.ID()]; ok {
-			logrus.Debugf("Multiple results for container %s - attempted multiple removals?", ctr)
+			logrus.Debugf("Multiple results for container %s - attempted multiple removals?", ctr.ID())
 		}
 		ctrsMap[ctr.ID()] = err
 	}