Refactor graph traversal & use for pod stop

First, refactor our existing graph traversal code to improve code sharing. There still isn't much sharing between inward traversal (stop, remove) and outward traversal (start) but stop and remove are sharing most of their code, which seems a positive. Second, add a new graph-traversal function to stop containers. We already had start and remove; stop uses the newly-refactored inward-traversal code which it shares with removal. Third, rework the shared stop/removal inward-traversal code to add locking. This allows parallel execution of stop and removal, which should improve the performance of `podman pod rm` and retain the performance of `podman pod stop` at about what it is right now. Fourth and finally, use the new graph-based stop when possible to solve unordered stop problems with pods - specifically, the infra container stopping before application containers, leaving those containers without a working network. Fixes https://issues.redhat.com/browse/RHEL-76827 Signed-off-by: Matt Heon <mheon@redhat.com>
2025-08-06 11:32:07 +08:00 · 2025-01-30 12:56:08 -05:00
parent 06fa617f61
commit 46d874aa52
6 changed files with 324 additions and 151 deletions
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@ -2114,6 +2114,62 @@ func (c *Container) cleanupStorage() error {
 	return cleanupErr
 }

+// fullCleanup performs all cleanup tasks, including handling restart policy.
+func (c *Container) fullCleanup(ctx context.Context, onlyStopped bool) error {
+	// Check if state is good
+	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) {
+		return fmt.Errorf("container %s is running or paused, refusing to clean up: %w", c.ID(), define.ErrCtrStateInvalid)
+	}
+	if onlyStopped && !c.ensureState(define.ContainerStateStopped) {
+		return fmt.Errorf("container %s is not stopped and only cleanup for a stopped container was requested: %w", c.ID(), define.ErrCtrStateInvalid)
+	}
+
+	// if the container was not created in the oci runtime or was already cleaned up, then do nothing
+	if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
+		return nil
+	}
+
+	// Handle restart policy.
+	// Returns a bool indicating whether we actually restarted.
+	// If we did, don't proceed to cleanup - just exit.
+	didRestart, err := c.handleRestartPolicy(ctx)
+	if err != nil {
+		return err
+	}
+	if didRestart {
+		return nil
+	}
+
+	// If we didn't restart, we perform a normal cleanup
+
+	// make sure all the container processes are terminated if we are running without a pid namespace.
+	hasPidNs := false
+	if c.config.Spec.Linux != nil {
+		for _, i := range c.config.Spec.Linux.Namespaces {
+			if i.Type == spec.PIDNamespace {
+				hasPidNs = true
+				break
+			}
+		}
+	}
+	if !hasPidNs {
+		// do not fail on errors
+		_ = c.ociRuntime.KillContainer(c, uint(unix.SIGKILL), true)
+	}
+
+	// Check for running exec sessions
+	sessions, err := c.getActiveExecSessions()
+	if err != nil {
+		return err
+	}
+	if len(sessions) > 0 {
+		return fmt.Errorf("container %s has active exec sessions, refusing to clean up: %w", c.ID(), define.ErrCtrStateInvalid)
+	}
+
+	defer c.newContainerEvent(events.Cleanup)
+	return c.cleanup(ctx)
+}
+
 // Unmount the container and free its resources
 func (c *Container) cleanup(ctx context.Context) error {
 	var lastError error