Merge pull request #15757 from mheon/fix_15526

Introduce graph-based pod container removal
2025-07-15 03:02:52 +08:00 · 2022-09-15 21:01:23 +02:00
parent 50c538b3cc e19e0de5fa
commit df73f606ef
7 changed files with 216 additions and 52 deletions
--- a/libpod/container_graph.go
+++ b/libpod/container_graph.go
@ -281,3 +281,94 @@ func startNode(ctx context.Context, node *containerNode, setError bool, ctrError
 		startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart)
 	}
 }
+
+// Visit a node on the container graph and remove it, or set an error if it
+// failed to remove. Only intended for use in pod removal; do *not* use when
+// removing individual containers.
+// All containers are assumed to be *UNLOCKED* on running this function.
+// Container locks will be acquired as necessary.
+// Pod and infraID are optional. If a pod is given it must be *LOCKED*.
+func removeNode(ctx context.Context, node *containerNode, pod *Pod, force bool, timeout *uint, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, ctrNamedVolumes map[string]*ContainerNamedVolume) {
+	// If we already visited this node, we're done.
+	if ctrsVisited[node.id] {
+		return
+	}
+
+	// Someone who depends on us failed.
+	// Mark us as failed and recurse.
+	if setError {
+		ctrsVisited[node.id] = true
+		ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s could not be removed: %w", node.id, define.ErrCtrStateInvalid)
+
+		// Hit anyone who depends on us, set errors there as well.
+		for _, successor := range node.dependsOn {
+			removeNode(ctx, successor, pod, force, timeout, true, ctrErrors, ctrsVisited, ctrNamedVolumes)
+		}
+	}
+
+	// Does anyone still depend on us?
+	// Cannot remove if true. Once all our dependencies have been removed,
+	// we will be removed.
+	for _, dep := range node.dependedOn {
+		// The container that depends on us hasn't been removed yet.
+		// OK to continue on
+		if ok := ctrsVisited[dep.id]; !ok {
+			return
+		}
+	}
+
+	// Going to try to remove the node, mark us as visited
+	ctrsVisited[node.id] = true
+
+	ctrErrored := false
+
+	// Verify that all that depend on us are gone.
+	// Graph traversal should guarantee this is true, but this isn't that
+	// expensive, and it's better to be safe.
+	for _, dep := range node.dependedOn {
+		if _, err := node.container.runtime.GetContainer(dep.id); err == nil {
+			ctrErrored = true
+			ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s still exists: %w", node.id, define.ErrDepExists)
+		}
+	}
+
+	// Lock the container
+	node.container.lock.Lock()
+
+	// Gate all subsequent bits behind a ctrErrored check - we don't want to
+	// proceed if a previous step failed.
+	if !ctrErrored {
+		if err := node.container.syncContainer(); err != nil {
+			ctrErrored = true
+			ctrErrors[node.id] = err
+		}
+	}
+
+	if !ctrErrored {
+		for _, vol := range node.container.config.NamedVolumes {
+			ctrNamedVolumes[vol.Name] = vol
+		}
+
+		if pod != nil && pod.state.InfraContainerID == node.id {
+			pod.state.InfraContainerID = ""
+			if err := pod.save(); err != nil {
+				ctrErrored = true
+				ctrErrors[node.id] = fmt.Errorf("error removing infra container %s from pod %s: %w", node.id, pod.ID(), err)
+			}
+		}
+	}
+
+	if !ctrErrored {
+		if err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, timeout); err != nil {
+			ctrErrored = true
+			ctrErrors[node.id] = err
+		}
+	}
+
+	node.container.lock.Unlock()
+
+	// Recurse to anyone who we depend on and remove them
+	for _, successor := range node.dependsOn {
+		removeNode(ctx, successor, pod, force, timeout, ctrErrored, ctrErrors, ctrsVisited, ctrNamedVolumes)
+	}
+}
--- a/libpod/pod_api.go
+++ b/libpod/pod_api.go
@ -40,7 +40,7 @@ func (p *Pod) startInitContainers(ctx context.Context) error {
 			icLock := initCon.lock
 			icLock.Lock()
 			var time *uint
-			if err := p.runtime.removeContainer(ctx, initCon, false, false, true, time); err != nil {
+			if err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, time); err != nil {
 				icLock.Unlock()
 				return fmt.Errorf("failed to remove once init container %s: %w", initCon.ID(), err)
 			}
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@ -581,7 +581,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
 // be removed also if and only if the container is the sole user
 // Otherwise, RemoveContainer will return an error if the container is running
 func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) error {
-	return r.removeContainer(ctx, c, force, removeVolume, false, timeout)
+	return r.removeContainer(ctx, c, force, removeVolume, false, false, timeout)
 }

 // Internal function to remove a container.
@ -589,7 +589,9 @@ func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool,
 // removePod is used only when removing pods. It instructs Podman to ignore
 // infra container protections, and *not* remove from the database (as pod
 // remove will handle that).
-func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod bool, timeout *uint) error {
+// ignoreDeps is *DANGEROUS* and should not be used outside of a very specific
+// context (alternate pod removal code, where graph traversal is not possible).
+func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod, ignoreDeps bool, timeout *uint) error {
 	if !c.valid {
 		if ok, _ := r.state.HasContainer(c.ID()); !ok {
 			// Container probably already removed
@ -618,12 +620,13 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 	// pod.
 	var pod *Pod
 	runtime := c.runtime
-	if c.config.Pod != "" && !removePod {
+	if c.config.Pod != "" {
 		pod, err = r.state.Pod(c.config.Pod)
 		if err != nil {
 			return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), pod.ID(), err)
 		}

+		if !removePod {
 			// Lock the pod while we're removing container
 			if pod.config.LockID == c.config.LockID {
 				return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock)
@ -639,6 +642,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 				return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID())
 			}
 		}
+	}

 	// For pod removal, the container is already locked by the caller
 	if !removePod {
@ -696,7 +700,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 	// Check that no other containers depend on the container.
 	// Only used if not removing a pod - pods guarantee that all
 	// deps will be evicted at the same time.
-	if !removePod {
+	if !ignoreDeps {
 		deps, err := r.state.ContainerInUse(c)
 		if err != nil {
 			return err
@ -777,7 +781,6 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 	if c.config.Pod != "" {
 		// If we're removing the pod, the container will be evicted
 		// from the state elsewhere
-		if !removePod {
 		if err := r.state.RemoveContainerFromPod(pod, c); err != nil {
 			if cleanupErr == nil {
 				cleanupErr = err
@ -785,7 +788,6 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 				logrus.Errorf("Removing container %s from database: %v", c.ID(), err)
 			}
 		}
-		}
 	} else {
 		if err := r.state.RemoveContainer(c); err != nil {
 			if cleanupErr == nil {
@ -872,7 +874,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol
 	if err == nil {
 		logrus.Infof("Container %s successfully retrieved from state, attempting normal removal", id)
 		// Assume force = true for the evict case
-		err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, timeout)
+		err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, false, timeout)
 		if !tmpCtr.valid {
 			// If the container is marked invalid, remove succeeded
 			// in kicking it out of the state - no need to continue.
@ -1034,7 +1036,7 @@ func (r *Runtime) RemoveDepend(ctx context.Context, rmCtr *Container, force bool
 	}

 	report := reports.RmReport{Id: rmCtr.ID(), RawInput: rmCtr.ID()}
-	report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, timeout)
+	report.Err = r.removeContainer(ctx, rmCtr, force, removeVolume, false, false, timeout)
 	return append(rmReports, &report), nil
 }

--- a/libpod/runtime_img.go
+++ b/libpod/runtime_img.go
@ -47,7 +47,7 @@ func (r *Runtime) RemoveContainersForImageCallback(ctx context.Context) libimage
 					return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err)
 				}
 			} else {
-				if err := r.removeContainer(ctx, ctr, true, false, false, timeout); err != nil {
+				if err := r.removeContainer(ctx, ctr, true, false, false, false, timeout); err != nil {
 					return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err)
 				}
 			}
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@ -17,6 +17,7 @@ import (
 	"github.com/containers/podman/v4/libpod/events"
 	"github.com/containers/podman/v4/pkg/rootless"
 	"github.com/containers/podman/v4/pkg/specgen"
+	"github.com/hashicorp/go-multierror"
 	"github.com/sirupsen/logrus"
 )

@ -191,29 +192,9 @@ func (r *Runtime) SavePod(pod *Pod) error {
 	return nil
 }

-func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error {
-	if err := p.updatePod(); err != nil {
-		return err
-	}
-
-	ctrs, err := r.state.PodContainers(p)
-	if err != nil {
-		return err
-	}
-	numCtrs := len(ctrs)
-
-	// If the only running container in the pod is the pause container, remove the pod and container unconditionally.
-	pauseCtrID := p.state.InfraContainerID
-	if numCtrs == 1 && ctrs[0].ID() == pauseCtrID {
-		removeCtrs = true
-		force = true
-	}
-	if !removeCtrs && numCtrs > 0 {
-		return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists)
-	}
-
-	ctrNamedVolumes := make(map[string]*ContainerNamedVolume)
-
+// DO NOT USE THIS FUNCTION DIRECTLY. Use removePod(), below. It will call
+// removeMalformedPod() if necessary.
+func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Container, force bool, timeout *uint, ctrNamedVolumes map[string]*ContainerNamedVolume) error {
 	var removalErr error
 	for _, ctr := range ctrs {
 		err := func() error {
@ -231,7 +212,7 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
 				ctrNamedVolumes[vol.Name] = vol
 			}

-			return r.removeContainer(ctx, ctr, force, false, true, timeout)
+			return r.removeContainer(ctx, ctr, force, false, true, true, timeout)
 		}()

 		if removalErr == nil {
@ -261,6 +242,69 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
 		return err
 	}

+	return nil
+}
+
+func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool, timeout *uint) error {
+	if err := p.updatePod(); err != nil {
+		return err
+	}
+
+	ctrs, err := r.state.PodContainers(p)
+	if err != nil {
+		return err
+	}
+	numCtrs := len(ctrs)
+
+	// If the only running container in the pod is the pause container, remove the pod and container unconditionally.
+	pauseCtrID := p.state.InfraContainerID
+	if numCtrs == 1 && ctrs[0].ID() == pauseCtrID {
+		removeCtrs = true
+		force = true
+	}
+	if !removeCtrs && numCtrs > 0 {
+		return fmt.Errorf("pod %s contains containers and cannot be removed: %w", p.ID(), define.ErrCtrExists)
+	}
+
+	var removalErr error
+	ctrNamedVolumes := make(map[string]*ContainerNamedVolume)
+
+	// Build a graph of all containers in the pod.
+	graph, err := BuildContainerGraph(ctrs)
+	if err != nil {
+		// We have to allow the pod to be removed.
+		// But let's only do it if force is set.
+		if !force {
+			return fmt.Errorf("cannot create container graph for pod %s: %w", p.ID(), err)
+		}
+
+		removalErr = fmt.Errorf("creating container graph for pod %s failed, fell back to loop removal: %w", p.ID(), err)
+
+		if err := r.removeMalformedPod(ctx, p, ctrs, force, timeout, ctrNamedVolumes); err != nil {
+			logrus.Errorf("Error creating container graph for pod %s: %v. Falling back to loop removal.", p.ID(), err)
+			return err
+		}
+	} else {
+		ctrErrors := make(map[string]error)
+		ctrsVisited := make(map[string]bool)
+
+		for _, node := range graph.notDependedOnNodes {
+			removeNode(ctx, node, p, force, timeout, false, ctrErrors, ctrsVisited, ctrNamedVolumes)
+		}
+
+		// This is gross, but I don't want to change the signature on
+		// removePod - especially since any change here eventually has
+		// to map down to one error unless we want to make a breaking
+		// API change.
+		if len(ctrErrors) > 0 {
+			var allErrs error
+			for id, err := range ctrErrors {
+				allErrs = multierror.Append(allErrs, fmt.Errorf("removing container %s from pod %s: %w", id, p.ID(), err))
+			}
+			return allErrs
+		}
+	}
+
 	for volName := range ctrNamedVolumes {
 		volume, err := r.state.Volume(volName)
 		if err != nil && !errors.Is(err, define.ErrNoSuchVolume) {
--- a/libpod/runtime_volume_linux.go
+++ b/libpod/runtime_volume_linux.go
@ -324,7 +324,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo

 			logrus.Debugf("Removing container %s (depends on volume %q)", ctr.ID(), v.Name())

-			if err := r.removeContainer(ctx, ctr, force, false, false, timeout); err != nil {
+			if err := r.removeContainer(ctx, ctr, force, false, false, false, timeout); err != nil {
 				return fmt.Errorf("removing container %s that depends on volume %s: %w", ctr.ID(), v.Name(), err)
 			}
 		}
--- a/test/e2e/pod_rm_test.go
+++ b/test/e2e/pod_rm_test.go
@ -318,4 +318,31 @@ var _ = Describe("Podman pod rm", func() {
 		result.WaitWithDefaultTimeout()
 		Expect(result).Should(Exit(0))
 	})
+
+	It("podman pod rm pod with infra container and running container", func() {
+		podName := "testPod"
+		ctrName := "testCtr"
+
+		ctrAndPod := podmanTest.Podman([]string{"run", "-d", "--pod", fmt.Sprintf("new:%s", podName), "--name", ctrName, ALPINE, "top"})
+		ctrAndPod.WaitWithDefaultTimeout()
+		Expect(ctrAndPod).Should(Exit(0))
+
+		removePod := podmanTest.Podman([]string{"pod", "rm", "-a"})
+		removePod.WaitWithDefaultTimeout()
+		Expect(removePod).Should(Not(Exit(0)))
+
+		ps := podmanTest.Podman([]string{"pod", "ps"})
+		ps.WaitWithDefaultTimeout()
+		Expect(ps).Should(Exit(0))
+		Expect(ps.OutputToString()).To(ContainSubstring(podName))
+
+		removePodForce := podmanTest.Podman([]string{"pod", "rm", "-af"})
+		removePodForce.WaitWithDefaultTimeout()
+		Expect(removePodForce).Should(Exit(0))
+
+		ps2 := podmanTest.Podman([]string{"pod", "ps"})
+		ps2.WaitWithDefaultTimeout()
+		Expect(ps2).Should(Exit(0))
+		Expect(ps2.OutputToString()).To(Not(ContainSubstring(podName)))
+	})
 })