Make RemoveContainer return containers and pods removed

This allows for accurate reporting of dependency removal, but the
work is still incomplete: pods can be removed, but do not report
the containers they removed as part of said removal. Will add
this in a subsequent commit.

Major note: I made ignoring no-such-container errors automatic
once it has been determined that a container did exist in the
first place. I can't think of any case where this would not be a
TOCTOU - IE, no reason not to ignore them. The `--ignore` option
to `podman rm` should still retain meaning as it will ignore
errors from containers that didn't exist in the first place.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
This commit is contained in:
Matthew Heon
2023-04-14 14:08:22 -04:00
parent e8d7456278
commit bc1a31ce6d
7 changed files with 134 additions and 66 deletions

View File

@ -359,7 +359,7 @@ func removeNode(ctx context.Context, node *containerNode, pod *Pod, force bool,
}
if !ctrErrored {
if err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, false, false, timeout); err != nil {
if _, _, err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, false, false, timeout); err != nil {
ctrErrored = true
ctrErrors[node.id] = err
}

View File

@ -40,7 +40,7 @@ func (p *Pod) startInitContainers(ctx context.Context) error {
icLock := initCon.lock
icLock.Lock()
var time *uint
if err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, false, false, time); err != nil {
if _, _, err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, false, false, time); err != nil {
icLock.Unlock()
return fmt.Errorf("failed to remove once init container %s: %w", initCon.ID(), err)
}

View File

@ -606,16 +606,22 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
// container will be removed also (iff the container is the sole user of the
// volumes). Timeout sets the stop timeout for the container if it is running.
func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) error {
// NOTE: container will be locked down the road.
return r.removeContainer(ctx, c, force, removeVolume, false, false, false, false, timeout)
// NOTE: container will be locked down the road. There is no unlocked
// version of removeContainer.
_, _, err := r.removeContainer(ctx, c, force, removeVolume, false, false, false, false, timeout)
return err
}
// RemoveContainerAndDependencies removes the given container and all its
// dependencies. This may include pods (if the container or any of its
// dependencies is an infra or service container, the associated pod(s) will also
// be removed). Otherwise, it functions identically to RemoveContainer.
func (r *Runtime) RemoveContainerAndDependencies(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) error {
// NOTE: container will be locked down the road.
// Returns two arrays: containers removed, and pods removed. These arrays are
// always returned, even if error is set, and indicate any containers that were
// successfully removed prior to the error.
func (r *Runtime) RemoveContainerAndDependencies(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) (map[string]error, map[string]error, error) {
// NOTE: container will be locked down the road. There is no unlocked
// version of removeContainer.
return r.removeContainer(ctx, c, force, removeVolume, false, false, true, false, timeout)
}
@ -629,17 +635,25 @@ func (r *Runtime) RemoveContainerAndDependencies(ctx context.Context, c *Contain
// removeDeps instructs Podman to remove dependency containers (and possible
// a dependency pod if an infra container is involved). removeDeps conflicts
// with removePod - pods have their own dependency management.
func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod, ignoreDeps, removeDeps, noLockPod bool, timeout *uint) error {
// noLockPod is used for recursive removeContainer calls when the pod is already
// locked.
// TODO: At this point we should just start accepting an options struct
func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod, ignoreDeps, removeDeps, noLockPod bool, timeout *uint) (removedCtrs map[string]error, removedPods map[string]error, retErr error) {
removedCtrs = make(map[string]error)
removedPods = make(map[string]error)
if !c.valid {
if ok, _ := r.state.HasContainer(c.ID()); !ok {
// Container probably already removed
// Or was never in the runtime to begin with
return nil
removedCtrs[c.ID()] = nil
return
}
}
if removePod && removeDeps {
return fmt.Errorf("cannot remove dependencies while also removing a pod: %w", define.ErrInvalidArg)
retErr = fmt.Errorf("cannot remove dependencies while also removing a pod: %w", define.ErrInvalidArg)
return
}
// We need to refresh container config from the DB, to ensure that any
@ -650,7 +664,8 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// exist once we're done.
newConf, err := r.state.GetContainerConfig(c.ID())
if err != nil {
return fmt.Errorf("retrieving container %s configuration from DB to remove: %w", c.ID(), err)
retErr = fmt.Errorf("retrieving container %s configuration from DB to remove: %w", c.ID(), err)
return
}
c.config = newConf
@ -665,23 +680,27 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
if c.config.Pod != "" {
pod, err = r.state.Pod(c.config.Pod)
if err != nil {
return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), pod.ID(), err)
retErr = err
return
}
if !removePod {
// Lock the pod while we're removing container
if pod.config.LockID == c.config.LockID {
return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock)
retErr = fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock)
return
}
pod.lock.Lock()
defer pod.lock.Unlock()
if err := pod.updatePod(); err != nil {
return err
retErr = err
return
}
infraID := pod.state.InfraContainerID
if c.ID() == infraID && !removeDeps {
return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID())
retErr = fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID())
return
}
}
}
@ -699,12 +718,14 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
}
if !r.valid {
return define.ErrRuntimeStopped
retErr = define.ErrRuntimeStopped
return
}
// Update the container to get current state
if err := c.syncContainer(); err != nil {
return err
retErr = err
return
}
serviceForPod := false
@ -715,10 +736,12 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
if errors.Is(err, define.ErrNoSuchPod) {
continue
}
return err
retErr = err
return
}
if !removeDeps {
return fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ","))
retErr = fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ","))
return
}
// If we are the service container for the pod we are a
// member of: we need to remove that pod last, since
@ -729,8 +752,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
}
logrus.Infof("Removing pod %s as container %s is its service container", depPod.ID(), c.ID())
if err := r.RemovePod(ctx, depPod, true, force, timeout); err != nil {
return err
removedPods[depPod.ID()] = err
retErr = fmt.Errorf("error removing container %s dependency pods: %w", c.ID(), err)
return
}
removedPods[depPod.ID()] = nil
}
}
if serviceForPod || c.config.IsInfra {
@ -744,36 +770,44 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
logrus.Infof("Removing pod %s (dependency of container %s)", pod.ID(), c.ID())
if err := r.removePod(ctx, pod, true, force, timeout); err != nil {
return err
removedPods[pod.ID()] = err
retErr = fmt.Errorf("error removing container %s pod: %w", c.ID(), err)
return
}
return nil
removedPods[pod.ID()] = nil
return
}
// If we're not force-removing, we need to check if we're in a good
// state to remove.
if !force {
if err := c.checkReadyForRemoval(); err != nil {
return err
retErr = err
return
}
}
if c.state.State == define.ContainerStatePaused {
isV2, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
return err
retErr = err
return
}
// cgroups v1 and v2 handle signals on paused processes differently
if !isV2 {
if err := c.unpause(); err != nil {
return err
retErr = err
return
}
}
if err := c.ociRuntime.KillContainer(c, 9, false); err != nil {
return err
retErr = err
return
}
// Need to update container state to make sure we know it's stopped
if err := c.waitForExitFileAndSync(); err != nil {
return err
retErr = err
return
}
}
@ -783,22 +817,33 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
if !ignoreDeps {
deps, err := r.state.ContainerInUse(c)
if err != nil {
return err
retErr = err
return
}
if !removeDeps {
if len(deps) != 0 {
depsStr := strings.Join(deps, ", ")
return fmt.Errorf("container %s has dependent containers which must be removed before it: %s: %w", c.ID(), depsStr, define.ErrCtrExists)
retErr = fmt.Errorf("container %s has dependent containers which must be removed before it: %s: %w", c.ID(), depsStr, define.ErrCtrExists)
return
}
}
for _, depCtr := range deps {
dep, err := r.GetContainer(depCtr)
if err != nil {
return err
retErr = err
return
}
logrus.Infof("Removing container %s (dependency of container %s)", dep.ID(), c.ID())
if err := r.removeContainer(ctx, dep, force, removeVolume, false, false, true, true, timeout); err != nil {
return err
ctrs, pods, err := r.removeContainer(ctx, dep, force, removeVolume, false, false, true, true, timeout)
for rmCtr, err := range ctrs {
removedCtrs[rmCtr] = err
}
for rmPod, err := range pods {
removedPods[rmPod] = err
}
if err != nil {
retErr = err
return
}
}
}
@ -812,7 +857,8 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Ignore ErrConmonDead - we couldn't retrieve the container's
// exit code properly, but it's still stopped.
if err := c.stop(time); err != nil && !errors.Is(err, define.ErrConmonDead) {
return fmt.Errorf("cannot remove container %s as it could not be stopped: %w", c.ID(), err)
retErr = fmt.Errorf("cannot remove container %s as it could not be stopped: %w", c.ID(), err)
return
}
// We unlocked as part of stop() above - there's a chance someone
@ -823,17 +869,19 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
if ok, _ := r.state.HasContainer(c.ID()); !ok {
// When the container has already been removed, the OCI runtime directory remain.
if err := c.cleanupRuntime(ctx); err != nil {
return fmt.Errorf("cleaning up container %s from OCI runtime: %w", c.ID(), err)
retErr = fmt.Errorf("cleaning up container %s from OCI runtime: %w", c.ID(), err)
return
}
return nil
// Do not add to removed containers, someone else
// removed it.
return
}
}
var cleanupErr error
reportErrorf := func(msg string, args ...any) {
err := fmt.Errorf(msg, args...) // Always use fmt.Errorf instead of just logrus.Errorf(…) because the format string probably contains %w
if cleanupErr == nil {
cleanupErr = err
if retErr == nil {
retErr = err
} else {
logrus.Errorf("%s", err.Error())
}
@ -887,6 +935,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
reportErrorf("removing container %s from database: %w", c.ID(), err)
}
}
removedCtrs[c.ID()] = nil
// Deallocate the container's lock
if err := c.lock.Free(); err != nil {
@ -899,7 +948,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
c.newContainerEvent(events.Remove)
if !removeVolume {
return cleanupErr
return
}
for _, v := range c.config.NamedVolumes {
@ -921,7 +970,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
}
}
return cleanupErr
return
}
// EvictContainer removes the given container partial or full ID or name, and
@ -960,7 +1009,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol
if err == nil {
logrus.Infof("Container %s successfully retrieved from state, attempting normal removal", id)
// Assume force = true for the evict case
err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, false, false, false, timeout)
_, _, err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, false, false, false, timeout)
if !tmpCtr.valid {
// If the container is marked invalid, remove succeeded
// in kicking it out of the state - no need to continue.

View File

@ -46,7 +46,7 @@ func (r *Runtime) RemoveContainersForImageCallback(ctx context.Context) libimage
return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err)
}
} else {
if err := r.removeContainer(ctx, ctr, true, false, false, false, false, false, timeout); err != nil {
if _, _, err := r.removeContainer(ctx, ctr, true, false, false, false, false, false, timeout); err != nil {
return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err)
}
}

View File

@ -142,7 +142,8 @@ func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Contai
ctrNamedVolumes[vol.Name] = vol
}
return r.removeContainer(ctx, ctr, force, false, true, true, false, false, timeout)
_, _, err := r.removeContainer(ctx, ctr, force, false, true, true, false, false, timeout)
return err
}()
if removalErr == nil {

View File

@ -388,7 +388,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo
logrus.Debugf("Removing container %s (depends on volume %q)", ctr.ID(), v.Name())
if err := r.removeContainer(ctx, ctr, force, false, false, false, false, false, timeout); err != nil {
if _, _, err := r.removeContainer(ctx, ctr, force, false, false, false, false, false, timeout); err != nil {
return fmt.Errorf("removing container %s that depends on volume %s: %w", ctr.ID(), v.Name(), err)
}
}

View File

@ -376,32 +376,24 @@ func (ic *ContainerEngine) ContainerRestart(ctx context.Context, namesOrIds []st
return reports, nil
}
func (ic *ContainerEngine) removeContainer(ctx context.Context, ctr *libpod.Container, options entities.RmOptions) error {
func (ic *ContainerEngine) removeContainer(ctx context.Context, ctr *libpod.Container, options entities.RmOptions) (map[string]error, map[string]error, error) {
var err error
ctrs := make(map[string]error)
pods := make(map[string]error)
if options.All || options.Depend {
err = ic.Libpod.RemoveContainerAndDependencies(ctx, ctr, options.Force, options.Volumes, options.Timeout)
ctrs, pods, err = ic.Libpod.RemoveContainerAndDependencies(ctx, ctr, options.Force, options.Volumes, options.Timeout)
} else {
err = ic.Libpod.RemoveContainer(ctx, ctr, options.Force, options.Volumes, options.Timeout)
}
ctrs[ctr.ID()] = err
if err == nil {
return nil
return ctrs, pods, nil
}
logrus.Debugf("Failed to remove container %s: %s", ctr.ID(), err.Error())
if errors.Is(err, define.ErrNoSuchCtr) {
// Ignore if the container does not exist (anymore) when either
// it has been requested by the user of if the container is a
// service one. Service containers are removed along with its
// pods which in turn are removed along with their infra
// container. Hence, there is an inherent race when removing
// infra containers with service containers in parallel.
if options.Ignore || ctr.IsService() {
logrus.Debugf("Ignoring error (--allow-missing): %v", err)
return nil
}
} else if errors.Is(err, define.ErrCtrRemoved) {
return nil
if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) {
return ctrs, pods, nil
}
return err
return ctrs, pods, err
}
func (ic *ContainerEngine) ContainerRm(ctx context.Context, namesOrIds []string, options entities.RmOptions) ([]*reports.RmReport, error) {
@ -440,18 +432,44 @@ func (ic *ContainerEngine) ContainerRm(ctx context.Context, namesOrIds []string,
}
}
ctrsMap := make(map[string]error)
mapMutex := sync.Mutex{}
errMap, err := parallelctr.ContainerOp(ctx, libpodContainers, func(c *libpod.Container) error {
return ic.removeContainer(ctx, c, options)
mapMutex.Lock()
if _, ok := ctrsMap[c.ID()]; ok {
return nil
}
mapMutex.Unlock()
ctrs, _, err := ic.removeContainer(ctx, c, options)
mapMutex.Lock()
defer mapMutex.Unlock()
for ctr, err := range ctrs {
ctrsMap[ctr] = err
}
return err
})
if err != nil {
return nil, err
}
for ctr, err := range errMap {
if _, ok := ctrsMap[ctr.ID()]; ok {
logrus.Debugf("Multiple results for container %s - attempted multiple removals?", ctr)
}
ctrsMap[ctr.ID()] = err
}
for ctr, err := range ctrsMap {
report := new(reports.RmReport)
report.Id = ctr.ID()
report.Err = err
report.RawInput = idToRawInput[ctr.ID()]
report.Id = ctr
if !(errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved)) {
report.Err = err
}
report.RawInput = idToRawInput[ctr]
rmReports = append(rmReports, report)
}
@ -945,7 +963,7 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
ExitCode: exitCode,
})
if ctr.AutoRemove() {
if err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
}
}
@ -981,7 +999,7 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err)
reports = append(reports, report)
if ctr.AutoRemove() {
if err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
}
}