podman rm: handle case where conmon was killed

When conmon was killed podman rm -f currently fails but running it again
then works which doesn't really makes sense. We should properly remove
the contianer even if conmon is dead.

In fact the code already handles ErrConmonDead as stop error when we
remove the container but this error was never thrown anywhere. To fix
this throw ErrConmonDead instead of ErrInternal because that is not an
intenral error if something else killed conmon.

With this we can correctly cleanup and remove the container. The fact
that this works on the first try is important for quadlet units as they
only run the ExecStopPost= command once to remove it.

Fixes: #26640

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger
2025-07-15 15:54:08 +02:00
parent 8b1f06bf8f
commit c1cf4b0d89
3 changed files with 18 additions and 2 deletions

View File

@ -1544,7 +1544,7 @@ func (c *Container) waitForConmonToExitAndSave() error {
logrus.Errorf("Error cleaning up container %s after Conmon exited prematurely: %v", c.ID(), err) logrus.Errorf("Error cleaning up container %s after Conmon exited prematurely: %v", c.ID(), err)
} }
return fmt.Errorf("container %s conmon exited prematurely, exit code could not be retrieved: %w", c.ID(), define.ErrInternal) return fmt.Errorf("container %s conmon exited prematurely, exit code could not be retrieved: %w", c.ID(), define.ErrConmonDead)
} }
return c.save() return c.save()

View File

@ -1778,7 +1778,7 @@ search | $IMAGE |
# Unclear why `-t0` is required here, works locally without. # Unclear why `-t0` is required here, works locally without.
# But it shouldn't hurt and does make the test pass... # But it shouldn't hurt and does make the test pass...
PODMAN_TIMEOUT=5 run_podman 125 stop -t0 $cname PODMAN_TIMEOUT=5 run_podman 125 stop -t0 $cname
is "$output" "Error: container .* conmon exited prematurely, exit code could not be retrieved: internal libpod error" "correct error on missing conmon" is "$output" "Error: container .* conmon exited prematurely, exit code could not be retrieved: conmon process killed" "correct error on missing conmon"
# This should be safe because stop is guaranteed to call cleanup? # This should be safe because stop is guaranteed to call cleanup?
run_podman inspect --format "{{ .State.Status }}" $cname run_podman inspect --format "{{ .State.Status }}" $cname

View File

@ -202,4 +202,20 @@ function __run_healthcheck_container() {
die "Container never entered 'stopping' state" die "Container never entered 'stopping' state"
} }
# bats test_tags=ci:parallel
@test "podman rm after killed conmon" {
cname=c_$(safename)
run_podman run -d --name $cname $IMAGE sleep 1000
run_podman inspect --format '{{ .State.ConmonPid }}' $cname
conmon_pid=$output
kill -9 ${conmon_pid}
run_podman rm -f -t0 $cname
run_podman 125 container inspect $cname
assert "$output" =~ "no such container \"$cname\"" "Container should be removed"
}
# vim: filetype=sh # vim: filetype=sh