From def13bea775b94804b05902d80ed34665f7a814c Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 11 Oct 2022 13:01:07 +0200 Subject: [PATCH] healthcheck: fix --on-failure=stop Fix the "stop" on-failure action by not removing the transient systemd timer and service during container stop. Removing the service will in turn cause systemd to terminate the Podman process attempting to stop the container and hence leave it in the "stopping" state. Instead move the removal into the restart sequence. Signed-off-by: Valentin Rothberg --- libpod/container_internal.go | 5 +++++ test/system/220-healthcheck.bats | 2 ++ 2 files changed, 7 insertions(+) diff --git a/libpod/container_internal.go b/libpod/container_internal.go index bad68991bb..a0f86afb50 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1412,6 +1412,11 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr if err := c.stop(timeout); err != nil { return err } + if c.config.HealthCheckConfig != nil { + if err := c.removeTransientFiles(context.Background()); err != nil { + logrus.Error(err.Error()) + } + } // Old versions of conmon have a bug where they create the exit file before // closing open file descriptors causing a race condition when restarting // containers with open ports since we cannot bind the ports as they're not diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index 4c3ddbebfb..fc8994d185 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -125,6 +125,8 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\" # kill and stop yield the container into a non-running state is "$output" ".* $policy" "container was stopped/killed" assert "$output" != "running $policy" + # also make sure that it's not stuck in the stopping state + assert "$output" != "stopping $policy" fi run_podman rm -f -t0 $ctr