healthcheck: fix --on-failure=stop

Fix the "stop" on-failure action by not removing the transient systemd timer and service during container stop. Removing the service will in turn cause systemd to terminate the Podman process attempting to stop the container and hence leave it in the "stopping" state. Instead move the removal into the restart sequence. Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
2025-08-06 19:44:14 +08:00 · 2022-10-11 13:01:07 +02:00
parent d752133e2d
commit def13bea77
2 changed files with 7 additions and 0 deletions
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@ -1412,6 +1412,11 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
 		if err := c.stop(timeout); err != nil {
 			return err
 		}
+		if c.config.HealthCheckConfig != nil {
+			if err := c.removeTransientFiles(context.Background()); err != nil {
+				logrus.Error(err.Error())
+			}
+		}
 		// Old versions of conmon have a bug where they create the exit file before
 		// closing open file descriptors causing a race condition when restarting
 		// containers with open ports since we cannot bind the ports as they're not
--- a/test/system/220-healthcheck.bats
+++ b/test/system/220-healthcheck.bats
@ -125,6 +125,8 @@ Log[-1].Output   | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
 	    # kill and stop yield the container into a non-running state
            is "$output" ".* $policy" "container was stopped/killed"
            assert "$output" != "running $policy"
+            # also make sure that it's not stuck in the stopping state
+            assert "$output" != "stopping $policy"
        fi

        run_podman rm -f -t0 $ctr