health checks: restart timers

Restart the health-check timers instead of starting them. This will surpress annoying errors stating that an already running timer cannot be started anymore. Also make sure that the transient units/timers are stopped and removed when stopping a container. Fixes: #15691 Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
2025-08-06 19:44:14 +08:00 · 2022-09-12 15:22:21 +02:00
parent 1635fe8620
commit c8c2aab50d
3 changed files with 9 additions and 5 deletions
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@ -1286,6 +1286,12 @@ func (c *Container) stop(timeout uint) error {
 		c.lock.Unlock()
 	}
 	if c.config.HealthCheckConfig != nil {
 		if err := c.removeTransientFiles(context.Background()); err != nil {
 			logrus.Error(err.Error())
 		}
 	}
 	stopErr := c.ociRuntime.StopContainer(c, timeout, all)
 	if !c.batched {
--- a/libpod/healthcheck_linux.go
+++ b/libpod/healthcheck_linux.go
@ -70,7 +70,7 @@ func (c *Container) startTimer() error {
 	startFile := fmt.Sprintf("%s.service", c.ID())
 	startChan := make(chan string)
-	if _, err := conn.StartUnitContext(context.Background(), startFile, "fail", startChan); err != nil {
+	if _, err := conn.RestartUnitContext(context.Background(), startFile, "fail", startChan); err != nil {
 		return err
 	}
 	if err := systemdOpSuccessful(startChan); err != nil {
--- a/test/system/220-healthcheck.bats
+++ b/test/system/220-healthcheck.bats
@ -106,8 +106,7 @@ Log[-1].Output   | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
        # healthcheck should now fail, with exit status 1 and 'unhealthy' output
        run_podman 1 healthcheck run $ctr
-	# FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists
+        is "$output" "unhealthy" "output from 'podman healthcheck run'"
        is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'"
        run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
 	if [[ $policy == "restart" ]];then
@ -118,8 +117,7 @@ Log[-1].Output   | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
            # Container is still running and health check still broken
            is "$output" "running $policy" "container continued running"
            run_podman 1 healthcheck run $ctr
-	    # FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists
+            is "$output" "unhealthy" "output from 'podman healthcheck run'"
            is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'"
 	else
 	    # kill and stop yield the container into a non-running state
            is "$output" ".* $policy" "container was stopped/killed"