From e2fa94e8ac288b1bda436a3f955aea8ec5ba518b Mon Sep 17 00:00:00 2001
From: Valentin Rothberg <vrothberg@redhat.com>
Date: Thu, 24 Nov 2022 10:22:22 +0100
Subject: [PATCH] container restart: clean up healthcheck state

When restarting a container, clean up the healthcheck state by removing
the old log on disk.  Carrying over the old state can lead to various
issues, for instance, in a wrong failing streak and hence wrong
behaviour after the restart.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2144754
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
---
 libpod/container_internal.go     |  9 +++++++++
 test/system/220-healthcheck.bats | 30 ++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index 13feb2ffeb..c168144426 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -1077,6 +1077,15 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
 		c.state.RestartCount = 0
 	}
 
+	// bugzilla.redhat.com/show_bug.cgi?id=2144754:
+	// In case of a restart, make sure to remove the healthcheck log to
+	// have a clean state.
+	if path := c.healthCheckLogPath(); path != "" {
+		if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) {
+			logrus.Error(err)
+		}
+	}
+
 	if err := c.save(); err != nil {
 		return err
 	}
diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats
index 8df9923c22..b024fc6dbc 100644
--- a/test/system/220-healthcheck.bats
+++ b/test/system/220-healthcheck.bats
@@ -76,6 +76,34 @@ Log[-1].Output   | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
     run_podman rmi   healthcheck_i
 }
 
+@test "podman healthcheck - restart cleans up old state" {
+    ctr="healthcheck_c"
+    img="healthcheck_i"
+
+    _build_health_check_image $img cleanfile
+    run_podman run -d --name $ctr      \
+           --health-cmd /healthcheck   \
+           --health-retries=2          \
+           --health-interval=disable   \
+           $img
+
+    run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}"
+    is "$output" "0" "Failing streak of fresh container should be 0"
+
+    # Get the healthcheck to fail
+    run_podman exec $ctr touch /uh-oh
+    run_podman 1 healthcheck run $ctr
+    is "$output" "unhealthy" "output from 'podman healthcheck run'"
+    run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}"
+    is "$output" "1" "Failing streak after one failed healthcheck should be 1"
+
+    run_podman container restart $ctr
+    run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}"
+    is "$output" "0" "Failing streak of restarted container should be 0 again"
+
+    run_podman rm -f -t0 $ctr
+}
+
 @test "podman healthcheck --health-on-failure" {
     run_podman 125 create --health-on-failure=kill $IMAGE
     is "$output" "Error: cannot set on-failure action to kill without a health check"
@@ -114,6 +142,8 @@ Log[-1].Output   | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
 	if [[ $policy == "restart" ]];then
 	    # Container has been restarted and health check works again
             is "$output" "running $policy" "container has been restarted"
+            run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}"
+            is "$output" "0" "Failing streak of restarted container should be 0 again"
             run_podman healthcheck run $ctr
         elif [[ $policy == "none" ]];then
             # Container is still running and health check still broken