From e2fa94e8ac288b1bda436a3f955aea8ec5ba518b Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Thu, 24 Nov 2022 10:22:22 +0100 Subject: [PATCH] container restart: clean up healthcheck state When restarting a container, clean up the healthcheck state by removing the old log on disk. Carrying over the old state can lead to various issues, for instance, in a wrong failing streak and hence wrong behaviour after the restart. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2144754 Signed-off-by: Valentin Rothberg --- libpod/container_internal.go | 9 +++++++++ test/system/220-healthcheck.bats | 30 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 13feb2ffeb..c168144426 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -1077,6 +1077,15 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error { c.state.RestartCount = 0 } + // bugzilla.redhat.com/show_bug.cgi?id=2144754: + // In case of a restart, make sure to remove the healthcheck log to + // have a clean state. + if path := c.healthCheckLogPath(); path != "" { + if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) { + logrus.Error(err) + } + } + if err := c.save(); err != nil { return err } diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index 8df9923c22..b024fc6dbc 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -76,6 +76,34 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\" run_podman rmi healthcheck_i } +@test "podman healthcheck - restart cleans up old state" { + ctr="healthcheck_c" + img="healthcheck_i" + + _build_health_check_image $img cleanfile + run_podman run -d --name $ctr \ + --health-cmd /healthcheck \ + --health-retries=2 \ + --health-interval=disable \ + $img + + run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}" + is "$output" "0" "Failing streak of fresh container should be 0" + + # Get the healthcheck to fail + run_podman exec $ctr touch /uh-oh + run_podman 1 healthcheck run $ctr + is "$output" "unhealthy" "output from 'podman healthcheck run'" + run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}" + is "$output" "1" "Failing streak after one failed healthcheck should be 1" + + run_podman container restart $ctr + run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}" + is "$output" "0" "Failing streak of restarted container should be 0 again" + + run_podman rm -f -t0 $ctr +} + @test "podman healthcheck --health-on-failure" { run_podman 125 create --health-on-failure=kill $IMAGE is "$output" "Error: cannot set on-failure action to kill without a health check" @@ -114,6 +142,8 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\" if [[ $policy == "restart" ]];then # Container has been restarted and health check works again is "$output" "running $policy" "container has been restarted" + run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}" + is "$output" "0" "Failing streak of restarted container should be 0 again" run_podman healthcheck run $ctr elif [[ $policy == "none" ]];then # Container is still running and health check still broken