health checks: restart timers

Restart the health-check timers instead of starting them.  This will
surpress annoying errors stating that an already running timer cannot be
started anymore.

Also make sure that the transient units/timers are stopped and removed
when stopping a container.

Fixes: #15691
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg
2022-09-12 15:22:21 +02:00
parent 1635fe8620
commit c8c2aab50d
3 changed files with 9 additions and 5 deletions

View File

@ -1286,6 +1286,12 @@ func (c *Container) stop(timeout uint) error {
c.lock.Unlock() c.lock.Unlock()
} }
if c.config.HealthCheckConfig != nil {
if err := c.removeTransientFiles(context.Background()); err != nil {
logrus.Error(err.Error())
}
}
stopErr := c.ociRuntime.StopContainer(c, timeout, all) stopErr := c.ociRuntime.StopContainer(c, timeout, all)
if !c.batched { if !c.batched {

View File

@ -70,7 +70,7 @@ func (c *Container) startTimer() error {
startFile := fmt.Sprintf("%s.service", c.ID()) startFile := fmt.Sprintf("%s.service", c.ID())
startChan := make(chan string) startChan := make(chan string)
if _, err := conn.StartUnitContext(context.Background(), startFile, "fail", startChan); err != nil { if _, err := conn.RestartUnitContext(context.Background(), startFile, "fail", startChan); err != nil {
return err return err
} }
if err := systemdOpSuccessful(startChan); err != nil { if err := systemdOpSuccessful(startChan); err != nil {

View File

@ -106,8 +106,7 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
# healthcheck should now fail, with exit status 1 and 'unhealthy' output # healthcheck should now fail, with exit status 1 and 'unhealthy' output
run_podman 1 healthcheck run $ctr run_podman 1 healthcheck run $ctr
# FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists is "$output" "unhealthy" "output from 'podman healthcheck run'"
is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'"
run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}" run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
if [[ $policy == "restart" ]];then if [[ $policy == "restart" ]];then
@ -118,8 +117,7 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
# Container is still running and health check still broken # Container is still running and health check still broken
is "$output" "running $policy" "container continued running" is "$output" "running $policy" "container continued running"
run_podman 1 healthcheck run $ctr run_podman 1 healthcheck run $ctr
# FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists is "$output" "unhealthy" "output from 'podman healthcheck run'"
is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'"
else else
# kill and stop yield the container into a non-running state # kill and stop yield the container into a non-running state
is "$output" ".* $policy" "container was stopped/killed" is "$output" ".* $policy" "container was stopped/killed"