healthcheck: fix --on-failure=stop

Fix the "stop" on-failure action by not removing the transient systemd
timer and service during container stop.  Removing the service will
in turn cause systemd to terminate the Podman process attempting to
stop the container and hence leave it in the "stopping" state.

Instead move the removal into the restart sequence.

Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg
2022-10-11 13:01:07 +02:00
parent d752133e2d
commit def13bea77
2 changed files with 7 additions and 0 deletions

View File

@ -1412,6 +1412,11 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
if err := c.stop(timeout); err != nil {
return err
}
if c.config.HealthCheckConfig != nil {
if err := c.removeTransientFiles(context.Background()); err != nil {
logrus.Error(err.Error())
}
}
// Old versions of conmon have a bug where they create the exit file before
// closing open file descriptors causing a race condition when restarting
// containers with open ports since we cannot bind the ports as they're not

View File

@ -125,6 +125,8 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
# kill and stop yield the container into a non-running state
is "$output" ".* $policy" "container was stopped/killed"
assert "$output" != "running $policy"
# also make sure that it's not stuck in the stopping state
assert "$output" != "stopping $policy"
fi
run_podman rm -f -t0 $ctr