mirror of
https://github.com/containers/podman.git
synced 2025-06-19 16:33:24 +08:00
Merge pull request #17830 from vrothberg/fix-17777
fix --health-on-failure=restart in transient unit
This commit is contained in:
@ -229,6 +229,15 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
|
||||
}
|
||||
|
||||
func (c *Container) shouldRestart() bool {
|
||||
if c.config.HealthCheckOnFailureAction == define.HealthCheckOnFailureActionRestart {
|
||||
isUnhealthy, err := c.isUnhealthy()
|
||||
if err != nil {
|
||||
logrus.Errorf("Checking if container is unhealthy: %v", err)
|
||||
} else if isUnhealthy {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// If we did not get a restart policy match, return false
|
||||
// Do the same if we're not a policy that restarts.
|
||||
if !c.state.RestartPolicyMatch ||
|
||||
@ -268,6 +277,12 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
|
||||
return false, err
|
||||
}
|
||||
|
||||
if c.config.HealthCheckConfig != nil {
|
||||
if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
|
||||
// Is the container running again?
|
||||
// If so, we don't have to do anything
|
||||
if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
|
||||
@ -1429,6 +1444,7 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
|
||||
if err := c.stop(timeout); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.config.HealthCheckConfig != nil {
|
||||
if err := c.removeTransientFiles(context.Background(), c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
|
||||
logrus.Error(err.Error())
|
||||
|
@ -184,8 +184,12 @@ func (c *Container) processHealthCheckStatus(status string) error {
|
||||
}
|
||||
|
||||
case define.HealthCheckOnFailureActionRestart:
|
||||
if err := c.RestartWithTimeout(context.Background(), c.config.StopTimeout); err != nil {
|
||||
return fmt.Errorf("restarting container after health-check turned unhealthy: %w", err)
|
||||
// We let the cleanup process handle the restart. Otherwise
|
||||
// the container would be restarted in the context of a
|
||||
// transient systemd unit which may cause undesired side
|
||||
// effects.
|
||||
if err := c.Stop(); err != nil {
|
||||
return fmt.Errorf("restarting/stopping container after health-check turned unhealthy: %w", err)
|
||||
}
|
||||
|
||||
case define.HealthCheckOnFailureActionStop:
|
||||
@ -346,6 +350,18 @@ func (c *Container) updateHealthStatus(status string) error {
|
||||
return os.WriteFile(c.healthCheckLogPath(), newResults, 0700)
|
||||
}
|
||||
|
||||
// isUnhealthy returns if the current health check status in unhealthy.
|
||||
func (c *Container) isUnhealthy() (bool, error) {
|
||||
if !c.HasHealthCheck() {
|
||||
return false, nil
|
||||
}
|
||||
healthCheck, err := c.getHealthCheckLog()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return healthCheck.Status == define.HealthCheckUnhealthy, nil
|
||||
}
|
||||
|
||||
// UpdateHealthCheckLog parses the health check results and writes the log
|
||||
func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod bool) (string, error) {
|
||||
c.lock.Lock()
|
||||
|
@ -139,19 +139,22 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
|
||||
run_podman 1 healthcheck run $ctr
|
||||
is "$output" "unhealthy" "output from 'podman healthcheck run' (policy: $policy)"
|
||||
|
||||
run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
|
||||
if [[ $policy == "restart" ]];then
|
||||
# Container has been restarted and health check works again
|
||||
is "$output" "running $policy" "container has been restarted"
|
||||
# Make sure the container transitions back to running
|
||||
run_podman wait --condition=running $ctr
|
||||
run_podman inspect $ctr --format "{{.RestartCount}}"
|
||||
assert "${#lines[@]}" != 0 "Container has been restarted at least once"
|
||||
run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}"
|
||||
is "$output" "0" "Failing streak of restarted container should be 0 again"
|
||||
run_podman healthcheck run $ctr
|
||||
elif [[ $policy == "none" ]];then
|
||||
run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
|
||||
# Container is still running and health check still broken
|
||||
is "$output" "running $policy" "container continued running"
|
||||
run_podman 1 healthcheck run $ctr
|
||||
is "$output" "unhealthy" "output from 'podman healthcheck run' (policy: $policy)"
|
||||
else
|
||||
run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
|
||||
# kill and stop yield the container into a non-running state
|
||||
is "$output" ".* $policy" "container was stopped/killed (policy: $policy)"
|
||||
assert "$output" != "running $policy"
|
||||
@ -164,4 +167,39 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
|
||||
done
|
||||
}
|
||||
|
||||
@test "podman healthcheck --health-on-failure with interval" {
|
||||
ctr="healthcheck_c"
|
||||
|
||||
for policy in stop kill restart ;do
|
||||
t0=$(date --iso-8601=seconds)
|
||||
run_podman run -d --name $ctr \
|
||||
--health-cmd /bin/false \
|
||||
--health-retries=1 \
|
||||
--health-on-failure=$policy \
|
||||
--health-interval=1s \
|
||||
$IMAGE top
|
||||
|
||||
if [[ $policy == "restart" ]];then
|
||||
# Sleeping for 2 seconds makes the test much faster than using
|
||||
# podman-wait which would compete with the container getting
|
||||
# restarted.
|
||||
sleep 2
|
||||
# Make sure the container transitions back to running
|
||||
run_podman wait --condition=running $ctr
|
||||
run_podman inspect $ctr --format "{{.RestartCount}}"
|
||||
assert "${#lines[@]}" != 0 "Container has been restarted at least once"
|
||||
else
|
||||
# kill and stop yield the container into a non-running state
|
||||
run_podman wait $ctr
|
||||
run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
|
||||
is "$output" ".* $policy" "container was stopped/killed (policy: $policy)"
|
||||
assert "$output" != "running $policy"
|
||||
# also make sure that it's not stuck in the stopping state
|
||||
assert "$output" != "stopping $policy"
|
||||
fi
|
||||
|
||||
run_podman rm -f -t0 $ctr
|
||||
done
|
||||
}
|
||||
|
||||
# vim: filetype=sh
|
||||
|
Reference in New Issue
Block a user