mirror of
https://github.com/containers/podman.git
synced 2025-06-25 20:26:51 +08:00
Merge pull request #16084 from vrothberg/health-check-fix
health checks: make on-failure action retry aware
This commit is contained in:
@ -32,18 +32,19 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
hcStatus, err := checkHealthCheckCanBeRun(container)
|
hcStatus, err := checkHealthCheckCanBeRun(container)
|
||||||
if err == nil {
|
if err != nil {
|
||||||
hcStatus, err := container.runHealthCheck()
|
return hcStatus, err
|
||||||
if err := container.processHealthCheckStatus(hcStatus); err != nil {
|
}
|
||||||
return hcStatus, err
|
|
||||||
}
|
hcStatus, logStatus, err := container.runHealthCheck()
|
||||||
|
if err := container.processHealthCheckStatus(logStatus); err != nil {
|
||||||
return hcStatus, err
|
return hcStatus, err
|
||||||
}
|
}
|
||||||
return hcStatus, err
|
return hcStatus, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// runHealthCheck runs the health check as defined by the container
|
// runHealthCheck runs the health check as defined by the container
|
||||||
func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
|
func (c *Container) runHealthCheck() (define.HealthCheckStatus, string, error) {
|
||||||
var (
|
var (
|
||||||
newCommand []string
|
newCommand []string
|
||||||
returnCode int
|
returnCode int
|
||||||
@ -51,11 +52,11 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
|
|||||||
)
|
)
|
||||||
hcCommand := c.HealthCheckConfig().Test
|
hcCommand := c.HealthCheckConfig().Test
|
||||||
if len(hcCommand) < 1 {
|
if len(hcCommand) < 1 {
|
||||||
return define.HealthCheckNotDefined, fmt.Errorf("container %s has no defined healthcheck", c.ID())
|
return define.HealthCheckNotDefined, "", fmt.Errorf("container %s has no defined healthcheck", c.ID())
|
||||||
}
|
}
|
||||||
switch hcCommand[0] {
|
switch hcCommand[0] {
|
||||||
case "", define.HealthConfigTestNone:
|
case "", define.HealthConfigTestNone:
|
||||||
return define.HealthCheckNotDefined, fmt.Errorf("container %s has no defined healthcheck", c.ID())
|
return define.HealthCheckNotDefined, "", fmt.Errorf("container %s has no defined healthcheck", c.ID())
|
||||||
case define.HealthConfigTestCmd:
|
case define.HealthConfigTestCmd:
|
||||||
newCommand = hcCommand[1:]
|
newCommand = hcCommand[1:]
|
||||||
case define.HealthConfigTestCmdShell:
|
case define.HealthConfigTestCmdShell:
|
||||||
@ -66,11 +67,11 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
|
|||||||
newCommand = hcCommand
|
newCommand = hcCommand
|
||||||
}
|
}
|
||||||
if len(newCommand) < 1 || newCommand[0] == "" {
|
if len(newCommand) < 1 || newCommand[0] == "" {
|
||||||
return define.HealthCheckNotDefined, fmt.Errorf("container %s has no defined healthcheck", c.ID())
|
return define.HealthCheckNotDefined, "", fmt.Errorf("container %s has no defined healthcheck", c.ID())
|
||||||
}
|
}
|
||||||
rPipe, wPipe, err := os.Pipe()
|
rPipe, wPipe, err := os.Pipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return define.HealthCheckInternalError, fmt.Errorf("unable to create pipe for healthcheck session: %w", err)
|
return define.HealthCheckInternalError, "", fmt.Errorf("unable to create pipe for healthcheck session: %w", err)
|
||||||
}
|
}
|
||||||
defer wPipe.Close()
|
defer wPipe.Close()
|
||||||
defer rPipe.Close()
|
defer rPipe.Close()
|
||||||
@ -135,15 +136,16 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
|
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
|
||||||
if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil {
|
logStatus, err := c.updateHealthCheckLog(hcl, inStartPeriod)
|
||||||
return hcResult, fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
|
if err != nil {
|
||||||
|
return hcResult, "", fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return hcResult, hcErr
|
return hcResult, logStatus, hcErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Container) processHealthCheckStatus(status define.HealthCheckStatus) error {
|
func (c *Container) processHealthCheckStatus(status string) error {
|
||||||
if status == define.HealthCheckSuccess {
|
if status != define.HealthCheckUnhealthy {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -211,10 +213,13 @@ func (c *Container) updateHealthStatus(status string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// UpdateHealthCheckLog parses the health check results and writes the log
|
// UpdateHealthCheckLog parses the health check results and writes the log
|
||||||
func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod bool) error {
|
func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod bool) (string, error) {
|
||||||
|
c.lock.Lock()
|
||||||
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
healthCheck, err := c.getHealthCheckLog()
|
healthCheck, err := c.getHealthCheckLog()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return "", err
|
||||||
}
|
}
|
||||||
if hcl.ExitCode == 0 {
|
if hcl.ExitCode == 0 {
|
||||||
// set status to healthy, reset failing state to 0
|
// set status to healthy, reset failing state to 0
|
||||||
@ -239,9 +244,9 @@ func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPerio
|
|||||||
}
|
}
|
||||||
newResults, err := json.Marshal(healthCheck)
|
newResults, err := json.Marshal(healthCheck)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to marshall healthchecks for writing: %w", err)
|
return "", fmt.Errorf("unable to marshall healthchecks for writing: %w", err)
|
||||||
}
|
}
|
||||||
return os.WriteFile(c.healthCheckLogPath(), newResults, 0700)
|
return healthCheck.Status, os.WriteFile(c.healthCheckLogPath(), newResults, 0700)
|
||||||
}
|
}
|
||||||
|
|
||||||
// HealthCheckLogPath returns the path for where the health check log is
|
// HealthCheckLogPath returns the path for where the health check log is
|
||||||
|
@ -28,10 +28,11 @@ function _check_health {
|
|||||||
--health-cmd /healthcheck \
|
--health-cmd /healthcheck \
|
||||||
--health-interval 1s \
|
--health-interval 1s \
|
||||||
--health-retries 3 \
|
--health-retries 3 \
|
||||||
|
--health-on-failure=kill \
|
||||||
healthcheck_i
|
healthcheck_i
|
||||||
|
|
||||||
run_podman inspect healthcheck_c --format "{{.Config.HealthcheckOnFailureAction}}"
|
run_podman inspect healthcheck_c --format "{{.Config.HealthcheckOnFailureAction}}"
|
||||||
is "$output" "none" "default on-failure action is none"
|
is "$output" "kill" "on-failure action is set to kill"
|
||||||
|
|
||||||
# We can't check for 'starting' because a 1-second interval is too
|
# We can't check for 'starting' because a 1-second interval is too
|
||||||
# short; it could run healthcheck before we get to our first check.
|
# short; it could run healthcheck before we get to our first check.
|
||||||
@ -67,9 +68,8 @@ Log[-1].ExitCode | 1
|
|||||||
Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
|
Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
|
||||||
"
|
"
|
||||||
|
|
||||||
# healthcheck should now fail, with exit status 1 and 'unhealthy' output
|
# now the on-failure should kick in and kill the container
|
||||||
run_podman 1 healthcheck run healthcheck_c
|
podman wait healthcheck_c
|
||||||
is "$output" "unhealthy" "output from 'podman healthcheck run'"
|
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
run_podman rm -t 0 -f healthcheck_c
|
run_podman rm -t 0 -f healthcheck_c
|
||||||
@ -95,6 +95,7 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
|
|||||||
# Run that healthcheck image.
|
# Run that healthcheck image.
|
||||||
run_podman run -d --name $ctr \
|
run_podman run -d --name $ctr \
|
||||||
--health-cmd /healthcheck \
|
--health-cmd /healthcheck \
|
||||||
|
--health-retries=1 \
|
||||||
--health-on-failure=$policy \
|
--health-on-failure=$policy \
|
||||||
$img
|
$img
|
||||||
|
|
||||||
|
@ -318,6 +318,7 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
|
|||||||
run_podman create --name $cname \
|
run_podman create --name $cname \
|
||||||
--health-cmd /healthcheck \
|
--health-cmd /healthcheck \
|
||||||
--health-on-failure=kill \
|
--health-on-failure=kill \
|
||||||
|
--health-retries=1 \
|
||||||
--restart=on-failure \
|
--restart=on-failure \
|
||||||
$img
|
$img
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user