Merge pull request #25423 from Honny1/hc-kill-status

Add stopped status for HealthCheck
This commit is contained in:
openshift-merge-bot[bot]
2025-03-04 13:17:58 +00:00
committed by GitHub
6 changed files with 45 additions and 44 deletions

View File

@ -35,7 +35,8 @@ func run(cmd *cobra.Command, args []string) error {
if err != nil {
return err
}
if response.Status == define.HealthCheckUnhealthy || response.Status == define.HealthCheckStarting {
switch response.Status {
case define.HealthCheckUnhealthy, define.HealthCheckStarting, define.HealthCheckStopped:
registry.SetExitCode(1)
fmt.Println(response.Status)
}

View File

@ -20,6 +20,9 @@ const (
HealthCheckStarting string = "starting"
// HealthCheckReset describes reset of HealthCheck logs
HealthCheckReset string = "reset"
// HealthCheckStopped describes the time when container was stopped during HealthCheck
// and HealthCheck was terminated
HealthCheckStopped string = "stopped"
)
// HealthCheckStatus represents the current state of a container
@ -49,6 +52,19 @@ const (
HealthCheckStartup HealthCheckStatus = iota
)
func (s HealthCheckStatus) String() string {
switch s {
case HealthCheckSuccess:
return HealthCheckHealthy
case HealthCheckStartup:
return HealthCheckStarting
case HealthCheckContainerStopped:
return HealthCheckStopped
default:
return HealthCheckUnhealthy
}
}
// Healthcheck defaults. These are used both in the cli as well in
// libpod and were moved from cmd/podman/common
const (

View File

@ -113,6 +113,14 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
returnCode = 1
}
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return define.HealthCheckInternalError, "", err
}
}
// Handle startup HC
if isStartup {
inStartPeriod = true
@ -124,6 +132,10 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
}
}
if exitCode != 0 && c.ensureState(define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) {
hcResult = define.HealthCheckContainerStopped
}
timeEnd := time.Now()
if c.HealthCheckConfig().StartPeriod > 0 {
// there is a start-period we need to honor; we add startPeriod to container start time
@ -148,7 +160,7 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
healthCheckResult, err := c.updateHealthCheckLog(hcl, inStartPeriod, isStartup)
healthCheckResult, err := c.updateHealthCheckLog(hcl, hcResult, inStartPeriod, isStartup)
if err != nil {
return hcResult, "", fmt.Errorf("unable to update health check log %s for %s: %w", c.config.HealthLogDestination, c.ID(), err)
}
@ -216,17 +228,8 @@ func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
// Increment the current startup healthcheck success counter.
// Can stop the startup HC and start the regular HC if the startup HC has enough
// consecutive successes.
// NOTE: The caller must lock and sync the container.
func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
return
}
}
// We don't have a startup HC, can't do anything
if c.config.StartupHealthCheckConfig == nil {
return
@ -299,17 +302,8 @@ func (c *Container) recreateHealthCheckTimer(ctx context.Context, isStartup bool
// Increment the current startup healthcheck failure counter.
// Can restart the container if the HC fails enough times consecutively.
// NOTE: The caller must lock and sync the container.
func (c *Container) incrementStartupHCFailureCounter(ctx context.Context) {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
return
}
}
// We don't have a startup HC, can't do anything
if c.config.StartupHealthCheckConfig == nil {
return
@ -371,10 +365,8 @@ func (c *Container) isUnhealthy() (bool, error) {
}
// UpdateHealthCheckLog parses the health check results and writes the log
func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod, isStartup bool) (define.HealthCheckResults, error) {
c.lock.Lock()
defer c.lock.Unlock()
// NOTE: The caller must lock the container.
func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, hcResult define.HealthCheckStatus, inStartPeriod, isStartup bool) (define.HealthCheckResults, error) {
// If we are playing a kube yaml then let's honor the start period time for
// both failing and succeeding cases to match kube behavior.
// So don't update the health check log till the start period is over
@ -394,7 +386,9 @@ func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPerio
if len(healthCheck.Status) < 1 {
healthCheck.Status = define.HealthCheckHealthy
}
if !inStartPeriod {
if hcResult == define.HealthCheckContainerStopped {
healthCheck.Status = define.HealthCheckStopped
} else if !inStartPeriod {
// increment failing streak
healthCheck.FailingStreak++
// if failing streak > retries, then status to unhealthy

View File

@ -31,14 +31,8 @@ func RunHealthCheck(w http.ResponseWriter, r *http.Request) {
utils.InternalServerError(w, err)
return
}
hcStatus := define.HealthCheckUnhealthy
if status == define.HealthCheckSuccess {
hcStatus = define.HealthCheckHealthy
} else if status == define.HealthCheckStartup {
hcStatus = define.HealthCheckStarting
}
report := define.HealthCheckResults{
Status: hcStatus,
Status: status.String(),
}
utils.WriteResponse(w, http.StatusOK, report)
}

View File

@ -14,14 +14,8 @@ func (ic *ContainerEngine) HealthCheckRun(ctx context.Context, nameOrID string,
if err != nil {
return nil, err
}
hcStatus := define.HealthCheckUnhealthy
if status == define.HealthCheckSuccess {
hcStatus = define.HealthCheckHealthy
} else if status == define.HealthCheckStartup {
hcStatus = define.HealthCheckStarting
}
report := define.HealthCheckResults{
Status: hcStatus,
Status: status.String(),
}
return &report, nil
}

View File

@ -469,13 +469,14 @@ function _check_health_log {
@test "podman healthcheck - stop container when healthcheck runs" {
ctr="c-h-$(safename)"
msg="hc-msg-$(random_string)"
hcStatus=$PODMAN_TMPDIR/hcStatus
run_podman run -d --name $ctr \
--health-cmd "sleep 20; echo $msg" \
$IMAGE /home/podman/pause
timeout --foreground -v --kill=10 60 \
$PODMAN healthcheck run $ctr &
$PODMAN healthcheck run $ctr &> $hcStatus &
hc_pid=$!
run_podman inspect $ctr --format "{{.State.Status}}"
@ -487,9 +488,10 @@ function _check_health_log {
rc=0
wait -n $hc_pid || rc=$?
assert $rc -eq 1 "exit status check of healthcheck command"
assert $(< $hcStatus) == "stopped" "Health status"
run_podman inspect $ctr --format "{{.State.Status}}"
assert "$output" == "exited" "Container is stopped"
run_podman inspect $ctr --format "{{.State.Status}}--{{.State.Health.Status}}--{{.State.Health.FailingStreak}}"
assert "$output" == "exited--stopped--0" "Container is stopped -- Health status -- failing streak"
run_podman inspect $ctr --format "{{.State.Health.Log}}"
assert "$output" !~ "$msg" "Health log message not found"