Merge pull request #25423 from Honny1/hc-kill-status

Add stopped status for HealthCheck
2025-08-06 11:32:07 +08:00 · 2025-03-04 13:17:58 +00:00
parent ebec104391 511d912685
commit 4ac061f383
6 changed files with 45 additions and 44 deletions
--- a/cmd/podman/healthcheck/run.go
+++ b/cmd/podman/healthcheck/run.go
@ -35,7 +35,8 @@ func run(cmd *cobra.Command, args []string) error {
 	if err != nil {
 		return err
 	}
-	if response.Status == define.HealthCheckUnhealthy || response.Status == define.HealthCheckStarting {
+	switch response.Status {
+	case define.HealthCheckUnhealthy, define.HealthCheckStarting, define.HealthCheckStopped:
 		registry.SetExitCode(1)
 		fmt.Println(response.Status)
 	}
--- a/libpod/define/healthchecks.go
+++ b/libpod/define/healthchecks.go
@ -20,6 +20,9 @@ const (
 	HealthCheckStarting string = "starting"
 	// HealthCheckReset describes reset of HealthCheck logs
 	HealthCheckReset string = "reset"
+	// HealthCheckStopped describes the time when container was stopped during HealthCheck
+	// and HealthCheck was terminated
+	HealthCheckStopped string = "stopped"
 )

 // HealthCheckStatus represents the current state of a container
@ -49,6 +52,19 @@ const (
 	HealthCheckStartup HealthCheckStatus = iota
 )

+func (s HealthCheckStatus) String() string {
+	switch s {
+	case HealthCheckSuccess:
+		return HealthCheckHealthy
+	case HealthCheckStartup:
+		return HealthCheckStarting
+	case HealthCheckContainerStopped:
+		return HealthCheckStopped
+	default:
+		return HealthCheckUnhealthy
+	}
+}
+
 // Healthcheck defaults.  These are used both in the cli as well in
 // libpod and were moved from cmd/podman/common
 const (
--- a/libpod/healthcheck.go
+++ b/libpod/healthcheck.go
@ -113,6 +113,14 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
 		returnCode = 1
 	}

+	if !c.batched {
+		c.lock.Lock()
+		defer c.lock.Unlock()
+		if err := c.syncContainer(); err != nil {
+			return define.HealthCheckInternalError, "", err
+		}
+	}
+
 	// Handle startup HC
 	if isStartup {
 		inStartPeriod = true
@ -124,6 +132,10 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
 		}
 	}

+	if exitCode != 0 && c.ensureState(define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) {
+		hcResult = define.HealthCheckContainerStopped
+	}
+
 	timeEnd := time.Now()
 	if c.HealthCheckConfig().StartPeriod > 0 {
 		// there is a start-period we need to honor; we add startPeriod to container start time
@ -148,7 +160,7 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.

 	hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)

-	healthCheckResult, err := c.updateHealthCheckLog(hcl, inStartPeriod, isStartup)
+	healthCheckResult, err := c.updateHealthCheckLog(hcl, hcResult, inStartPeriod, isStartup)
 	if err != nil {
 		return hcResult, "", fmt.Errorf("unable to update health check log %s for %s: %w", c.config.HealthLogDestination, c.ID(), err)
 	}
@ -216,17 +228,8 @@ func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
 // Increment the current startup healthcheck success counter.
 // Can stop the startup HC and start the regular HC if the startup HC has enough
 // consecutive successes.
+// NOTE: The caller must lock and sync the container.
 func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) {
-	if !c.batched {
-		c.lock.Lock()
-		defer c.lock.Unlock()
-
-		if err := c.syncContainer(); err != nil {
-			logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
-			return
-		}
-	}
-
 	// We don't have a startup HC, can't do anything
 	if c.config.StartupHealthCheckConfig == nil {
 		return
@ -299,17 +302,8 @@ func (c *Container) recreateHealthCheckTimer(ctx context.Context, isStartup bool

 // Increment the current startup healthcheck failure counter.
 // Can restart the container if the HC fails enough times consecutively.
+// NOTE: The caller must lock and sync the container.
 func (c *Container) incrementStartupHCFailureCounter(ctx context.Context) {
-	if !c.batched {
-		c.lock.Lock()
-		defer c.lock.Unlock()
-
-		if err := c.syncContainer(); err != nil {
-			logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
-			return
-		}
-	}
-
 	// We don't have a startup HC, can't do anything
 	if c.config.StartupHealthCheckConfig == nil {
 		return
@ -371,10 +365,8 @@ func (c *Container) isUnhealthy() (bool, error) {
 }

 // UpdateHealthCheckLog parses the health check results and writes the log
-func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod, isStartup bool) (define.HealthCheckResults, error) {
-	c.lock.Lock()
-	defer c.lock.Unlock()
-
+// NOTE: The caller must lock the container.
+func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, hcResult define.HealthCheckStatus, inStartPeriod, isStartup bool) (define.HealthCheckResults, error) {
 	// If we are playing a kube yaml then let's honor the start period time for
 	// both failing and succeeding cases to match kube behavior.
 	// So don't update the health check log till the start period is over
@ -394,7 +386,9 @@ func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPerio
 		if len(healthCheck.Status) < 1 {
 			healthCheck.Status = define.HealthCheckHealthy
 		}
-		if !inStartPeriod {
+		if hcResult == define.HealthCheckContainerStopped {
+			healthCheck.Status = define.HealthCheckStopped
+		} else if !inStartPeriod {
 			// increment failing streak
 			healthCheck.FailingStreak++
 			// if failing streak > retries, then status to unhealthy
--- a/pkg/api/handlers/libpod/healthcheck.go
+++ b/pkg/api/handlers/libpod/healthcheck.go
@ -31,14 +31,8 @@ func RunHealthCheck(w http.ResponseWriter, r *http.Request) {
 		utils.InternalServerError(w, err)
 		return
 	}
-	hcStatus := define.HealthCheckUnhealthy
-	if status == define.HealthCheckSuccess {
-		hcStatus = define.HealthCheckHealthy
-	} else if status == define.HealthCheckStartup {
-		hcStatus = define.HealthCheckStarting
-	}
 	report := define.HealthCheckResults{
-		Status: hcStatus,
+		Status: status.String(),
 	}
 	utils.WriteResponse(w, http.StatusOK, report)
 }
--- a/pkg/domain/infra/abi/healthcheck.go
+++ b/pkg/domain/infra/abi/healthcheck.go
@ -14,14 +14,8 @@ func (ic *ContainerEngine) HealthCheckRun(ctx context.Context, nameOrID string,
 	if err != nil {
 		return nil, err
 	}
-	hcStatus := define.HealthCheckUnhealthy
-	if status == define.HealthCheckSuccess {
-		hcStatus = define.HealthCheckHealthy
-	} else if status == define.HealthCheckStartup {
-		hcStatus = define.HealthCheckStarting
-	}
 	report := define.HealthCheckResults{
-		Status: hcStatus,
+		Status: status.String(),
 	}
 	return &report, nil
 }
--- a/test/system/220-healthcheck.bats
+++ b/test/system/220-healthcheck.bats
@ -469,13 +469,14 @@ function _check_health_log {
@test "podman healthcheck - stop container when healthcheck runs" {
    ctr="c-h-$(safename)"
    msg="hc-msg-$(random_string)"
+    hcStatus=$PODMAN_TMPDIR/hcStatus

    run_podman run -d --name $ctr             \
           --health-cmd "sleep 20; echo $msg" \
           $IMAGE /home/podman/pause

    timeout --foreground -v --kill=10 60 \
-        $PODMAN healthcheck run $ctr &
+        $PODMAN healthcheck run $ctr &> $hcStatus &
    hc_pid=$!

    run_podman inspect $ctr --format "{{.State.Status}}"
@ -487,9 +488,10 @@ function _check_health_log {
    rc=0
    wait -n $hc_pid || rc=$?
    assert $rc -eq 1 "exit status check of healthcheck command"
+    assert $(< $hcStatus) == "stopped" "Health status"

-    run_podman inspect $ctr --format "{{.State.Status}}"
-    assert "$output" == "exited" "Container is stopped"
+    run_podman inspect $ctr --format "{{.State.Status}}--{{.State.Health.Status}}--{{.State.Health.FailingStreak}}"
+    assert "$output" == "exited--stopped--0" "Container is stopped -- Health status -- failing streak"

    run_podman inspect $ctr --format "{{.State.Health.Log}}"
    assert "$output" !~ "$msg" "Health log message not found"