diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index 983dc3211d..7e01a5545b 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -17,22 +17,34 @@ function _check_health { local since="$4" local hc_status="$5" + # Loop-wait (up to a few seconds) for healthcheck event (#20342) + local timeout=5 + while :; do + run_podman events --filter container=$ctrname --filter event=health_status \ + --since "$since" --stream=false --format "{{.HealthStatus}}" + # Output may be empty or multiple lines. + if [[ -n "$output" ]]; then + if [[ "${lines[-1]}" = "$hc_status" ]]; then + break + fi + fi + + timeout=$((timeout - 1)) + if [[ $timeout -eq 0 ]]; then + die "$testname - timed out waiting for '$hc_status' in podman events" + fi + sleep 1 + done + + # Got the desired status. Now verify all the healthcheck fields run_podman inspect --format "{{json .State.Healthcheck}}" $ctrname + defer-assertion-failures parse_table "$tests" | while read field expect;do actual=$(jq ".$field" <<<"$output") is "$actual" "$expect" "$testname - .State.Healthcheck.$field" done - - # Make sure we can read the healthcheck event in podman events (#20342) - run_podman events --filter container=$ctrname --filter event=health_status \ - --since "$since" --stream=false --format "{{.HealthStatus}}" - # Because the assert below would fail with "lines: bad array subscript" when - # there are no events lets special case this to provide a more meaningful error. - if [[ -z "$output" ]]; then - die "no healthcheck events" - fi - assert "${lines[-1]}" == "$hc_status" "$testname - podman events health status" + immediate-assertion-failures } @test "podman healthcheck" { @@ -50,7 +62,7 @@ function _check_health { run_podman inspect $ctrname --format "{{.Config.HealthcheckOnFailureAction}}" is "$output" "kill" "on-failure action is set to kill" - current_time=$(date --iso-8601=seconds) + current_time=$(date --iso-8601=ns) # We can't check for 'starting' because a 1-second interval is too # short; it could run healthcheck before we get to our first check. # @@ -65,10 +77,9 @@ Log[-1].ExitCode | 0 Log[-1].Output | \"Life is Good on stdout\\\nLife is Good on stderr\\\n\" " "$current_time" "healthy" - current_time=$(date --iso-8601=seconds) + current_time=$(date --iso-8601=ns) # Force a failure run_podman exec $ctrname touch /uh-oh - sleep 2 _check_health $ctrname "First failure" " Status | \"healthy\" @@ -81,10 +92,9 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\\\n\" # name so that the leak check below does not turn into a NOP without noticing. assert "$(systemctl list-units --type timer | grep $cid)" =~ "podman" "Healthcheck systemd unit exists" - current_time=$(date --iso-8601=seconds) + current_time=$(date --iso-8601=ns) # After three successive failures, container should no longer be healthy - sleep 5 - _check_health $ctrname "Three or more failures" " + _check_health $ctrname "Four or more failures" " Status | \"unhealthy\" FailingStreak | [3456] Log[-1].ExitCode | 1