mirror of
https://github.com/containers/podman.git
synced 2025-08-06 19:44:14 +08:00

GoLang sets unset values to the default value of the type. This means that the destination of the log is an empty string and the count and size are set to 0. However, this means that size and count are unbounded, and this is not the default behavior. Fixes: https://github.com/containers/podman/issues/25473 Fixes: https://issues.redhat.com/browse/RHEL-83262 Signed-off-by: Jan Rodák <hony.com@seznam.cz>
422 lines
16 KiB
Bash
422 lines
16 KiB
Bash
#!/usr/bin/env bats -*- bats -*-
|
|
#
|
|
# tests for podman healthcheck
|
|
#
|
|
#
|
|
|
|
load helpers
|
|
load helpers.systemd
|
|
|
|
# bats file_tags=ci:parallel
|
|
|
|
# Helper function: run 'podman inspect' and check various given fields
|
|
function _check_health {
|
|
local ctrname="$1"
|
|
local testname="$2"
|
|
local tests="$3"
|
|
local since="$4"
|
|
local hc_status="$5"
|
|
|
|
# Loop-wait (up to a few seconds) for healthcheck event (#20342)
|
|
# Allow a margin when running parallel, because of system load
|
|
local timeout=5
|
|
if [[ -n "$PARALLEL_JOBSLOT" ]]; then
|
|
timeout=$((timeout + 3))
|
|
fi
|
|
|
|
while :; do
|
|
run_podman events --filter container=$ctrname --filter event=health_status \
|
|
--since "$since" --stream=false --format "{{.HealthStatus}}"
|
|
# Output may be empty or multiple lines.
|
|
if [[ -n "$output" ]]; then
|
|
if [[ "${lines[-1]}" = "$hc_status" ]]; then
|
|
break
|
|
fi
|
|
fi
|
|
|
|
timeout=$((timeout - 1))
|
|
if [[ $timeout -eq 0 ]]; then
|
|
die "$testname - timed out waiting for '$hc_status' in podman events"
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
# Got the desired status. Now verify all the healthcheck fields
|
|
run_podman inspect --format "{{json .State.Healthcheck}}" $ctrname
|
|
|
|
defer-assertion-failures
|
|
parse_table "$tests" | while read field expect;do
|
|
actual=$(jq ".$field" <<<"$output")
|
|
is "$actual" "$expect" "$testname - .State.Healthcheck.$field"
|
|
done
|
|
immediate-assertion-failures
|
|
}
|
|
|
|
@test "podman healthcheck" {
|
|
local ctrname="c-h-$(safename)"
|
|
run_podman run -d --name $ctrname \
|
|
--health-cmd /home/podman/healthcheck \
|
|
--health-interval 1s \
|
|
--health-retries 3 \
|
|
--health-on-failure=kill \
|
|
--health-startup-cmd /home/podman/healthcheck \
|
|
--health-startup-interval 1s \
|
|
$IMAGE /home/podman/pause
|
|
cid="$output"
|
|
|
|
run_podman inspect $ctrname --format "{{.Config.HealthcheckOnFailureAction}}"
|
|
is "$output" "kill" "on-failure action is set to kill"
|
|
|
|
run_podman inspect $ctrname --format "{{.Config.StartupHealthCheck.Test}}"
|
|
is "$output" "[CMD-SHELL /home/podman/healthcheck]" ".Config.StartupHealthCheck.Test"
|
|
|
|
current_time=$(date --iso-8601=ns)
|
|
# We can't check for 'starting' because a 1-second interval is too
|
|
# short; it could run healthcheck before we get to our first check.
|
|
#
|
|
# So, just force a healthcheck run, then confirm that it's running.
|
|
run_podman healthcheck run $ctrname
|
|
is "$output" "" "output from 'podman healthcheck run'"
|
|
|
|
_check_health $ctrname "All healthy" "
|
|
Status | \"healthy\"
|
|
FailingStreak | 0
|
|
Log[-1].ExitCode | 0
|
|
Log[-1].Output | \"Life is Good on stdout\\\nLife is Good on stderr\\\n\"
|
|
" "$current_time" "healthy"
|
|
|
|
current_time=$(date --iso-8601=ns)
|
|
# Force a failure
|
|
run_podman exec $ctrname touch /uh-oh
|
|
|
|
_check_health $ctrname "First failure" "
|
|
Status | \"healthy\"
|
|
FailingStreak | [123]
|
|
Log[-1].ExitCode | 1
|
|
Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\\\n\"
|
|
" "$current_time" "healthy"
|
|
|
|
# Check that we now we do have valid podman units with this
|
|
# name so that the leak check below does not turn into a NOP without noticing.
|
|
run -0 systemctl list-units
|
|
cidmatch=$(grep "$cid" <<<"$output")
|
|
echo "$cidmatch"
|
|
assert "$cidmatch" =~ " $cid-[0-9a-f]+\.timer *.*/podman healthcheck run $cid" \
|
|
"Healthcheck systemd unit exists"
|
|
|
|
current_time=$(date --iso-8601=ns)
|
|
# After three successive failures, container should no longer be healthy
|
|
_check_health $ctrname "Four or more failures" "
|
|
Status | \"unhealthy\"
|
|
FailingStreak | [3456]
|
|
Log[-1].ExitCode | 1
|
|
Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\\\n\"
|
|
" "$current_time" "unhealthy"
|
|
|
|
# now the on-failure should kick in and kill the container
|
|
run_podman wait $ctrname
|
|
|
|
# Clean up
|
|
run_podman rm -t 0 -f $ctrname
|
|
|
|
# Important check for https://github.com/containers/podman/issues/22884
|
|
# We never should leak the unit files, healthcheck uses the cid in name so just grep that.
|
|
# (Ignore .scope units, those are conmon and can linger for 5 minutes)
|
|
# (Ignore .mount, too. They are created/removed by systemd based on the actual real mounts
|
|
# on the host and that is async and might be slow enough in CI to cause failures.)
|
|
run -0 systemctl list-units --quiet "*$cid*"
|
|
except_scope_mount=$(grep -vF ".scope " <<<"$output" | { grep -vF ".mount" || true; } )
|
|
assert "$except_scope_mount" == "" "Healthcheck systemd unit cleanup: no units leaked"
|
|
}
|
|
|
|
@test "podman healthcheck - restart cleans up old state" {
|
|
ctr="c-h-$(safename)"
|
|
|
|
run_podman run -d --name $ctr \
|
|
--health-cmd /home/podman/healthcheck \
|
|
--health-retries=3 \
|
|
--health-interval=disable \
|
|
$IMAGE /home/podman/pause
|
|
|
|
run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}"
|
|
is "$output" "0" "Failing streak of fresh container should be 0"
|
|
|
|
# Get the healthcheck to fail
|
|
run_podman exec $ctr touch /uh-oh-only-once
|
|
run_podman 1 healthcheck run $ctr
|
|
is "$output" "unhealthy" "output from 'podman healthcheck run'"
|
|
run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}"
|
|
is "$output" "1" "Failing streak after one failed healthcheck should be 1"
|
|
|
|
run_podman container restart $ctr
|
|
run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}"
|
|
is "$output" "0" "Failing streak of restarted container should be 0 again"
|
|
|
|
run_podman rm -f -t0 $ctr
|
|
}
|
|
|
|
@test "podman wait --condition={healthy,unhealthy}" {
|
|
ctr="c-h-$(safename)"
|
|
|
|
wait_file="$PODMAN_TMPDIR/$(random_string).wait_for_me"
|
|
|
|
for condition in healthy unhealthy;do
|
|
rm -f $wait_file
|
|
run_podman run -d --name $ctr \
|
|
--health-cmd /home/podman/healthcheck \
|
|
--health-retries=1 \
|
|
--health-interval=disable \
|
|
$IMAGE /home/podman/pause
|
|
if [[ $condition == "unhealthy" ]];then
|
|
# create the uh-oh file to let the health check fail
|
|
run_podman exec $ctr touch /uh-oh
|
|
fi
|
|
|
|
# Wait for the container in the background and create the $wait_file to
|
|
# signal the specified wait condition was met.
|
|
(timeout --foreground -v --kill=5 10 $PODMAN wait --condition=$condition $ctr && touch $wait_file) &
|
|
|
|
# Sleep 1 second to make sure above commands are running
|
|
sleep 1
|
|
if [[ -f $wait_file ]]; then
|
|
die "the wait file should only be created after the container turned healthy"
|
|
fi
|
|
|
|
if [[ $condition == "healthy" ]];then
|
|
run_podman healthcheck run $ctr
|
|
else
|
|
run_podman 1 healthcheck run $ctr
|
|
fi
|
|
wait_for_file $wait_file
|
|
run_podman rm -f -t0 $ctr
|
|
done
|
|
}
|
|
|
|
@test "podman healthcheck --health-on-failure" {
|
|
run_podman 125 create --health-on-failure=kill $IMAGE
|
|
is "$output" "Error: cannot set on-failure action to kill without a health check"
|
|
|
|
ctr="c-h-$(safename)"
|
|
|
|
for policy in none kill restart stop;do
|
|
uhoh=/uh-oh
|
|
if [[ $policy != "none" ]];then
|
|
# only fail the first run
|
|
uhoh=/uh-oh-only-once
|
|
fi
|
|
|
|
# Run healthcheck image.
|
|
run_podman run -d --name $ctr \
|
|
--health-cmd /home/podman/healthcheck \
|
|
--health-retries=1 \
|
|
--health-on-failure=$policy \
|
|
--health-interval=disable \
|
|
$IMAGE /home/podman/pause
|
|
|
|
# healthcheck should succeed
|
|
run_podman healthcheck run $ctr
|
|
|
|
# Now cause the healthcheck to fail
|
|
run_podman exec $ctr touch $uhoh
|
|
|
|
# healthcheck should now fail, with exit status 1 and 'unhealthy' output
|
|
run_podman 1 healthcheck run $ctr
|
|
is "$output" "unhealthy" "output from 'podman healthcheck run' (policy: $policy)"
|
|
|
|
if [[ $policy == "restart" ]];then
|
|
# Make sure the container transitions back to running
|
|
run_podman wait --condition=running $ctr
|
|
run_podman inspect $ctr --format "{{.RestartCount}}"
|
|
assert "${#lines[@]}" != 0 "Container has been restarted at least once"
|
|
run_podman container inspect $ctr --format "{{.State.Healthcheck.FailingStreak}}"
|
|
is "$output" "0" "Failing streak of restarted container should be 0 again"
|
|
run_podman healthcheck run $ctr
|
|
elif [[ $policy == "none" ]];then
|
|
run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
|
|
# Container is still running and health check still broken
|
|
is "$output" "running $policy" "container continued running"
|
|
run_podman 1 healthcheck run $ctr
|
|
is "$output" "unhealthy" "output from 'podman healthcheck run' (policy: $policy)"
|
|
else
|
|
run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
|
|
# kill and stop yield the container into a non-running state
|
|
is "$output" ".* $policy" "container was stopped/killed (policy: $policy)"
|
|
assert "$output" != "running $policy"
|
|
# also make sure that it's not stuck in the stopping state
|
|
assert "$output" != "stopping $policy"
|
|
fi
|
|
|
|
run_podman rm -f -t0 $ctr
|
|
done
|
|
}
|
|
|
|
@test "podman healthcheck --health-on-failure with interval" {
|
|
ctr="c-h-$(safename)"
|
|
|
|
for policy in stop kill restart ;do
|
|
t0=$(date --iso-8601=seconds)
|
|
run_podman run -d --name $ctr \
|
|
--health-cmd /bin/false \
|
|
--health-retries=1 \
|
|
--health-on-failure=$policy \
|
|
--health-interval=1s \
|
|
$IMAGE top
|
|
|
|
if [[ $policy == "restart" ]];then
|
|
# Sleeping for 2 seconds makes the test much faster than using
|
|
# podman-wait which would compete with the container getting
|
|
# restarted.
|
|
sleep 2
|
|
# Make sure the container transitions back to running
|
|
run_podman wait --condition=running $ctr
|
|
run_podman inspect $ctr --format "{{.RestartCount}}"
|
|
assert "${#lines[@]}" != 0 "Container has been restarted at least once"
|
|
else
|
|
# kill and stop yield the container into a non-running state
|
|
run_podman wait $ctr
|
|
run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
|
|
is "$output" ".* $policy" "container was stopped/killed (policy: $policy)"
|
|
assert "$output" != "running $policy"
|
|
# also make sure that it's not stuck in the stopping state
|
|
assert "$output" != "stopping $policy"
|
|
fi
|
|
|
|
run_podman rm -f -t0 $ctr
|
|
done
|
|
}
|
|
|
|
function _create_container_with_health_log_settings {
|
|
local ctrname="$1"
|
|
local msg="$2"
|
|
local format="$3"
|
|
local flag="$4"
|
|
local expect="$5"
|
|
local expect_msg="$6"
|
|
|
|
run_podman run -d --name $ctrname \
|
|
--health-cmd "echo $msg" \
|
|
$flag \
|
|
$IMAGE /home/podman/pause
|
|
cid="$output"
|
|
|
|
run_podman inspect $ctrname --format $format
|
|
is "$output" "$expect" "$expect_msg"
|
|
|
|
output=$cid
|
|
}
|
|
|
|
function _check_health_log {
|
|
local ctrname="$1"
|
|
local expect_msg="$2"
|
|
local comparison=$3
|
|
local expect_count="$4"
|
|
|
|
run_podman inspect $ctrname --format "{{.State.Health.Log}}"
|
|
count=$(grep -co "$expect_msg" <<< "$output")
|
|
assert "$count" $comparison $expect_count "Number of matching health log messages"
|
|
}
|
|
|
|
@test "podman healthcheck --health-max-log-count values" {
|
|
# flag | expected value | op | log count
|
|
test="
|
|
| 5 | -eq | 5
|
|
--health-max-log-count 0 | 0 | -ge | 11
|
|
--health-max-log-count=0 | 0 | -ge | 11
|
|
--health-max-log-count 10 | 10 | -eq | 10
|
|
--health-max-log-count=10 | 10 | -eq | 10
|
|
"
|
|
|
|
while read flag value op logs_count ; do
|
|
local msg="healthmsg-$(random_string)"
|
|
local ctrname="c-h-$(safename)"
|
|
_create_container_with_health_log_settings $ctrname $msg "{{.Config.HealthMaxLogCount}}" $flag $value "HealthMaxLogCount"
|
|
|
|
for i in $(seq 1 $((logs_count + 5)));
|
|
do
|
|
run_podman healthcheck run $ctrname
|
|
is "$output" "" "unexpected output from podman healthcheck run (pass $i)"
|
|
done
|
|
|
|
_check_health_log $ctrname $msg $op $logs_count
|
|
|
|
run_podman rm -t 0 -f $ctrname
|
|
done < <(parse_table "$tests")
|
|
}
|
|
|
|
@test "podman healthcheck --health-max-log-size values" {
|
|
local s=$(printf "healthmsg-%1000s")
|
|
local long_msg=${s// /$(random_string)}
|
|
|
|
# flag | expected value | exp_msg
|
|
test="
|
|
| 500 | ${long_msg:0:500}}]\$
|
|
--health-max-log-size 0 | 0 | $long_msg}]\$
|
|
--health-max-log-size=0 | 0 | $long_msg}]\$
|
|
--health-max-log-size 10 | 10 | ${long_msg:0:10}}]\$
|
|
--health-max-log-size=10 | 10 | ${long_msg:0:10}}]\$
|
|
"
|
|
|
|
while read flag value exp_msg ; do
|
|
local ctrname="c-h-$(safename)"
|
|
_create_container_with_health_log_settings $ctrname $long_msg "{{.Config.HealthMaxLogSize}}" $flag $value "HealthMaxLogSize"
|
|
|
|
run_podman healthcheck run $ctrname
|
|
is "$output" "" "output from 'podman healthcheck run'"
|
|
|
|
_check_health_log $ctrname $exp_msg -eq 1
|
|
|
|
run_podman rm -t 0 -f $ctrname
|
|
done < <(parse_table "$tests")
|
|
}
|
|
|
|
@test "podman healthcheck --health-log-destination file" {
|
|
local TMP_DIR_HEALTHCHECK="$PODMAN_TMPDIR/healthcheck"
|
|
mkdir $TMP_DIR_HEALTHCHECK
|
|
local ctrname="c-h-$(safename)"
|
|
local msg="healthmsg-$(random_string)"
|
|
_create_container_with_health_log_settings $ctrname $msg "{{.Config.HealthLogDestination}}" "--health-log-destination $TMP_DIR_HEALTHCHECK" "$TMP_DIR_HEALTHCHECK" "HealthLogDestination"
|
|
cid="$output"
|
|
|
|
run_podman healthcheck run $ctrname
|
|
is "$output" "" "output from 'podman healthcheck run'"
|
|
|
|
healthcheck_log_path="${TMP_DIR_HEALTHCHECK}/${cid}-healthcheck.log"
|
|
# The healthcheck is triggered by the podman when the container is started, but its execution depends on systemd.
|
|
# And since `run_podman healthcheck run` is also run manually, it will result in two runs.
|
|
count=$(grep -co "$msg" $healthcheck_log_path)
|
|
assert "$count" -ge 1 "Number of matching health log messages"
|
|
|
|
run_podman rm -t 0 -f $ctrname
|
|
}
|
|
|
|
|
|
@test "podman healthcheck --health-log-destination journal" {
|
|
skip_if_remote "We cannot read journalctl over remote."
|
|
|
|
# We can't use journald on RHEL as rootless, either: rhbz#1895105
|
|
skip_if_journald_unavailable
|
|
|
|
local ctrname="c-h-$(safename)"
|
|
local msg="healthmsg-$(random_string)"
|
|
_create_container_with_health_log_settings $ctrname $msg "{{.Config.HealthLogDestination}}" "--health-log-destination events_logger" "events_logger" "HealthLogDestination"
|
|
cid="$output"
|
|
|
|
run_podman healthcheck run $ctrname
|
|
is "$output" "" "output from 'podman healthcheck run'"
|
|
|
|
cmd="journalctl --output cat --output-fields=PODMAN_HEALTH_LOG PODMAN_ID=$cid"
|
|
echo "$_LOG_PROMPT $cmd"
|
|
run $cmd
|
|
echo "$output"
|
|
assert "$status" -eq 0 "exit status of journalctl"
|
|
|
|
# The healthcheck is triggered by the podman when the container is started, but its execution depends on systemd.
|
|
# And since `run_podman healthcheck run` is also run manually, it will result in two runs.
|
|
count=$(grep -co "$msg" <<< "$output")
|
|
assert "$count" -ge 1 "Number of matching health log messages"
|
|
|
|
run_podman rm -t 0 -f $ctrname
|
|
}
|
|
|
|
# vim: filetype=sh
|