systests: fix race in quadlet tests

The new exit-code propagation test is racy: 'podman wait' can fail if the service container has already been cleaned up by systemd. Solution: run the inspect and wait tests opportunistically, i.e., only if those commands succeed. If they fail, confirm that they fail with ENOSUCHCONTAINER. This may silently lose us some coverage ... but none of it is important. The important test, systemctl final status, remains. Also, as drive-bys: - add a FIXME comment documenting another race condition that I'm not bothering to fix right now - give distinct names to unit files, for readability in test failures Fixes: #18732 Signed-off-by: Ed Santiago <santiago@redhat.com>
2025-06-23 02:18:13 +08:00 · 2023-05-30 13:18:37 -06:00
parent c9c5cb2224
commit 72d4cede29
1 changed files with 34 additions and 9 deletions
--- a/test/system/252-quadlet.bats
+++ b/test/system/252-quadlet.bats
@ -91,6 +91,7 @@ function service_setup() {
    echo "$output"
    assert $status -eq 0 "Error starting systemd unit $service"

+    # FIXME FIXME FIXME: this is racy with short-lived containers!
    echo "$_LOG_PROMPT systemctl status $service"
    run systemctl status "$service"
    echo "$output"
@ -638,15 +639,16 @@ EOF
 }

@test "quadlet - exit-code propagation" {
-   local quadlet_file=$PODMAN_TMPDIR/basic_$(random_string).kube
-   local yaml_file=$PODMAN_TMPDIR/$(random_string).yaml
-
   exit_tests="
 all  | true  | 0   | inactive
 all  | false | 137 | failed
 none | false | 0   | inactive
 "
   while read exit_code_prop cmd exit_code service_state; do
+      local basename=propagate-${exit_code_prop}-${cmd}-$(random_string)
+      local quadlet_file=$PODMAN_TMPDIR/$basename.kube
+      local yaml_file=$PODMAN_TMPDIR/$basename.yaml
+
      cat > $yaml_file <<EOF
 apiVersion: v1
 kind: Pod
@ -674,19 +676,42 @@ EOF

      run_quadlet "$quadlet_file"
      run systemctl status $QUADLET_SERVICE_NAME
+
+      yaml_sha=$(sha256sum $yaml_file)
+      service_container="${yaml_sha:0:12}-service"
+
      service_setup $QUADLET_SERVICE_NAME

      # Ensure we have output. Output is synced via sd-notify (socat in Exec)
      run journalctl "--since=$STARTED_TIME" --unit="$QUADLET_SERVICE_NAME"
      is "$output" '.*Started.*\.service.*'

-      yaml_sha=$(sha256sum $yaml_file)
-      service_container="${yaml_sha:0:12}-service"
-      run_podman container inspect --format '{{.KubeExitCodePropagation}}' $service_container
-      is "$output" "$exit_code_prop" "service container has the expected policy set in its annotations"
-      run_podman wait $service_container
-      is "$output" "$exit_code" "service container reflects expected exit code $exit_code"
+      # Opportunistic test: confirm that the Propagation field got set.
+      # This is racy, because the container is short-lived and quadlet
+      # cleans up on exit (via kube-down in ExecStopPost). So we use '?'
+      # and only check output if the inspect succeeds.
+      run_podman '?' container inspect --format '{{.KubeExitCodePropagation}}' $service_container
+      if [[ $status -eq 0 ]]; then
+          is "$output" "$exit_code_prop" \
+             "$basename: service container has the expected policy set in its annotations"
+      else
+          assert "$output" =~ "no such container $service_container" \
+                 "$basename: unexpected error from podman container inspect"
+      fi

+      # Container must stop of its own accord before we call service_cleanup(),
+      # otherwise the 'systemctl stop' there may affect the unit's status.
+      # Again, use '?' to handle the abovementioned race condition.
+      run_podman '?' wait $service_container
+      if [[ $status -eq 0 ]]; then
+          assert "$output" = "$exit_code" \
+                 "$basename: service container reflects expected exit code"
+      else
+          assert "$output" =~ "no container with name or ID" \
+                 "$basename: unexpected error from podman wait"
+      fi
+
+      # This is the actual propagation check
      service_cleanup $QUADLET_SERVICE_NAME $service_state
      run_podman ps -aq
      is "$output" "" "all containers are cleaned up even in case of errors"