From c0fb8fe594c5e288e927438983ed4696e4a6a6e1 Mon Sep 17 00:00:00 2001 From: Ed Santiago Date: Tue, 24 Oct 2023 09:10:57 -0600 Subject: [PATCH] CI: try to fix more networking flakes There's a whole slew of networking-related flakes whose common element seems to be improper use of curl. Fix those by: * add --retry --retry-connrefused; and/or * add -S ("show errors". Plain -s silences everything!); and/or * test exit status from curl; and/or * add wait_for_port after "podman run -d", to avoid races * log commands, to make debugging easier Important note: wait_for_port() was not working with rootless podman ports. Trivial proof: $ podman run -d --name foo -p 8192:80 \ quay.io/libpod/testimage:20221018 \ /bin/busybox-extras httpd -f -p 80 $ grep :2000 /proc/net/tcp [no results] Solution: use ss tool; it seems to handle this just fine. There may be a better solution. Oh, also, add -t1 to a podman restart, to shave 18s from test run. Fixes: #20335 and, I think, a handful of others Signed-off-by: Ed Santiago --- test/e2e/pod_infra_container_test.go | 2 +- test/system/500-networking.bats | 45 +++++++++++++++++----------- test/system/700-play.bats | 7 +++-- test/system/helpers.network.bash | 7 +++++ 4 files changed, 39 insertions(+), 22 deletions(-) diff --git a/test/e2e/pod_infra_container_test.go b/test/e2e/pod_infra_container_test.go index e88442bf5f..e8aea0a5b2 100644 --- a/test/e2e/pod_infra_container_test.go +++ b/test/e2e/pod_infra_container_test.go @@ -95,7 +95,7 @@ var _ = Describe("Podman pod create", func() { session.WaitWithDefaultTimeout() Expect(session).Should(ExitCleanly()) - session = podmanTest.Podman([]string{"run", "--pod", podID, fedoraMinimal, "curl", "-s", "-f", "localhost:80"}) + session = podmanTest.Podman([]string{"run", "--pod", podID, fedoraMinimal, "curl", "-s", "--retry", "2", "--retry-connrefused", "-f", "localhost:80"}) session.WaitWithDefaultTimeout() Expect(session).Should(ExitCleanly()) diff --git a/test/system/500-networking.bats b/test/system/500-networking.bats index a34d854244..5a53169f0f 100644 --- a/test/system/500-networking.bats +++ b/test/system/500-networking.bats @@ -59,9 +59,9 @@ load helpers.network is "$output" "$random_2" "exec cat index2.txt" # Verify http contents: curl from localhost - run curl -s $SERVER/index.txt + run curl -s -S $SERVER/index.txt is "$output" "$random_1" "curl 127.0.0.1:/index.txt" - run curl -s $SERVER/index2.txt + run curl -s -S $SERVER/index2.txt is "$output" "$random_2" "curl 127.0.0.1:/index2.txt" # Verify http contents: wget from a second container @@ -303,7 +303,7 @@ load helpers.network mac1="$output" # Verify http contents: curl from localhost - run curl -s $SERVER/index.txt + run curl -s -S $SERVER/index.txt is "$output" "$random_1" "curl 127.0.0.1:/index.txt" # rootless cannot modify iptables @@ -369,7 +369,7 @@ load helpers.network is "$output" "$mac2" "MAC address changed after podman network reload ($netname2)" # check that we can still curl - run curl -s $SERVER/index.txt + run curl -s -S $SERVER/index.txt is "$output" "$random_1" "curl 127.0.0.1:/index.txt" # clean up the container @@ -481,9 +481,10 @@ load helpers.network $IMAGE /bin/busybox-extras httpd -f -p 80 cid=$output - # Verify http contents: curl from localhost - run curl --max-time 3 -s $SERVER/index.txt - is "$output" "$random_1" "curl 127.0.0.1:/index.txt" + # Verify http contents: curl from localhost. This is the first time + # connecting, so, allow retries until httpd starts. + run curl --retry 2 --retry-connrefused -s $SERVER/index.txt + is "$output" "$random_1" "curl $SERVER/index.txt" run_podman inspect $cid --format "{{(index .NetworkSettings.Networks \"$netname\").IPAddress}}" ip="$output" @@ -505,8 +506,9 @@ load helpers.network run_podman exec $cid cat /etc/hosts assert "$output" !~ "$ip" "IP ($ip) should no longer be in /etc/hosts" - # check that we cannot curl (timeout after 3 sec) - run curl --max-time 3 -s $SERVER/index.txt + # check that we cannot curl (timeout after 3 sec). Fails with inconsistent + # curl exit codes, so, just check for nonzero. + run curl --max-time 3 -s -S $SERVER/index.txt assert $status -ne 0 \ "curl did not fail, it should have timed out or failed with non zero exit code" @@ -514,7 +516,7 @@ load helpers.network is "$output" "" "Output should be empty (no errors)" # curl should work again - run curl --max-time 3 -s $SERVER/index.txt + run curl --max-time 3 -s -S $SERVER/index.txt is "$output" "$random_1" "curl 127.0.0.1:/index.txt should work again" # check that we have a new ip and mac @@ -555,14 +557,14 @@ load helpers.network is "$output" "[${cid:0:12} $hostname]" "short container id and hostname in network2 aliases" # curl should work - run curl --max-time 3 -s $SERVER/index.txt + run curl --max-time 3 -s -S $SERVER/index.txt is "$output" "$random_1" "curl 127.0.0.1:/index.txt should work" # disconnect the first network run_podman network disconnect $netname $cid # curl should still work - run curl --max-time 3 -s $SERVER/index.txt + run curl --max-time 3 -s -S $SERVER/index.txt is "$output" "$random_1" "curl 127.0.0.1:/index.txt should still work" # clean up @@ -626,19 +628,26 @@ load helpers.network # Verify http contents again: curl from localhost # Use retry since it can take a moment until the new container is ready - run curl --retry 2 -s $SERVER/index.txt - is "$output" "$random_1" "curl 127.0.0.1:/index.txt after auto restart" + local curlcmd="curl --retry 2 --retry-connrefused -s $SERVER/index.txt" + echo "$_LOG_PROMPT $curlcmd" + run $curlcmd + echo "$output" + assert "$status" == 0 "curl exit status" + assert "$output" = "$random_1" "curl $SERVER/index.txt after auto restart" - run_podman 0+w restart $cid + run_podman 0+w restart -t1 $cid if ! is_remote; then - require_warning "StopSignal SIGTERM failed to stop container .* in 10 seconds, resorting to SIGKILL" \ + require_warning "StopSignal SIGTERM failed to stop container .* in 1 seconds, resorting to SIGKILL" \ "podman restart issues warning" fi # Verify http contents again: curl from localhost # Use retry since it can take a moment until the new container is ready - run curl --retry 2 -s $SERVER/index.txt - is "$output" "$random_1" "curl 127.0.0.1:/index.txt after podman restart" + echo "$_LOG_PROMPT $curlcmd" + run $curlcmd + echo "$output" + assert "$status" == 0 "curl exit status" + assert "$output" = "$random_1" "curl $SERVER/index.txt after podman restart" run_podman rm -t 0 -f $cid done diff --git a/test/system/700-play.bats b/test/system/700-play.bats index 40a701d8ee..66a0625999 100644 --- a/test/system/700-play.bats +++ b/test/system/700-play.bats @@ -485,9 +485,10 @@ _EOF SERVER=http://127.0.0.1:$HOST_PORT run_podman run -d --name myyaml -p "$HOST_PORT:80" \ - -v $PODMAN_TMPDIR/test.yaml:/var/www/testpod.yaml:Z \ - -w /var/www \ - $IMAGE /bin/busybox-extras httpd -f -p 80 + -v $PODMAN_TMPDIR/test.yaml:/var/www/testpod.yaml:Z \ + -w /var/www \ + $IMAGE /bin/busybox-extras httpd -f -p 80 + wait_for_port 127.0.0.1 $HOST_PORT run_podman kube play $SERVER/testpod.yaml run_podman inspect test_pod-test --format "{{.State.Running}}" diff --git a/test/system/helpers.network.bash b/test/system/helpers.network.bash index 0a90ddfc43..831d973afe 100644 --- a/test/system/helpers.network.bash +++ b/test/system/helpers.network.bash @@ -241,6 +241,13 @@ function port_is_bound() { local proto="tcp" fi + # /proc/net/tcp is insufficient: it does not show some rootless ports. + # ss does, so check it first. + run ss -${proto:0:1}nlH sport = $port + if [[ -n "$output" ]]; then + return + fi + port=$(printf %04X ${port}) case "${address}" in *":"*)