From 0be44ccb20cfbd033e90790a4e1d1de1ff56106e Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 11 Apr 2025 14:23:18 +0200 Subject: [PATCH 1/4] test/system: add debug for healthcheck flake Seen it a few times, the exit code differs but we didn't print the command output so it hard to tell why podman errors out here. This just adds the output and does not fix the flake. https://api.cirrus-ci.com/v1/artifact/task/5368521426731008/html/sys-podman-fedora-41-rootless-host-boltdb.log.html Signed-off-by: Paul Holzinger --- test/system/220-healthcheck.bats | 1 + 1 file changed, 1 insertion(+) diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index e2ace7fa04..6d02946623 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -439,6 +439,7 @@ function _check_health_log { # Wait for background healthcheck to finish and make sure the exit status is 1 rc=0 wait -n $hc_pid || rc=$? + cat $hcStatus # just as debug in case the exit code check fails assert $rc -eq 1 "exit status check of healthcheck command" assert $(< $hcStatus) == "stopped" "Health status" From 0fb78905c1a6d1f2c64ad37d87940577ea72d8a7 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 11 Apr 2025 15:12:33 +0200 Subject: [PATCH 2/4] Revert "Instrument cleanup tracer to log weird volume removal flake" This reverts commit d633824a9527b9ec937cdfc8aacc890ec3249127. The issue has been fixed in commit 9a0c0b2eef and I have not seen it since so remove this special case. Signed-off-by: Paul Holzinger --- hack/podman_cleanup_tracer.bt | 14 -------------- test/system/600-completion.bats | 3 +-- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/hack/podman_cleanup_tracer.bt b/hack/podman_cleanup_tracer.bt index f2b480c056..42c46cde9a 100755 --- a/hack/podman_cleanup_tracer.bt +++ b/hack/podman_cleanup_tracer.bt @@ -149,17 +149,3 @@ tracepoint:syscalls:sys_enter_write $offset += $len } } - -// HACK: debug for https://github.com/containers/podman/issues/23913 -// The test uses "ebpf-debug-23913" volume name and because and volume rm -// will delete the path we can trap the process here to find out who actually -// deletes it. -tracepoint:syscalls:sys_enter_unlink* -/ strcontains(str(args.pathname), "ebpf-debug-23913") / -{ - printf("Special issue 23913 volume deleted by pid %d: ", pid); - // This can fail to open the file it is done in user space and - // thus racy if the process exits quickly. - cat("/proc/%d/cmdline", pid); - print(""); -} diff --git a/test/system/600-completion.bats b/test/system/600-completion.bats index fd108e445b..3773cf4056 100644 --- a/test/system/600-completion.bats +++ b/test/system/600-completion.bats @@ -270,8 +270,7 @@ function _check_no_suggestions() { random_image_name="i-$(safename)" random_image_tag=$(random_string 5) random_network_name="n-$(safename)" - # Do not change the suffix, it is special debug for #23913 - random_volume_name="v-$(safename)-ebpf-debug-23913" + random_volume_name="v-$(safename)" random_secret_name="s-$(safename)" random_secret_content=$(random_string 30) secret_file=$PODMAN_TMPDIR/$(random_string 10) From 0849cbcf83c355e8d36399e823e178f073750c70 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 11 Apr 2025 16:13:03 +0200 Subject: [PATCH 3/4] test/e2e: skip flaky restore into pod test It is failing a lot, on the issue (#24571) there is a 100% reproducer so we don't need to gather more data this is simply broken. Reduce our flakes by skiping this until the main issue gets resolved. Signed-off-by: Paul Holzinger --- test/e2e/checkpoint_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/e2e/checkpoint_test.go b/test/e2e/checkpoint_test.go index 0c4a0bb4a0..1498b4fa50 100644 --- a/test/e2e/checkpoint_test.go +++ b/test/e2e/checkpoint_test.go @@ -1131,6 +1131,7 @@ var _ = Describe("Podman checkpoint", func() { ) It(testName, func() { + Skip("FIXME: #24571 - not working an super flaky, don't waste CI time on it") podName := "test_pod" if err := criu.CheckForCriu(criu.PodCriuVersion); err != nil { From a4856a6224e630eacbc7642f17685361a7fde4d6 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 11 Apr 2025 16:21:03 +0200 Subject: [PATCH 4/4] test/system: add debug for /etc/hosts restore flake Somehow the files do not match sometimes, I like to get data on the /etc/hosts file on the host looks to see if this would explain anything. Signed-off-by: Paul Holzinger --- test/system/520-checkpoint.bats | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/system/520-checkpoint.bats b/test/system/520-checkpoint.bats index 22b834867e..5113f6785e 100644 --- a/test/system/520-checkpoint.bats +++ b/test/system/520-checkpoint.bats @@ -247,6 +247,11 @@ function setup() { run_podman inspect $cid --format "{{(index .NetworkSettings.Networks \"$netname\").MacAddress}}" mac1="$output" + # There is a weird flake, where the hosts content changed after restore and I don't know why. + # Because we start from a /etc/hosts base on the host print that. + echo "hosts file on the host" + cat /etc/hosts + run_podman exec $cid cat /etc/hosts /etc/resolv.conf pre_hosts_resolv_conf_output="$output"