system test parallelization: enable two-pass approach

For the past two months we've been splitting system tests
into two categories: those that CAN be run in parallel,
and those that CANNOT. Much work has been done to replace
hardcoded names (mycontainer, mypod) with safename().
Hundreds of test runs, in CI and on Ed's laptop, have
proven this approach viable.

make {local,remote}system now runs in two steps: first
the serial ones, then the parallel ones. hack/bats will
now recognize the 'ci:parallel' tag and add --jobs (nprocs).

This requires some tweaking of leak_check, because there
can be umpteen tests running (affecting image/container/pod/etc
state) when any given test completes.

Rules for enabling parallelization in tests:

   * use unique container/pod/volume/network names (safename)
   * do not run 'podman rm -a' or 'rmi -a'
   * never use the -l (--latest) option
   * do not run 'podman ps/images' and expect precise output

Signed-off-by: Ed Santiago <santiago@redhat.com>
This commit is contained in:
Ed Santiago
2024-09-17 09:19:52 -06:00
parent f4a08f46b7
commit d571ca6536
4 changed files with 47 additions and 25 deletions

View File

@ -688,7 +688,8 @@ localmachine:
localsystem:
# Wipe existing config, database, and cache: start with clean slate.
$(RM) -rf ${HOME}/.local/share/containers ${HOME}/.config/containers
if timeout -v 1 true; then PODMAN=$(CURDIR)/bin/podman QUADLET=$(CURDIR)/bin/quadlet bats -T test/system/; else echo "Skipping $@: 'timeout -v' unavailable'"; fi
PODMAN=$(CURDIR)/bin/podman QUADLET=$(CURDIR)/bin/quadlet bats -T --filter-tags '!ci:parallel' test/system/
PODMAN=$(CURDIR)/bin/podman QUADLET=$(CURDIR)/bin/quadlet bats -T --filter-tags ci:parallel -j $$(nproc) test/system/
.PHONY: remotesystem
remotesystem:
@ -717,7 +718,8 @@ remotesystem:
echo "Error: ./bin/podman system service did not come up" >&2;\
exit 1;\
fi;\
env PODMAN="$(CURDIR)/bin/podman-remote" bats -T test/system/ ;\
env PODMAN="$(CURDIR)/bin/podman-remote" bats -T --filter-tags '!ci:parallel' test/system/ ;\
env PODMAN="$(CURDIR)/bin/podman-remote" bats -T --filter-tags ci:parallel -j $$(nproc) test/system/ ;\
rc=$$?;\
kill %1;\
else \

View File

@ -83,7 +83,10 @@ for i;do
--rootless) TEST_ROOT= ;;
--remote) REMOTE=remote ;;
--ts|-T) bats_opts+=("-T") ;;
--tag=*) bats_filter=("--filter-tags" "$value") ;;
--tag=*) bats_filter=("--filter-tags" "$value")
if [[ "$value" = "ci:parallel" ]]; then
bats_opts+=("--jobs" $(nproc))
fi;;
*/*.bats) TESTS=$i ;;
*)
if [[ $i =~ : ]]; then
@ -130,7 +133,7 @@ export PODMAN_BATS_LEAK_CHECK=1
# Root
if [[ "$TEST_ROOT" ]]; then
echo "# bats ${bats_filter[*]} $TESTS"
echo "# bats ${bats_opts[*]} ${bats_filter[*]} $TESTS"
sudo --preserve-env=PODMAN \
--preserve-env=QUADLET \
--preserve-env=PODMAN_TEST_DEBUG \
@ -144,7 +147,7 @@ fi
# Rootless. (Only if we're not already root)
if [[ "$TEST_ROOTLESS" && "$(id -u)" != 0 ]]; then
echo "--------------------------------------------------"
echo "\$ bats ${bats_filter[*]} $TESTS"
echo "\$ bats ${bats_opts[*]} ${bats_filter[*]} $TESTS"
bats "${bats_opts[@]}" "${bats_filter[@]}" $TESTS
rc=$((rc | $?))
fi

View File

@ -222,22 +222,34 @@ function basic_teardown() {
# This must be directly after immediate-assertion-failures to capture the error code
local exit_code=$?
# Only checks for leaks on a successful run (BATS_TEST_COMPLETED is set 1),
# immediate-assertion-failures didn't fail (exit_code -eq 0)
# and PODMAN_BATS_LEAK_CHECK is set.
# As these podman commands are slow we do not want to do this by default
# and only provide this as opt in option. (#22909)
if [[ "$BATS_TEST_COMPLETED" -eq 1 ]] && [ $exit_code -eq 0 ] && [ -n "$PODMAN_BATS_LEAK_CHECK" ]; then
leak_check
exit_code=$((exit_code + $?))
# Leak check and state reset. Only run these when running tests serially!
# (For parallel tests, we run a leak check only at the very end of all tests)
if [[ -z "$PARALLEL_JOBSLOT" ]]; then
# Check for leaks, but only if:
# 1) test was successful (BATS_TEST_COMPLETED is set 1); and
# 2) immediate-assertion-failures didn't fail (exit_code -eq 0); and
# 3) PODMAN_BATS_LEAK_CHECK is set (usually only in cron).
# As these podman commands are slow we do not want to do this by default
# and only provide this as opt-in option. (#22909)
if [[ "$BATS_TEST_COMPLETED" -eq 1 ]] && [[ $exit_code -eq 0 ]] && [[ -n "$PODMAN_BATS_LEAK_CHECK" ]]; then
leak_check
exit_code=$((exit_code + $?))
fi
# Some error happened (either in teardown itself or the actual test failed)
# so do a full cleanup to ensure following tests start with a clean env.
if [ $exit_code -gt 0 ] || [ -z "$BATS_TEST_COMPLETED" ]; then
clean_setup
exit_code=$((exit_code + $?))
fi
fi
# Some error happened (either in teardown itself or the actual test failed)
# so do a full cleanup to ensure following tests start with a clean env.
if [ $exit_code -gt 0 ] || [ -z "$BATS_TEST_COMPLETED" ]; then
clean_setup
exit_code=$((exit_code + $?))
# Status file used in teardown_suite() to decide whether or not
# to check for leaks
if [[ "$BATS_TEST_COMPLETED" -ne 1 ]]; then
rm -f "$BATS_SUITE_TMPDIR/all-tests-passed"
fi
command rm -rf $PODMAN_TMPDIR
exit_code=$((exit_code + $?))
return $exit_code
@ -426,12 +438,12 @@ function clean_setup() {
else
# Always remove image that doesn't match by name
echo "# setup(): removing stray image $1" >&3
run_podman rmi --force "$1" >/dev/null 2>&1 || true
_run_podman_quiet rmi --force "$1"
# Tagged image will have same IID as our test image; don't rmi it.
if [[ $2 != "$PODMAN_TEST_IMAGE_ID" ]]; then
echo "# setup(): removing stray image $2" >&3
run_podman rmi --force "$2" >/dev/null 2>&1 || true
_run_podman_quiet rmi --force "$2"
fi
fi
done

View File

@ -32,6 +32,9 @@ function setup_suite() {
"Unable to set PODMAN_LOGIN_REGISTRY_PORT"
clean_setup
# Canary file. Will be removed if any individual test fails.
touch "$BATS_SUITE_TMPDIR/all-tests-passed"
}
# Run at the very end of all tests. Useful for cleanup of non-BATS tmpdirs.
@ -39,11 +42,13 @@ function teardown_suite() {
stop_registry
local exit_code=$?
# After all tests make sure there are no leaks and cleanup if there are
leak_check
if [ $? -gt 0 ]; then
exit_code=$((exit_code + 1))
clean_setup
# At end, if all tests have passed, check for leaks.
# Don't do this if there were errors: failing tests may not clean up.
if [[ -e "$BATS_SUITE_TMPDIR/all-tests-passed" ]]; then
leak_check
if [ $? -gt 0 ]; then
exit_code=$((exit_code + 1))
fi
fi
return $exit_code