mirror of
https://github.com/containers/podman.git
synced 2025-07-15 03:02:52 +08:00
Merge pull request #8749 from edsantiago/upgrade_test
podman upgrade tests
This commit is contained in:
33
.cirrus.yml
33
.cirrus.yml
@ -598,6 +598,38 @@ rootless_system_test_task:
|
||||
main_script: *main
|
||||
always: *logs_artifacts
|
||||
|
||||
# FIXME: we may want to consider running this from nightly cron instead of CI.
|
||||
# The tests are actually pretty quick (less than a minute) but they do rely
|
||||
# on pulling images from quay.io, which means we're subject to network flakes.
|
||||
#
|
||||
# FIXME: how does this env matrix work, anyway? Does it spin up multiple VMs?
|
||||
# We might just want to encode the version matrix in runner.sh instead
|
||||
upgrade_test_task:
|
||||
name: "Upgrade test: from $PODMAN_UPGRADE_FROM"
|
||||
alias: upgrade_test
|
||||
skip: *tags
|
||||
only_if: *not_docs
|
||||
depends_on:
|
||||
- local_system_test
|
||||
matrix:
|
||||
- env:
|
||||
PODMAN_UPGRADE_FROM: v1.9.0
|
||||
- env:
|
||||
PODMAN_UPGRADE_FROM: v2.0.6
|
||||
- env:
|
||||
PODMAN_UPGRADE_FROM: v2.1.1
|
||||
gce_instance: *standardvm
|
||||
env:
|
||||
TEST_FLAVOR: upgrade_test
|
||||
DISTRO_NV: ${FEDORA_NAME}
|
||||
VM_IMAGE_NAME: ${FEDORA_CACHE_IMAGE_NAME}
|
||||
# ID for re-use of build output
|
||||
_BUILD_CACHE_HANDLE: ${FEDORA_NAME}-build-${CIRRUS_BUILD_ID}
|
||||
clone_script: *noop
|
||||
gopath_cache: *ro_gopath_cache
|
||||
setup_script: *setup
|
||||
main_script: *main
|
||||
always: *logs_artifacts
|
||||
|
||||
# This task is critical. It updates the "last-used by" timestamp stored
|
||||
# in metadata for all VM images. This mechanism functions in tandem with
|
||||
@ -654,6 +686,7 @@ success_task:
|
||||
- local_system_test
|
||||
- remote_system_test
|
||||
- rootless_system_test
|
||||
- upgrade_test
|
||||
- meta
|
||||
container: *smallcontainer
|
||||
env:
|
||||
|
@ -70,6 +70,10 @@ function _run_sys() {
|
||||
dotest system
|
||||
}
|
||||
|
||||
function _run_upgrade_test() {
|
||||
bats test/upgrade |& logformatter
|
||||
}
|
||||
|
||||
function _run_bindings() {
|
||||
# shellcheck disable=SC2155
|
||||
export PATH=$PATH:$GOSRC/hack
|
||||
|
@ -200,6 +200,7 @@ case "$TEST_FLAVOR" in
|
||||
compose) ;&
|
||||
int) ;&
|
||||
sys) ;&
|
||||
upgrade_test) ;&
|
||||
bindings) ;&
|
||||
endpoint)
|
||||
# Use existing host bits when testing is to happen inside a container
|
||||
|
@ -154,7 +154,7 @@ function run_podman() {
|
||||
echo "$_LOG_PROMPT $PODMAN $*"
|
||||
# BATS hangs if a subprocess remains and keeps FD 3 open; this happens
|
||||
# if podman crashes unexpectedly without cleaning up subprocesses.
|
||||
run timeout --foreground -v --kill=10 $PODMAN_TIMEOUT $PODMAN "$@" 3>/dev/null
|
||||
run timeout --foreground -v --kill=10 $PODMAN_TIMEOUT $PODMAN $_PODMAN_TEST_OPTS "$@" 3>/dev/null
|
||||
# without "quotes", multiple lines are glommed together into one
|
||||
if [ -n "$output" ]; then
|
||||
echo "$output"
|
||||
|
87
test/upgrade/README.md
Normal file
87
test/upgrade/README.md
Normal file
@ -0,0 +1,87 @@
|
||||
Background
|
||||
==========
|
||||
|
||||
For years we've been needing a way to test podman upgrades; this
|
||||
became much more critical on December 7, 2020, when Matt disclosed
|
||||
a bug he had found over the weekend
|
||||
([#8613](https://github.com/containers/podman/issues/8613))
|
||||
in which reuse of a previously-defined field name would
|
||||
result in fatal JSON decode failures if current-podman were
|
||||
to try reading containers created with podman <= 1.8 (FIXME: confirm)
|
||||
|
||||
Upgrade testing is a daunting problem; but in the December 12
|
||||
Cabal meeting Dan suggested using podman-in-podman. This PR
|
||||
is the result of fleshing out that idea.
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
The BATS script in this directory fetches and runs an old-podman
|
||||
container image from quay.io/podman, uses it to create and run
|
||||
a number of containers, then uses new-podman to interact with
|
||||
those containers.
|
||||
|
||||
As of 2021-02-23 the available old-podman versions are:
|
||||
|
||||
```console
|
||||
$ ./bin/podman search --list-tags quay.io/podman/stable | awk '$2 ~ /^v/ { print $2}' | sort | column -c 75
|
||||
v1.4.2 v1.5.0 v1.6 v1.9.0 v2.0.2 v2.1.1
|
||||
v1.4.4 v1.5.1 v1.6.2 v1.9.1 v2.0.6 v2.2.1
|
||||
```
|
||||
|
||||
Test invocation is:
|
||||
```console
|
||||
$ sudo env PODMAN=bin/podman PODMAN_UPGRADE_FROM=v1.9.0 PODMAN_UPGRADE_TEST_DEBUG= bats test/upgrade
|
||||
```
|
||||
(Path assumes you're cd'ed to top-level podman repo). `PODMAN_UPGRADE_FROM`
|
||||
can be any of the versions above. `PODMAN_UPGRADE_TEST_DEBUG` is empty
|
||||
here, but listed so you can set it `=1` and leave the podman_parent
|
||||
container running. Interacting with this container is left as an
|
||||
exercise for the reader.
|
||||
|
||||
The script will pull the given podman image, invoke it with a scratch
|
||||
root directory, and have it do a small set of podman stuff (pull an
|
||||
image, create/run some containers). This podman process stays running
|
||||
because if it exits, it kills containers running inside the container.
|
||||
|
||||
We then invoke the current (host-installed) podman, using the same
|
||||
scratch root directory, and perform operations on those images and
|
||||
containers. Most of those operations are done in individual @tests.
|
||||
|
||||
The goal is to have this upgrade test run in CI, iterating over a
|
||||
loop of known old versions. This list would need to be hand-maintained
|
||||
and updated on new releases. There might also need to be extra
|
||||
configuration defined, such as per-version commands (see below).
|
||||
|
||||
Findings
|
||||
========
|
||||
|
||||
Well, first, `v1.6.2` won't work on default f32/f33: the image
|
||||
does not include `crun`, so it can't work at all:
|
||||
|
||||
ERRO[0000] oci runtime "runc" does not support CGroups V2: use system migrate to mitigate
|
||||
|
||||
I realize that it's kind of stupid not to test 1.6, since that's
|
||||
precisely the test that would've caught #8613 early, but I just
|
||||
don't think it's worth the hassle of setting up cgroupsv1 VMs.
|
||||
|
||||
For posterity, in an earlier incantation of this script I tried
|
||||
booting f32 into cgroupsv1 and ran into the following warnings
|
||||
when running new-podman on old-containers:
|
||||
```
|
||||
ERRO[0000] error joining network namespace for container 322b66d94640e31b2e6921565445cf0dade4ec13cabc16ee5f29292bdc038341: error retrieving network namespace at /var/run/netns/cni-577e2289-2c05-2e28-3c3d-002a5596e7da: failed to Statfs "/var/run/netns/cni-577e2289
|
||||
```
|
||||
|
||||
Where To Go From Here
|
||||
=====================
|
||||
|
||||
* Tests are still (2021-02-23) incomplete, with several failing outright.
|
||||
See FIXMEs in the code.
|
||||
|
||||
* Figuring out how/if to run rootless. I think this is possible, perhaps
|
||||
even necessary, but will be tricky to get right because of home-directory
|
||||
mounting.
|
||||
|
||||
* Figuring out how/if to run variations with different config files
|
||||
(e.g. running OLD-PODMAN that creates a user libpod.conf, tweaking
|
||||
that in the test, then running NEW-PODMAN upgrate tests)
|
11
test/upgrade/helpers.bash
Normal file
11
test/upgrade/helpers.bash
Normal file
@ -0,0 +1,11 @@
|
||||
# -*- bash -*-
|
||||
|
||||
load "../system/helpers"
|
||||
|
||||
setup() {
|
||||
:
|
||||
}
|
||||
|
||||
teardown() {
|
||||
:
|
||||
}
|
313
test/upgrade/test-upgrade.bats
Normal file
313
test/upgrade/test-upgrade.bats
Normal file
@ -0,0 +1,313 @@
|
||||
# -*- bats -*-
|
||||
|
||||
load helpers
|
||||
|
||||
# Create a var-lib-containers dir for this podman. We need to bind-mount
|
||||
# this into the container, and use --root and --runroot and --tmpdir
|
||||
# options both in the container podman and out here: that's the only
|
||||
# way to share image and container storage.
|
||||
if [ -z "${PODMAN_UPGRADE_WORKDIR}" ]; then
|
||||
# Much as I'd love a descriptive name like "podman-upgrade-tests.XXXXX",
|
||||
# keep it short ("pu") because of the 100-character path length limit
|
||||
# for UNIX sockets (needed by conmon)
|
||||
export PODMAN_UPGRADE_WORKDIR=$(mktemp -d --tmpdir=${BATS_TMPDIR:-${TMPDIR:-/tmp}} pu.XXXXXX)
|
||||
|
||||
touch $PODMAN_UPGRADE_WORKDIR/status
|
||||
fi
|
||||
|
||||
# Generate a set of random strings used for content verification
|
||||
if [ -z "${RANDOM_STRING_1}" ]; then
|
||||
export RANDOM_STRING_1=$(random_string 15)
|
||||
export LABEL_CREATED=$(random_string 16)
|
||||
export LABEL_FAILED=$(random_string 17)
|
||||
export LABEL_RUNNING=$(random_string 18)
|
||||
|
||||
# FIXME: randomize this
|
||||
HOST_PORT=34567
|
||||
fi
|
||||
|
||||
# Version string of the podman we're actually testing, e.g. '3.0.0-dev-d1a26013'
|
||||
PODMAN_VERSION=$($PODMAN version |awk '/^Version:/ { V=$2 } /^Git Commit:/ { G=$3 } END { print V "-" substr(G,0,8) }')
|
||||
|
||||
setup() {
|
||||
skip_if_rootless
|
||||
|
||||
# The podman-in-podman image (old podman)
|
||||
if [[ -z "$PODMAN_UPGRADE_FROM" ]]; then
|
||||
echo "# \$PODMAN_UPGRADE_FROM is undefined (should be e.g. v1.9.0)" >&3
|
||||
false
|
||||
fi
|
||||
|
||||
if [ "$(< $PODMAN_UPGRADE_WORKDIR/status)" = "failed" ]; then
|
||||
# FIXME: exit instead?
|
||||
echo "*** setup failed - no point in running tests"
|
||||
false
|
||||
fi
|
||||
|
||||
export _PODMAN_TEST_OPTS="--root=$PODMAN_UPGRADE_WORKDIR/root --runroot=$PODMAN_UPGRADE_WORKDIR/runroot --tmpdir=$PODMAN_UPGRADE_WORKDIR/tmp"
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
# BEGIN setup
|
||||
|
||||
@test "initial setup: start $PODMAN_UPGRADE_FROM containers" {
|
||||
echo failed >| $PODMAN_UPGRADE_WORKDIR/status
|
||||
|
||||
OLD_PODMAN=quay.io/podman/stable:$PODMAN_UPGRADE_FROM
|
||||
$PODMAN pull $OLD_PODMAN
|
||||
|
||||
# Shortcut name, because we're referencing it a lot
|
||||
pmroot=$PODMAN_UPGRADE_WORKDIR
|
||||
|
||||
# WWW content to share
|
||||
mkdir -p $pmroot/var/www
|
||||
echo $RANDOM_STRING_1 >$pmroot/var/www/index.txt
|
||||
|
||||
# podman tmpdir
|
||||
mkdir -p $pmroot/tmp
|
||||
|
||||
#
|
||||
# Script to run >>OLD<< podman commands.
|
||||
#
|
||||
# These commands will be run inside a podman container. The "podman"
|
||||
# command in this script will be the desired old-podman version.
|
||||
#
|
||||
pmscript=$pmroot/setup
|
||||
cat >| $pmscript <<EOF
|
||||
#!/bin/bash
|
||||
|
||||
# cgroup-manager=systemd does not work inside a container
|
||||
opts="--cgroup-manager=cgroupfs --events-backend=file $_PODMAN_TEST_OPTS"
|
||||
|
||||
set -ex
|
||||
|
||||
# Try try again, because network flakiness makes this a point of failure
|
||||
podman \$opts pull $IMAGE \
|
||||
|| (sleep 10; podman \$opts pull $IMAGE) \
|
||||
|| (sleep 30; podman \$opts pull $IMAGE)
|
||||
|
||||
|
||||
podman \$opts create --name mycreatedcontainer --label mylabel=$LABEL_CREATED \
|
||||
$IMAGE false
|
||||
|
||||
podman \$opts run --name mydonecontainer $IMAGE echo ++$RANDOM_STRING_1++
|
||||
|
||||
podman \$opts run --name myfailedcontainer --label mylabel=$LABEL_FAILED \
|
||||
$IMAGE sh -c 'exit 17' || true
|
||||
|
||||
# FIXME: add "-p $HOST_PORT:80"
|
||||
# ...I tried and tried, and could not get this to work. I could never
|
||||
# connect to the port from the host, nor even from the podman_parent
|
||||
# container; I could never see the port listed in 'ps' nor 'inspect'.
|
||||
# And, finally, I ended up in a state where the container wouldn't
|
||||
# even start, and via complicated 'podman logs' found out:
|
||||
# httpd: bind: Address in use
|
||||
# So I just give up for now.
|
||||
#
|
||||
podman \$opts run -d --name myrunningcontainer --label mylabel=$LABEL_RUNNING \
|
||||
-v $pmroot/var/www:/var/www \
|
||||
-w /var/www \
|
||||
$IMAGE /bin/busybox-extras httpd -f -p 80
|
||||
|
||||
echo READY
|
||||
while :;do
|
||||
if [ -e /stop ]; then
|
||||
echo STOPPING
|
||||
podman \$opts stop -t 0 myrunningcontainer || true
|
||||
podman \$opts rm -f myrunningcontainer || true
|
||||
exit 0
|
||||
fi
|
||||
sleep 0.5
|
||||
done
|
||||
EOF
|
||||
chmod 555 $pmscript
|
||||
|
||||
# Clean up vestiges of previous run
|
||||
$PODMAN rm -f podman_parent || true
|
||||
|
||||
# Not entirely a NOP! This is just so we get /run/crun created on a CI VM
|
||||
$PODMAN run --rm $OLD_PODMAN true
|
||||
|
||||
#
|
||||
# Use new-podman to run the above script under old-podman.
|
||||
#
|
||||
# DO NOT USE run_podman HERE! That would use $_PODMAN_TEST_OPTS
|
||||
# and would write into our shared test dir, which would then
|
||||
# pollute it for use by old-podman. We must keep that pristine
|
||||
# so old-podman is the first to write to it.
|
||||
#
|
||||
$PODMAN run -d --name podman_parent --pid=host \
|
||||
--privileged \
|
||||
--net=host \
|
||||
--cgroupns=host \
|
||||
-v /dev/fuse:/dev/fuse \
|
||||
-v /run/crun:/run/crun \
|
||||
-v $pmroot:$pmroot \
|
||||
$OLD_PODMAN $pmroot/setup
|
||||
|
||||
_PODMAN_TEST_OPTS= wait_for_ready podman_parent
|
||||
|
||||
echo OK >| $PODMAN_UPGRADE_WORKDIR/status
|
||||
}
|
||||
|
||||
# END setup
|
||||
###############################################################################
|
||||
# BEGIN actual tests
|
||||
|
||||
# This is a NOP; used only so the version string will show up in logs
|
||||
@test "upgrade: $PODMAN_UPGRADE_FROM -> $PODMAN_VERSION" {
|
||||
:
|
||||
}
|
||||
|
||||
@test "images" {
|
||||
run_podman images -a --format '{{.Names}}'
|
||||
is "$output" "\[$IMAGE\]" "podman images"
|
||||
}
|
||||
|
||||
@test "ps : one container running" {
|
||||
run_podman ps --format '{{.Image}}--{{.Names}}'
|
||||
is "$output" "$IMAGE--myrunningcontainer" "ps: one container running"
|
||||
}
|
||||
|
||||
@test "ps -a : shows all containers" {
|
||||
# IMPORTANT: we can't use --sort=created, because that requires #8427
|
||||
# on the *creating* podman end.
|
||||
run_podman ps -a \
|
||||
--format '{{.Names}}--{{.Status}}--{{.Ports}}--{{.Labels.mylabel}}' \
|
||||
--sort=names
|
||||
is "${lines[0]}" "mycreatedcontainer--Created----$LABEL_CREATED" "created"
|
||||
is "${lines[1]}" "mydonecontainer--Exited (0).*----<no value>" "done"
|
||||
is "${lines[2]}" "myfailedcontainer--Exited (17) .*----$LABEL_FAILED" "fail"
|
||||
is "${lines[3]}" "myrunningcontainer--Up .*----$LABEL_RUNNING" "running"
|
||||
|
||||
# For debugging: dump containers and IDs
|
||||
if [[ -n "$PODMAN_UPGRADE_TEST_DEBUG" ]]; then
|
||||
run_podman ps -a
|
||||
for l in "${lines[@]}"; do
|
||||
echo "# $l" >&3
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
@test "inspect - all container status" {
|
||||
tests="
|
||||
running | running | 0
|
||||
created | configured | 0
|
||||
done | exited | 0
|
||||
failed | exited | 17
|
||||
"
|
||||
while read cname state exitstatus; do
|
||||
run_podman inspect --format '{{.State.Status}}--{{.State.ExitCode}}' my${cname}container
|
||||
is "$output" "$state--$exitstatus" "status of my${cname}container"
|
||||
done < <(parse_table "$tests")
|
||||
}
|
||||
|
||||
@test "logs" {
|
||||
run_podman logs mydonecontainer
|
||||
is "$output" "++$RANDOM_STRING_1++" "podman logs on stopped container"
|
||||
|
||||
# run_podman logs myrunningcontainer
|
||||
# is "$output" "READY" "podman logs on running container"
|
||||
}
|
||||
|
||||
@test "exec" {
|
||||
run_podman exec myrunningcontainer cat /var/www/index.txt
|
||||
is "$output" "$RANDOM_STRING_1" "exec into myrunningcontainer"
|
||||
}
|
||||
|
||||
@test "load" {
|
||||
# FIXME, is this really necessary?
|
||||
skip "TBI. Not sure if there's any point to this."
|
||||
}
|
||||
|
||||
@test "mount" {
|
||||
skip "TBI"
|
||||
}
|
||||
|
||||
@test "pods" {
|
||||
skip "TBI"
|
||||
}
|
||||
|
||||
# FIXME: commit? kill? network? pause? restart? top? volumes? What else?
|
||||
|
||||
|
||||
@test "start" {
|
||||
skip "FIXME: this leaves a mount behind: root/overlay/sha/merged"
|
||||
run_podman --cgroup-manager=cgroupfs start -a mydonecontainer
|
||||
is "$output" "++$RANDOM_STRING_1++" "start on already-run container"
|
||||
}
|
||||
|
||||
@test "rm a stopped container" {
|
||||
# FIXME FIXME FIXME!
|
||||
#
|
||||
# I have no idea what's going on here. For most of my testing in this
|
||||
# section, the code here was simply 'podman rm myfailedcontainer', and
|
||||
# it would succeed, but then way down, in 'cleanup' below, the 'rm -f'
|
||||
# step would fail:
|
||||
#
|
||||
# # podman rm -f podman_parent
|
||||
# error freeing lock for container <sha>: no such file or directory
|
||||
# ...where <sha> is the ID of the podman_parent container.
|
||||
#
|
||||
# I started playing with this section, by adding 'rm mydonecontainer',
|
||||
# and now it always fails, the same way, but with the container we're
|
||||
# removing right here:
|
||||
#
|
||||
# error freeing lock for container <sha>: no such file or directory
|
||||
# ...where <sha> is the ID of mydonecontainer.
|
||||
#
|
||||
# I don't know. I give up for now, and am skip'ing the whole thing.
|
||||
# If you want to play with it, try commenting out the 'myfailed' lines,
|
||||
# or just the 'mydone' ones, or, I don't know.
|
||||
skip "FIXME: error freeing lock for container <sha>: no such file or dir"
|
||||
|
||||
# For debugging, so we can see what 'error freeing lock' refers to
|
||||
run_podman ps -a
|
||||
|
||||
run_podman rm myfailedcontainer
|
||||
is "$output" "[0-9a-f]\\{64\\}" "podman rm myfailedcontainer"
|
||||
|
||||
run_podman rm mydonecontainer
|
||||
is "$output" "[0-9a-f]\\{64\\}" "podman rm mydonecontainer"
|
||||
}
|
||||
|
||||
|
||||
@test "stop and rm" {
|
||||
# About a ten-second pause, then:
|
||||
# Error: timed out waiting for file /tmp/pu.nf747w/tmp/exits/<sha>: internal libpod error
|
||||
# It doesn't seem to be a socket-length issue: the paths are ~80-88 chars.
|
||||
# Leaving podman_parent running, and exec'ing into it, it doesn't look
|
||||
# like the file is being written to the wrong place.
|
||||
skip "FIXME: this doesn't work: timed out waiting for file tmpdir/exits/sha"
|
||||
run_podman stop myrunningcontainer
|
||||
run_podman rm myrunningcontainer
|
||||
}
|
||||
|
||||
@test "clean up parent" {
|
||||
if [[ -n "$PODMAN_UPGRADE_TEST_DEBUG" ]]; then
|
||||
skip "workdir is $PODMAN_UPGRADE_WORKDIR"
|
||||
fi
|
||||
|
||||
# We're done with shared environment. By clearing this, we can now
|
||||
# use run_podman for actions on the podman_parent container
|
||||
unset _PODMAN_TEST_OPTS
|
||||
|
||||
# (Useful for debugging the 'rm -f' step below, which, when it fails, only
|
||||
# gives a container ID. This 'ps' confirms that the CID is podman_parent)
|
||||
run_podman ps -a
|
||||
|
||||
# Stop the container gracefully
|
||||
run_podman exec podman_parent touch /stop
|
||||
run_podman wait podman_parent
|
||||
|
||||
run_podman logs podman_parent
|
||||
run_podman rm -f podman_parent
|
||||
|
||||
# FIXME: why does this remain mounted?
|
||||
umount $PODMAN_UPGRADE_WORKDIR/root/overlay || true
|
||||
|
||||
rm -rf $PODMAN_UPGRADE_WORKDIR
|
||||
}
|
||||
|
||||
# FIXME: now clean up
|
Reference in New Issue
Block a user