From 5dfc44cad2feacb661be63eba326a6bc66a2e591 Mon Sep 17 00:00:00 2001 From: Nalin Dahyabhai Date: Wed, 14 Apr 2021 13:35:43 -0400 Subject: [PATCH 1/2] rmi: don't break when the image is missing a manifest In libpod/image.Image.Remove(), if the attempt to find the image's parent fails for any reason, log a warning and proceed as though it didn't have one instead of failing, which would leave us unable to remove the image without resetting everything. In libpod/Runtime.RemoveImage(), if we can't determine if an image has children, log a warning, and assume that it doesn't have any instead of failing, which would leave us unable to remove the image without resetting everything. In pkg/domain/infra/abi.ImageEngine.Remove(), when attempting to remove all images, if we encounter an error checking if a given image has children, log a warning, and assume that it doesn't have any instead of failing, which would leave us unable to remove the image without resetting everything. Signed-off-by: Nalin Dahyabhai --- libpod/image/image.go | 3 +- libpod/runtime_img.go | 3 +- pkg/domain/infra/abi/images.go | 4 +- test/system/330-corrupt-images.bats | 134 ++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 test/system/330-corrupt-images.bats diff --git a/libpod/image/image.go b/libpod/image/image.go index cecd64eb7f..2ead63c6f8 100644 --- a/libpod/image/image.go +++ b/libpod/image/image.go @@ -623,7 +623,8 @@ func (i *Image) TopLayer() string { func (i *Image) Remove(ctx context.Context, force bool) error { parent, err := i.GetParent(ctx) if err != nil { - return err + logrus.Warnf("error determining parent of image: %v, ignoring the error", err) + parent = nil } if _, err := i.imageruntime.store.DeleteImage(i.ID(), true); err != nil { return err diff --git a/libpod/runtime_img.go b/libpod/runtime_img.go index e57890fa29..cb08a8c300 100644 --- a/libpod/runtime_img.go +++ b/libpod/runtime_img.go @@ -66,7 +66,8 @@ func (r *Runtime) RemoveImage(ctx context.Context, img *image.Image, force bool) hasChildren, err := img.IsParent(ctx) if err != nil { - return nil, err + logrus.Warnf("error determining if an image is a parent: %v, ignoring the error", err) + hasChildren = false } if (len(img.Names()) > 1 && !img.InputIsID()) || hasChildren { diff --git a/pkg/domain/infra/abi/images.go b/pkg/domain/infra/abi/images.go index 75ce518b6f..c5831ff989 100644 --- a/pkg/domain/infra/abi/images.go +++ b/pkg/domain/infra/abi/images.go @@ -657,8 +657,8 @@ func (ir *ImageEngine) Remove(ctx context.Context, images []string, opts entitie for _, img := range storageImages { isParent, err := img.IsParent(ctx) if err != nil { - rmErrors = append(rmErrors, err) - continue + logrus.Warnf("%v, ignoring the error", err) + isParent = false } // Skip parent images. if isParent { diff --git a/test/system/330-corrupt-images.bats b/test/system/330-corrupt-images.bats new file mode 100644 index 0000000000..9836de3630 --- /dev/null +++ b/test/system/330-corrupt-images.bats @@ -0,0 +1,134 @@ +#!/usr/bin/env bats -*- bats -*- +# +# All tests in here perform nasty manipulations on image storage. +# + +load helpers + +############################################################################### +# BEGIN setup/teardown + +# Create a scratch directory; this is what we'll use for image store and cache +if [ -z "${PODMAN_CORRUPT_TEST_WORKDIR}" ]; then + export PODMAN_CORRUPT_TEST_WORKDIR=$(mktemp -d --tmpdir=${BATS_TMPDIR:-${TMPDIR:-/tmp}} podman_corrupt_test.XXXXXX) +fi + +PODMAN_CORRUPT_TEST_IMAGE_FQIN=quay.io/libpod/alpine@sha256:634a8f35b5f16dcf4aaa0822adc0b1964bb786fca12f6831de8ddc45e5986a00 +PODMAN_CORRUPT_TEST_IMAGE_ID=961769676411f082461f9ef46626dd7a2d1e2b2a38e6a44364bcbecf51e66dd4 + +# All tests in this file (and ONLY in this file) run with a custom rootdir +function setup() { + skip_if_remote "none of these tests run under podman-remote" + _PODMAN_TEST_OPTS="--root ${PODMAN_CORRUPT_TEST_WORKDIR}/root" +} + +function teardown() { + # No other tests should ever run with this custom rootdir + unset _PODMAN_TEST_OPTS + + is_remote && return + + # Clean up + umount ${PODMAN_CORRUPT_TEST_WORKDIR}/root/overlay || true + if is_rootless; then + run_podman unshare rm -rf ${PODMAN_CORRUPT_TEST_WORKDIR}/root + else + rm -rf ${PODMAN_CORRUPT_TEST_WORKDIR}/root + fi +} + +# END setup/teardown +############################################################################### +# BEGIN primary test helper + +# This is our main action, invoked by every actual test. It: +# - creates a new empty rootdir +# - populates it with our crafted test image +# - removes [ manifest, blob ] +# - confirms that "podman images" throws an error +# - runs the specified command (rmi -a -f, prune, reset, etc) +# - confirms that it succeeds, and also emits expected warnings +function _corrupt_image_test() { + # Run this test twice: once removing manifest, once removing blob + for what_to_rm in manifest blob; do + # I have no idea, but this sometimes remains mounted + umount ${PODMAN_CORRUPT_TEST_WORKDIR}/root/overlay || true + # Start with a fresh storage root, load prefetched image into it. + /bin/rm -rf ${PODMAN_CORRUPT_TEST_WORKDIR}/root + mkdir -p ${PODMAN_CORRUPT_TEST_WORKDIR}/root + run_podman load -i ${PODMAN_CORRUPT_TEST_WORKDIR}/img.tar + # "podman load" restores it without a tag, which (a) causes rmi-by-name + # to fail, and (b) causes "podman images" to exit 0 instead of 125 + run_podman tag ${PODMAN_CORRUPT_TEST_IMAGE_ID} ${PODMAN_CORRUPT_TEST_IMAGE_FQIN} + + # shortcut variable name + local id=${PODMAN_CORRUPT_TEST_IMAGE_ID} + + case "$what_to_rm" in + manifest) rm_path=manifest ;; + blob) rm_path="=$(echo -n "sha256:$id" | base64 -w0)" ;; + *) die "Internal error: unknown action '$what_to_rm'" ;; + esac + + # Corruptify, and confirm that 'podman images' throws an error + rm -v ${PODMAN_CORRUPT_TEST_WORKDIR}/root/*-images/$id/${rm_path} + run_podman 125 images + is "$output" "Error: error retrieving label for image \"$id\": you may need to remove the image to resolve the error" + + # Run the requested command. Confirm it succeeds, with suitable warnings + run_podman $* + is "$output" ".*error determining parent of image" \ + "$* with missing $what_to_rm" + + run_podman images -a --noheading + is "$output" "" "podman images -a, after $*, is empty" + done +} + +# END primary test helper +############################################################################### +# BEGIN first "test" does a one-time pull of our desired image + +@test "podman corrupt images - initialize" { + # Pull once, save cached copy. + run_podman pull $PODMAN_CORRUPT_TEST_IMAGE_FQIN + run_podman save -o ${PODMAN_CORRUPT_TEST_WORKDIR}/img.tar \ + $PODMAN_CORRUPT_TEST_IMAGE_FQIN +} + +# END first "test" does a one-time pull of our desired image +############################################################################### +# BEGIN actual tests + +@test "podman corrupt images - rmi -f " { + _corrupt_image_test "rmi -f ${PODMAN_CORRUPT_TEST_IMAGE_ID}" +} + +@test "podman corrupt images - rmi -f " { + _corrupt_image_test "rmi -f ${PODMAN_CORRUPT_TEST_IMAGE_FQIN}" +} + +@test "podman corrupt images - rmi -f -a" { + _corrupt_image_test "rmi -f -a" +} + +@test "podman corrupt images - image prune" { + _corrupt_image_test "image prune -a -f" +} + +@test "podman corrupt images - system reset" { + _corrupt_image_test "image prune -a -f" +} + +# END actual tests +############################################################################### +# BEGIN final cleanup + +@test "podman corrupt images - cleanup" { + rm -rf ${PODMAN_CORRUPT_TEST_WORKDIR} +} + +# END final cleanup +############################################################################### + +# vim: filetype=sh From 6925683082d00f2b9679fc98adcdbf9ca65e4ec9 Mon Sep 17 00:00:00 2001 From: Nalin Dahyabhai Date: Thu, 29 Apr 2021 14:30:21 -0400 Subject: [PATCH 2/2] Post-pick adjustment In libpod/image/layerTree.toOCI(), if we can't read the image's configuration in OCI format, log the error and return a `nil` pointer instead of returning the error. 2.2's default logging level is "error" instead of "warn" used in the version the previous commit is backported from. The error that causes us to fail in 2.2 is also in a different location, so we need to add a similar change for it, and update tests to expect the accompanying error message. Signed-off-by: Nalin Dahyabhai --- libpod/image/layer_tree.go | 7 +++++-- test/system/330-corrupt-images.bats | 2 +- test/system/helpers.bash | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/libpod/image/layer_tree.go b/libpod/image/layer_tree.go index 18101575e6..cef8dadd67 100644 --- a/libpod/image/layer_tree.go +++ b/libpod/image/layer_tree.go @@ -5,6 +5,7 @@ import ( ociv1 "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // layerTree is an internal representation of local layers. @@ -32,9 +33,11 @@ func (t *layerTree) toOCI(ctx context.Context, i *Image) (*ociv1.Image, error) { oci, exists := t.ociCache[i.ID()] if !exists { oci, err = i.ociv1Image(ctx) - if err == nil { - t.ociCache[i.ID()] = oci + if err != nil { + logrus.Errorf("%v, ignoring the error", err) + return nil, nil } + t.ociCache[i.ID()] = oci } return oci, err } diff --git a/test/system/330-corrupt-images.bats b/test/system/330-corrupt-images.bats index 9836de3630..049f84bb73 100644 --- a/test/system/330-corrupt-images.bats +++ b/test/system/330-corrupt-images.bats @@ -77,7 +77,7 @@ function _corrupt_image_test() { # Run the requested command. Confirm it succeeds, with suitable warnings run_podman $* - is "$output" ".*error determining parent of image" \ + is "$output" ".*file does not exist, ignoring the error" \ "$* with missing $what_to_rm" run_podman images -a --noheading diff --git a/test/system/helpers.bash b/test/system/helpers.bash index f5df85338e..ac9fa85090 100644 --- a/test/system/helpers.bash +++ b/test/system/helpers.bash @@ -149,7 +149,7 @@ function run_podman() { echo "$_LOG_PROMPT $PODMAN $*" # BATS hangs if a subprocess remains and keeps FD 3 open; this happens # if podman crashes unexpectedly without cleaning up subprocesses. - run timeout --foreground -v --kill=10 $PODMAN_TIMEOUT $PODMAN "$@" 3>/dev/null + run timeout --foreground -v --kill=10 $PODMAN_TIMEOUT $PODMAN $_PODMAN_TEST_OPTS "$@" 3>/dev/null # without "quotes", multiple lines are glommed together into one if [ -n "$output" ]; then echo "$output"