Cirrus: Print images that should be pruned

Over time unless they're removed, the project could grow quite a large
collection of VM images.  While generally cheap (less than a penny each,
per month), these will become a significant cost item if not kept
in-check.

Add a specialized container for handling image-pruning, but limit
it to only finding and printing (not actually deleting) images.

Also update the image-building workflow so that base-images used to
compose cache-images are also labeled with metadata.

N/B: As an additional safeguard, the service account which
     executes the new container in production *DOES NOT*
     have access to delete images.  This can be enabled
     by adding the GCE IAM role: CustomComputeImagePrune

Signed-off-by: Chris Evich <cevich@redhat.com>
This commit is contained in:
Chris Evich
2019-06-14 13:28:47 -04:00
parent 547cb4e55e
commit e8564dc44f
8 changed files with 207 additions and 39 deletions

View File

@ -261,7 +261,7 @@ meta_task:
cpu: 1
memory: 1
env:
env: &meta_env_vars
# Space-separated list of images used by this repository state
IMGNAMES: >-
${FEDORA_CACHE_IMAGE_NAME}
@ -277,6 +277,31 @@ meta_task:
timeout_in: 10m
# Cirrus-CI ignores entrypoint defined in image
script: '/usr/local/bin/entrypoint.sh |& ${TIMESTAMP}'
# Remove old and disused images based on labels set by meta_task
image_prune_task:
# Do not run this frequently
only_if: $CIRRUS_BRANCH == 'master'
depends_on:
- "meta"
container:
image: "quay.io/libpod/imgprune:latest" # see contrib/imgprune
cpu: 1
memory: 1
env:
<<: *meta_env_vars
GCPJSON: ENCRYPTED[4c11d8e09c904c30fc70eecb95c73dec0ddf19976f9b981a0f80f3f6599e8f990bcef93c253ac0277f200850d98528e7]
GCPNAME: ENCRYPTED[7f54557ba6e5a437f11283a53e71baec9ca546f48a9835538cc54d297f79968eb1337d4596a1025b14f9d1c5723fbd29]
timeout_in: 10m
script: '/usr/local/bin/entrypoint.sh |& ${TIMESTAMP}'

View File

@ -3,7 +3,8 @@
set -e
source $(dirname $0)/lib.sh
ENV_VARS='PACKER_BUILDS BUILT_IMAGE_SUFFIX UBUNTU_BASE_IMAGE FEDORA_BASE_IMAGE PRIOR_FEDORA_BASE_IMAGE SERVICE_ACCOUNT GCE_SSH_USERNAME GCP_PROJECT_ID PACKER_VER SCRIPT_BASE PACKER_BASE'
BASE_IMAGE_VARS='FEDORA_BASE_IMAGE PRIOR_FEDORA_BASE_IMAGE UBUNTU_BASE_IMAGE'
ENV_VARS="PACKER_BUILDS BUILT_IMAGE_SUFFIX $BASE_IMAGE_VARS SERVICE_ACCOUNT GCE_SSH_USERNAME GCP_PROJECT_ID PACKER_VER SCRIPT_BASE PACKER_BASE CIRRUS_BUILD_ID CIRRUS_CHANGE_IN_REPO"
req_env_var $ENV_VARS
# Must also be made available through make, into packer process
export $ENV_VARS
@ -24,6 +25,20 @@ then
fi
cd "$GOSRC/$PACKER_BASE"
# Add/update labels on base-images used in this build to prevent premature deletion
ARGS="
"
for base_image_var in $BASE_IMAGE_VARS
do
# See entrypoint.sh in contrib/imgts and contrib/imgprune
# These updates can take a while, run them in the background, check later
gcloud compute images update "$image" \
--update-labels=last-used=$(date +%s) \
--update-labels=build-id=$CIRRUS_BUILD_ID \
--update-labels=repo-ref=$CIRRUS_CHANGE_IN_REPO \
--update-labels=project=$GCP_PROJECT_ID \
${!base_image_var} &
done
make libpod_images \
PACKER_BUILDS=$PACKER_BUILDS \
@ -33,9 +48,31 @@ make libpod_images \
PACKER_BASE=$PACKER_BASE \
BUILT_IMAGE_SUFFIX=$BUILT_IMAGE_SUFFIX
# Separate PR-produced images from those produced on master.
if [[ "${CIRRUS_BRANCH:-}" == "master" ]]
then
POST_MERGE_BUCKET_SUFFIX="-master"
else
POST_MERGE_BUCKET_SUFFIX=""
fi
# When successful, upload manifest of produced images using a filename unique
# to this build.
URI="gs://packer-import${POST_MERGE_BUCKET_SUFFIX}/manifest${BUILT_IMAGE_SUFFIX}.json"
gsutil cp packer-manifest.json "$URI"
# Ensure any background 'gcloud compute images update' processes finish
set +e # need 'wait' exit code to avoid race
while [[ -n "$(jobs)" ]]
do
wait -n
RET=$?
if [[ "$RET" -eq "127" ]] || \ # Avoid TOCTOU race w/ jobs + wait
[[ "$RET" -eq "0" ]]
then
continue
fi
die $RET "Required base-image metadata update failed"
done
echo "Finished. A JSON manifest of produced images is available at $URI"

View File

@ -0,0 +1,7 @@
FROM libpod/imgts:latest
RUN yum -y update && \
yum clean all
COPY /contrib/imgprune/entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod 755 /usr/local/bin/entrypoint.sh

View File

@ -0,0 +1,11 @@
![PODMAN logo](../../logo/podman-logo-source.svg)
A container image for maintaining the collection of
VM images used by CI/CD on this project and several others.
Acts upon metadata maintained by the imgts container.
Example build (from repository root):
```bash
sudo podman build -t $IMAGE_NAME -f contrib/imgprune/Dockerfile .
```

67
contrib/imgprune/entrypoint.sh Executable file
View File

@ -0,0 +1,67 @@
#!/bin/bash
set -e
source /usr/local/bin/lib_entrypoint.sh
req_env_var GCPJSON GCPNAME GCPPROJECT IMGNAMES
gcloud_init
# For safety's sake + limit nr background processes
PRUNE_LIMIT=10
THEFUTURE=$(date --date='+1 hour' +%s)
TOO_OLD='90 days ago'
THRESHOLD=$(date --date="$TOO_OLD" +%s)
# Format Ref: https://cloud.google.com/sdk/gcloud/reference/topic/formats
FORMAT='value[quote](name,selfLink,creationTimestamp,labels)'
PROJRE="/v1/projects/$GCPPROJECT/global/"
BASE_IMAGE_RE='cloud-base'
RECENTLY=$(date --date='30 days ago' --iso-8601=date)
EXCLUDE="$IMGNAMES $IMAGE_BUILDER_CACHE_IMAGE_NAME" # whitespace separated values
# Filter Ref: https://cloud.google.com/sdk/gcloud/reference/topic/filters
FILTER="selfLink~$PROJRE AND creationTimestamp<$RECENTLY AND NOT name=($EXCLUDE)"
TODELETE=$(mktemp -p '' todelete.XXXXXX)
echo "Searching images for pruning candidates older than $TOO_OLD ($THRESHOLD):"
$GCLOUD compute images list --format="$FORMAT" --filter="$FILTER" | \
while read name selfLink creationTimestamp labels
do
created_ymd=$(date --date=$creationTimestamp --iso-8601=date)
last_used=$(egrep --only-matching --max-count=1 'last-used=[[:digit:]]+' <<< $labels || true)
markmsgpfx="Marking $name (created $created_ymd) for deletion"
if [[ -z "$last_used" ]]
then # image pre-dates addition of tracking labels
echo "$markmsgpfx: Missing 'last-used' metadata, labels: '$labels'"
echo "$name" >> $TODELETE
continue
fi
last_used_timestamp=$(date --date=@$(cut -d= -f2 <<< $last_used || true) +%s || true)
last_used_ymd=$(date --date=@$last_used_timestamp --iso-8601=date)
if [[ -z "$last_used_timestamp" ]] || [[ "$last_used_timestamp" -ge "$THEFUTURE" ]]
then
echo "$markmsgpfx: Missing or invalid last-used timestamp: '$last_used_timestamp'"
echo "$name" >> $TODELETE
continue
fi
if [[ "$last_used_timestamp" -le "$THRESHOLD" ]]
then
echo "$markmsgpfx: Used over $TOO_OLD on $last_used_ymd"
echo "$name" >> $TODELETE
continue
fi
echo "NOT $markmsgpfx: last used on $last_used_ymd)"
done
echo "Pruning up to $PRUNE_LIMIT images that were marked for deletion:"
for image_name in $(tail -$PRUNE_LIMIT $TODELETE | sort --random-sort)
do
# This can take quite some time (minutes), run in parallel disconnected from terminal
echo "TODO: Would have: $GCLOUD compute images delete $image_name &"
sleep "$[1+RANDOM/1000]s" & # Simlate background operation
done
wait || true # Nothing to delete: No background jobs

View File

@ -7,14 +7,14 @@ RUN yum -y update && \
yum -y install google-cloud-sdk && \
yum clean all
COPY /contrib/imgts/entrypoint.sh /usr/local/bin/entrypoint.sh
ENV GCPJSON="__unknown__" \
GCPNAME="__unknown__" \
GCPPROJECT="__unknown__" \
IMGNAMES="__unknown__" \
TIMESTAMP="__unknown__" \
BUILDID="__unknown__" \
REPOREF="__unknown__"
COPY ["/contrib/imgts/entrypoint.sh", "/contrib/imgts/lib_entrypoint.sh", "/usr/local/bin/"]
RUN chmod 755 /usr/local/bin/entrypoint.sh
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]

View File

@ -2,45 +2,22 @@
set -e
RED="\e[1;36;41m"
YEL="\e[1;33;44m"
NOR="\e[0m"
source /usr/local/bin/lib_entrypoint.sh
die() {
echo -e "$2" >&2
exit "$1"
}
req_env_var GCPJSON GCPNAME GCPPROJECT IMGNAMES BUILDID REPOREF
SENTINEL="__unknown__" # default set in dockerfile
gcloud_init
[[ "$GCPJSON" != "$SENTINEL" ]] || \
die 1 "Must specify service account JSON in \$GCPJSON"
[[ "$GCPNAME" != "$SENTINEL" ]] || \
die 2 "Must specify service account name in \$GCPNAME"
[[ "$GCPPROJECT" != "$SENTINEL" ]] || \
die 4 "Must specify GCP Project ID in \$GCPPROJECT"
[[ -n "$GCPPROJECT" ]] || \
die 5 "Must specify non-empty GCP Project ID in \$GCPPROJECT"
[[ "$IMGNAMES" != "$SENTINEL" ]] || \
die 6 "Must specify space separated list of GCE image names in \$IMGNAMES"
[[ "$BUILDID" != "$SENTINEL" ]] || \
die 7 "Must specify the number of current build in \$BUILDID"
[[ "$REPOREF" != "$SENTINEL" ]] || \
die 8 "Must specify a PR number or Branch name in \$REPOREF"
ARGS="
--update-labels=last-used=$(date +%s)
--update-labels=build-id=$BUILDID
--update-labels=repo-ref=$REPOREF
--update-labels=project=$GCPPROJECT
"
ARGS="--update-labels=last-used=$(date +%s)"
# optional
[[ -z "$BUILDID" ]] || ARGS="$ARGS --update-labels=build-id=$BUILDID"
[[ -z "$REPOREF" ]] || ARGS="$ARGS --update-labels=repo-ref=$REPOREF"
[[ -z "$GCPPROJECT" ]] || ARGS="$ARGS --update-labels=project=$GCPPROJECT"
gcloud config set account "$GCPNAME"
gcloud config set project "$GCPPROJECT"
echo "$GCPJSON" > /tmp/gcp.json
gcloud auth activate-service-account --key-file=/tmp/gcp.json || rm /tmp/gcp.json
for image in $IMGNAMES
do
gcloud compute images update "$image" $ARGS &
$GCLOUD compute images update "$image" $ARGS &
done
set +e # Actual update failures are only warnings
wait || die 0 "${RED}WARNING:$NOR ${YEL}Failed to update labels on one or more images:$NOR '$IMGNAMES'"
wait || echo "Warning: No \$IMGNAMES were specified."

View File

@ -0,0 +1,44 @@
#!/bin/bash
set -e
RED="\e[1;36;41m"
YEL="\e[1;33;44m"
NOR="\e[0m"
SENTINEL="__unknown__" # default set in dockerfile
# Disable all input prompts
# https://cloud.google.com/sdk/docs/scripting-gcloud
GCLOUD="gcloud --quiet"
die() {
EXIT=$1
PFX=$2
shift 2
MSG="$@"
echo -e "${RED}${PFX}:${NOR} ${YEL}$MSG${NOR}"
[[ "$EXIT" -eq "0" ]] || exit "$EXIT"
}
# Pass in a list of one or more envariable names; exit non-zero with
# helpful error message if any value is empty
req_env_var() {
for i; do
if [[ -z "${!i}" ]]
then
die 1 FATAL entrypoint.sh requires \$$i to be non-empty.
elif [[ "${!i}" == "$SENTINEL" ]]
then
die 2 FATAL entrypoint.sh requires \$$i to be explicitly set.
fi
done
}
gcloud_init() {
set +xe
TMPF=$(mktemp -p '' .$(uuidgen)XXXX)
trap "rm -f $TMPF" EXIT
echo "$GCPJSON" > $TMPF && \
$GCLOUD auth activate-service-account --project "$GCPPROJECT" --key-file=$TMPF || \
die 5 FATAL auth
rm -f $TMPF
}