From 84e7ce82d9074a1de4cc3b2cf222dc5bd70bb764 Mon Sep 17 00:00:00 2001
From: Ed Santiago <santiago@redhat.com>
Date: Tue, 15 Mar 2022 11:25:36 -0600
Subject: [PATCH] Binary growth check, part 1 of 2

Add a CI check to prevent unwanted bloat in binary images,
by building a baseline (pre-PR) binary then comparing file
sizes post-PR.

We piggyback onto the existing 'Build Each Commit' CI check
because it gives us an easy way to run 'make' against the
parent commit.

This is part 1 of 2: adding the script, not the Makefile rule.
We can't add the Makefile rule now because the script it would
invoke does not exist in the parent commit.

Signed-off-by: Ed Santiago <santiago@redhat.com>
---
 hack/make-and-check-size | 131 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100755 hack/make-and-check-size

diff --git a/hack/make-and-check-size b/hack/make-and-check-size
new file mode 100755
index 0000000000..a6a77e8ca8
--- /dev/null
+++ b/hack/make-and-check-size
@@ -0,0 +1,131 @@
+#!/bin/bash
+#
+# make-and-check-size - wrapper around 'make' that also checks binary growth
+#
+# This script is intended to be run via 'git rebase -x', in a Makefile rule
+# such as:
+#
+#    build-all-new-commits:
+#       CONTEXT_DIR=$(shell mktemp -d --tmpdir make-size-check.XXXXXXX); \
+#       git rebase $(GIT_BASE_BRANCH)^ -x "hack/make-and-check-size $$CONTEXT_DIR"; \
+#       $(RM) -rf $$CONTEXT_DIR
+#
+# ...which has long been a part of our usual CI, one that makes sure that
+# each commit (in a multi-commit PR) can be compiled individually. By
+# adding the '^' to GIT_BASE_BRANCH we establish a baseline and store
+# the binary sizes of each file (podman, podman-remote) prior to our PR.
+#
+# CONTEXT_DIR is a temporary directory used to store the original sizes
+# of each binary file under bin/
+#
+# *IMPORTANT NOTE*: this script will leave the git checkout in a funky state!
+# (because we rebase onto a nonterminal commit). I believe this is OK, since
+# this makefile target is used only in CI and only in a scratch VM. Running
+# this in a development environment would yield unpredictable results anyway,
+# by rebasing onto origin/main by default and by leaving an aborted rebase
+# on failure.
+#
+ME=$(basename $0)
+
+###############################################################################
+# BEGIN end-user-customizable settings
+
+# Maximum allowable size, in bytes
+MAX_BIN_GROWTH=$((50 * 1024))
+
+# Github label which allows overriding this check
+OVERRIDE_LABEL=bloat_approved
+
+# END   end-user-customizable settings
+###############################################################################
+
+#
+# Helper function: queries github for labels on this PR
+#
+function bloat_approved() {
+    # Argument is the actual size increase in this build.
+    # FIXME: 2022-03-21: this is not actually used atm, but Ed hopes some day
+    #        to implement a more robust size-override mechanism, such as by
+    #        requiring a MAX_BIN_GROWTH=nnn statement in github comments.
+    local actual_growth="$1"
+
+    if [[ -z "$CIRRUS_PR" ]]; then
+        echo "$ME: cannot query github: \$CIRRUS_PR is undefined" >&2
+        return 1
+    fi
+    if [[ -z "$CIRRUS_REPO_CLONE_TOKEN" ]]; then
+        echo "$ME: cannot query github: \$CIRRUS_REPO_CLONE_TOKEN is undefined" >&2
+        return 1
+    fi
+
+    query="{
+  \"query\": \"query {
+  repository(owner: \\\"containers\\\", name: \\\"podman\\\") {
+    pullRequest(number: $CIRRUS_PR) {
+      labels(first: 100) {
+        nodes {
+          name
+        }
+      }
+    }
+  }
+}\"
+}"
+
+    result=$(curl -s -H "Authorization: bearer $CIRRUS_REPO_CLONE_TOKEN" -H "Accept: application/vnd.github.antiope-preview+json" -H "Content-Type: application/json" -X POST --data @- https://api.github.com/graphql <<<"$query")
+
+    labels=$(jq -r '.data.repository.pullRequest.labels.nodes[].name' <<<"$result")
+
+    grep -q -w "$OVERRIDE_LABEL" <<<"$labels"
+}
+
+# ACTUAL CODE BEGINS HERE
+set -e
+
+# Must be invoked with one argument, an existing context directory
+context_dir=${1?Missing CONTEXT-DIR argument}
+if [[ ! -d $context_dir ]]; then
+    echo "$ME: directory '$context_dir' does not exist"
+    exit 1
+fi
+
+# This is the original (and primary) purpose of this check: if 'make' fails,
+# there is no point in continuing
+echo
+echo "Building: $(git rev-parse HEAD)"
+make
+
+# Determine size of each built file.
+# - If this is our first time through, preserve that size in a tmpfile
+# - On all subsequent runs, compare built size to initial size
+for bin in bin/*;do
+    size=$(stat -c %s $bin)
+
+    saved_size_file=$context_dir/$(basename $bin)
+    if [[ -e $saved_size_file ]]; then
+        # Not the first time through: compare to original size
+        size_orig=$(< $saved_size_file)
+        delta_size=$(( size - size_orig ))
+
+        if [[ $delta_size -gt $MAX_BIN_GROWTH ]]; then
+            separator=$(printf "%.0s*" {1..75})   # row of stars, for highlight
+            echo "$separator"
+            echo "* $bin grew by $delta_size bytes; max allowed is $MAX_BIN_GROWTH."
+            echo "*"
+            if bloat_approved $delta_size; then
+                echo "* Continuing due to '$OVERRIDE_LABEL' label"
+                echo "*"
+                echo "$separator"
+            else
+                echo "* Please investigate, and fix if possible."
+                echo "*"
+                echo "* A repo admin can override by setting the $OVERRIDE_LABEL label"
+                echo "$separator"
+                exit 1
+            fi
+        fi
+    else
+        # First time through: preserve original file size
+        echo $size >$saved_size_file
+    fi
+done