Cirrus: Detailed CPU/Memory/Time runner.sh stats

On several occasions, there have been questions about CPU/Memory/IO
trends in testing over time. Start collecting this data for all jobs,
using a common/stable format so that trending analysis can be performed
within/across multiple Cirrus-CI builds. This PR doesn't add any related
tooling, it simply arranges for the collection of the data.

Stats generation is done at the orchestration level to guarantee they
reflect everything happening inside `runner.sh`.  For example, the
container-based tests re-exec `runner.sh` inside a container, but
we're only interested in the top-level stats.

Update all tasks to include collection of the stats file.
Unfortunately, due to the way the Cirrus-CI YAML parser works,
it is *not* possible to alias the artifacts collection more clearly,
for example:

```yaml
    always:
        <<: *runner_stats
        <<: *logs_artifacts
```

Signed-off-by: Chris Evich <cevich@redhat.com>
This commit is contained in:
Chris Evich
2020-10-22 08:19:31 -04:00
parent abc77b4221
commit f44af20561
4 changed files with 36 additions and 1 deletions

View File

@ -17,6 +17,9 @@ env:
CIRRUS_SHELL: "/bin/bash" CIRRUS_SHELL: "/bin/bash"
# Save a little typing (path relative to $CIRRUS_WORKING_DIR) # Save a little typing (path relative to $CIRRUS_WORKING_DIR)
SCRIPT_BASE: "./contrib/cirrus" SCRIPT_BASE: "./contrib/cirrus"
# Runner statistics log file path/name
STATS_LOGFILE_SFX: 'runner_stats.log'
STATS_LOGFILE: '$GOSRC/${CIRRUS_TASK_NAME}-${STATS_LOGFILE_SFX}'
#### ####
#### Cache-image names to test with (double-quotes around names are critical) #### Cache-image names to test with (double-quotes around names are critical)
@ -75,7 +78,11 @@ ext_svc_check_task:
TEST_FLAVOR: ext_svc TEST_FLAVOR: ext_svc
CTR_FQIN: ${FEDORA_CONTAINER_FQIN} CTR_FQIN: ${FEDORA_CONTAINER_FQIN}
setup_script: &setup '$GOSRC/$SCRIPT_BASE/setup_environment.sh' setup_script: &setup '$GOSRC/$SCRIPT_BASE/setup_environment.sh'
main_script: &main '$GOSRC/$SCRIPT_BASE/runner.sh' main_script: &main '/usr/bin/time --verbose --output="$STATS_LOGFILE" $GOSRC/$SCRIPT_BASE/runner.sh'
always: &runner_stats
runner_stats_artifacts:
path: ./*-${STATS_LOGFILE_SFX}
type: text/plain
# Execute some quick checks to confirm this YAML file and all # Execute some quick checks to confirm this YAML file and all
@ -91,6 +98,7 @@ automation_task:
TEST_ENVIRON: container TEST_ENVIRON: container
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: *runner_stats
# This task use to be called 'gating', however that name is being # This task use to be called 'gating', however that name is being
@ -126,6 +134,7 @@ smoke_task:
make install.tools make install.tools
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: *runner_stats
# N/B: This task is critical. It builds all binaries and release archives # N/B: This task is critical. It builds all binaries and release archives
@ -186,6 +195,7 @@ build_task:
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: &binary_artifacts always: &binary_artifacts
<<: *runner_stats
gosrc_artifacts: gosrc_artifacts:
path: ./* # Grab everything in top-level $GOSRC path: ./* # Grab everything in top-level $GOSRC
type: application/octet-stream type: application/octet-stream
@ -220,6 +230,7 @@ validate_task:
clone_script: *noop clone_script: *noop
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: *runner_stats
# Exercise the "libpod" API with a small set of common # Exercise the "libpod" API with a small set of common
@ -239,6 +250,7 @@ bindings_task:
clone_script: *noop # Comes from cache clone_script: *noop # Comes from cache
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: *runner_stats
# Build the "libpod" API documentation `swagger.yaml` for eventual # Build the "libpod" API documentation `swagger.yaml` for eventual
@ -275,6 +287,7 @@ endpoint_task:
clone_script: *full_clone # build-cache not available to container tasks clone_script: *full_clone # build-cache not available to container tasks
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: *runner_stats
# Check that all included go modules from other sources match # Check that all included go modules from other sources match
@ -293,6 +306,7 @@ vendor_task:
clone_script: *full_clone # build-cache not available to container tasks clone_script: *full_clone # build-cache not available to container tasks
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: *runner_stats
# There are several other important variations of podman which # There are several other important variations of podman which
@ -394,6 +408,7 @@ docker-py_test_task:
clone_script: *noop # Comes from cache clone_script: *noop # Comes from cache
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: *runner_stats
# Does exactly what it says, execute the podman unit-tests on all primary # Does exactly what it says, execute the podman unit-tests on all primary
@ -412,6 +427,7 @@ unit_test_task:
gopath_cache: *ro_gopath_cache gopath_cache: *ro_gopath_cache
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: *runner_stats
apiv2_test_task: apiv2_test_task:
@ -428,6 +444,7 @@ apiv2_test_task:
setup_script: *setup setup_script: *setup
main_script: *main main_script: *main
always: &logs_artifacts always: &logs_artifacts
<<: *runner_stats
# Required for `contrib/cirrus/logformatter` to work properly # Required for `contrib/cirrus/logformatter` to work properly
html_artifacts: html_artifacts:
path: ./*.html path: ./*.html
@ -438,6 +455,7 @@ apiv2_test_task:
audit_log_script: '$SCRIPT_BASE/logcollector.sh audit' audit_log_script: '$SCRIPT_BASE/logcollector.sh audit'
journal_script: '$SCRIPT_BASE/logcollector.sh journal' journal_script: '$SCRIPT_BASE/logcollector.sh journal'
podman_system_info_script: '$SCRIPT_BASE/logcollector.sh podman' podman_system_info_script: '$SCRIPT_BASE/logcollector.sh podman'
time_script: '$SCRIPT_BASE/logcollector.sh time'
# Execute the podman integration tests on all primary platforms and release # Execute the podman integration tests on all primary platforms and release

2
.gitignore vendored
View File

@ -34,3 +34,5 @@ release.txt
/test/goecho/goecho /test/goecho/goecho
.vscode* .vscode*
result result
# Necessary to prevent hack/tree-status.sh false-positive
/*runner_stats.log

View File

@ -76,5 +76,10 @@ case $1 in
# Any not-present packages will be listed as such # Any not-present packages will be listed as such
$PKG_LST_CMD "${PKG_NAMES[@]}" | sort -u $PKG_LST_CMD "${PKG_NAMES[@]}" | sort -u
;; ;;
time)
# Assumed to be empty/undefined outside of Cirrus-CI (.cirrus.yml)
# shellcheck disable=SC2154
if [[ -r "$STATS_LOGFILE" ]]; then cat "$STATS_LOGFILE"; fi
;;
*) die "Warning, $(basename $0) doesn't know how to handle the parameter '$1'" *) die "Warning, $(basename $0) doesn't know how to handle the parameter '$1'"
esac esac

View File

@ -253,6 +253,16 @@ dotest() {
|& logformatter |& logformatter
} }
# Nearly every task in .cirrus.yml makes use of this shell script
# wrapped by /usr/bin/time to collect runtime statistics. Because the
# --output option is used to log stats to a file, every child-process
# inherits an open FD3 pointing at the log. However, some testing
# operations depend on making use of FD3, and so it must be explicitly
# closed here (and for all further child-processes).
# STATS_LOGFILE assumed empty/undefined outside of Cirrus-CI (.cirrus.yml)
# shellcheck disable=SC2154
exec 3<&-
msg "************************************************************" msg "************************************************************"
# Required to be defined by caller # Required to be defined by caller
# shellcheck disable=SC2154 # shellcheck disable=SC2154