Cirrus: Detailed CPU/Memory/Time runner.sh stats

On several occasions, there have been questions about CPU/Memory/IO
trends in testing over time. Start collecting this data for all jobs,
using a common/stable format so that trending analysis can be performed
within/across multiple Cirrus-CI builds. This PR doesn't add any related
tooling, it simply arranges for the collection of the data.

Stats generation is done at the orchestration level to guarantee they
reflect everything happening inside `runner.sh`.  For example, the
container-based tests re-exec `runner.sh` inside a container, but
we're only interested in the top-level stats.

Update all tasks to include collection of the stats file.
Unfortunately, due to the way the Cirrus-CI YAML parser works,
it is *not* possible to alias the artifacts collection more clearly,
for example:

```yaml
    always:
        <<: *runner_stats
        <<: *logs_artifacts
```

Signed-off-by: Chris Evich <cevich@redhat.com>
This commit is contained in:
Chris Evich
2020-10-22 08:19:31 -04:00
parent abc77b4221
commit f44af20561
4 changed files with 36 additions and 1 deletions

View File

@ -17,6 +17,9 @@ env:
CIRRUS_SHELL: "/bin/bash"
# Save a little typing (path relative to $CIRRUS_WORKING_DIR)
SCRIPT_BASE: "./contrib/cirrus"
# Runner statistics log file path/name
STATS_LOGFILE_SFX: 'runner_stats.log'
STATS_LOGFILE: '$GOSRC/${CIRRUS_TASK_NAME}-${STATS_LOGFILE_SFX}'
####
#### Cache-image names to test with (double-quotes around names are critical)
@ -75,7 +78,11 @@ ext_svc_check_task:
TEST_FLAVOR: ext_svc
CTR_FQIN: ${FEDORA_CONTAINER_FQIN}
setup_script: &setup '$GOSRC/$SCRIPT_BASE/setup_environment.sh'
main_script: &main '$GOSRC/$SCRIPT_BASE/runner.sh'
main_script: &main '/usr/bin/time --verbose --output="$STATS_LOGFILE" $GOSRC/$SCRIPT_BASE/runner.sh'
always: &runner_stats
runner_stats_artifacts:
path: ./*-${STATS_LOGFILE_SFX}
type: text/plain
# Execute some quick checks to confirm this YAML file and all
@ -91,6 +98,7 @@ automation_task:
TEST_ENVIRON: container
setup_script: *setup
main_script: *main
always: *runner_stats
# This task use to be called 'gating', however that name is being
@ -126,6 +134,7 @@ smoke_task:
make install.tools
setup_script: *setup
main_script: *main
always: *runner_stats
# N/B: This task is critical. It builds all binaries and release archives
@ -186,6 +195,7 @@ build_task:
setup_script: *setup
main_script: *main
always: &binary_artifacts
<<: *runner_stats
gosrc_artifacts:
path: ./* # Grab everything in top-level $GOSRC
type: application/octet-stream
@ -220,6 +230,7 @@ validate_task:
clone_script: *noop
setup_script: *setup
main_script: *main
always: *runner_stats
# Exercise the "libpod" API with a small set of common
@ -239,6 +250,7 @@ bindings_task:
clone_script: *noop # Comes from cache
setup_script: *setup
main_script: *main
always: *runner_stats
# Build the "libpod" API documentation `swagger.yaml` for eventual
@ -275,6 +287,7 @@ endpoint_task:
clone_script: *full_clone # build-cache not available to container tasks
setup_script: *setup
main_script: *main
always: *runner_stats
# Check that all included go modules from other sources match
@ -293,6 +306,7 @@ vendor_task:
clone_script: *full_clone # build-cache not available to container tasks
setup_script: *setup
main_script: *main
always: *runner_stats
# There are several other important variations of podman which
@ -394,6 +408,7 @@ docker-py_test_task:
clone_script: *noop # Comes from cache
setup_script: *setup
main_script: *main
always: *runner_stats
# Does exactly what it says, execute the podman unit-tests on all primary
@ -412,6 +427,7 @@ unit_test_task:
gopath_cache: *ro_gopath_cache
setup_script: *setup
main_script: *main
always: *runner_stats
apiv2_test_task:
@ -428,6 +444,7 @@ apiv2_test_task:
setup_script: *setup
main_script: *main
always: &logs_artifacts
<<: *runner_stats
# Required for `contrib/cirrus/logformatter` to work properly
html_artifacts:
path: ./*.html
@ -438,6 +455,7 @@ apiv2_test_task:
audit_log_script: '$SCRIPT_BASE/logcollector.sh audit'
journal_script: '$SCRIPT_BASE/logcollector.sh journal'
podman_system_info_script: '$SCRIPT_BASE/logcollector.sh podman'
time_script: '$SCRIPT_BASE/logcollector.sh time'
# Execute the podman integration tests on all primary platforms and release

2
.gitignore vendored
View File

@ -34,3 +34,5 @@ release.txt
/test/goecho/goecho
.vscode*
result
# Necessary to prevent hack/tree-status.sh false-positive
/*runner_stats.log

View File

@ -76,5 +76,10 @@ case $1 in
# Any not-present packages will be listed as such
$PKG_LST_CMD "${PKG_NAMES[@]}" | sort -u
;;
time)
# Assumed to be empty/undefined outside of Cirrus-CI (.cirrus.yml)
# shellcheck disable=SC2154
if [[ -r "$STATS_LOGFILE" ]]; then cat "$STATS_LOGFILE"; fi
;;
*) die "Warning, $(basename $0) doesn't know how to handle the parameter '$1'"
esac

View File

@ -253,6 +253,16 @@ dotest() {
|& logformatter
}
# Nearly every task in .cirrus.yml makes use of this shell script
# wrapped by /usr/bin/time to collect runtime statistics. Because the
# --output option is used to log stats to a file, every child-process
# inherits an open FD3 pointing at the log. However, some testing
# operations depend on making use of FD3, and so it must be explicitly
# closed here (and for all further child-processes).
# STATS_LOGFILE assumed empty/undefined outside of Cirrus-CI (.cirrus.yml)
# shellcheck disable=SC2154
exec 3<&-
msg "************************************************************"
# Required to be defined by caller
# shellcheck disable=SC2154