Merge pull request #16414 from cevich/fix_cirrus_cron_jobs

[CI:BUILD] Fix cirrus cirrus-cron GHA workflow scripts, add checks, and tests
This commit is contained in:
OpenShift Merge Robot
2022-11-10 17:16:38 +00:00
committed by GitHub
8 changed files with 158 additions and 24 deletions

View File

@@ -14,7 +14,9 @@ elif [[ -z "$NAME_ID_FILEPATH" ]]; then # output filepath
err $(printf "$_errfmt" "\$NAME_ID_FILEPATH") err $(printf "$_errfmt" "\$NAME_ID_FILEPATH")
fi fi
mkdir -p artifacts confirm_gha_environment
mkdir -p ./artifacts
cat > ./artifacts/query_raw.json << "EOF" cat > ./artifacts/query_raw.json << "EOF"
query { query {
ownerRepository(platform: "LINUX", owner: "@@OWNER@@", name: "@@REPO@@") { ownerRepository(platform: "LINUX", owner: "@@OWNER@@", name: "@@REPO@@") {
@@ -32,14 +34,11 @@ EOF
# https://cirrus-ci.com/explorer # https://cirrus-ci.com/explorer
owner=$(cut -d '/' -f 1 <<<"$GITHUB_REPOSITORY") owner=$(cut -d '/' -f 1 <<<"$GITHUB_REPOSITORY")
repo=$(cut -d '/' -f 2 <<<"$GITHUB_REPOSITORY") repo=$(cut -d '/' -f 2 <<<"$GITHUB_REPOSITORY")
sed -i -r -e "s/@@OWNER@@/$owner/g" -e "s/@@REPO@@/$repo/g" ./artifacts/query_raw.json sed -r -e "s/@@OWNER@@/$owner/g" -e "s/@@REPO@@/$repo/g" \
./artifacts/query_raw.json > ./artifacts/query.json
# Easier to debug in error-reply when query is compacted
tr -d '\n' < ./artifacts/query_raw.json | tr -s ' ' | tee ./artifacts/query.json | \
jq --indent 4 --color-output .
if grep -q '@@' ./artifacts/query.json; then if grep -q '@@' ./artifacts/query.json; then
err "Found unreplaced substitution token in raw query JSON" err "Found unreplaced substitution token in query JSON"
fi fi
# The query should never ever return an empty-list, unless there are no cirrus-cron # The query should never ever return an empty-list, unless there are no cirrus-cron
@@ -47,7 +46,7 @@ fi
# be running anyway. # be running anyway.
filt_head='.data.ownerRepository.cronSettings' filt_head='.data.ownerRepository.cronSettings'
gql $(./artifacts/query.json) "$filt_head" > ./artifacts/reply.json gql "$(<./artifacts/query.json)" "$filt_head" > ./artifacts/reply.json
# e.x. reply.json # e.x. reply.json
# { # {
# "data": { # "data": {
@@ -87,5 +86,6 @@ records=$(wc --words "$NAME_ID_FILEPATH" | cut -d ' ' -f 1)
failures=$((records/2)) failures=$((records/2))
# Set the output of this step. # Set the output of this step.
# Ref: https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-output-parameter # Ref: https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-output-parameter
# shellcheck disable=SC2154
echo "failures::$failures" >> $GITHUB_OUTPUT echo "failures::$failures" >> $GITHUB_OUTPUT
echo "Total failed Cirrus-CI cron builds: $failures" echo "Total failed Cirrus-CI cron builds: $failures"

View File

@@ -8,10 +8,28 @@ msg() {
# Must be called from top-level of script, not another function. # Must be called from top-level of script, not another function.
err() { err() {
# Ref: https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-commands-for-github-actions # Ref: https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-commands-for-github-actions
msg "::error file=${BASH_SOURCE[1]},line=${BASH_LINENO[0]}::$@" msg "::error file=${BASH_SOURCE[1]},line=${BASH_LINENO[0]}::$*"
exit 1 exit 1
} }
confirm_gha_environment() {
local _err_fmt
_err_fmt="I don't seem to be running from a github-actions workflow"
# These are all defined by github-actions
# shellcheck disable=SC2154
if [[ -z "$GITHUB_OUTPUT" ]]; then
err "$_err_fmt, \$GITHUB_OUTPUT is empty"
elif [[ -z "$GITHUB_WORKFLOW" ]]; then
err "$_err_fmt, \$GITHUB_WORKFLOW is empty"
elif [[ ! -d "$GITHUB_WORKSPACE" ]]; then
# Defined by github-actions
# shellcheck disable=SC2154
err "$_err_fmt, \$GITHUB_WORKSPACE='$GITHUB_WORKSPACE' isn't a directory"
fi
cd "$GITHUB_WORKSPACE" || false
}
# Using python3 here is a compromise for readability and # Using python3 here is a compromise for readability and
# properly handling quote, control and unicode character encoding. # properly handling quote, control and unicode character encoding.
escape_query() { escape_query() {
@@ -45,6 +63,8 @@ gql() {
msg "::error file=${BASH_SOURCE[1]},line=${BASH_LINENO[0]}::Invalid query JSON: $query" msg "::error file=${BASH_SOURCE[1]},line=${BASH_LINENO[0]}::Invalid query JSON: $query"
return 1 return 1
fi fi
# SECRET_CIRRUS_API_KEY is defined github secret
# shellcheck disable=SC2154
if output=$(curl \ if output=$(curl \
--request POST \ --request POST \
--silent \ --silent \

View File

@@ -9,14 +9,16 @@ set -eo pipefail
source $(dirname "${BASH_SOURCE[0]}")/lib.sh source $(dirname "${BASH_SOURCE[0]}")/lib.sh
_errfmt="Expecting %s value to not be empty" _errfmt="Expecting %s value to not be empty"
# NAME_ID_FILEPATH is defined by workflow YAML
# shellcheck disable=SC2154
if [[ -z "$GITHUB_REPOSITORY" ]]; then if [[ -z "$GITHUB_REPOSITORY" ]]; then
err $(printf "$_errfmt" "\$GITHUB_REPOSITORY") err $(printf "$_errfmt" "\$GITHUB_REPOSITORY")
elif [[ -z "$GITHUB_WORKFLOW" ]]; then
err $(printf "$_errfmt" "\$GITHUB_WORKFLOW")
elif [[ ! -r "$NAME_ID_FILEPATH" ]]; then elif [[ ! -r "$NAME_ID_FILEPATH" ]]; then
err "Expecting \$NAME_ID_FILEPATH value ($NAME_ID_FILEPATH) to be a readable file" err "Expecting \$NAME_ID_FILEPATH value ($NAME_ID_FILEPATH) to be a readable file"
fi fi
confirm_gha_environment
mkdir -p artifacts mkdir -p artifacts
( (
echo "Detected one or more Cirrus-CI cron-triggered jobs have failed recently:" echo "Detected one or more Cirrus-CI cron-triggered jobs have failed recently:"
@@ -27,6 +29,8 @@ mkdir -p artifacts
done < "$NAME_ID_FILEPATH" done < "$NAME_ID_FILEPATH"
echo "" echo ""
# Defined by github-actions
# shellcheck disable=SC2154
echo "# Source: ${GITHUB_WORKFLOW} workflow on ${GITHUB_REPOSITORY}." echo "# Source: ${GITHUB_WORKFLOW} workflow on ${GITHUB_REPOSITORY}."
# Separate content from sendgrid.com automatic footer. # Separate content from sendgrid.com automatic footer.
echo "" echo ""

View File

@@ -25,12 +25,16 @@ set -eo pipefail
source $(dirname "${BASH_SOURCE[0]}")/lib.sh source $(dirname "${BASH_SOURCE[0]}")/lib.sh
_errfmt="Expecting %s value to not be empty" _errfmt="Expecting %s value to not be empty"
# NAME_ID_FILEPATH is defined by workflow YAML
# shellcheck disable=SC2154
if [[ -z "$SECRET_CIRRUS_API_KEY" ]]; then if [[ -z "$SECRET_CIRRUS_API_KEY" ]]; then
err $(printf "$_errfmt" "\$SECRET_CIRRUS_API_KEY") err $(printf "$_errfmt" "\$SECRET_CIRRUS_API_KEY")
elif [[ ! -r "$NAME_ID_FILEPATH" ]]; then # output from cron_failures.sh elif [[ ! -r "$NAME_ID_FILEPATH" ]]; then # output from cron_failures.sh
err $(printf "Expecting %s value to be a readable file" "\$NAME_ID_FILEPATH") err $(printf "Expecting %s value to be a readable file" "\$NAME_ID_FILEPATH")
fi fi
confirm_gha_environment
mkdir -p artifacts mkdir -p artifacts
# If there are no tasks, don't fail reading the file # If there are no tasks, don't fail reading the file
truncate -s 0 ./artifacts/rerun_tids.txt truncate -s 0 ./artifacts/rerun_tids.txt
@@ -92,7 +96,9 @@ cat "$NAME_ID_FILEPATH" | \
# Check-value returned if the gql call was successful # Check-value returned if the gql call was successful
canary=$(uuidgen) canary=$(uuidgen)
# Ensure the trailing ',' is stripped from the end (would be invalid JSON) # Ensure the trailing ',' is stripped from the end (would be invalid JSON)
task_ids=$(printf '[%s]' $(printf '"%s",' ${rerun_tasks[@]} | head -c -1)) # Rely on shell word-splitting in this case.
# shellcheck disable=SC2048
task_ids=$(printf '[%s]' $(printf '"%s",' ${rerun_tasks[*]} | head -c -1))
rerun_m=" rerun_m="
mutation { mutation {
batchReRun(input: { batchReRun(input: {
@@ -105,8 +111,12 @@ cat "$NAME_ID_FILEPATH" | \
} }
" "
filter='.data.batchReRun.clientMutationId' filter='.data.batchReRun.clientMutationId'
result=$(gql "$rerun_m" "$filter") if [[ ! "$NAME" =~ "testing" ]]; then # see test.sh
if [[ $(jq -r -e "$filter"<<<"$result") != "$canary" ]]; then result=$(gql "$rerun_m" "$filter")
err "Attempt to re-run tasks for build $BID failed: ${rerun_tasks[@]}" if [[ $(jq -r -e "$filter"<<<"$result") != "$canary" ]]; then
err "Attempt to re-run tasks for build $BID failed: ${rerun_tasks[*]}"
fi
else
warn "Test-mode: Would have sent GraphQL request: '$rerun_m'"
fi fi
done done

View File

@@ -0,0 +1,89 @@
# This script attempts to confirm functional github action scripts.
# It expects to be called from Cirrus-CI, in a special execution
# enironment. Any use outside this environment will probably fail.
set -eo pipefail
# Defined by setup_environment.sh
# shellcheck disable=SC2154
if ! ((PREBUILD)); then
echo "Not operating under expected environment"
exit 1
fi
expect_regex() {
local expected_regex
local input_file
expected_regex="$1"
input_file="$2"
egrep -q "$expected_regex" $input_file || \
die "No match to '$expected_regex' in '$(<$input_file)'"
}
req_env_vars CIRRUS_CI CIRRUS_REPO_FULL_NAME CIRRUS_WORKING_DIR CIRRUS_BUILD_ID
# Defined by the CI system
# shellcheck disable=SC2154
cd $CIRRUS_WORKING_DIR || fail
header="Testing cirrus-cron github-action script:"
msg "$header cron_failures.sh"
base=$CIRRUS_WORKING_DIR/.github/actions/check_cirrus_cron
# Don't care about mktemp return value
# shellcheck disable=SC2155
export GITHUB_OUTPUT=$(mktemp -p '' cron_failures_output_XXXX)
# CIRRUS_REPO_FULL_NAME checked above in req_env_vars
# shellcheck disable=SC2154
export GITHUB_REPOSITORY="$CIRRUS_REPO_FULL_NAME"
# shellcheck disable=SC2155
export GITHUB_WORKSPACE=$(mktemp -d -p '' cron_failures_workspace_XXXX)
export GITHUB_WORKFLOW="testing"
# shellcheck disable=SC2155
export NAME_ID_FILEPATH=$(mktemp -p '' cron_failures_data_XXXX)
trap "rm -rf $GITHUB_OUTPUT $GITHUB_WORKSPACE $NAME_ID_FILEPATH" EXIT
#####
cd /tmp || fail
# Replace newlines and indentation to make egrep easier
if ! $base/cron_failures.sh |& \
tr -s '[:space:]' ' ' > $GITHUB_WORKSPACE/output; then
die "Failed: $base/cron_failures.sh with output '$(<$GITHUB_WORKSPACE/output)'"
fi
expect_regex \
'result.+data.+ownerRepository.+cronSettings.+endgroup' \
"$GITHUB_WORKSPACE/output"
#####
msg "$header make_email_body.sh"
# It's possible no cirrus-cron jobs actually failed
echo '' >> "$NAME_ID_FILEPATH"
# Don't need to test stdout/stderr of this
if ! $base/make_email_body.sh; then
die "make_email_body.sh failed"
fi
expect_regex \
'^Detected.+Cirrus-CI.+failed.*' \
"$GITHUB_WORKSPACE/artifacts/email_body.txt"
#####
msg "$header rerun_failed_tasks.sh"
export SECRET_CIRRUS_API_KEY=testing-nottherightkey
# test.sh is sensitive to the 'testing' name. Var. defined by cirrus-ci
# shellcheck disable=SC2154
echo "testing $CIRRUS_BUILD_ID" > "$NAME_ID_FILEPATH"
if ! $base/rerun_failed_tasks.sh |& \
tr -s '[:space:]' ' ' > $GITHUB_WORKSPACE/rerun_output; then
die "rerun_failed_tasks.sh failed"
fi
expect_regex \
"Posting GraphQL Query.+$CIRRUS_BUILD_ID.+Selecting.+re-run" \
"$GITHUB_WORKSPACE/rerun_output"

View File

@@ -32,7 +32,7 @@ jobs:
cron_failures: cron_failures:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 - uses: actions/checkout@v3
with: with:
persist-credentials: false persist-credentials: false
@@ -47,7 +47,7 @@ jobs:
- if: steps.cron.outputs.failures > 0 - if: steps.cron.outputs.failures > 0
name: Send failure notification e-mail name: Send failure notification e-mail
# Ref: https://github.com/dawidd6/action-send-mail # Ref: https://github.com/dawidd6/action-send-mail
uses: dawidd6/action-send-mail@a80d851dc950256421f1d1d735a2dc1ef314ac8f # v2.2.2 uses: dawidd6/action-send-mail@v3.7.1
with: with:
server_address: ${{secrets.ACTION_MAIL_SERVER}} server_address: ${{secrets.ACTION_MAIL_SERVER}}
server_port: 465 server_port: 465
@@ -59,14 +59,14 @@ jobs:
body: file://./artifacts/email_body.txt body: file://./artifacts/email_body.txt
- if: always() - if: always()
uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 uses: actions/upload-artifact@v3
with: with:
name: ${{ github.job }}_artifacts name: ${{ github.job }}_artifacts
path: artifacts/* path: artifacts/*
- if: failure() - if: failure()
name: Send error notification e-mail name: Send error notification e-mail
uses: dawidd6/action-send-mail@a80d851dc950256421f1d1d735a2dc1ef314ac8f # v2.2.2 uses: dawidd6/action-send-mail@3.7.1
with: with:
server_address: ${{secrets.ACTION_MAIL_SERVER}} server_address: ${{secrets.ACTION_MAIL_SERVER}}
server_port: 465 server_port: 465

View File

@@ -14,6 +14,7 @@ on:
# Debug: Allow triggering job manually in github-actions WebUI # Debug: Allow triggering job manually in github-actions WebUI
workflow_dispatch: {} workflow_dispatch: {}
env: env:
# Debug-mode can reveal secrets, only enable by a secret value. # Debug-mode can reveal secrets, only enable by a secret value.
# Ref: https://help.github.com/en/actions/configuring-and-managing-workflows/managing-a-workflow-run#enabling-step-debug-logging # Ref: https://help.github.com/en/actions/configuring-and-managing-workflows/managing-a-workflow-run#enabling-step-debug-logging
@@ -28,10 +29,10 @@ permissions:
contents: read contents: read
jobs: jobs:
cron_failures: cron_rerun:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@629c2de402a417ea7690ca6ce3f33229e27606a5 # v2 - uses: actions/checkout@v3
with: with:
persist-credentials: false persist-credentials: false
@@ -42,14 +43,15 @@ jobs:
- if: steps.cron.outputs.failures > 0 - if: steps.cron.outputs.failures > 0
shell: bash shell: bash
run: './.github/actions/check_cirrus_cron/rerun_failed_tasks.sh' run: './.github/actions/check_cirrus_cron/rerun_failed_tasks.sh'
uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2 # v2
- uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2 # v2
with: with:
name: ${{ github.job }}_artifacts name: ${{ github.job }}_artifacts
path: artifacts/* path: artifacts/*
- if: failure() - if: failure()
name: Send error notification e-mail name: Send error notification e-mail
uses: dawidd6/action-send-mail@a80d851dc950256421f1d1d735a2dc1ef314ac8f # v2.2.2 uses: dawidd6/action-send-mail@v3.7.1
with: with:
server_address: ${{secrets.ACTION_MAIL_SERVER}} server_address: ${{secrets.ACTION_MAIL_SERVER}}
server_port: 465 server_port: 465

View File

@@ -9,8 +9,10 @@ set -eo pipefail
# prevent wasting time on tests that can't succeed due to some # prevent wasting time on tests that can't succeed due to some
# outage, failure, or missed expectation. # outage, failure, or missed expectation.
set -a
source /etc/automation_environment source /etc/automation_environment
source $AUTOMATION_LIB_PATH/common_lib.sh source $AUTOMATION_LIB_PATH/common_lib.sh
set +a
req_env_vars CI DEST_BRANCH IMAGE_SUFFIX TEST_FLAVOR TEST_ENVIRON \ req_env_vars CI DEST_BRANCH IMAGE_SUFFIX TEST_FLAVOR TEST_ENVIRON \
PODBIN_NAME PRIV_NAME DISTRO_NV AUTOMATION_LIB_PATH \ PODBIN_NAME PRIV_NAME DISTRO_NV AUTOMATION_LIB_PATH \
@@ -21,6 +23,7 @@ req_env_vars CI DEST_BRANCH IMAGE_SUFFIX TEST_FLAVOR TEST_ENVIRON \
# shellcheck disable=SC2154 # shellcheck disable=SC2154
cd $CIRRUS_WORKING_DIR cd $CIRRUS_WORKING_DIR
msg "Checking Cirrus YAML"
# Defined by CI config. # Defined by CI config.
# shellcheck disable=SC2154 # shellcheck disable=SC2154
showrun $SCRIPT_BASE/cirrus_yaml_test.py showrun $SCRIPT_BASE/cirrus_yaml_test.py
@@ -28,13 +31,19 @@ showrun $SCRIPT_BASE/cirrus_yaml_test.py
# Defined by CI config. # Defined by CI config.
# shellcheck disable=SC2154 # shellcheck disable=SC2154
if [[ "${DISTRO_NV}" =~ fedora ]]; then if [[ "${DISTRO_NV}" =~ fedora ]]; then
msg "Checking shell scripts"
showrun ooe.sh dnf install -y ShellCheck # small/quick addition showrun ooe.sh dnf install -y ShellCheck # small/quick addition
showrun shellcheck --color=always --format=tty \ showrun shellcheck --color=always --format=tty \
--shell=bash --external-sources \ --shell=bash --external-sources \
--enable add-default-case,avoid-nullary-conditions,check-unassigned-uppercase \ --enable add-default-case,avoid-nullary-conditions,check-unassigned-uppercase \
--exclude SC2046,SC2034,SC2090,SC2064 \ --exclude SC2046,SC2034,SC2090,SC2064 \
--wiki-link-count=0 --severity=warning \ --wiki-link-count=0 --severity=warning \
$SCRIPT_BASE/*.sh hack/get_ci_vm.sh $SCRIPT_BASE/*.sh \
./.github/actions/check_cirrus_cron/* \
hack/get_ci_vm.sh
export PREBUILD=1
showrun bash ${CIRRUS_WORKING_DIR}/.github/actions/check_cirrus_cron/test.sh
fi fi
msg "Checking 3rd party network service connectivity" msg "Checking 3rd party network service connectivity"