Merge pull request #20136 from cevich/credential_scanning_config

[CI:DOCS] Implement secrets/credential scanning
This commit is contained in:
Chris Evich
2023-10-30 11:43:01 -04:00
committed by GitHub
6 changed files with 398 additions and 2 deletions

View File

@ -333,6 +333,7 @@ swagger_task:
<<: *stdenvars
TEST_FLAVOR: swagger
CTR_FQIN: 'quay.io/libpod/gcsupld:${IMAGE_SUFFIX}'
# N/B: Do not modify below items w/o update to references in .gitleaks/config.toml
GCPJSON: ENCRYPTED[927dc01e755eaddb4242b0845cf86c9098d1e3dffac38c70aefb1487fd8b4fe6dd6ae627b3bffafaba70e2c63172664e]
GCPNAME: ENCRYPTED[c145e9c16b6fb88d476944a454bf4c1ccc84bb4ecaca73bdd28bdacef0dfa7959ebc8171a27b2e4064d66093b2cdba49]
GCPPROJECT: 'libpod-218412'
@ -1028,6 +1029,7 @@ meta_task:
${WINDOWS_AMI}
BUILDID: "${CIRRUS_BUILD_ID}"
REPOREF: "${CIRRUS_REPO_NAME}"
# N/B: Do not modify below items w/o update to references in .gitleaks/config.toml
AWSINI: ENCRYPTED[21b2db557171b11eb5abdbccae593f48c9caeba86dfcc4d4ff109edee9b4656ab6720a110dadfcd51e88cc59a71cc7af]
GCPJSON: ENCRYPTED[3a198350077849c8df14b723c0f4c9fece9ebe6408d35982e7adf2105a33f8e0e166ed3ed614875a0887e1af2b8775f4]
GCPNAME: ENCRYPTED[2f9738ef295a706f66a13891b40e8eaa92a89e0e87faf8bed66c41eca72bf76cfd190a6f2d0e8444c631fdf15ed32ef6]

196
.github/workflows/scan-secrets.yml vendored Normal file
View File

@ -0,0 +1,196 @@
---
name: Scan for secret leaks and changes
on:
# Block PR modification of workflow
pull_request_target:
push:
workflow_dispatch:
# N/B: Default write-all permission for pull_request_target
permissions: read-all
env:
# How far back in history to go when scanning a branch/tag
# This is most significant when scanning vs new release-branches
# with commit IDs that may differ from those encoded in the
# .gitleaks/baseline.json data (which always comes from
# the default branch).
# TODO: Is there any way to not hard-code this?
# N/B: This value is reused by Cirrus-CI, see contrib/cirrus/prebuild.sh
brdepth: 50
# GitLeaks container image to use.
# N/B: Updating this is hard to test, esp. care must be exercised re: new leak-ignore behaviors
# (example ref: 'Check for inline scan overrides' step below). Also b/c this workflow is not
# intended to be used with the 'pull_request' trigger - as doing so defeats gitleaks scan
# result trustworthiness.
# N/B: This value is reused by Cirrus-CI, see contrib/cirrus/prebuild.sh
glfqin: ghcr.io/gitleaks/gitleaks@sha256:e5f6d1a62786affd1abd882ecc73e9353ce78adea1650711f6e351767723712d # v8.18.0
# General arguments to pass for all execution contexts
# Ref: https://github.com/gitleaks/gitleaks#usage
# N/B: This value is reused by Cirrus-CI, see contrib/cirrus/prebuild.sh
glargs: >-
--exit-code=1
--no-banner
--verbose
--log-level=debug
--source=/subject
--config=/default/.gitleaks.toml
--report-path=/report/gitleaks-report.json
--baseline-path=/default/.gitleaks/baseline.json
# Where to send notification e-mail
RCPTCSV: podman-monitor@lists.podman.io
jobs:
scan-secrets:
runs-on: ubuntu-latest
env:
# Reduce duplication & command-line length
gitlogcmd: "git -C ${{ github.workspace }}/_subject log -p -U0"
steps:
- name: Define git log command and options for re-use
id: gitlog
shell: bash
run: |
set -exuo pipefail
if [[ "${{ github.base_ref }}" == "" ]]; then # It's a branch/tag
echo "range=-${{ env.brdepth }}" >> $GITHUB_OUTPUT
else # It's a PR
echo "range=${{ github.event.pull_request.head.sha }}~${{ github.event.pull_request.commits }}..HEAD" >> $GITHUB_OUTPUT
fi
# On a large repo, there's no need to check out the entire thing. For PRs
# the depth can be limited to one-greater than the number of PR commits.
# Unfortunately, GHA is incapable of performing simple math in-line.
- name: Do some simple math for PR clone depth
if: github.base_ref != ''
id: one_more_commit
shell: bash
run: |
echo "depth=$((${{ github.event.pull_request.commits }} + 1))" >> $GITHUB_OUTPUT
- name: Show important context details
shell: bash
run: |
set -euo pipefail
echo "The workspace path is '${{ github.workspace }}'"
echo "The github.base_ref value is '${{ github.base_ref }}'"
echo "The branch scan depth value is '${{ env.brdepth }}'"
echo "The PR clone depth value is '${{ steps.one_more_commit.outputs.depth }}'"
echo "The gitlogcmd value is '${{ env.gitlogcmd }}'"
echo "The gitlog range value is '${{ steps.gitlog.outputs.range }}'"
echo "The GitLeaks container FQIN is '${{ env.glfqin }}'"
echo "::group::The trigger event JSON"
jq --color-output --indent 2 --sort-keys . $GITHUB_EVENT_PATH
echo "::endgroup::"
# N/B: Use "_" prefixed paths to (somewhat) guard against clashes. GHA has some
# non-POLA behaviors WRT `${{ github.workspace }}` + checkout action.
- name: Checkout PR
if: github.base_ref != ''
uses: actions/checkout@v4
with:
persist-credentials: false
path: _subject
ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: ${{ steps.one_more_commit.outputs.depth }}
- name: Checkout Branch/Tag
if: github.base_ref == ''
uses: actions/checkout@v4
with:
persist-credentials: false
path: _subject
fetch-depth: ${{ env.brdepth }}
# Trusted source of gitleaks config.
- name: Checkout default branch
uses: actions/checkout@v4
with:
persist-credentials: false
ref: ${{ github.event.repository.default_branch }}
path: _default
fetch-depth: 1
- name: Create report directory
shell: bash
run: |
set -exuo pipefail
mkdir ${{ github.workspace }}/_report
touch ${{ github.workspace }}/_report/gitleaks-report.json
# A force-push to a PR can obscure Cirrus-CI logs, but not GHA logs
- name: Show content being scanned
shell: bash
run: |
set -exuo pipefail
${{ env.gitlogcmd }} ${{ steps.gitlog.outputs.range }}
# Unfortunately gitleaks provides several in-built ways to
# completely bypass an alert within PR-level commits. Assume
# it's not possible to detect these with gitleaks-config rules.
- name: Check for inline scan overrides
if: github.base_ref != '' # A PR
shell: bash
env:
# Workaround erronously detecting the string in this file
_rx1: "gitleaks"
_rx2: ":"
_rx3: "allow"
run: |
set -euo pipefail
verboten_rx="${_rx1}${_rx2}${_rx3}"
verboten=$(set -x ; ${{ env.gitlogcmd }} "-G$verboten_rx" ${{ steps.gitlog.outputs.range }})
if [[ -n "$verboten" ]]; then
printf '::error::%s' 'Found comment(s) utilizing detection override(s) (see job log for details)'
# Hack: Grep will never colorize an end of a line match
echo "$verboten" | grep --color=always -E "($verboten_rx)|$"
exit 1
fi
if [[ -r "${{ github.workspace }}/_subject/.gitleaksignore" ]]; then
printf '::error::%s' 'Detected a .gitleaksignore file from untrusted source.'
exit 1
fi
- name: Scan for secrets
shell: bash
# gitleaks entrypoint runs as gitleaks user (UID/GID 1000)
run: |
set -exuo pipefail
# Careful: Changes need coordination with contrib/cirrus/prebuild.sh
podman run --rm \
--security-opt=label=disable \
--userns=keep-id:uid=1000,gid=1000 \
-v ${{ github.workspace }}/_default:/default:ro \
-v ${{ github.workspace }}/_subject:/subject:ro \
-v ${{ github.workspace }}/_report:/report:rw \
$glfqin \
detect $glargs --log-opts=${{ steps.gitlog.outputs.range }}
- name: Collect scan report artifact
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v3
with:
name: gitleaks-report
path: ${{ github.workspace }}/_report/gitleaks-report.json
# Nobody monitors the actions-tab for failures, and may not see this
# fail on push to a nefarious PR. Send an e-mail alert to unmask
# this activity or some other general job failure.
- if: failure() && !contains(github.event.pull_request.labels.*.name,'BypassLeakNotification')
name: Send leak detection notification e-mail
uses: dawidd6/action-send-mail@v3.8.0
with:
server_address: ${{secrets.ACTION_MAIL_SERVER}}
server_port: 465
username: ${{secrets.ACTION_MAIL_USERNAME}}
password: ${{secrets.ACTION_MAIL_PASSWORD}}
subject: Addition|Change|Use of sensitive ${{github.repository}}-CI value
to: ${{env.RCPTCSV}}
from: ${{secrets.ACTION_MAIL_SENDER}}
body: "Please investigate: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}"

55
.gitleaks.toml Normal file
View File

@ -0,0 +1,55 @@
# Options Ref:
# https://github.com/gitleaks/gitleaks#configuration
[extend]
# useDefault will extend the base configuration with the default gitleaks config:
# https://github.com/zricethezav/gitleaks/blob/master/config/gitleaks.toml
useDefault = true
[allowlist]
description = "Global allow list"
paths = [
'''^\.gitleaks.toml''',
'''^\.gitleaks/baseline\.json''',
'''^\.github/workflows/scan-secrets\.yml'''
]
# Any ENCRYPTED[blahblahblah] entries are only valid from a Cirrus-CI
# execution context, but may appear in any file loadable by a starlark
# script. Though the actual strings are repo-specific and useless elsewhere.
# This check is here simply to raise red-flags on new definitions or movements
# of existing values. Operationally, the actual leak-risk is only present
# after Cirrus-CI decodes the value.
[[rules]]
id = "cirrus-ci_config-secrets"
description = "Cirrus-CI Configuration Secret ID"
regex = '''ENCRYPTED[\[][a-fA-F0-9]+\]'''
# Any *_credentials items in .cirrus.yml should not appear in any other context.
# Though Cirrus-CI restricts decoding of these values, this check is here to
# raise red-flags on any new definitions or movements.
[[rules]]
id = "cirrus-ci_cloud-credentials"
description = "Cirrus-CI Cloud service-account credentials"
regex = '''(gcp|aws)_credentials'''
# Changes to the scanning workflow trigger could be used to superficialy mask a negative result.
[[rules]]
id = "scan-secrets_trigger"
description = "PR trigger change to secret-scanning workflow"
regex = '''pull_request:'''
path = '''.github/workflows/scan-secrets.yml'''
##### Podman Repo. specific items #####
[[rules]]
id = "podman_envar_credentials"
# From .cirrus.yml
description = "Service-account and other credentials with limited/specific and restricted access."
regex = '''GCPJSON|GCPNAME|AWSINI|GCPPROJECT'''
[[rules]]
id = "podman_github-action_secrets"
description = "Managed secrets for github-action workflows."
# From https://github.com/containers/podman/settings/secrets/actions
regex = '''SECRET_CIRRUS_API_KEY|ACTION_MAIL_.+|AZ_.+|MACOS_.+|QUAY_PODMAN_.+'''

111
.gitleaks/README.md Normal file
View File

@ -0,0 +1,111 @@
## Secret Scanning
### Overview
During the course of submitting a pull-request, it's possible that a
malicious-actor may try to reference and exfiltrate sensitive CI
values in their commits. This activity can be obscured relatively
easily via multiple methods.
Secret-scanning is an automated process for examining commits for
any mention, addition, or changes to potentially sensitive values.
It's **not** a perfect security solution, but can help thwart naive
attempts and innocent accidents alike.
### Mechanism
Whenever a PR is pushed to, an automated process will scan all
of it's commits. This happens from an execution context outside
of the influence from any changes in the PR. When a detection is
made, the automated job will fail, and an e-mail notification will
be sent for review. The email is necessary because these jobs
aren't otherwise monitored, and a malicious-actor may attempt
to cover their tracks.
### Notifications
Scan failure notification e-mails are necessary because the
detection jobs may not be closely monitored. This makes it
more likely malicious-actions may go unnoticed. However,
project maintainers may bypass notification by adding a
`BypassLeakNotification` label to a PR that would otherwise.
### Configuration
The meaning of a "sensitive value" is very repository specific.
So addition of any new sensitive values to automation must be
reflected in the GitLeaks configuration. This change must
land on the repository's main branch before detection will be
active.
Additionally, there are sets of known sensitive values (e.g.
ssh keys) which could be added or referenced in the future.
To help account for this, GitLeaks is configured to reads an
upstream source of well-known, common patterns that are
automatically included in all scans. These can **NOT** be
individually overridden by the repository configuration.
### Baseline
At times, there may be deliberate use/change of sensitive
values. This is accounted for by referencing a "baseline" of
known detections. For GitLeaks, new possible baseline items are
present within the detection report JSON (artifact) produced
with every automated execution.
Baseline items may also be produced by locally, by running the
GitLeaks container (from the repo. root) using a command similar
to:
```
$ git_log_options="-50"
$ podman run --rm \
--security-opt=label=disable \
--userns=keep-id:uid=1000,gid=1000 \
-v $PWD:/subject:ro \
-v $PWD:/default:ro \
ghcr.io/gitleaks/gitleaks:latest \
detect \
--log-opts="$git_log_options" \
--source=/subject \
--config=/default/.gitleaks/config.toml \
--report-path=/dev/stdout \
--baseline-path=/default/.gitleaks/baseline.json
```
You may then copy-paste the necessary JSON list items into
the baseline file. In order to be effective on current or
future pull-requests, changes to the baseline or configuration
**MUST** be merged onto the `main` branch.
### Important notes
* The `.gitleaks.toml` file **must** be in the root of the repo.
due to it being used by third-party tooling.
* The scan will still run on PRs with a 'BypassLeakNotification' label.
This is intended to help in scanning test-cases and where updates
to the baseline are needed.
* Detection rules can be fairly complex, but they don't need to be.
When adding new rules, please be mindful weather or not they **REALLY**
are only valid from specific files. Also be mindful of potentially
generic names, for example 'debug' or 'base64' that may match
code comments.
* Baseline items are commit-specific! Meaning, if the commit which
caused the alert changes, the baseline-detection item will no-longer
match. Fixing this will likely generate additional e-mail notification
spam, so please be mindful of your merge and change sequence, as well
and content.
* There is an additional execution of the GitLeaks scanner during the
"pre-build" phase of Cirrus-CI. Results from this run are **NOT**
to be trusted in any way. This check is only present to catch
potential configuration or base-line data breaking changes.
* Scans **are** configured to execute against all branches and new
tags. Because the configuration and baseline data is always sourced
from `main`, this check is necessary to alert on changed, non-conforming
commits. Just remember to make any needed corrections on the `main`
branch configuration or baseline to mitigate.

1
.gitleaks/baseline.json Normal file
View File

@ -0,0 +1 @@
[]

View File

@ -33,9 +33,21 @@ if grep -n ^$'\t' test/system/*; then
die "Found leading tabs in system tests. Use spaces to indent, not tabs."
fi
# Defined by CI config.
# Lookup 'env' dict. string value from key specified as argument from YAML file.
get_env_key() {
local yaml
local script
yaml="$CIRRUS_WORKING_DIR/.github/workflows/scan-secrets.yml"
script="from yaml import safe_load; print(safe_load(open('$yaml'))['env']['$1'])"
python -c "$script"
}
# Only need to check CI-stuffs on a single build-task, there's only ever
# one prior-fedora task so use that one.
# Envars all defined by CI config.
# shellcheck disable=SC2154
if [[ "${DISTRO_NV}" =~ fedora ]]; then
if [[ "${DISTRO_NV}" == "$PRIOR_FEDORA_NAME" ]]; then
msg "Checking shell scripts"
showrun ooe.sh dnf install -y ShellCheck # small/quick addition
showrun shellcheck --format=tty \
@ -59,6 +71,25 @@ if [[ "${DISTRO_NV}" =~ fedora ]]; then
export PREBUILD=1
showrun bash ${CIRRUS_WORKING_DIR}/.github/actions/check_cirrus_cron/test.sh
fi
# Note: This may detect leaks, but should not be considered authorative
# since any PR could modify the contents or arguments. This check is
# simply here to...
msg "Checking GitLeaks functions with current CLI args, configuration, and baseline JSON"
brdepth=$(get_env_key 'brdepth')
glfqin=$(get_env_key 'glfqin')
glargs=$(get_env_key 'glargs')
showrun podman run --rm \
--security-opt=label=disable \
--userns=keep-id:uid=1000,gid=1000 \
-v $CIRRUS_WORKING_DIR:/subject:ro \
-v $CIRRUS_WORKING_DIR:/default:ro \
--tmpfs /report:rw,size=256k,mode=1777 \
$glfqin \
detect \
--log-opts=-$brdepth \
$glargs
fi
msg "Checking 3rd party network service connectivity"