From ac5b8a478acba647d6c8a7e6630f179ae2684c03 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 24 Mar 2026 08:56:04 -0700 Subject: [PATCH] ci: add supply chain audit workflow for PR scanning (#2816) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scans every PR diff for patterns associated with supply chain attacks: CRITICAL (blocks merge): - .pth files (auto-execute on Python startup — litellm attack vector) - base64 decode + exec/eval combo (obfuscated payload execution) - subprocess with encoded/obfuscated commands WARNING (comment only, no block): - base64 encode/decode alone (legitimate uses: images, JWT, etc.) - exec/eval alone - Outbound POST/PUT requests - setup.py/sitecustomize.py/usercustomize.py changes - marshal.loads/pickle.loads/compile() Posts a detailed comment on the PR with matched lines and context. Excludes lockfiles (uv.lock, package-lock.json) from scanning. Motivated by the litellm 1.82.7/1.82.8 credential stealer attack (BerriAI/litellm#24512). --- .github/workflows/supply-chain-audit.yml | 192 +++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 .github/workflows/supply-chain-audit.yml diff --git a/.github/workflows/supply-chain-audit.yml b/.github/workflows/supply-chain-audit.yml new file mode 100644 index 000000000..b94e1dda4 --- /dev/null +++ b/.github/workflows/supply-chain-audit.yml @@ -0,0 +1,192 @@ +name: Supply Chain Audit + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + pull-requests: write + contents: read + +jobs: + scan: + name: Scan PR for supply chain risks + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Scan diff for suspicious patterns + id: scan + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + + BASE="${{ github.event.pull_request.base.sha }}" + HEAD="${{ github.event.pull_request.head.sha }}" + + # Get the full diff (added lines only) + DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true) + + FINDINGS="" + CRITICAL=false + + # --- .pth files (auto-execute on Python startup) --- + PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true) + if [ -n "$PTH_FILES" ]; then + CRITICAL=true + FINDINGS="${FINDINGS} + ### 🚨 CRITICAL: .pth file added or modified + Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512). + + **Files:** + \`\`\` + ${PTH_FILES} + \`\`\` + " + fi + + # --- base64 + exec/eval combo (the litellm attack pattern) --- + B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true) + if [ -n "$B64_EXEC_HITS" ]; then + CRITICAL=true + FINDINGS="${FINDINGS} + ### 🚨 CRITICAL: base64 decode + exec/eval combo + This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads. + + **Matches:** + \`\`\` + ${B64_EXEC_HITS} + \`\`\` + " + fi + + # --- base64 decode/encode (alone — legitimate uses exist) --- + B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true) + if [ -n "$B64_HITS" ]; then + FINDINGS="${FINDINGS} + ### ⚠️ WARNING: base64 encoding/decoding detected + Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate. + + **Matches (first 20):** + \`\`\` + ${B64_HITS} + \`\`\` + " + fi + + # --- exec/eval with string arguments --- + EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true) + if [ -n "$EXEC_HITS" ]; then + FINDINGS="${FINDINGS} + ### ⚠️ WARNING: exec() or eval() usage + Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches. + + **Matches (first 20):** + \`\`\` + ${EXEC_HITS} + \`\`\` + " + fi + + # --- subprocess with encoded/obfuscated commands --- + PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true) + if [ -n "$PROC_HITS" ]; then + CRITICAL=true + FINDINGS="${FINDINGS} + ### 🚨 CRITICAL: subprocess with encoded/obfuscated command + Subprocess calls with encoded arguments are a strong indicator of payload execution. + + **Matches:** + \`\`\` + ${PROC_HITS} + \`\`\` + " + fi + + # --- Network calls to non-standard domains --- + EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true) + if [ -n "$EXFIL_HITS" ]; then + FINDINGS="${FINDINGS} + ### ⚠️ WARNING: Outbound network calls (POST/PUT) + Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate. + + **Matches (first 10):** + \`\`\` + ${EXFIL_HITS} + \`\`\` + " + fi + + # --- setup.py / setup.cfg install hooks --- + SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true) + if [ -n "$SETUP_HITS" ]; then + FINDINGS="${FINDINGS} + ### ⚠️ WARNING: Install hook files modified + These files can execute code during package installation or interpreter startup. + + **Files:** + \`\`\` + ${SETUP_HITS} + \`\`\` + " + fi + + # --- Compile/marshal/pickle (code object injection) --- + MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true) + if [ -n "$MARSHAL_HITS" ]; then + FINDINGS="${FINDINGS} + ### ⚠️ WARNING: marshal/pickle/compile usage + These can deserialize or construct executable code objects. + + **Matches:** + \`\`\` + ${MARSHAL_HITS} + \`\`\` + " + fi + + # --- Output results --- + if [ -n "$FINDINGS" ]; then + echo "found=true" >> "$GITHUB_OUTPUT" + if [ "$CRITICAL" = true ]; then + echo "critical=true" >> "$GITHUB_OUTPUT" + else + echo "critical=false" >> "$GITHUB_OUTPUT" + fi + # Write findings to a file (multiline env vars are fragile) + echo "$FINDINGS" > /tmp/findings.md + else + echo "found=false" >> "$GITHUB_OUTPUT" + echo "critical=false" >> "$GITHUB_OUTPUT" + fi + + - name: Post warning comment + if: steps.scan.outputs.found == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + SEVERITY="⚠️ Supply Chain Risk Detected" + if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then + SEVERITY="🚨 CRITICAL Supply Chain Risk Detected" + fi + + BODY="## ${SEVERITY} + + This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging. + + $(cat /tmp/findings.md) + + --- + *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*" + + gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" + + - name: Fail on critical findings + if: steps.scan.outputs.critical == 'true' + run: | + echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details." + exit 1