name: Supply Chain Audit on: pull_request: types: [opened, synchronize, reopened] permissions: pull-requests: write contents: read jobs: scan: name: Scan PR for supply chain risks runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - name: Scan diff for suspicious patterns id: scan env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | set -euo pipefail BASE="${{ github.event.pull_request.base.sha }}" HEAD="${{ github.event.pull_request.head.sha }}" # Get the full diff (added lines only) DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true) FINDINGS="" CRITICAL=false # --- .pth files (auto-execute on Python startup) --- PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true) if [ -n "$PTH_FILES" ]; then CRITICAL=true FINDINGS="${FINDINGS} ### 🚨 CRITICAL: .pth file added or modified Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512). **Files:** \`\`\` ${PTH_FILES} \`\`\` " fi # --- base64 + exec/eval combo (the litellm attack pattern) --- B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true) if [ -n "$B64_EXEC_HITS" ]; then CRITICAL=true FINDINGS="${FINDINGS} ### 🚨 CRITICAL: base64 decode + exec/eval combo This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads. **Matches:** \`\`\` ${B64_EXEC_HITS} \`\`\` " fi # --- base64 decode/encode (alone — legitimate uses exist) --- B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true) if [ -n "$B64_HITS" ]; then FINDINGS="${FINDINGS} ### ⚠️ WARNING: base64 encoding/decoding detected Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate. **Matches (first 20):** \`\`\` ${B64_HITS} \`\`\` " fi # --- exec/eval with string arguments --- EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true) if [ -n "$EXEC_HITS" ]; then FINDINGS="${FINDINGS} ### ⚠️ WARNING: exec() or eval() usage Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches. **Matches (first 20):** \`\`\` ${EXEC_HITS} \`\`\` " fi # --- subprocess with encoded/obfuscated commands --- PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true) if [ -n "$PROC_HITS" ]; then CRITICAL=true FINDINGS="${FINDINGS} ### 🚨 CRITICAL: subprocess with encoded/obfuscated command Subprocess calls with encoded arguments are a strong indicator of payload execution. **Matches:** \`\`\` ${PROC_HITS} \`\`\` " fi # --- Network calls to non-standard domains --- EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true) if [ -n "$EXFIL_HITS" ]; then FINDINGS="${FINDINGS} ### ⚠️ WARNING: Outbound network calls (POST/PUT) Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate. **Matches (first 10):** \`\`\` ${EXFIL_HITS} \`\`\` " fi # --- setup.py / setup.cfg install hooks --- SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true) if [ -n "$SETUP_HITS" ]; then FINDINGS="${FINDINGS} ### ⚠️ WARNING: Install hook files modified These files can execute code during package installation or interpreter startup. **Files:** \`\`\` ${SETUP_HITS} \`\`\` " fi # --- Compile/marshal/pickle (code object injection) --- MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true) if [ -n "$MARSHAL_HITS" ]; then FINDINGS="${FINDINGS} ### ⚠️ WARNING: marshal/pickle/compile usage These can deserialize or construct executable code objects. **Matches:** \`\`\` ${MARSHAL_HITS} \`\`\` " fi # --- Output results --- if [ -n "$FINDINGS" ]; then echo "found=true" >> "$GITHUB_OUTPUT" if [ "$CRITICAL" = true ]; then echo "critical=true" >> "$GITHUB_OUTPUT" else echo "critical=false" >> "$GITHUB_OUTPUT" fi # Write findings to a file (multiline env vars are fragile) echo "$FINDINGS" > /tmp/findings.md else echo "found=false" >> "$GITHUB_OUTPUT" echo "critical=false" >> "$GITHUB_OUTPUT" fi - name: Post warning comment if: steps.scan.outputs.found == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | SEVERITY="⚠️ Supply Chain Risk Detected" if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then SEVERITY="🚨 CRITICAL Supply Chain Risk Detected" fi BODY="## ${SEVERITY} This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging. $(cat /tmp/findings.md) --- *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*" gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" - name: Fail on critical findings if: steps.scan.outputs.critical == 'true' run: | echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details." exit 1