name: Secret Scan

on:
  pull_request:
    types: [opened, synchronize, reopened]

permissions:
  pull-requests: write
  contents: read

jobs:
  scan:
    name: Scan for secrets
    runs-on: ubuntu-latest
    container: catthehacker/ubuntu:act-22.04
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Fetch base branch
        run: git fetch origin ${{ github.base_ref }}

      - name: Scan diff for secrets
        id: scan
        run: |
          set -euo pipefail

          # Get only added lines from the diff (exclude deletions and context lines)
          DIFF=$(git diff "origin/${{ github.base_ref }}"...HEAD -- \
            ':!*.lock' ':!uv.lock' ':!package-lock.json' ':!yarn.lock' \
            | grep '^+' | grep -v '^+++' || true)

          FINDINGS=""
          CRITICAL=false

          check() {
            local label="$1"
            local pattern="$2"
            local critical="${3:-false}"
            local matches
            matches=$(echo "$DIFF" | grep -oP "$pattern" || true)
            if [ -n "$matches" ]; then
              FINDINGS="${FINDINGS}\n- **${label}**: pattern matched"
              if [ "$critical" = "true" ]; then
                CRITICAL=true
              fi
            fi
          }

          # AWS keys — critical
          check "AWS Access Key" 'AKIA[0-9A-Z]{16}' true

          # Private key headers — critical
          check "Private Key Header" '-----BEGIN (RSA|EC|DSA|OPENSSH|PGP) PRIVATE KEY' true

          # OpenAI / Anthropic style keys
          check "OpenAI-style API key (sk-)" 'sk-[a-zA-Z0-9]{20,}' false

          # GitHub tokens
          check "GitHub personal access token (ghp_)" 'ghp_[a-zA-Z0-9]{36}' true
          check "GitHub fine-grained PAT (github_pat_)" 'github_pat_[a-zA-Z0-9_]{1,}' true

          # Slack tokens
          check "Slack bot token (xoxb-)" 'xoxb-[0-9A-Za-z\-]{10,}' true
          check "Slack user token (xoxp-)" 'xoxp-[0-9A-Za-z\-]{10,}' true

          # Generic assignment patterns — exclude obvious placeholders
          GENERIC=$(echo "$DIFF" | grep -iP '(api_key|apikey|api-key|secret_key|access_token|auth_token)\s*[=:]\s*['"'"'"][^'"'"'"]{20,}['"'"'"]' \
            | grep -ivP '(fake|mock|test|placeholder|example|dummy|your[_-]|xxx|<|>|\{\{)' || true)
          if [ -n "$GENERIC" ]; then
            FINDINGS="${FINDINGS}\n- **Generic credential assignment**: possible hardcoded secret"
          fi

          # .env additions with long values
          ENV_DIFF=$(git diff "origin/${{ github.base_ref }}"...HEAD -- '*.env' '**/.env' '.env*' \
            | grep '^+' | grep -v '^+++' || true)
          ENV_MATCHES=$(echo "$ENV_DIFF" | grep -P '^[A-Z_]+=.{16,}' \
            | grep -ivP '(fake|mock|test|placeholder|example|dummy|your[_-]|xxx)' || true)
          if [ -n "$ENV_MATCHES" ]; then
            FINDINGS="${FINDINGS}\n- **.env file**: lines with potentially real secret values detected"
          fi

          # Write outputs
          if [ -n "$FINDINGS" ]; then
            echo "found=true" >> "$GITHUB_OUTPUT"
          else
            echo "found=false" >> "$GITHUB_OUTPUT"
          fi

          if [ "$CRITICAL" = "true" ]; then
            echo "critical=true" >> "$GITHUB_OUTPUT"
          else
            echo "critical=false" >> "$GITHUB_OUTPUT"
          fi

          # Store findings in a file to use in comment step
          printf "%b" "$FINDINGS" > /tmp/secret-findings.txt

      - name: Post PR comment with findings
        if: steps.scan.outputs.found == 'true' && github.event_name == 'pull_request'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          FINDINGS=$(cat /tmp/secret-findings.txt)
          SEVERITY="warning"
          if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
            SEVERITY="CRITICAL"
          fi

          BODY="## Secret Scan — ${SEVERITY} findings

          The automated secret scanner detected potential secrets in the diff for this PR.

          ### Findings
          ${FINDINGS}

          ### What to do
          1. Remove any real credentials from the diff immediately.
          2. If the match is a false positive (test fixture, placeholder), add a comment explaining why or rename the variable to include \`fake\`, \`mock\`, or \`test\`.
          3. Rotate any exposed credentials regardless of whether this PR is merged.

          ---
          *Automated scan by [secret-scan](/.github/workflows/secret-scan.yml)*"

          gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"

      - name: Fail on critical secrets
        if: steps.scan.outputs.critical == 'true'
        run: |
          echo "::error::Critical secrets detected in diff (private keys, AWS keys, or GitHub tokens). Remove them before merging."
          exit 1

      - name: Warn on non-critical findings
        if: steps.scan.outputs.found == 'true' && steps.scan.outputs.critical == 'false'
        run: |
          echo "::warning::Potential secrets detected in diff. Review the PR comment for details."