#!/bin/bash # # Pre-commit hook for detecting secret leaks in commits # This hook scans staged files for potential secret leaks including: # - Private keys (PEM, OpenSSH formats) # - API keys (OpenAI, Anthropic, HuggingFace, etc.) # - Token file paths in prompts/conversations # - Environment variable names in sensitive contexts # - AWS credentials, database connection strings, etc. # # Installation: # git config core.hooksPath .githooks # # To bypass this hook temporarily: # git commit --no-verify # set -euo pipefail # Colors for output RED='\033[0;31m' YELLOW='\033[1;33m' GREEN='\033[0;32m' NC='\033[0m' # No Color # Counters for statistics CRITICAL_FOUND=0 WARNING_FOUND=0 BLOCK_COMMIT=0 # Array to store findings FINDINGS=() # Get list of staged files (excluding deleted) STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACMR 2>/dev/null || true) if [ -z "$STAGED_FILES" ]; then echo -e "${GREEN}✓ No files staged for commit${NC}" exit 0 fi # Get the diff content of staged files (new/changed lines only, starting with +) STAGED_DIFF=$(git diff --cached --no-color -U0 2>/dev/null | grep -E '^\+[^+]' || true) if [ -z "$STAGED_DIFF" ]; then echo -e "${GREEN}✓ No new content to scan${NC}" exit 0 fi echo "🔍 Scanning for secret leaks in staged files..." echo "" # ============================================================================ # PATTERN DEFINITIONS # ============================================================================ # Critical patterns - will block commit CRITICAL_PATTERNS=( # Private Keys '-----BEGIN (RSA |DSA |EC |OPENSSH |PGP |SSH2 |PRIVATE KEY-----)' '-----BEGIN ENCRYPTED PRIVATE KEY-----' '-----BEGIN CERTIFICATE-----' # API Keys - Common prefixes 'sk-[a-zA-Z0-9]{20,}' # OpenAI, Anthropic 'gsk_[a-zA-Z0-9]{20,}' # Groq 'hf_[a-zA-Z0-9]{20,}' # HuggingFace 'nvapi-[a-zA-Z0-9]{20,}' # NVIDIA 'AIza[0-9A-Za-z_-]{35}' # Google/Gemini 'sk_[a-zA-Z0-9]{20,}' # Replicate 'xai-[a-zA-Z0-9]{20,}' # xAI 'pplx-[a-zA-Z0-9]{20,}' # Perplexity 'anthropic-api-key' # Anthropic literal 'claude-api-key' # Claude literal # AWS Credentials 'AKIA[0-9A-Z]{16}' # AWS Access Key ID 'ASIA[0-9A-Z]{16}' # AWS Temporary Access Key 'aws(.{0,20})?(secret(.{0,20})?)?key' 'aws(.{0,20})?(access(.{0,20})?)?id' # Database Connection Strings (with credentials) 'mongodb(\+srv)?://[^:]+:[^@]+@' 'postgres(ql)?://[^:]+:[^@]+@' 'mysql://[^:]+:[^@]+@' 'redis://:[^@]+@' 'mongodb://[^:]+:[^@]+@' ) # Warning patterns - will warn but not block WARNING_PATTERNS=( # Token file paths in prompts or conversation contexts '(prompt|conversation|context|message).*~/\.hermes/\.env' '(prompt|conversation|context|message).*~/\.tokens/' '(prompt|conversation|context|message).*~/.env' '(prompt|conversation|context|message).*~/.netrc' '(prompt|conversation|context|message).*~/.ssh/' '(prompt|conversation|context|message).*~/.aws/' '(prompt|conversation|context|message).*~/.config/' # Environment variable names in prompts (suspicious) '(prompt|conversation|context|message).*(OPENAI_API_KEY|ANTHROPIC_API_KEY|HF_TOKEN|HF_API_TOKEN)' '(prompt|conversation|context|message).*(AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AZURE_.*_KEY)' '(prompt|conversation|context|message).*(DATABASE_URL|DB_PASSWORD|SECRET_KEY)' '(prompt|conversation|context|message).*(GITHUB_TOKEN|GITLAB_TOKEN|DOCKER_.*_TOKEN)' # GitHub tokens 'gh[pousr]_[A-Za-z0-9_]{36}' 'github[_-]?pat[_-]?[a-zA-Z0-9]{22,}' # Generic high-entropy strings that look like secrets 'api[_-]?key["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}' 'secret["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}' 'password["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{16,}' 'token["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}' # JWT tokens (3 base64 sections separated by dots) 'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*' # Slack tokens 'xox[baprs]-[0-9]{10,13}-[0-9]{10,13}([a-zA-Z0-9-]*)?' # Discord tokens '[MN][A-Za-z\d]{23}\.[\w-]{6}\.[\w-]{27}' # Stripe keys 'sk_live_[0-9a-zA-Z]{24,}' 'pk_live_[0-9a-zA-Z]{24,}' # Twilio 'SK[0-9a-fA-F]{32}' # SendGrid 'SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}' # Heroku '[hH][eE][rR][oO][kK][uU].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}' ) # File patterns to scan (relevant to prompts, conversations, config) SCAN_FILE_PATTERNS=( '\.(py|js|ts|jsx|tsx|json|yaml|yml|toml|md|txt|sh|bash|zsh|fish)$' '(prompt|conversation|chat|message|llm|ai)_' '_log\.txt$' '\.log$' 'prompt' 'conversation' ) # ============================================================================ # SCANNING FUNCTIONS # ============================================================================ scan_with_pattern() { local pattern="$1" local content="$2" local severity="$3" local grep_opts="-iE" # Use grep to find matches local matches matches=$(echo "$content" | grep $grep_opts "$pattern" 2>/dev/null | head -5 || true) if [ -n "$matches" ]; then echo "$matches" return 0 fi return 1 } # ============================================================================ # MAIN SCANNING LOGIC # ============================================================================ echo "Files being scanned:" echo "$STAGED_FILES" | head -20 if [ $(echo "$STAGED_FILES" | wc -l) -gt 20 ]; then echo " ... and $(( $(echo "$STAGED_FILES" | wc -l) - 20 )) more files" fi echo "" # Scan for critical patterns echo "Scanning for CRITICAL patterns (will block commit)..." for pattern in "${CRITICAL_PATTERNS[@]}"; do result=$(scan_with_pattern "$pattern" "$STAGED_DIFF" "CRITICAL" || true) if [ -n "$result" ]; then CRITICAL_FOUND=$((CRITICAL_FOUND + 1)) BLOCK_COMMIT=1 FINDINGS+=("[CRITICAL] Pattern matched: $pattern") FINDINGS+=("Matches:") FINDINGS+=("$result") FINDINGS+=("") echo -e "${RED}✗ CRITICAL: Found potential secret!${NC}" echo " Pattern: $pattern" echo " Matches:" echo "$result" | sed 's/^/ /' echo "" fi done # Scan for warning patterns echo "Scanning for WARNING patterns (will warn but not block)..." for pattern in "${WARNING_PATTERNS[@]}"; do result=$(scan_with_pattern "$pattern" "$STAGED_DIFF" "WARNING" || true) if [ -n "$result" ]; then WARNING_FOUND=$((WARNING_FOUND + 1)) FINDINGS+=("[WARNING] Pattern matched: $pattern") FINDINGS+=("Matches:") FINDINGS+=("$result") FINDINGS+=("") echo -e "${YELLOW}⚠ WARNING: Found suspicious pattern${NC}" echo " Pattern: $pattern" echo " Matches:" echo "$result" | sed 's/^/ /' echo "" fi done # ============================================================================ # FILE-SPECIFIC SCANS # ============================================================================ echo "Performing file-specific checks..." # Check for .env files being committed (should be in .gitignore but double-check) ENV_FILES=$(echo "$STAGED_FILES" | grep -E '^\.env' | grep -v '.env.example' | grep -v '.envrc' || true) if [ -n "$ENV_FILES" ]; then echo -e "${RED}✗ CRITICAL: Attempting to commit .env file(s):${NC}" echo "$ENV_FILES" | sed 's/^/ /' FINDINGS+=("[CRITICAL] .env file(s) staged for commit:") FINDINGS+=("$ENV_FILES") BLOCK_COMMIT=1 echo "" fi # Check for credential files CRED_FILES=$(echo "$STAGED_FILES" | grep -E '(credentials|secrets|tokens)\.?(json|yaml|yml|txt)?$' | grep -v 'test_' | grep -v '_test\.' | grep -v 'example' || true) if [ -n "$CRED_FILES" ]; then echo -e "${YELLOW}⚠ WARNING: Potential credential file(s) detected:${NC}" echo "$CRED_FILES" | sed 's/^/ /' FINDINGS+=("[WARNING] Potential credential files staged:") FINDINGS+=("$CRED_FILES") echo "" fi # Check for private key files KEY_FILES=$(echo "$STAGED_FILES" | grep -E '\.(pem|key|ppk|p12|pfx)$' | grep -v 'test_' | grep -v 'example' || true) if [ -n "$KEY_FILES" ]; then echo -e "${RED}✗ CRITICAL: Private key file(s) detected:${NC}" echo "$KEY_FILES" | sed 's/^/ /' FINDINGS+=("[CRITICAL] Private key files staged for commit:") FINDINGS+=("$KEY_FILES") BLOCK_COMMIT=1 echo "" fi # ============================================================================ # PROMPT/CONVERSATION SPECIFIC SCANS # ============================================================================ # Look for prompts that might contain sensitive data PROMPT_FILES=$(echo "$STAGED_FILES" | grep -iE '(prompt|conversation|chat|message)' | grep -v 'test_' | grep -v '.pyc' || true) if [ -n "$PROMPT_FILES" ]; then echo "Scanning prompt/conversation files for embedded secrets..." for file in $PROMPT_FILES; do if [ -f "$file" ]; then file_content=$(cat "$file" 2>/dev/null || true) # Check for common secret patterns in prompts if echo "$file_content" | grep -qiE '(api[_-]?key|secret[_-]?key|password|token)\s*[:=]\s*\S{8,}'; then echo -e "${YELLOW}⚠ WARNING: Potential secret in prompt file: $file${NC}" FINDINGS+=("[WARNING] Potential secret in: $file") fi # Check for file paths in home directory if echo "$file_content" | grep -qE '~/\.\w+'; then echo -e "${YELLOW}⚠ WARNING: Home directory path in prompt file: $file${NC}" FINDINGS+=("[WARNING] Home directory path in: $file") fi fi done echo "" fi # ============================================================================ # SUMMARY AND DECISION # ============================================================================ echo "============================================" echo " SCAN SUMMARY" echo "============================================" echo "" if [ $CRITICAL_FOUND -gt 0 ]; then echo -e "${RED}✗ $CRITICAL_FOUND CRITICAL finding(s) detected${NC}" fi if [ $WARNING_FOUND -gt 0 ]; then echo -e "${YELLOW}⚠ $WARNING_FOUND WARNING(s) detected${NC}" fi if [ $BLOCK_COMMIT -eq 0 ] && [ $WARNING_FOUND -eq 0 ] && [ $CRITICAL_FOUND -eq 0 ]; then echo -e "${GREEN}✓ No potential secret leaks detected${NC}" echo "" exit 0 fi echo "" # If blocking issues found if [ $BLOCK_COMMIT -eq 1 ]; then echo -e "${RED}╔════════════════════════════════════════════════════════════╗${NC}" echo -e "${RED}║ COMMIT BLOCKED: Potential secrets detected! ║${NC}" echo -e "${RED}╚════════════════════════════════════════════════════════════╝${NC}" echo "" echo "The following issues must be resolved before committing:" echo "" printf '%s\n' "${FINDINGS[@]}" | grep -E '^\[CRITICAL\]' echo "" echo "Recommendations:" echo " 1. Remove secrets from your code" echo " 2. Use environment variables or a secrets manager" echo " 3. Add sensitive files to .gitignore" echo " 4. Rotate any exposed credentials immediately" echo "" echo "If you are CERTAIN this is a false positive, you can bypass:" echo " git commit --no-verify" echo "" echo "⚠️ WARNING: Bypassing should be done with extreme caution!" echo "" exit 1 fi # If only warnings if [ $WARNING_FOUND -gt 0 ]; then echo -e "${YELLOW}⚠ WARNINGS found but commit will proceed${NC}" echo "" echo "Please review the warnings above and ensure no sensitive data" echo "is being included in prompts or configuration files." echo "" echo "To cancel this commit, press Ctrl+C within 3 seconds..." sleep 3 fi echo "" echo -e "${GREEN}✓ Proceeding with commit${NC}" exit 0