Add pre-receive hook to prevent merging code with Python syntax errors. Features: - Checks all Python files (.py) in each push using python -m py_compile - Special protection for critical files: - run_agent.py - model_tools.py - hermes-agent/tools/nexus_architect.py - cli.py, batch_runner.py, hermes_state.py - Clear error messages showing file and line number - Rejects pushes containing syntax errors Files added: - .githooks/pre-receive (Bash implementation) - .githooks/pre-receive.py (Python implementation) - docs/GITEA_SYNTAX_GUARD.md (installation guide) - .githooks/pre-commit (existing secret detection hook) Closes #82
349 lines
12 KiB
Bash
349 lines
12 KiB
Bash
#!/bin/bash
|
|
#
|
|
# Pre-commit hook for detecting secret leaks in commits
|
|
# This hook scans staged files for potential secret leaks including:
|
|
# - Private keys (PEM, OpenSSH formats)
|
|
# - API keys (OpenAI, Anthropic, HuggingFace, etc.)
|
|
# - Token file paths in prompts/conversations
|
|
# - Environment variable names in sensitive contexts
|
|
# - AWS credentials, database connection strings, etc.
|
|
#
|
|
# Installation:
|
|
# git config core.hooksPath .githooks
|
|
#
|
|
# To bypass this hook temporarily:
|
|
# git commit --no-verify
|
|
#
|
|
|
|
set -euo pipefail
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
YELLOW='\033[1;33m'
|
|
GREEN='\033[0;32m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Counters for statistics
|
|
CRITICAL_FOUND=0
|
|
WARNING_FOUND=0
|
|
BLOCK_COMMIT=0
|
|
|
|
# Array to store findings
|
|
FINDINGS=()
|
|
|
|
# Get list of staged files (excluding deleted)
|
|
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACMR 2>/dev/null || true)
|
|
|
|
if [ -z "$STAGED_FILES" ]; then
|
|
echo -e "${GREEN}✓ No files staged for commit${NC}"
|
|
exit 0
|
|
fi
|
|
|
|
# Get the diff content of staged files (new/changed lines only, starting with +)
|
|
STAGED_DIFF=$(git diff --cached --no-color -U0 2>/dev/null | grep -E '^\+[^+]' || true)
|
|
|
|
if [ -z "$STAGED_DIFF" ]; then
|
|
echo -e "${GREEN}✓ No new content to scan${NC}"
|
|
exit 0
|
|
fi
|
|
|
|
echo "🔍 Scanning for secret leaks in staged files..."
|
|
echo ""
|
|
|
|
# ============================================================================
|
|
# PATTERN DEFINITIONS
|
|
# ============================================================================
|
|
|
|
# Critical patterns - will block commit
|
|
CRITICAL_PATTERNS=(
|
|
# Private Keys
|
|
'-----BEGIN (RSA |DSA |EC |OPENSSH |PGP |SSH2 |PRIVATE KEY-----)'
|
|
'-----BEGIN ENCRYPTED PRIVATE KEY-----'
|
|
'-----BEGIN CERTIFICATE-----'
|
|
|
|
# API Keys - Common prefixes
|
|
'sk-[a-zA-Z0-9]{20,}' # OpenAI, Anthropic
|
|
'gsk_[a-zA-Z0-9]{20,}' # Groq
|
|
'hf_[a-zA-Z0-9]{20,}' # HuggingFace
|
|
'nvapi-[a-zA-Z0-9]{20,}' # NVIDIA
|
|
'AIza[0-9A-Za-z_-]{35}' # Google/Gemini
|
|
'sk_[a-zA-Z0-9]{20,}' # Replicate
|
|
'xai-[a-zA-Z0-9]{20,}' # xAI
|
|
'pplx-[a-zA-Z0-9]{20,}' # Perplexity
|
|
'anthropic-api-key' # Anthropic literal
|
|
'claude-api-key' # Claude literal
|
|
|
|
# AWS Credentials
|
|
'AKIA[0-9A-Z]{16}' # AWS Access Key ID
|
|
'ASIA[0-9A-Z]{16}' # AWS Temporary Access Key
|
|
'aws(.{0,20})?(secret(.{0,20})?)?key'
|
|
'aws(.{0,20})?(access(.{0,20})?)?id'
|
|
|
|
# Database Connection Strings (with credentials)
|
|
'mongodb(\+srv)?://[^:]+:[^@]+@'
|
|
'postgres(ql)?://[^:]+:[^@]+@'
|
|
'mysql://[^:]+:[^@]+@'
|
|
'redis://:[^@]+@'
|
|
'mongodb://[^:]+:[^@]+@'
|
|
)
|
|
|
|
# Warning patterns - will warn but not block
|
|
WARNING_PATTERNS=(
|
|
# Token file paths in prompts or conversation contexts
|
|
'(prompt|conversation|context|message).*~/\.hermes/\.env'
|
|
'(prompt|conversation|context|message).*~/\.tokens/'
|
|
'(prompt|conversation|context|message).*~/.env'
|
|
'(prompt|conversation|context|message).*~/.netrc'
|
|
'(prompt|conversation|context|message).*~/.ssh/'
|
|
'(prompt|conversation|context|message).*~/.aws/'
|
|
'(prompt|conversation|context|message).*~/.config/'
|
|
|
|
# Environment variable names in prompts (suspicious)
|
|
'(prompt|conversation|context|message).*(OPENAI_API_KEY|ANTHROPIC_API_KEY|HF_TOKEN|HF_API_TOKEN)'
|
|
'(prompt|conversation|context|message).*(AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AZURE_.*_KEY)'
|
|
'(prompt|conversation|context|message).*(DATABASE_URL|DB_PASSWORD|SECRET_KEY)'
|
|
'(prompt|conversation|context|message).*(GITHUB_TOKEN|GITLAB_TOKEN|DOCKER_.*_TOKEN)'
|
|
|
|
# GitHub tokens
|
|
'gh[pousr]_[A-Za-z0-9_]{36}'
|
|
'github[_-]?pat[_-]?[a-zA-Z0-9]{22,}'
|
|
|
|
# Generic high-entropy strings that look like secrets
|
|
'api[_-]?key["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}'
|
|
'secret["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}'
|
|
'password["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{16,}'
|
|
'token["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}'
|
|
|
|
# JWT tokens (3 base64 sections separated by dots)
|
|
'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*'
|
|
|
|
# Slack tokens
|
|
'xox[baprs]-[0-9]{10,13}-[0-9]{10,13}([a-zA-Z0-9-]*)?'
|
|
|
|
# Discord tokens
|
|
'[MN][A-Za-z\d]{23}\.[\w-]{6}\.[\w-]{27}'
|
|
|
|
# Stripe keys
|
|
'sk_live_[0-9a-zA-Z]{24,}'
|
|
'pk_live_[0-9a-zA-Z]{24,}'
|
|
|
|
# Twilio
|
|
'SK[0-9a-fA-F]{32}'
|
|
|
|
# SendGrid
|
|
'SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}'
|
|
|
|
# Heroku
|
|
'[hH][eE][rR][oO][kK][uU].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}'
|
|
)
|
|
|
|
# File patterns to scan (relevant to prompts, conversations, config)
|
|
SCAN_FILE_PATTERNS=(
|
|
'\.(py|js|ts|jsx|tsx|json|yaml|yml|toml|md|txt|sh|bash|zsh|fish)$'
|
|
'(prompt|conversation|chat|message|llm|ai)_'
|
|
'_log\.txt$'
|
|
'\.log$'
|
|
'prompt'
|
|
'conversation'
|
|
)
|
|
|
|
# ============================================================================
|
|
# SCANNING FUNCTIONS
|
|
# ============================================================================
|
|
|
|
scan_with_pattern() {
|
|
local pattern="$1"
|
|
local content="$2"
|
|
local severity="$3"
|
|
local grep_opts="-iE"
|
|
|
|
# Use grep to find matches
|
|
local matches
|
|
matches=$(echo "$content" | grep $grep_opts "$pattern" 2>/dev/null | head -5 || true)
|
|
|
|
if [ -n "$matches" ]; then
|
|
echo "$matches"
|
|
return 0
|
|
fi
|
|
return 1
|
|
}
|
|
|
|
# ============================================================================
|
|
# MAIN SCANNING LOGIC
|
|
# ============================================================================
|
|
|
|
echo "Files being scanned:"
|
|
echo "$STAGED_FILES" | head -20
|
|
if [ $(echo "$STAGED_FILES" | wc -l) -gt 20 ]; then
|
|
echo " ... and $(( $(echo "$STAGED_FILES" | wc -l) - 20 )) more files"
|
|
fi
|
|
echo ""
|
|
|
|
# Scan for critical patterns
|
|
echo "Scanning for CRITICAL patterns (will block commit)..."
|
|
for pattern in "${CRITICAL_PATTERNS[@]}"; do
|
|
result=$(scan_with_pattern "$pattern" "$STAGED_DIFF" "CRITICAL" || true)
|
|
if [ -n "$result" ]; then
|
|
CRITICAL_FOUND=$((CRITICAL_FOUND + 1))
|
|
BLOCK_COMMIT=1
|
|
FINDINGS+=("[CRITICAL] Pattern matched: $pattern")
|
|
FINDINGS+=("Matches:")
|
|
FINDINGS+=("$result")
|
|
FINDINGS+=("")
|
|
echo -e "${RED}✗ CRITICAL: Found potential secret!${NC}"
|
|
echo " Pattern: $pattern"
|
|
echo " Matches:"
|
|
echo "$result" | sed 's/^/ /'
|
|
echo ""
|
|
fi
|
|
done
|
|
|
|
# Scan for warning patterns
|
|
echo "Scanning for WARNING patterns (will warn but not block)..."
|
|
for pattern in "${WARNING_PATTERNS[@]}"; do
|
|
result=$(scan_with_pattern "$pattern" "$STAGED_DIFF" "WARNING" || true)
|
|
if [ -n "$result" ]; then
|
|
WARNING_FOUND=$((WARNING_FOUND + 1))
|
|
FINDINGS+=("[WARNING] Pattern matched: $pattern")
|
|
FINDINGS+=("Matches:")
|
|
FINDINGS+=("$result")
|
|
FINDINGS+=("")
|
|
echo -e "${YELLOW}⚠ WARNING: Found suspicious pattern${NC}"
|
|
echo " Pattern: $pattern"
|
|
echo " Matches:"
|
|
echo "$result" | sed 's/^/ /'
|
|
echo ""
|
|
fi
|
|
done
|
|
|
|
# ============================================================================
|
|
# FILE-SPECIFIC SCANS
|
|
# ============================================================================
|
|
|
|
echo "Performing file-specific checks..."
|
|
|
|
# Check for .env files being committed (should be in .gitignore but double-check)
|
|
ENV_FILES=$(echo "$STAGED_FILES" | grep -E '^\.env' | grep -v '.env.example' | grep -v '.envrc' || true)
|
|
if [ -n "$ENV_FILES" ]; then
|
|
echo -e "${RED}✗ CRITICAL: Attempting to commit .env file(s):${NC}"
|
|
echo "$ENV_FILES" | sed 's/^/ /'
|
|
FINDINGS+=("[CRITICAL] .env file(s) staged for commit:")
|
|
FINDINGS+=("$ENV_FILES")
|
|
BLOCK_COMMIT=1
|
|
echo ""
|
|
fi
|
|
|
|
# Check for credential files
|
|
CRED_FILES=$(echo "$STAGED_FILES" | grep -E '(credentials|secrets|tokens)\.?(json|yaml|yml|txt)?$' | grep -v 'test_' | grep -v '_test\.' | grep -v 'example' || true)
|
|
if [ -n "$CRED_FILES" ]; then
|
|
echo -e "${YELLOW}⚠ WARNING: Potential credential file(s) detected:${NC}"
|
|
echo "$CRED_FILES" | sed 's/^/ /'
|
|
FINDINGS+=("[WARNING] Potential credential files staged:")
|
|
FINDINGS+=("$CRED_FILES")
|
|
echo ""
|
|
fi
|
|
|
|
# Check for private key files
|
|
KEY_FILES=$(echo "$STAGED_FILES" | grep -E '\.(pem|key|ppk|p12|pfx)$' | grep -v 'test_' | grep -v 'example' || true)
|
|
if [ -n "$KEY_FILES" ]; then
|
|
echo -e "${RED}✗ CRITICAL: Private key file(s) detected:${NC}"
|
|
echo "$KEY_FILES" | sed 's/^/ /'
|
|
FINDINGS+=("[CRITICAL] Private key files staged for commit:")
|
|
FINDINGS+=("$KEY_FILES")
|
|
BLOCK_COMMIT=1
|
|
echo ""
|
|
fi
|
|
|
|
# ============================================================================
|
|
# PROMPT/CONVERSATION SPECIFIC SCANS
|
|
# ============================================================================
|
|
|
|
# Look for prompts that might contain sensitive data
|
|
PROMPT_FILES=$(echo "$STAGED_FILES" | grep -iE '(prompt|conversation|chat|message)' | grep -v 'test_' | grep -v '.pyc' || true)
|
|
if [ -n "$PROMPT_FILES" ]; then
|
|
echo "Scanning prompt/conversation files for embedded secrets..."
|
|
|
|
for file in $PROMPT_FILES; do
|
|
if [ -f "$file" ]; then
|
|
file_content=$(cat "$file" 2>/dev/null || true)
|
|
|
|
# Check for common secret patterns in prompts
|
|
if echo "$file_content" | grep -qiE '(api[_-]?key|secret[_-]?key|password|token)\s*[:=]\s*\S{8,}'; then
|
|
echo -e "${YELLOW}⚠ WARNING: Potential secret in prompt file: $file${NC}"
|
|
FINDINGS+=("[WARNING] Potential secret in: $file")
|
|
fi
|
|
|
|
# Check for file paths in home directory
|
|
if echo "$file_content" | grep -qE '~/\.\w+'; then
|
|
echo -e "${YELLOW}⚠ WARNING: Home directory path in prompt file: $file${NC}"
|
|
FINDINGS+=("[WARNING] Home directory path in: $file")
|
|
fi
|
|
fi
|
|
done
|
|
echo ""
|
|
fi
|
|
|
|
# ============================================================================
|
|
# SUMMARY AND DECISION
|
|
# ============================================================================
|
|
|
|
echo "============================================"
|
|
echo " SCAN SUMMARY"
|
|
echo "============================================"
|
|
echo ""
|
|
|
|
if [ $CRITICAL_FOUND -gt 0 ]; then
|
|
echo -e "${RED}✗ $CRITICAL_FOUND CRITICAL finding(s) detected${NC}"
|
|
fi
|
|
|
|
if [ $WARNING_FOUND -gt 0 ]; then
|
|
echo -e "${YELLOW}⚠ $WARNING_FOUND WARNING(s) detected${NC}"
|
|
fi
|
|
|
|
if [ $BLOCK_COMMIT -eq 0 ] && [ $WARNING_FOUND -eq 0 ] && [ $CRITICAL_FOUND -eq 0 ]; then
|
|
echo -e "${GREEN}✓ No potential secret leaks detected${NC}"
|
|
echo ""
|
|
exit 0
|
|
fi
|
|
|
|
echo ""
|
|
|
|
# If blocking issues found
|
|
if [ $BLOCK_COMMIT -eq 1 ]; then
|
|
echo -e "${RED}╔════════════════════════════════════════════════════════════╗${NC}"
|
|
echo -e "${RED}║ COMMIT BLOCKED: Potential secrets detected! ║${NC}"
|
|
echo -e "${RED}╚════════════════════════════════════════════════════════════╝${NC}"
|
|
echo ""
|
|
echo "The following issues must be resolved before committing:"
|
|
echo ""
|
|
printf '%s\n' "${FINDINGS[@]}" | grep -E '^\[CRITICAL\]'
|
|
echo ""
|
|
echo "Recommendations:"
|
|
echo " 1. Remove secrets from your code"
|
|
echo " 2. Use environment variables or a secrets manager"
|
|
echo " 3. Add sensitive files to .gitignore"
|
|
echo " 4. Rotate any exposed credentials immediately"
|
|
echo ""
|
|
echo "If you are CERTAIN this is a false positive, you can bypass:"
|
|
echo " git commit --no-verify"
|
|
echo ""
|
|
echo "⚠️ WARNING: Bypassing should be done with extreme caution!"
|
|
echo ""
|
|
exit 1
|
|
fi
|
|
|
|
# If only warnings
|
|
if [ $WARNING_FOUND -gt 0 ]; then
|
|
echo -e "${YELLOW}⚠ WARNINGS found but commit will proceed${NC}"
|
|
echo ""
|
|
echo "Please review the warnings above and ensure no sensitive data"
|
|
echo "is being included in prompts or configuration files."
|
|
echo ""
|
|
echo "To cancel this commit, press Ctrl+C within 3 seconds..."
|
|
sleep 3
|
|
fi
|
|
|
|
echo ""
|
|
echo -e "${GREEN}✓ Proceeding with commit${NC}"
|
|
exit 0
|