Files
hermes-agent/.githooks/pre-commit
Allegro 91e6540a23 feat: implement Syntax Guard as Gitea pre-receive hook
Add pre-receive hook to prevent merging code with Python syntax errors.

Features:
- Checks all Python files (.py) in each push using python -m py_compile
- Special protection for critical files:
  - run_agent.py
  - model_tools.py
  - hermes-agent/tools/nexus_architect.py
  - cli.py, batch_runner.py, hermes_state.py
- Clear error messages showing file and line number
- Rejects pushes containing syntax errors

Files added:
- .githooks/pre-receive (Bash implementation)
- .githooks/pre-receive.py (Python implementation)
- docs/GITEA_SYNTAX_GUARD.md (installation guide)
- .githooks/pre-commit (existing secret detection hook)

Closes #82
2026-04-05 06:12:37 +00:00

349 lines
12 KiB
Bash

#!/bin/bash
#
# Pre-commit hook for detecting secret leaks in commits
# This hook scans staged files for potential secret leaks including:
# - Private keys (PEM, OpenSSH formats)
# - API keys (OpenAI, Anthropic, HuggingFace, etc.)
# - Token file paths in prompts/conversations
# - Environment variable names in sensitive contexts
# - AWS credentials, database connection strings, etc.
#
# Installation:
# git config core.hooksPath .githooks
#
# To bypass this hook temporarily:
# git commit --no-verify
#
set -euo pipefail
# Colors for output
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color
# Counters for statistics
CRITICAL_FOUND=0
WARNING_FOUND=0
BLOCK_COMMIT=0
# Array to store findings
FINDINGS=()
# Get list of staged files (excluding deleted)
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACMR 2>/dev/null || true)
if [ -z "$STAGED_FILES" ]; then
echo -e "${GREEN}✓ No files staged for commit${NC}"
exit 0
fi
# Get the diff content of staged files (new/changed lines only, starting with +)
STAGED_DIFF=$(git diff --cached --no-color -U0 2>/dev/null | grep -E '^\+[^+]' || true)
if [ -z "$STAGED_DIFF" ]; then
echo -e "${GREEN}✓ No new content to scan${NC}"
exit 0
fi
echo "🔍 Scanning for secret leaks in staged files..."
echo ""
# ============================================================================
# PATTERN DEFINITIONS
# ============================================================================
# Critical patterns - will block commit
CRITICAL_PATTERNS=(
# Private Keys
'-----BEGIN (RSA |DSA |EC |OPENSSH |PGP |SSH2 |PRIVATE KEY-----)'
'-----BEGIN ENCRYPTED PRIVATE KEY-----'
'-----BEGIN CERTIFICATE-----'
# API Keys - Common prefixes
'sk-[a-zA-Z0-9]{20,}' # OpenAI, Anthropic
'gsk_[a-zA-Z0-9]{20,}' # Groq
'hf_[a-zA-Z0-9]{20,}' # HuggingFace
'nvapi-[a-zA-Z0-9]{20,}' # NVIDIA
'AIza[0-9A-Za-z_-]{35}' # Google/Gemini
'sk_[a-zA-Z0-9]{20,}' # Replicate
'xai-[a-zA-Z0-9]{20,}' # xAI
'pplx-[a-zA-Z0-9]{20,}' # Perplexity
'anthropic-api-key' # Anthropic literal
'claude-api-key' # Claude literal
# AWS Credentials
'AKIA[0-9A-Z]{16}' # AWS Access Key ID
'ASIA[0-9A-Z]{16}' # AWS Temporary Access Key
'aws(.{0,20})?(secret(.{0,20})?)?key'
'aws(.{0,20})?(access(.{0,20})?)?id'
# Database Connection Strings (with credentials)
'mongodb(\+srv)?://[^:]+:[^@]+@'
'postgres(ql)?://[^:]+:[^@]+@'
'mysql://[^:]+:[^@]+@'
'redis://:[^@]+@'
'mongodb://[^:]+:[^@]+@'
)
# Warning patterns - will warn but not block
WARNING_PATTERNS=(
# Token file paths in prompts or conversation contexts
'(prompt|conversation|context|message).*~/\.hermes/\.env'
'(prompt|conversation|context|message).*~/\.tokens/'
'(prompt|conversation|context|message).*~/.env'
'(prompt|conversation|context|message).*~/.netrc'
'(prompt|conversation|context|message).*~/.ssh/'
'(prompt|conversation|context|message).*~/.aws/'
'(prompt|conversation|context|message).*~/.config/'
# Environment variable names in prompts (suspicious)
'(prompt|conversation|context|message).*(OPENAI_API_KEY|ANTHROPIC_API_KEY|HF_TOKEN|HF_API_TOKEN)'
'(prompt|conversation|context|message).*(AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AZURE_.*_KEY)'
'(prompt|conversation|context|message).*(DATABASE_URL|DB_PASSWORD|SECRET_KEY)'
'(prompt|conversation|context|message).*(GITHUB_TOKEN|GITLAB_TOKEN|DOCKER_.*_TOKEN)'
# GitHub tokens
'gh[pousr]_[A-Za-z0-9_]{36}'
'github[_-]?pat[_-]?[a-zA-Z0-9]{22,}'
# Generic high-entropy strings that look like secrets
'api[_-]?key["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}'
'secret["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}'
'password["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{16,}'
'token["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}'
# JWT tokens (3 base64 sections separated by dots)
'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*'
# Slack tokens
'xox[baprs]-[0-9]{10,13}-[0-9]{10,13}([a-zA-Z0-9-]*)?'
# Discord tokens
'[MN][A-Za-z\d]{23}\.[\w-]{6}\.[\w-]{27}'
# Stripe keys
'sk_live_[0-9a-zA-Z]{24,}'
'pk_live_[0-9a-zA-Z]{24,}'
# Twilio
'SK[0-9a-fA-F]{32}'
# SendGrid
'SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}'
# Heroku
'[hH][eE][rR][oO][kK][uU].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}'
)
# File patterns to scan (relevant to prompts, conversations, config)
SCAN_FILE_PATTERNS=(
'\.(py|js|ts|jsx|tsx|json|yaml|yml|toml|md|txt|sh|bash|zsh|fish)$'
'(prompt|conversation|chat|message|llm|ai)_'
'_log\.txt$'
'\.log$'
'prompt'
'conversation'
)
# ============================================================================
# SCANNING FUNCTIONS
# ============================================================================
scan_with_pattern() {
local pattern="$1"
local content="$2"
local severity="$3"
local grep_opts="-iE"
# Use grep to find matches
local matches
matches=$(echo "$content" | grep $grep_opts "$pattern" 2>/dev/null | head -5 || true)
if [ -n "$matches" ]; then
echo "$matches"
return 0
fi
return 1
}
# ============================================================================
# MAIN SCANNING LOGIC
# ============================================================================
echo "Files being scanned:"
echo "$STAGED_FILES" | head -20
if [ $(echo "$STAGED_FILES" | wc -l) -gt 20 ]; then
echo " ... and $(( $(echo "$STAGED_FILES" | wc -l) - 20 )) more files"
fi
echo ""
# Scan for critical patterns
echo "Scanning for CRITICAL patterns (will block commit)..."
for pattern in "${CRITICAL_PATTERNS[@]}"; do
result=$(scan_with_pattern "$pattern" "$STAGED_DIFF" "CRITICAL" || true)
if [ -n "$result" ]; then
CRITICAL_FOUND=$((CRITICAL_FOUND + 1))
BLOCK_COMMIT=1
FINDINGS+=("[CRITICAL] Pattern matched: $pattern")
FINDINGS+=("Matches:")
FINDINGS+=("$result")
FINDINGS+=("")
echo -e "${RED}✗ CRITICAL: Found potential secret!${NC}"
echo " Pattern: $pattern"
echo " Matches:"
echo "$result" | sed 's/^/ /'
echo ""
fi
done
# Scan for warning patterns
echo "Scanning for WARNING patterns (will warn but not block)..."
for pattern in "${WARNING_PATTERNS[@]}"; do
result=$(scan_with_pattern "$pattern" "$STAGED_DIFF" "WARNING" || true)
if [ -n "$result" ]; then
WARNING_FOUND=$((WARNING_FOUND + 1))
FINDINGS+=("[WARNING] Pattern matched: $pattern")
FINDINGS+=("Matches:")
FINDINGS+=("$result")
FINDINGS+=("")
echo -e "${YELLOW}⚠ WARNING: Found suspicious pattern${NC}"
echo " Pattern: $pattern"
echo " Matches:"
echo "$result" | sed 's/^/ /'
echo ""
fi
done
# ============================================================================
# FILE-SPECIFIC SCANS
# ============================================================================
echo "Performing file-specific checks..."
# Check for .env files being committed (should be in .gitignore but double-check)
ENV_FILES=$(echo "$STAGED_FILES" | grep -E '^\.env' | grep -v '.env.example' | grep -v '.envrc' || true)
if [ -n "$ENV_FILES" ]; then
echo -e "${RED}✗ CRITICAL: Attempting to commit .env file(s):${NC}"
echo "$ENV_FILES" | sed 's/^/ /'
FINDINGS+=("[CRITICAL] .env file(s) staged for commit:")
FINDINGS+=("$ENV_FILES")
BLOCK_COMMIT=1
echo ""
fi
# Check for credential files
CRED_FILES=$(echo "$STAGED_FILES" | grep -E '(credentials|secrets|tokens)\.?(json|yaml|yml|txt)?$' | grep -v 'test_' | grep -v '_test\.' | grep -v 'example' || true)
if [ -n "$CRED_FILES" ]; then
echo -e "${YELLOW}⚠ WARNING: Potential credential file(s) detected:${NC}"
echo "$CRED_FILES" | sed 's/^/ /'
FINDINGS+=("[WARNING] Potential credential files staged:")
FINDINGS+=("$CRED_FILES")
echo ""
fi
# Check for private key files
KEY_FILES=$(echo "$STAGED_FILES" | grep -E '\.(pem|key|ppk|p12|pfx)$' | grep -v 'test_' | grep -v 'example' || true)
if [ -n "$KEY_FILES" ]; then
echo -e "${RED}✗ CRITICAL: Private key file(s) detected:${NC}"
echo "$KEY_FILES" | sed 's/^/ /'
FINDINGS+=("[CRITICAL] Private key files staged for commit:")
FINDINGS+=("$KEY_FILES")
BLOCK_COMMIT=1
echo ""
fi
# ============================================================================
# PROMPT/CONVERSATION SPECIFIC SCANS
# ============================================================================
# Look for prompts that might contain sensitive data
PROMPT_FILES=$(echo "$STAGED_FILES" | grep -iE '(prompt|conversation|chat|message)' | grep -v 'test_' | grep -v '.pyc' || true)
if [ -n "$PROMPT_FILES" ]; then
echo "Scanning prompt/conversation files for embedded secrets..."
for file in $PROMPT_FILES; do
if [ -f "$file" ]; then
file_content=$(cat "$file" 2>/dev/null || true)
# Check for common secret patterns in prompts
if echo "$file_content" | grep -qiE '(api[_-]?key|secret[_-]?key|password|token)\s*[:=]\s*\S{8,}'; then
echo -e "${YELLOW}⚠ WARNING: Potential secret in prompt file: $file${NC}"
FINDINGS+=("[WARNING] Potential secret in: $file")
fi
# Check for file paths in home directory
if echo "$file_content" | grep -qE '~/\.\w+'; then
echo -e "${YELLOW}⚠ WARNING: Home directory path in prompt file: $file${NC}"
FINDINGS+=("[WARNING] Home directory path in: $file")
fi
fi
done
echo ""
fi
# ============================================================================
# SUMMARY AND DECISION
# ============================================================================
echo "============================================"
echo " SCAN SUMMARY"
echo "============================================"
echo ""
if [ $CRITICAL_FOUND -gt 0 ]; then
echo -e "${RED}$CRITICAL_FOUND CRITICAL finding(s) detected${NC}"
fi
if [ $WARNING_FOUND -gt 0 ]; then
echo -e "${YELLOW}$WARNING_FOUND WARNING(s) detected${NC}"
fi
if [ $BLOCK_COMMIT -eq 0 ] && [ $WARNING_FOUND -eq 0 ] && [ $CRITICAL_FOUND -eq 0 ]; then
echo -e "${GREEN}✓ No potential secret leaks detected${NC}"
echo ""
exit 0
fi
echo ""
# If blocking issues found
if [ $BLOCK_COMMIT -eq 1 ]; then
echo -e "${RED}╔════════════════════════════════════════════════════════════╗${NC}"
echo -e "${RED}║ COMMIT BLOCKED: Potential secrets detected! ║${NC}"
echo -e "${RED}╚════════════════════════════════════════════════════════════╝${NC}"
echo ""
echo "The following issues must be resolved before committing:"
echo ""
printf '%s\n' "${FINDINGS[@]}" | grep -E '^\[CRITICAL\]'
echo ""
echo "Recommendations:"
echo " 1. Remove secrets from your code"
echo " 2. Use environment variables or a secrets manager"
echo " 3. Add sensitive files to .gitignore"
echo " 4. Rotate any exposed credentials immediately"
echo ""
echo "If you are CERTAIN this is a false positive, you can bypass:"
echo " git commit --no-verify"
echo ""
echo "⚠️ WARNING: Bypassing should be done with extreme caution!"
echo ""
exit 1
fi
# If only warnings
if [ $WARNING_FOUND -gt 0 ]; then
echo -e "${YELLOW}⚠ WARNINGS found but commit will proceed${NC}"
echo ""
echo "Please review the warnings above and ensure no sensitive data"
echo "is being included in prompts or configuration files."
echo ""
echo "To cancel this commit, press Ctrl+C within 3 seconds..."
sleep 3
fi
echo ""
echo -e "${GREEN}✓ Proceeding with commit${NC}"
exit 0