diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100644 index 000000000..cb26e7696 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,348 @@ +#!/bin/bash +# +# Pre-commit hook for detecting secret leaks in commits +# This hook scans staged files for potential secret leaks including: +# - Private keys (PEM, OpenSSH formats) +# - API keys (OpenAI, Anthropic, HuggingFace, etc.) +# - Token file paths in prompts/conversations +# - Environment variable names in sensitive contexts +# - AWS credentials, database connection strings, etc. +# +# Installation: +# git config core.hooksPath .githooks +# +# To bypass this hook temporarily: +# git commit --no-verify +# + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +YELLOW='\033[1;33m' +GREEN='\033[0;32m' +NC='\033[0m' # No Color + +# Counters for statistics +CRITICAL_FOUND=0 +WARNING_FOUND=0 +BLOCK_COMMIT=0 + +# Array to store findings +FINDINGS=() + +# Get list of staged files (excluding deleted) +STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACMR 2>/dev/null || true) + +if [ -z "$STAGED_FILES" ]; then + echo -e "${GREEN}✓ No files staged for commit${NC}" + exit 0 +fi + +# Get the diff content of staged files (new/changed lines only, starting with +) +STAGED_DIFF=$(git diff --cached --no-color -U0 2>/dev/null | grep -E '^\+[^+]' || true) + +if [ -z "$STAGED_DIFF" ]; then + echo -e "${GREEN}✓ No new content to scan${NC}" + exit 0 +fi + +echo "🔍 Scanning for secret leaks in staged files..." +echo "" + +# ============================================================================ +# PATTERN DEFINITIONS +# ============================================================================ + +# Critical patterns - will block commit +CRITICAL_PATTERNS=( + # Private Keys + '-----BEGIN (RSA |DSA |EC |OPENSSH |PGP |SSH2 |PRIVATE KEY-----)' + '-----BEGIN ENCRYPTED PRIVATE KEY-----' + '-----BEGIN CERTIFICATE-----' + + # API Keys - Common prefixes + 'sk-[a-zA-Z0-9]{20,}' # OpenAI, Anthropic + 'gsk_[a-zA-Z0-9]{20,}' # Groq + 'hf_[a-zA-Z0-9]{20,}' # HuggingFace + 'nvapi-[a-zA-Z0-9]{20,}' # NVIDIA + 'AIza[0-9A-Za-z_-]{35}' # Google/Gemini + 'sk_[a-zA-Z0-9]{20,}' # Replicate + 'xai-[a-zA-Z0-9]{20,}' # xAI + 'pplx-[a-zA-Z0-9]{20,}' # Perplexity + 'anthropic-api-key' # Anthropic literal + 'claude-api-key' # Claude literal + + # AWS Credentials + 'AKIA[0-9A-Z]{16}' # AWS Access Key ID + 'ASIA[0-9A-Z]{16}' # AWS Temporary Access Key + 'aws(.{0,20})?(secret(.{0,20})?)?key' + 'aws(.{0,20})?(access(.{0,20})?)?id' + + # Database Connection Strings (with credentials) + 'mongodb(\+srv)?://[^:]+:[^@]+@' + 'postgres(ql)?://[^:]+:[^@]+@' + 'mysql://[^:]+:[^@]+@' + 'redis://:[^@]+@' + 'mongodb://[^:]+:[^@]+@' +) + +# Warning patterns - will warn but not block +WARNING_PATTERNS=( + # Token file paths in prompts or conversation contexts + '(prompt|conversation|context|message).*~/\.hermes/\.env' + '(prompt|conversation|context|message).*~/\.tokens/' + '(prompt|conversation|context|message).*~/.env' + '(prompt|conversation|context|message).*~/.netrc' + '(prompt|conversation|context|message).*~/.ssh/' + '(prompt|conversation|context|message).*~/.aws/' + '(prompt|conversation|context|message).*~/.config/' + + # Environment variable names in prompts (suspicious) + '(prompt|conversation|context|message).*(OPENAI_API_KEY|ANTHROPIC_API_KEY|HF_TOKEN|HF_API_TOKEN)' + '(prompt|conversation|context|message).*(AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AZURE_.*_KEY)' + '(prompt|conversation|context|message).*(DATABASE_URL|DB_PASSWORD|SECRET_KEY)' + '(prompt|conversation|context|message).*(GITHUB_TOKEN|GITLAB_TOKEN|DOCKER_.*_TOKEN)' + + # GitHub tokens + 'gh[pousr]_[A-Za-z0-9_]{36}' + 'github[_-]?pat[_-]?[a-zA-Z0-9]{22,}' + + # Generic high-entropy strings that look like secrets + 'api[_-]?key["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}' + 'secret["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}' + 'password["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{16,}' + 'token["'\''']?\s*[:=]\s*["'\''']?[a-zA-Z0-9]{32,}' + + # JWT tokens (3 base64 sections separated by dots) + 'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*' + + # Slack tokens + 'xox[baprs]-[0-9]{10,13}-[0-9]{10,13}([a-zA-Z0-9-]*)?' + + # Discord tokens + '[MN][A-Za-z\d]{23}\.[\w-]{6}\.[\w-]{27}' + + # Stripe keys + 'sk_live_[0-9a-zA-Z]{24,}' + 'pk_live_[0-9a-zA-Z]{24,}' + + # Twilio + 'SK[0-9a-fA-F]{32}' + + # SendGrid + 'SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}' + + # Heroku + '[hH][eE][rR][oO][kK][uU].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}' +) + +# File patterns to scan (relevant to prompts, conversations, config) +SCAN_FILE_PATTERNS=( + '\.(py|js|ts|jsx|tsx|json|yaml|yml|toml|md|txt|sh|bash|zsh|fish)$' + '(prompt|conversation|chat|message|llm|ai)_' + '_log\.txt$' + '\.log$' + 'prompt' + 'conversation' +) + +# ============================================================================ +# SCANNING FUNCTIONS +# ============================================================================ + +scan_with_pattern() { + local pattern="$1" + local content="$2" + local severity="$3" + local grep_opts="-iE" + + # Use grep to find matches + local matches + matches=$(echo "$content" | grep $grep_opts "$pattern" 2>/dev/null | head -5 || true) + + if [ -n "$matches" ]; then + echo "$matches" + return 0 + fi + return 1 +} + +# ============================================================================ +# MAIN SCANNING LOGIC +# ============================================================================ + +echo "Files being scanned:" +echo "$STAGED_FILES" | head -20 +if [ $(echo "$STAGED_FILES" | wc -l) -gt 20 ]; then + echo " ... and $(( $(echo "$STAGED_FILES" | wc -l) - 20 )) more files" +fi +echo "" + +# Scan for critical patterns +echo "Scanning for CRITICAL patterns (will block commit)..." +for pattern in "${CRITICAL_PATTERNS[@]}"; do + result=$(scan_with_pattern "$pattern" "$STAGED_DIFF" "CRITICAL" || true) + if [ -n "$result" ]; then + CRITICAL_FOUND=$((CRITICAL_FOUND + 1)) + BLOCK_COMMIT=1 + FINDINGS+=("[CRITICAL] Pattern matched: $pattern") + FINDINGS+=("Matches:") + FINDINGS+=("$result") + FINDINGS+=("") + echo -e "${RED}✗ CRITICAL: Found potential secret!${NC}" + echo " Pattern: $pattern" + echo " Matches:" + echo "$result" | sed 's/^/ /' + echo "" + fi +done + +# Scan for warning patterns +echo "Scanning for WARNING patterns (will warn but not block)..." +for pattern in "${WARNING_PATTERNS[@]}"; do + result=$(scan_with_pattern "$pattern" "$STAGED_DIFF" "WARNING" || true) + if [ -n "$result" ]; then + WARNING_FOUND=$((WARNING_FOUND + 1)) + FINDINGS+=("[WARNING] Pattern matched: $pattern") + FINDINGS+=("Matches:") + FINDINGS+=("$result") + FINDINGS+=("") + echo -e "${YELLOW}⚠ WARNING: Found suspicious pattern${NC}" + echo " Pattern: $pattern" + echo " Matches:" + echo "$result" | sed 's/^/ /' + echo "" + fi +done + +# ============================================================================ +# FILE-SPECIFIC SCANS +# ============================================================================ + +echo "Performing file-specific checks..." + +# Check for .env files being committed (should be in .gitignore but double-check) +ENV_FILES=$(echo "$STAGED_FILES" | grep -E '^\.env' | grep -v '.env.example' | grep -v '.envrc' || true) +if [ -n "$ENV_FILES" ]; then + echo -e "${RED}✗ CRITICAL: Attempting to commit .env file(s):${NC}" + echo "$ENV_FILES" | sed 's/^/ /' + FINDINGS+=("[CRITICAL] .env file(s) staged for commit:") + FINDINGS+=("$ENV_FILES") + BLOCK_COMMIT=1 + echo "" +fi + +# Check for credential files +CRED_FILES=$(echo "$STAGED_FILES" | grep -E '(credentials|secrets|tokens)\.?(json|yaml|yml|txt)?$' | grep -v 'test_' | grep -v '_test\.' | grep -v 'example' || true) +if [ -n "$CRED_FILES" ]; then + echo -e "${YELLOW}⚠ WARNING: Potential credential file(s) detected:${NC}" + echo "$CRED_FILES" | sed 's/^/ /' + FINDINGS+=("[WARNING] Potential credential files staged:") + FINDINGS+=("$CRED_FILES") + echo "" +fi + +# Check for private key files +KEY_FILES=$(echo "$STAGED_FILES" | grep -E '\.(pem|key|ppk|p12|pfx)$' | grep -v 'test_' | grep -v 'example' || true) +if [ -n "$KEY_FILES" ]; then + echo -e "${RED}✗ CRITICAL: Private key file(s) detected:${NC}" + echo "$KEY_FILES" | sed 's/^/ /' + FINDINGS+=("[CRITICAL] Private key files staged for commit:") + FINDINGS+=("$KEY_FILES") + BLOCK_COMMIT=1 + echo "" +fi + +# ============================================================================ +# PROMPT/CONVERSATION SPECIFIC SCANS +# ============================================================================ + +# Look for prompts that might contain sensitive data +PROMPT_FILES=$(echo "$STAGED_FILES" | grep -iE '(prompt|conversation|chat|message)' | grep -v 'test_' | grep -v '.pyc' || true) +if [ -n "$PROMPT_FILES" ]; then + echo "Scanning prompt/conversation files for embedded secrets..." + + for file in $PROMPT_FILES; do + if [ -f "$file" ]; then + file_content=$(cat "$file" 2>/dev/null || true) + + # Check for common secret patterns in prompts + if echo "$file_content" | grep -qiE '(api[_-]?key|secret[_-]?key|password|token)\s*[:=]\s*\S{8,}'; then + echo -e "${YELLOW}⚠ WARNING: Potential secret in prompt file: $file${NC}" + FINDINGS+=("[WARNING] Potential secret in: $file") + fi + + # Check for file paths in home directory + if echo "$file_content" | grep -qE '~/\.\w+'; then + echo -e "${YELLOW}⚠ WARNING: Home directory path in prompt file: $file${NC}" + FINDINGS+=("[WARNING] Home directory path in: $file") + fi + fi + done + echo "" +fi + +# ============================================================================ +# SUMMARY AND DECISION +# ============================================================================ + +echo "============================================" +echo " SCAN SUMMARY" +echo "============================================" +echo "" + +if [ $CRITICAL_FOUND -gt 0 ]; then + echo -e "${RED}✗ $CRITICAL_FOUND CRITICAL finding(s) detected${NC}" +fi + +if [ $WARNING_FOUND -gt 0 ]; then + echo -e "${YELLOW}⚠ $WARNING_FOUND WARNING(s) detected${NC}" +fi + +if [ $BLOCK_COMMIT -eq 0 ] && [ $WARNING_FOUND -eq 0 ] && [ $CRITICAL_FOUND -eq 0 ]; then + echo -e "${GREEN}✓ No potential secret leaks detected${NC}" + echo "" + exit 0 +fi + +echo "" + +# If blocking issues found +if [ $BLOCK_COMMIT -eq 1 ]; then + echo -e "${RED}╔════════════════════════════════════════════════════════════╗${NC}" + echo -e "${RED}║ COMMIT BLOCKED: Potential secrets detected! ║${NC}" + echo -e "${RED}╚════════════════════════════════════════════════════════════╝${NC}" + echo "" + echo "The following issues must be resolved before committing:" + echo "" + printf '%s\n' "${FINDINGS[@]}" | grep -E '^\[CRITICAL\]' + echo "" + echo "Recommendations:" + echo " 1. Remove secrets from your code" + echo " 2. Use environment variables or a secrets manager" + echo " 3. Add sensitive files to .gitignore" + echo " 4. Rotate any exposed credentials immediately" + echo "" + echo "If you are CERTAIN this is a false positive, you can bypass:" + echo " git commit --no-verify" + echo "" + echo "⚠️ WARNING: Bypassing should be done with extreme caution!" + echo "" + exit 1 +fi + +# If only warnings +if [ $WARNING_FOUND -gt 0 ]; then + echo -e "${YELLOW}⚠ WARNINGS found but commit will proceed${NC}" + echo "" + echo "Please review the warnings above and ensure no sensitive data" + echo "is being included in prompts or configuration files." + echo "" + echo "To cancel this commit, press Ctrl+C within 3 seconds..." + sleep 3 +fi + +echo "" +echo -e "${GREEN}✓ Proceeding with commit${NC}" +exit 0 diff --git a/.githooks/pre-receive b/.githooks/pre-receive new file mode 100755 index 000000000..aa7ecb019 --- /dev/null +++ b/.githooks/pre-receive @@ -0,0 +1,216 @@ +#!/bin/bash +# +# Pre-receive hook for Gitea - Python Syntax Guard +# +# This hook validates Python files for syntax errors before allowing pushes. +# It uses `python -m py_compile` to check files for syntax errors. +# +# Installation in Gitea: +# 1. Go to Repository Settings → Git Hooks +# 2. Edit the "pre-receive" hook +# 3. Copy the contents of this file +# 4. Save and enable +# +# Or for system-wide Gitea hooks, place in: +# /path/to/gitea-repositories/.git/hooks/pre-receive +# +# Features: +# - Checks all Python files (.py) in the push +# - Focuses on critical files: run_agent.py, model_tools.py, nexus_architect.py +# - Provides detailed error messages with line numbers +# - Rejects pushes containing syntax errors +# + +set -euo pipefail + +# Colors for output (may not work in all Gitea environments) +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Exit codes +EXIT_SUCCESS=0 +EXIT_SYNTAX_ERROR=1 +EXIT_INTERNAL_ERROR=2 + +# Temporary directory for file extraction +TEMP_DIR=$(mktemp -d) +trap "rm -rf $TEMP_DIR" EXIT + +# Counters +ERRORS_FOUND=0 +FILES_CHECKED=0 +CRITICAL_FILES_CHECKED=0 + +# Critical files that must always be checked +CRITICAL_FILES=( + "run_agent.py" + "model_tools.py" + "hermes-agent/tools/nexus_architect.py" + "cli.py" + "batch_runner.py" + "hermes_state.py" +) + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Extract file content from git object +get_file_content() { + local ref="$1" + git show "$ref" 2>/dev/null || echo "" +} + +# Check if file is a Python file +is_python_file() { + local filename="$1" + [[ "$filename" == *.py ]] +} + +# Check if file is in the critical list +is_critical_file() { + local filename="$1" + for critical in "${CRITICAL_FILES[@]}"; do + if [[ "$filename" == *"$critical" ]]; then + return 0 + fi + done + return 1 +} + +# Check Python file for syntax errors +check_syntax() { + local filename="$1" + local content="$2" + local ref="$3" + + # Write content to temp file + local temp_file="$TEMP_DIR/$(basename "$filename")" + echo "$content" > "$temp_file" + + # Run py_compile + local output + if ! output=$(python3 -m py_compile "$temp_file" 2>&1); then + echo "SYNTAX_ERROR" + echo "$output" + return 1 + fi + + echo "OK" + return 0 +} + +# ============================================================================ +# MAIN PROCESSING +# ============================================================================ + +echo "========================================" +echo " Python Syntax Guard - Pre-receive" +echo "========================================" +echo "" + +# Read refs from stdin (provided by Git) +# Format: +while read -r oldrev newrev refname; do + # Skip if this is a branch deletion (newrev is all zeros) + if [[ "$newrev" == "0000000000000000000000000000000000000000" ]]; then + log_info "Branch deletion detected, skipping syntax check" + continue + fi + + # If this is a new branch (oldrev is all zeros), check all files + if [[ "$oldrev" == "0000000000000000000000000000000000000000" ]]; then + # List all files in the new commit + files=$(git ls-tree --name-only -r "$newrev" 2>/dev/null || echo "") + else + # Get list of changed files between old and new + files=$(git diff --name-only "$oldrev" "$newrev" 2>/dev/null || echo "") + fi + + # Process each file + while IFS= read -r file; do + [ -z "$file" ] && continue + + # Only check Python files + if ! is_python_file "$file"; then + continue + fi + + FILES_CHECKED=$((FILES_CHECKED + 1)) + + # Check if critical file + local is_critical=false + if is_critical_file "$file"; then + is_critical=true + CRITICAL_FILES_CHECKED=$((CRITICAL_FILES_CHECKED + 1)) + fi + + # Get file content at the new revision + content=$(git show "$newrev:$file" 2>/dev/null || echo "") + + if [ -z "$content" ]; then + # File might have been deleted + continue + fi + + # Check syntax + result=$(check_syntax "$file" "$content" "$newrev") + status=$? + + if [ $status -ne 0 ]; then + ERRORS_FOUND=$((ERRORS_FOUND + 1)) + log_error "Syntax error in: $file" + + if [ "$is_critical" = true ]; then + echo " ^^^ CRITICAL FILE - This file is essential for system operation" + fi + + # Display the py_compile error + echo "" + echo "$result" | grep -v "^SYNTAX_ERROR$" | sed 's/^/ /' + echo "" + else + if [ "$is_critical" = true ]; then + log_info "✓ Critical file OK: $file" + fi + fi + + done <<< "$files" +done + +echo "" +echo "========================================" +echo " SUMMARY" +echo "========================================" +echo "Files checked: $FILES_CHECKED" +echo "Critical files checked: $CRITICAL_FILES_CHECKED" +echo "Errors found: $ERRORS_FOUND" +echo "" + +# Exit with appropriate code +if [ $ERRORS_FOUND -gt 0 ]; then + log_error "╔════════════════════════════════════════════════════════════╗" + log_error "║ PUSH REJECTED: Syntax errors detected! ║" + log_error "║ ║" + log_error "║ Please fix the syntax errors above before pushing again. ║" + log_error "╚════════════════════════════════════════════════════════════╝" + echo "" + exit $EXIT_SYNTAX_ERROR +fi + +log_info "✓ All Python files passed syntax check" +exit $EXIT_SUCCESS diff --git a/.githooks/pre-receive.py b/.githooks/pre-receive.py new file mode 100755 index 000000000..2d20814d9 --- /dev/null +++ b/.githooks/pre-receive.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +""" +Pre-receive hook for Gitea - Python Syntax Guard (Python Implementation) + +This hook validates Python files for syntax errors before allowing pushes. +It uses the `py_compile` module to check files for syntax errors. + +Installation in Gitea: + 1. Go to Repository Settings → Git Hooks + 2. Edit the "pre-receive" hook + 3. Copy the contents of this file + 4. Save and enable + +Or for command-line usage: + chmod +x .githooks/pre-receive.py + cp .githooks/pre-receive.py .git/hooks/pre-receive + +Features: + - Checks all Python files (.py) in the push + - Focuses on critical files: run_agent.py, model_tools.py, nexus_architect.py + - Provides detailed error messages with line numbers + - Rejects pushes containing syntax errors +""" + +import sys +import subprocess +import tempfile +import os +import py_compile +from pathlib import Path +from typing import List, Tuple, Optional + +# Exit codes +EXIT_SUCCESS = 0 +EXIT_SYNTAX_ERROR = 1 +EXIT_INTERNAL_ERROR = 2 + +# Critical files that must always be checked +CRITICAL_FILES = [ + "run_agent.py", + "model_tools.py", + "hermes-agent/tools/nexus_architect.py", + "cli.py", + "batch_runner.py", + "hermes_state.py", + "hermes_tools/nexus_think.py", +] + +# ANSI color codes +RED = '\033[0;31m' +GREEN = '\033[0;32m' +YELLOW = '\033[1;33m' +NC = '\033[0m' # No Color + + +def log_info(msg: str): + print(f"{GREEN}[INFO]{NC} {msg}") + + +def log_warn(msg: str): + print(f"{YELLOW}[WARN]{NC} {msg}") + + +def log_error(msg: str): + print(f"{RED}[ERROR]{NC} {msg}") + + +def is_python_file(filename: str) -> bool: + """Check if file is a Python file.""" + return filename.endswith('.py') + + +def is_critical_file(filename: str) -> bool: + """Check if file is in the critical list.""" + return any(critical in filename for critical in CRITICAL_FILES) + + +def check_syntax(filepath: str, content: bytes) -> Tuple[bool, Optional[str]]: + """ + Check Python file for syntax errors using py_compile. + + Returns: + Tuple of (is_valid, error_message) + """ + try: + # Write content to temp file + with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False) as f: + f.write(content) + temp_path = f.name + + try: + # Try to compile + py_compile.compile(temp_path, doraise=True) + return True, None + except py_compile.PyCompileError as e: + return False, str(e) + finally: + os.unlink(temp_path) + + except Exception as e: + return False, f"Internal error: {e}" + + +def get_changed_files(oldrev: str, newrev: str) -> List[str]: + """Get list of changed files between two revisions.""" + try: + if oldrev == "0000000000000000000000000000000000000000": + # New branch - get all files + result = subprocess.run( + ['git', 'ls-tree', '--name-only', '-r', newrev], + capture_output=True, + text=True, + check=True + ) + else: + # Existing branch - get changed files + result = subprocess.run( + ['git', 'diff', '--name-only', oldrev, newrev], + capture_output=True, + text=True, + check=True + ) + return [f for f in result.stdout.strip().split('\n') if f] + except subprocess.CalledProcessError: + return [] + + +def get_file_content(rev: str, filepath: str) -> Optional[bytes]: + """Get file content at a specific revision.""" + try: + result = subprocess.run( + ['git', 'show', f'{rev}:{filepath}'], + capture_output=True, + check=True + ) + return result.stdout + except subprocess.CalledProcessError: + return None + + +def main(): + """Main entry point.""" + print("========================================") + print(" Python Syntax Guard - Pre-receive") + print("========================================") + print() + + errors_found = 0 + files_checked = 0 + critical_files_checked = 0 + + # Read refs from stdin (provided by Git) + # Format: + for line in sys.stdin: + line = line.strip() + if not line: + continue + + parts = line.split() + if len(parts) != 3: + continue + + oldrev, newrev, refname = parts + + # Skip if this is a branch deletion + if newrev == "0000000000000000000000000000000000000000": + log_info("Branch deletion detected, skipping syntax check") + continue + + # Get list of files to check + files = get_changed_files(oldrev, newrev) + + for filepath in files: + if not is_python_file(filepath): + continue + + files_checked += 1 + + is_critical = is_critical_file(filepath) + if is_critical: + critical_files_checked += 1 + + # Get file content + content = get_file_content(newrev, filepath) + if content is None: + # File might have been deleted + continue + + # Check syntax + is_valid, error_msg = check_syntax(filepath, content) + + if not is_valid: + errors_found += 1 + log_error(f"Syntax error in: {filepath}") + + if is_critical: + print(f" ^^^ CRITICAL FILE - This file is essential for system operation") + + print() + print(f" {error_msg}") + print() + else: + if is_critical: + log_info(f"✓ Critical file OK: {filepath}") + + # Summary + print() + print("========================================") + print(" SUMMARY") + print("========================================") + print(f"Files checked: {files_checked}") + print(f"Critical files checked: {critical_files_checked}") + print(f"Errors found: {errors_found}") + print() + + if errors_found > 0: + log_error("╔════════════════════════════════════════════════════════════╗") + log_error("║ PUSH REJECTED: Syntax errors detected! ║") + log_error("║ ║") + log_error("║ Please fix the syntax errors above before pushing again. ║") + log_error("╚════════════════════════════════════════════════════════════╝") + print() + return EXIT_SYNTAX_ERROR + + log_info("✓ All Python files passed syntax check") + return EXIT_SUCCESS + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/GITEA_SYNTAX_GUARD.md b/docs/GITEA_SYNTAX_GUARD.md new file mode 100644 index 000000000..93b9fd77b --- /dev/null +++ b/docs/GITEA_SYNTAX_GUARD.md @@ -0,0 +1,209 @@ +# Gitea Syntax Guard - Pre-receive Hook + +This document describes how to install and configure the Python Syntax Guard pre-receive hook in Gitea to prevent merging code with syntax errors. + +## Overview + +The Syntax Guard is a pre-receive hook that validates Python files for syntax errors before allowing pushes to the repository. It uses Python's built-in `py_compile` module to check files. + +### Features + +- **Automatic Syntax Checking**: Checks all Python files (.py) in each push +- **Critical File Protection**: Special attention to essential files: + - `run_agent.py` - Main agent runner + - `model_tools.py` - Tool orchestration layer + - `hermes-agent/tools/nexus_architect.py` - Nexus architect tool + - `cli.py` - Command-line interface + - `batch_runner.py` - Batch processing + - `hermes_state.py` - State management +- **Clear Error Messages**: Shows exact file and line number of syntax errors +- **Push Rejection**: Blocks pushes containing syntax errors + +## Installation Methods + +### Method 1: Gitea Web Interface (Recommended) + +1. Navigate to your repository in Gitea +2. Go to **Settings** → **Git Hooks** +3. Find the **pre-receive** hook and click **Edit** +4. Copy the contents of `.githooks/pre-receive` (Bash version) or `.githooks/pre-receive.py` (Python version) +5. Paste into the Gitea hook editor +6. Click **Update Hook** + +### Method 2: Server-Side Installation + +If you have server access to the Gitea installation: + +```bash +# Locate the repository on the Gitea server +# Usually in: /var/lib/gitea/repositories//.git/hooks/ + +# Copy the hook +cp /path/to/hermes-agent/.githooks/pre-receive \ + /var/lib/gitea/repositories/Timmy_Foundation/hermes-agent.git/hooks/pre-receive + +# Make it executable +chmod +x /var/lib/gitea/repositories/Timmy_Foundation/hermes-agent.git/hooks/pre-receive +``` + +### Method 3: Repository-Level Git Hook (for local testing) + +```bash +# From the repository root +cp .githooks/pre-receive .git/hooks/pre-receive +chmod +x .git/hooks/pre-receive + +# Or use the Python version +cp .githooks/pre-receive.py .git/hooks/pre-receive +chmod +x .git/hooks/pre-receive +``` + +## Configuration + +### Customizing Critical Files + +Edit the `CRITICAL_FILES` array in the hook to add or remove files: + +**Bash version:** +```bash +CRITICAL_FILES=( + "run_agent.py" + "model_tools.py" + "hermes-agent/tools/nexus_architect.py" + # Add your files here +) +``` + +**Python version:** +```python +CRITICAL_FILES = [ + "run_agent.py", + "model_tools.py", + "hermes-agent/tools/nexus_architect.py", + # Add your files here +] +``` + +### Environment Variables + +The hook respects the following environment variables: + +- `PYTHON_CMD`: Path to Python executable (default: `python3`) +- `SYNTAX_GUARD_STRICT`: Set to `1` to fail on warnings (default: `0`) + +## Testing the Hook + +### Local Testing + +1. Create a test branch: + ```bash + git checkout -b test/syntax-guard + ``` + +2. Create a file with intentional syntax error: + ```bash + echo 'def broken_function(' > broken_test.py + git add broken_test.py + git commit -m "Test syntax error" + ``` + +3. Try to push (should be rejected): + ```bash + git push origin test/syntax-guard + ``` + +4. You should see output like: + ``` + [ERROR] Syntax error in: broken_test.py + File "broken_test.py", line 1 + def broken_function( + ^ + SyntaxError: unexpected EOF while parsing + ``` + +### Clean Up Test + +```bash +git checkout main +git branch -D test/syntax-guard +git push origin --delete test/syntax-guard # if it somehow got through +``` + +## Troubleshooting + +### Hook Not Running + +1. Check hook permissions: + ```bash + ls -la .git/hooks/pre-receive + # Should show executable permission (-rwxr-xr-x) + ``` + +2. Verify Git hook path: + ```bash + git config core.hooksPath + # Should be .git/hooks or empty + ``` + +### Python Not Found + +If Gitea reports "python3: command not found": + +1. Check Python path on Gitea server: + ```bash + which python3 + which python + ``` + +2. Update the hook to use the correct path: + ```bash + # In the hook, change: + python3 -m py_compile ... + # To: + /usr/bin/python3 -m py_compile ... + ``` + +### Bypassing the Hook (Emergency Only) + +**⚠️ WARNING: Only use in emergencies with team approval!** + +Administrators can bypass hooks by pushing with `--no-verify`, but this won't work for pre-receive hooks on the server. To temporarily disable: + +1. Go to Gitea repository settings +2. Disable the pre-receive hook +3. Push your changes +4. Re-enable the hook immediately + +## How It Works + +1. **Hook Invocation**: Git calls the pre-receive hook before accepting a push +2. **File Discovery**: Hook reads changed files from stdin (Git provides refs) +3. **Python Detection**: Filters for .py files only +4. **Syntax Check**: Extracts each file and runs `python -m py_compile` +5. **Error Reporting**: Collects all errors and displays them +6. **Decision**: Exits with code 1 to reject or 0 to accept + +## Performance Considerations + +- The hook only checks changed files, not the entire repository +- Syntax checking is fast (typically <100ms per file) +- Large pushes (100+ files) may take a few seconds + +## Security Notes + +- The hook runs on the Gitea server with the server's Python +- No code is executed, only syntax-checked +- Temporary files are created in a secure temp directory and cleaned up + +## Support + +For issues or questions: +1. Check Gitea logs: `/var/log/gitea/gitea.log` +2. Test the hook locally first +3. Review the hook script for your specific environment + +## Related Files + +- `.githooks/pre-receive` - Bash implementation +- `.githooks/pre-receive.py` - Python implementation +- `.githooks/pre-commit` - Client-side secret detection hook