hermes-agent/scripts/forge_health_check.py

#!/usr/bin/env python3
"""Forge Health Check — Build verification and artifact integrity scanner.

Scans wizard environments for:
- Missing source files (.pyc without .py) — Allegro finding: GOFAI source files gone
- Burn script accumulation in /root or wizard directories
- World-readable sensitive files (keystores, tokens, configs)
- Missing required environment variables

Usage:
    python scripts/forge_health_check.py /root/wizards
    python scripts/forge_health_check.py /root/wizards --json
    python scripts/forge_health_check.py /root/wizards --fix-permissions
"""

from __future__ import annotations

import argparse
import json
import os
import stat
import sys
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Iterable


SENSITIVE_FILE_PATTERNS = (
    "keystore",
    "password",
    "private",
    "apikey",
    "api_key",
    "credentials",
)

SENSITIVE_NAME_PREFIXES = (
    "key_",
    "keys_",
    "token_",
    "tokens_",
    "secret_",
    "secrets_",
    ".env",
    "env.",
)

SENSITIVE_NAME_SUFFIXES = (
    "_key",
    "_keys",
    "_token",
    "_tokens",
    "_secret",
    "_secrets",
    ".key",
    ".env",
    ".token",
    ".secret",
)

SENSIBLE_PERMISSIONS = 0o600  # owner read/write only

REQUIRED_ENV_VARS = (
    "GITEA_URL",
    "GITEA_TOKEN",
    "GITEA_USER",
)

BURN_SCRIPT_PATTERNS = (
    "burn",
    "ignite",
    "inferno",
    "scorch",
    "char",
    "blaze",
    "ember",
)


@dataclass
class HealthFinding:
    category: str
    severity: str  # critical, warning, info
    path: str
    message: str
    suggestion: str = ""


@dataclass
class HealthReport:
    target: str
    findings: list[HealthFinding] = field(default_factory=list)
    passed: bool = True

    def add(self, finding: HealthFinding) -> None:
        self.findings.append(finding)
        if finding.severity == "critical":
            self.passed = False


def scan_orphaned_bytecode(root: Path, report: HealthReport) -> None:
    """Detect .pyc files without corresponding .py source files."""
    for pyc in root.rglob("*.pyc"):
        py = pyc.with_suffix(".py")
        if not py.exists():
            # Also check __pycache__ naming convention
            if pyc.name.startswith("__") and pyc.parent.name == "__pycache__":
                stem = pyc.stem.split(".")[0]
                py = pyc.parent.parent / f"{stem}.py"
            if not py.exists():
                report.add(
                    HealthFinding(
                        category="artifact_integrity",
                        severity="critical",
                        path=str(pyc),
                        message=f"Compiled bytecode without source: {pyc}",
                        suggestion="Restore missing .py source file from version control or backup",
                    )
                )


def scan_burn_script_clutter(root: Path, report: HealthReport) -> None:
    """Detect burn scripts and other temporary artifacts outside proper staging."""
    for path in root.iterdir():
        if not path.is_file():
            continue
        lower = path.name.lower()
        if any(pat in lower for pat in BURN_SCRIPT_PATTERNS):
            report.add(
                HealthFinding(
                    category="deployment_hygiene",
                    severity="warning",
                    path=str(path),
                    message=f"Burn script or temporary artifact in production path: {path.name}",
                    suggestion="Archive to a burn/ or tmp/ directory, or remove if no longer needed",
                )
            )


def _is_sensitive_filename(name: str) -> bool:
    """Check if a filename indicates it may contain secrets."""
    lower = name.lower()
    if lower == ".env.example":
        return False
    if any(pat in lower for pat in SENSITIVE_FILE_PATTERNS):
        return True
    if any(lower.startswith(pref) for pref in SENSITIVE_NAME_PREFIXES):
        return True
    if any(lower.endswith(suff) for suff in SENSITIVE_NAME_SUFFIXES):
        return True
    return False


def scan_sensitive_file_permissions(root: Path, report: HealthReport, fix: bool = False) -> None:
    """Detect world-readable sensitive files."""
    for fpath in root.rglob("*"):
        if not fpath.is_file():
            continue
        # Skip test files — real secrets should never live in tests/
        if "/tests/" in str(fpath) or str(fpath).startswith(str(root / "tests")):
            continue
        if not _is_sensitive_filename(fpath.name):
            continue

        try:
            mode = fpath.stat().st_mode
        except OSError:
            continue

        # Readable by group or other
        if mode & stat.S_IRGRP or mode & stat.S_IROTH:
            was_fixed = False
            if fix:
                try:
                    fpath.chmod(SENSIBLE_PERMISSIONS)
                    was_fixed = True
                except OSError:
                    pass

            report.add(
                HealthFinding(
                    category="security",
                    severity="critical",
                    path=str(fpath),
                    message=(
                        f"Sensitive file world-readable: {fpath.name} "
                        f"(mode={oct(mode & 0o777)})"
                    ),
                    suggestion=(
                        f"Fixed permissions to {oct(SENSIBLE_PERMISSIONS)}"
                        if was_fixed
                        else f"Run 'chmod {oct(SENSIBLE_PERMISSIONS)[2:]} {fpath}'"
                    ),
                )
            )


def scan_environment_variables(report: HealthReport) -> None:
    """Check for required environment variables."""
    for var in REQUIRED_ENV_VARS:
        if not os.environ.get(var):
            report.add(
                HealthFinding(
                    category="configuration",
                    severity="warning",
                    path="$" + var,
                    message=f"Required environment variable {var} is missing or empty",
                    suggestion="Export the variable in your shell profile or secrets manager",
                )
            )


def run_health_check(target: Path, fix_permissions: bool = False) -> HealthReport:
    report = HealthReport(target=str(target.resolve()))
    if target.exists():
        scan_orphaned_bytecode(target, report)
        scan_burn_script_clutter(target, report)
        scan_sensitive_file_permissions(target, report, fix=fix_permissions)
    scan_environment_variables(report)
    return report


def print_report(report: HealthReport) -> None:
    status = "PASS" if report.passed else "FAIL"
    print(f"Forge Health Check: {status}")
    print(f"Target: {report.target}")
    print(f"Findings: {len(report.findings)}\n")

    by_category: dict[str, list[HealthFinding]] = {}
    for f in report.findings:
        by_category.setdefault(f.category, []).append(f)

    for category, findings in by_category.items():
        print(f"[{category.upper()}]")
        for f in findings:
            print(f"  {f.severity.upper()}: {f.message}")
            if f.suggestion:
                print(f"    -> {f.suggestion}")
        print()


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description="Forge Health Check")
    parser.add_argument("target", nargs="?", default="/root/wizards", help="Root path to scan")
    parser.add_argument("--json", action="store_true", help="Output JSON report")
    parser.add_argument("--fix-permissions", action="store_true", help="Auto-fix file permissions")
    args = parser.parse_args(argv)

    target = Path(args.target)
    report = run_health_check(target, fix_permissions=args.fix_permissions)

    if args.json:
        print(json.dumps(asdict(report), indent=2))
    else:
        print_report(report)

    return 0 if report.passed else 1


if __name__ == "__main__":
    raise SystemExit(main())