[BEZALEL] Add forge health check — artifact integrity and security scanner

Adds scripts/forge_health_check.py to scan wizard environments for: - Missing .py source files with orphaned .pyc bytecode (GOFAI artifact integrity) - Burn script clutter in production paths - World-readable sensitive files (keystores, tokens, .env) - Missing required environment variables Includes full test suite in tests/test_forge_health_check.py covering orphaned bytecode detection, burn script clutter, permission auto-fix, and environment variable validation. Addresses Allegro formalization audit findings: - GOFAI source files missing (only .pyc remains) - Nostr keystore world-readable - eg burn scripts cluttering /root /assign @bezalel
2026-04-06 22:37:32 +00:00
parent 4532c123a0
commit 89730e8e90
2 changed files with 436 additions and 0 deletions
--- a/scripts/forge_health_check.py
+++ b/scripts/forge_health_check.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+"""Forge Health Check — Build verification and artifact integrity scanner.
+
+Scans wizard environments for:
+- Missing source files (.pyc without .py) — Allegro finding: GOFAI source files gone
+- Burn script accumulation in /root or wizard directories
+- World-readable sensitive files (keystores, tokens, configs)
+- Missing required environment variables
+
+Usage:
+    python scripts/forge_health_check.py /root/wizards
+    python scripts/forge_health_check.py /root/wizards --json
+    python scripts/forge_health_check.py /root/wizards --fix-permissions
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import stat
+import sys
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Iterable
+
+
+SENSITIVE_FILE_PATTERNS = (
+    "keystore",
+    "password",
+    "private",
+    "apikey",
+    "api_key",
+    "credentials",
+)
+
+SENSITIVE_NAME_PREFIXES = (
+    "key_",
+    "keys_",
+    "token_",
+    "tokens_",
+    "secret_",
+    "secrets_",
+    ".env",
+    "env.",
+)
+
+SENSITIVE_NAME_SUFFIXES = (
+    "_key",
+    "_keys",
+    "_token",
+    "_tokens",
+    "_secret",
+    "_secrets",
+    ".key",
+    ".env",
+    ".token",
+    ".secret",
+)
+
+SENSIBLE_PERMISSIONS = 0o600  # owner read/write only
+
+REQUIRED_ENV_VARS = (
+    "GITEA_URL",
+    "GITEA_TOKEN",
+    "GITEA_USER",
+)
+
+BURN_SCRIPT_PATTERNS = (
+    "burn",
+    "ignite",
+    "inferno",
+    "scorch",
+    "char",
+    "blaze",
+    "ember",
+)
+
+
+@dataclass
+class HealthFinding:
+    category: str
+    severity: str  # critical, warning, info
+    path: str
+    message: str
+    suggestion: str = ""
+
+
+@dataclass
+class HealthReport:
+    target: str
+    findings: list[HealthFinding] = field(default_factory=list)
+    passed: bool = True
+
+    def add(self, finding: HealthFinding) -> None:
+        self.findings.append(finding)
+        if finding.severity == "critical":
+            self.passed = False
+
+
+def scan_orphaned_bytecode(root: Path, report: HealthReport) -> None:
+    """Detect .pyc files without corresponding .py source files."""
+    for pyc in root.rglob("*.pyc"):
+        py = pyc.with_suffix(".py")
+        if not py.exists():
+            # Also check __pycache__ naming convention
+            if pyc.name.startswith("__") and pyc.parent.name == "__pycache__":
+                stem = pyc.stem.split(".")[0]
+                py = pyc.parent.parent / f"{stem}.py"
+            if not py.exists():
+                report.add(
+                    HealthFinding(
+                        category="artifact_integrity",
+                        severity="critical",
+                        path=str(pyc),
+                        message=f"Compiled bytecode without source: {pyc}",
+                        suggestion="Restore missing .py source file from version control or backup",
+                    )
+                )
+
+
+def scan_burn_script_clutter(root: Path, report: HealthReport) -> None:
+    """Detect burn scripts and other temporary artifacts outside proper staging."""
+    for path in root.iterdir():
+        if not path.is_file():
+            continue
+        lower = path.name.lower()
+        if any(pat in lower for pat in BURN_SCRIPT_PATTERNS):
+            report.add(
+                HealthFinding(
+                    category="deployment_hygiene",
+                    severity="warning",
+                    path=str(path),
+                    message=f"Burn script or temporary artifact in production path: {path.name}",
+                    suggestion="Archive to a burn/ or tmp/ directory, or remove if no longer needed",
+                )
+            )
+
+
+def _is_sensitive_filename(name: str) -> bool:
+    """Check if a filename indicates it may contain secrets."""
+    lower = name.lower()
+    if lower == ".env.example":
+        return False
+    if any(pat in lower for pat in SENSITIVE_FILE_PATTERNS):
+        return True
+    if any(lower.startswith(pref) for pref in SENSITIVE_NAME_PREFIXES):
+        return True
+    if any(lower.endswith(suff) for suff in SENSITIVE_NAME_SUFFIXES):
+        return True
+    return False
+
+
+def scan_sensitive_file_permissions(root: Path, report: HealthReport, fix: bool = False) -> None:
+    """Detect world-readable sensitive files."""
+    for fpath in root.rglob("*"):
+        if not fpath.is_file():
+            continue
+        # Skip test files — real secrets should never live in tests/
+        if "/tests/" in str(fpath) or str(fpath).startswith(str(root / "tests")):
+            continue
+        if not _is_sensitive_filename(fpath.name):
+            continue
+
+        try:
+            mode = fpath.stat().st_mode
+        except OSError:
+            continue
+
+        # Readable by group or other
+        if mode & stat.S_IRGRP or mode & stat.S_IROTH:
+            was_fixed = False
+            if fix:
+                try:
+                    fpath.chmod(SENSIBLE_PERMISSIONS)
+                    was_fixed = True
+                except OSError:
+                    pass
+
+            report.add(
+                HealthFinding(
+                    category="security",
+                    severity="critical",
+                    path=str(fpath),
+                    message=(
+                        f"Sensitive file world-readable: {fpath.name} "
+                        f"(mode={oct(mode & 0o777)})"
+                    ),
+                    suggestion=(
+                        f"Fixed permissions to {oct(SENSIBLE_PERMISSIONS)}"
+                        if was_fixed
+                        else f"Run 'chmod {oct(SENSIBLE_PERMISSIONS)[2:]} {fpath}'"
+                    ),
+                )
+            )
+
+
+def scan_environment_variables(report: HealthReport) -> None:
+    """Check for required environment variables."""
+    for var in REQUIRED_ENV_VARS:
+        if not os.environ.get(var):
+            report.add(
+                HealthFinding(
+                    category="configuration",
+                    severity="warning",
+                    path="$" + var,
+                    message=f"Required environment variable {var} is missing or empty",
+                    suggestion="Export the variable in your shell profile or secrets manager",
+                )
+            )
+
+
+def run_health_check(target: Path, fix_permissions: bool = False) -> HealthReport:
+    report = HealthReport(target=str(target.resolve()))
+    if target.exists():
+        scan_orphaned_bytecode(target, report)
+        scan_burn_script_clutter(target, report)
+        scan_sensitive_file_permissions(target, report, fix=fix_permissions)
+    scan_environment_variables(report)
+    return report
+
+
+def print_report(report: HealthReport) -> None:
+    status = "PASS" if report.passed else "FAIL"
+    print(f"Forge Health Check: {status}")
+    print(f"Target: {report.target}")
+    print(f"Findings: {len(report.findings)}\n")
+
+    by_category: dict[str, list[HealthFinding]] = {}
+    for f in report.findings:
+        by_category.setdefault(f.category, []).append(f)
+
+    for category, findings in by_category.items():
+        print(f"[{category.upper()}]")
+        for f in findings:
+            print(f"  {f.severity.upper()}: {f.message}")
+            if f.suggestion:
+                print(f"    -> {f.suggestion}")
+        print()
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Forge Health Check")
+    parser.add_argument("target", nargs="?", default="/root/wizards", help="Root path to scan")
+    parser.add_argument("--json", action="store_true", help="Output JSON report")
+    parser.add_argument("--fix-permissions", action="store_true", help="Auto-fix file permissions")
+    args = parser.parse_args(argv)
+
+    target = Path(args.target)
+    report = run_health_check(target, fix_permissions=args.fix_permissions)
+
+    if args.json:
+        print(json.dumps(asdict(report), indent=2))
+    else:
+        print_report(report)
+
+    return 0 if report.passed else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tests/test_forge_health_check.py
+++ b/tests/test_forge_health_check.py
@@ -0,0 +1,175 @@
+"""Tests for scripts/forge_health_check.py"""
+
+import os
+import stat
+from pathlib import Path
+
+# Import the script as a module
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
+
+from forge_health_check import (
+    HealthFinding,
+    HealthReport,
+    _is_sensitive_filename,
+    run_health_check,
+    scan_burn_script_clutter,
+    scan_orphaned_bytecode,
+    scan_sensitive_file_permissions,
+    scan_environment_variables,
+)
+
+
+class TestIsSensitiveFilename:
+    def test_keystore_is_sensitive(self) -> None:
+        assert _is_sensitive_filename("keystore.json") is True
+
+    def test_env_example_is_not_sensitive(self) -> None:
+        assert _is_sensitive_filename(".env.example") is False
+
+    def test_env_file_is_sensitive(self) -> None:
+        assert _is_sensitive_filename(".env") is True
+        assert _is_sensitive_filename("production.env") is True
+
+    def test_test_file_with_key_is_not_sensitive(self) -> None:
+        assert _is_sensitive_filename("test_interrupt_key_match.py") is False
+        assert _is_sensitive_filename("test_api_key_providers.py") is False
+
+
+class TestScanOrphanedBytecode:
+    def test_detects_pyc_without_py(self, tmp_path: Path) -> None:
+        pyc = tmp_path / "module.pyc"
+        pyc.write_bytes(b"\x00")
+        report = HealthReport(target=str(tmp_path))
+        scan_orphaned_bytecode(tmp_path, report)
+        assert len(report.findings) == 1
+        assert report.findings[0].category == "artifact_integrity"
+        assert report.findings[0].severity == "critical"
+
+    def test_ignores_pyc_with_py(self, tmp_path: Path) -> None:
+        (tmp_path / "module.py").write_text("pass")
+        pyc = tmp_path / "module.pyc"
+        pyc.write_bytes(b"\x00")
+        report = HealthReport(target=str(tmp_path))
+        scan_orphaned_bytecode(tmp_path, report)
+        assert len(report.findings) == 0
+
+    def test_detects_pycache_orphan(self, tmp_path: Path) -> None:
+        pycache = tmp_path / "__pycache__"
+        pycache.mkdir()
+        pyc = pycache / "module.cpython-312.pyc"
+        pyc.write_bytes(b"\x00")
+        report = HealthReport(target=str(tmp_path))
+        scan_orphaned_bytecode(tmp_path, report)
+        assert len(report.findings) == 1
+        assert "__pycache__" in report.findings[0].path
+
+
+class TestScanBurnScriptClutter:
+    def test_detects_burn_script(self, tmp_path: Path) -> None:
+        (tmp_path / "burn_test.sh").write_text("#!/bin/bash")
+        report = HealthReport(target=str(tmp_path))
+        scan_burn_script_clutter(tmp_path, report)
+        assert len(report.findings) == 1
+        assert report.findings[0].category == "deployment_hygiene"
+        assert report.findings[0].severity == "warning"
+
+    def test_ignores_regular_files(self, tmp_path: Path) -> None:
+        (tmp_path / "deploy.sh").write_text("#!/bin/bash")
+        report = HealthReport(target=str(tmp_path))
+        scan_burn_script_clutter(tmp_path, report)
+        assert len(report.findings) == 0
+
+
+class TestScanSensitiveFilePermissions:
+    def test_detects_world_readable_keystore(self, tmp_path: Path) -> None:
+        ks = tmp_path / "keystore.json"
+        ks.write_text("{}")
+        ks.chmod(0o644)
+        report = HealthReport(target=str(tmp_path))
+        scan_sensitive_file_permissions(tmp_path, report)
+        assert len(report.findings) == 1
+        assert report.findings[0].category == "security"
+        assert report.findings[0].severity == "critical"
+        assert "644" in report.findings[0].message
+
+    def test_auto_fixes_permissions(self, tmp_path: Path) -> None:
+        ks = tmp_path / "keystore.json"
+        ks.write_text("{}")
+        ks.chmod(0o644)
+        report = HealthReport(target=str(tmp_path))
+        scan_sensitive_file_permissions(tmp_path, report, fix=True)
+        assert len(report.findings) == 1
+        assert ks.stat().st_mode & 0o777 == 0o600
+
+    def test_ignores_safe_permissions(self, tmp_path: Path) -> None:
+        ks = tmp_path / "keystore.json"
+        ks.write_text("{}")
+        ks.chmod(0o600)
+        report = HealthReport(target=str(tmp_path))
+        scan_sensitive_file_permissions(tmp_path, report)
+        assert len(report.findings) == 0
+
+    def test_ignores_env_example(self, tmp_path: Path) -> None:
+        env = tmp_path / ".env.example"
+        env.write_text("# example")
+        env.chmod(0o644)
+        report = HealthReport(target=str(tmp_path))
+        scan_sensitive_file_permissions(tmp_path, report)
+        assert len(report.findings) == 0
+
+    def test_ignores_test_directory(self, tmp_path: Path) -> None:
+        tests_dir = tmp_path / "tests"
+        tests_dir.mkdir()
+        ks = tests_dir / "keystore.json"
+        ks.write_text("{}")
+        ks.chmod(0o644)
+        report = HealthReport(target=str(tmp_path))
+        scan_sensitive_file_permissions(tmp_path, report)
+        assert len(report.findings) == 0
+
+
+class TestScanEnvironmentVariables:
+    def test_reports_missing_env_var(self, monkeypatch) -> None:
+        monkeypatch.delenv("GITEA_TOKEN", raising=False)
+        report = HealthReport(target=".")
+        scan_environment_variables(report)
+        missing = [f for f in report.findings if f.path == "$GITEA_TOKEN"]
+        assert len(missing) == 1
+        assert missing[0].severity == "warning"
+
+    def test_passes_when_env_vars_present(self, monkeypatch) -> None:
+        for var in ("GITEA_URL", "GITEA_TOKEN", "GITEA_USER"):
+            monkeypatch.setenv(var, "present")
+        report = HealthReport(target=".")
+        scan_environment_variables(report)
+        assert len(report.findings) == 0
+
+
+class TestRunHealthCheck:
+    def test_full_run(self, tmp_path: Path, monkeypatch) -> None:
+        monkeypatch.setenv("GITEA_URL", "https://example.com")
+        monkeypatch.setenv("GITEA_TOKEN", "secret")
+        monkeypatch.setenv("GITEA_USER", "bezalel")
+
+        (tmp_path / "orphan.pyc").write_bytes(b"\x00")
+        (tmp_path / "burn_it.sh").write_text("#!/bin/bash")
+        ks = tmp_path / "keystore.json"
+        ks.write_text("{}")
+        ks.chmod(0o644)
+
+        report = run_health_check(tmp_path)
+        assert not report.passed
+        categories = {f.category for f in report.findings}
+        assert "artifact_integrity" in categories
+        assert "deployment_hygiene" in categories
+        assert "security" in categories
+
+    def test_clean_run_passes(self, tmp_path: Path, monkeypatch) -> None:
+        for var in ("GITEA_URL", "GITEA_TOKEN", "GITEA_USER"):
+            monkeypatch.setenv(var, "present")
+
+        (tmp_path / "module.py").write_text("pass")
+        report = run_health_check(tmp_path)
+        assert report.passed
+        assert len(report.findings) == 0