[BEZALEL] Add forge health check — artifact integrity and security scanner
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Failing after 7s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Failing after 0s
Tests / test (pull_request) Failing after 2s

Adds scripts/forge_health_check.py to scan wizard environments for:
- Missing .py source files with orphaned .pyc bytecode (GOFAI artifact integrity)
- Burn script clutter in production paths
- World-readable sensitive files (keystores, tokens, .env)
- Missing required environment variables

Includes full test suite in tests/test_forge_health_check.py covering
orphaned bytecode detection, burn script clutter, permission auto-fix,
and environment variable validation.

Addresses Allegro formalization audit findings:
- GOFAI source files missing (only .pyc remains)
- Nostr keystore world-readable
- eg burn scripts cluttering /root

/assign @bezalel
This commit is contained in:
2026-04-06 22:37:32 +00:00
parent 4532c123a0
commit 89730e8e90
2 changed files with 436 additions and 0 deletions

261
scripts/forge_health_check.py Executable file
View File

@@ -0,0 +1,261 @@
#!/usr/bin/env python3
"""Forge Health Check — Build verification and artifact integrity scanner.
Scans wizard environments for:
- Missing source files (.pyc without .py) — Allegro finding: GOFAI source files gone
- Burn script accumulation in /root or wizard directories
- World-readable sensitive files (keystores, tokens, configs)
- Missing required environment variables
Usage:
python scripts/forge_health_check.py /root/wizards
python scripts/forge_health_check.py /root/wizards --json
python scripts/forge_health_check.py /root/wizards --fix-permissions
"""
from __future__ import annotations
import argparse
import json
import os
import stat
import sys
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Iterable
SENSITIVE_FILE_PATTERNS = (
"keystore",
"password",
"private",
"apikey",
"api_key",
"credentials",
)
SENSITIVE_NAME_PREFIXES = (
"key_",
"keys_",
"token_",
"tokens_",
"secret_",
"secrets_",
".env",
"env.",
)
SENSITIVE_NAME_SUFFIXES = (
"_key",
"_keys",
"_token",
"_tokens",
"_secret",
"_secrets",
".key",
".env",
".token",
".secret",
)
SENSIBLE_PERMISSIONS = 0o600 # owner read/write only
REQUIRED_ENV_VARS = (
"GITEA_URL",
"GITEA_TOKEN",
"GITEA_USER",
)
BURN_SCRIPT_PATTERNS = (
"burn",
"ignite",
"inferno",
"scorch",
"char",
"blaze",
"ember",
)
@dataclass
class HealthFinding:
category: str
severity: str # critical, warning, info
path: str
message: str
suggestion: str = ""
@dataclass
class HealthReport:
target: str
findings: list[HealthFinding] = field(default_factory=list)
passed: bool = True
def add(self, finding: HealthFinding) -> None:
self.findings.append(finding)
if finding.severity == "critical":
self.passed = False
def scan_orphaned_bytecode(root: Path, report: HealthReport) -> None:
"""Detect .pyc files without corresponding .py source files."""
for pyc in root.rglob("*.pyc"):
py = pyc.with_suffix(".py")
if not py.exists():
# Also check __pycache__ naming convention
if pyc.name.startswith("__") and pyc.parent.name == "__pycache__":
stem = pyc.stem.split(".")[0]
py = pyc.parent.parent / f"{stem}.py"
if not py.exists():
report.add(
HealthFinding(
category="artifact_integrity",
severity="critical",
path=str(pyc),
message=f"Compiled bytecode without source: {pyc}",
suggestion="Restore missing .py source file from version control or backup",
)
)
def scan_burn_script_clutter(root: Path, report: HealthReport) -> None:
"""Detect burn scripts and other temporary artifacts outside proper staging."""
for path in root.iterdir():
if not path.is_file():
continue
lower = path.name.lower()
if any(pat in lower for pat in BURN_SCRIPT_PATTERNS):
report.add(
HealthFinding(
category="deployment_hygiene",
severity="warning",
path=str(path),
message=f"Burn script or temporary artifact in production path: {path.name}",
suggestion="Archive to a burn/ or tmp/ directory, or remove if no longer needed",
)
)
def _is_sensitive_filename(name: str) -> bool:
"""Check if a filename indicates it may contain secrets."""
lower = name.lower()
if lower == ".env.example":
return False
if any(pat in lower for pat in SENSITIVE_FILE_PATTERNS):
return True
if any(lower.startswith(pref) for pref in SENSITIVE_NAME_PREFIXES):
return True
if any(lower.endswith(suff) for suff in SENSITIVE_NAME_SUFFIXES):
return True
return False
def scan_sensitive_file_permissions(root: Path, report: HealthReport, fix: bool = False) -> None:
"""Detect world-readable sensitive files."""
for fpath in root.rglob("*"):
if not fpath.is_file():
continue
# Skip test files — real secrets should never live in tests/
if "/tests/" in str(fpath) or str(fpath).startswith(str(root / "tests")):
continue
if not _is_sensitive_filename(fpath.name):
continue
try:
mode = fpath.stat().st_mode
except OSError:
continue
# Readable by group or other
if mode & stat.S_IRGRP or mode & stat.S_IROTH:
was_fixed = False
if fix:
try:
fpath.chmod(SENSIBLE_PERMISSIONS)
was_fixed = True
except OSError:
pass
report.add(
HealthFinding(
category="security",
severity="critical",
path=str(fpath),
message=(
f"Sensitive file world-readable: {fpath.name} "
f"(mode={oct(mode & 0o777)})"
),
suggestion=(
f"Fixed permissions to {oct(SENSIBLE_PERMISSIONS)}"
if was_fixed
else f"Run 'chmod {oct(SENSIBLE_PERMISSIONS)[2:]} {fpath}'"
),
)
)
def scan_environment_variables(report: HealthReport) -> None:
"""Check for required environment variables."""
for var in REQUIRED_ENV_VARS:
if not os.environ.get(var):
report.add(
HealthFinding(
category="configuration",
severity="warning",
path="$" + var,
message=f"Required environment variable {var} is missing or empty",
suggestion="Export the variable in your shell profile or secrets manager",
)
)
def run_health_check(target: Path, fix_permissions: bool = False) -> HealthReport:
report = HealthReport(target=str(target.resolve()))
if target.exists():
scan_orphaned_bytecode(target, report)
scan_burn_script_clutter(target, report)
scan_sensitive_file_permissions(target, report, fix=fix_permissions)
scan_environment_variables(report)
return report
def print_report(report: HealthReport) -> None:
status = "PASS" if report.passed else "FAIL"
print(f"Forge Health Check: {status}")
print(f"Target: {report.target}")
print(f"Findings: {len(report.findings)}\n")
by_category: dict[str, list[HealthFinding]] = {}
for f in report.findings:
by_category.setdefault(f.category, []).append(f)
for category, findings in by_category.items():
print(f"[{category.upper()}]")
for f in findings:
print(f" {f.severity.upper()}: {f.message}")
if f.suggestion:
print(f" -> {f.suggestion}")
print()
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="Forge Health Check")
parser.add_argument("target", nargs="?", default="/root/wizards", help="Root path to scan")
parser.add_argument("--json", action="store_true", help="Output JSON report")
parser.add_argument("--fix-permissions", action="store_true", help="Auto-fix file permissions")
args = parser.parse_args(argv)
target = Path(args.target)
report = run_health_check(target, fix_permissions=args.fix_permissions)
if args.json:
print(json.dumps(asdict(report), indent=2))
else:
print_report(report)
return 0 if report.passed else 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,175 @@
"""Tests for scripts/forge_health_check.py"""
import os
import stat
from pathlib import Path
# Import the script as a module
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from forge_health_check import (
HealthFinding,
HealthReport,
_is_sensitive_filename,
run_health_check,
scan_burn_script_clutter,
scan_orphaned_bytecode,
scan_sensitive_file_permissions,
scan_environment_variables,
)
class TestIsSensitiveFilename:
def test_keystore_is_sensitive(self) -> None:
assert _is_sensitive_filename("keystore.json") is True
def test_env_example_is_not_sensitive(self) -> None:
assert _is_sensitive_filename(".env.example") is False
def test_env_file_is_sensitive(self) -> None:
assert _is_sensitive_filename(".env") is True
assert _is_sensitive_filename("production.env") is True
def test_test_file_with_key_is_not_sensitive(self) -> None:
assert _is_sensitive_filename("test_interrupt_key_match.py") is False
assert _is_sensitive_filename("test_api_key_providers.py") is False
class TestScanOrphanedBytecode:
def test_detects_pyc_without_py(self, tmp_path: Path) -> None:
pyc = tmp_path / "module.pyc"
pyc.write_bytes(b"\x00")
report = HealthReport(target=str(tmp_path))
scan_orphaned_bytecode(tmp_path, report)
assert len(report.findings) == 1
assert report.findings[0].category == "artifact_integrity"
assert report.findings[0].severity == "critical"
def test_ignores_pyc_with_py(self, tmp_path: Path) -> None:
(tmp_path / "module.py").write_text("pass")
pyc = tmp_path / "module.pyc"
pyc.write_bytes(b"\x00")
report = HealthReport(target=str(tmp_path))
scan_orphaned_bytecode(tmp_path, report)
assert len(report.findings) == 0
def test_detects_pycache_orphan(self, tmp_path: Path) -> None:
pycache = tmp_path / "__pycache__"
pycache.mkdir()
pyc = pycache / "module.cpython-312.pyc"
pyc.write_bytes(b"\x00")
report = HealthReport(target=str(tmp_path))
scan_orphaned_bytecode(tmp_path, report)
assert len(report.findings) == 1
assert "__pycache__" in report.findings[0].path
class TestScanBurnScriptClutter:
def test_detects_burn_script(self, tmp_path: Path) -> None:
(tmp_path / "burn_test.sh").write_text("#!/bin/bash")
report = HealthReport(target=str(tmp_path))
scan_burn_script_clutter(tmp_path, report)
assert len(report.findings) == 1
assert report.findings[0].category == "deployment_hygiene"
assert report.findings[0].severity == "warning"
def test_ignores_regular_files(self, tmp_path: Path) -> None:
(tmp_path / "deploy.sh").write_text("#!/bin/bash")
report = HealthReport(target=str(tmp_path))
scan_burn_script_clutter(tmp_path, report)
assert len(report.findings) == 0
class TestScanSensitiveFilePermissions:
def test_detects_world_readable_keystore(self, tmp_path: Path) -> None:
ks = tmp_path / "keystore.json"
ks.write_text("{}")
ks.chmod(0o644)
report = HealthReport(target=str(tmp_path))
scan_sensitive_file_permissions(tmp_path, report)
assert len(report.findings) == 1
assert report.findings[0].category == "security"
assert report.findings[0].severity == "critical"
assert "644" in report.findings[0].message
def test_auto_fixes_permissions(self, tmp_path: Path) -> None:
ks = tmp_path / "keystore.json"
ks.write_text("{}")
ks.chmod(0o644)
report = HealthReport(target=str(tmp_path))
scan_sensitive_file_permissions(tmp_path, report, fix=True)
assert len(report.findings) == 1
assert ks.stat().st_mode & 0o777 == 0o600
def test_ignores_safe_permissions(self, tmp_path: Path) -> None:
ks = tmp_path / "keystore.json"
ks.write_text("{}")
ks.chmod(0o600)
report = HealthReport(target=str(tmp_path))
scan_sensitive_file_permissions(tmp_path, report)
assert len(report.findings) == 0
def test_ignores_env_example(self, tmp_path: Path) -> None:
env = tmp_path / ".env.example"
env.write_text("# example")
env.chmod(0o644)
report = HealthReport(target=str(tmp_path))
scan_sensitive_file_permissions(tmp_path, report)
assert len(report.findings) == 0
def test_ignores_test_directory(self, tmp_path: Path) -> None:
tests_dir = tmp_path / "tests"
tests_dir.mkdir()
ks = tests_dir / "keystore.json"
ks.write_text("{}")
ks.chmod(0o644)
report = HealthReport(target=str(tmp_path))
scan_sensitive_file_permissions(tmp_path, report)
assert len(report.findings) == 0
class TestScanEnvironmentVariables:
def test_reports_missing_env_var(self, monkeypatch) -> None:
monkeypatch.delenv("GITEA_TOKEN", raising=False)
report = HealthReport(target=".")
scan_environment_variables(report)
missing = [f for f in report.findings if f.path == "$GITEA_TOKEN"]
assert len(missing) == 1
assert missing[0].severity == "warning"
def test_passes_when_env_vars_present(self, monkeypatch) -> None:
for var in ("GITEA_URL", "GITEA_TOKEN", "GITEA_USER"):
monkeypatch.setenv(var, "present")
report = HealthReport(target=".")
scan_environment_variables(report)
assert len(report.findings) == 0
class TestRunHealthCheck:
def test_full_run(self, tmp_path: Path, monkeypatch) -> None:
monkeypatch.setenv("GITEA_URL", "https://example.com")
monkeypatch.setenv("GITEA_TOKEN", "secret")
monkeypatch.setenv("GITEA_USER", "bezalel")
(tmp_path / "orphan.pyc").write_bytes(b"\x00")
(tmp_path / "burn_it.sh").write_text("#!/bin/bash")
ks = tmp_path / "keystore.json"
ks.write_text("{}")
ks.chmod(0o644)
report = run_health_check(tmp_path)
assert not report.passed
categories = {f.category for f in report.findings}
assert "artifact_integrity" in categories
assert "deployment_hygiene" in categories
assert "security" in categories
def test_clean_run_passes(self, tmp_path: Path, monkeypatch) -> None:
for var in ("GITEA_URL", "GITEA_TOKEN", "GITEA_USER"):
monkeypatch.setenv(var, "present")
(tmp_path / "module.py").write_text("pass")
report = run_health_check(tmp_path)
assert report.passed
assert len(report.findings) == 0