Merge branch 'main' into burn/20260409-1247-self-healing-safe

2026-04-10 09:37:36 +00:00
parent 5a649966ab fa9e83ac95
commit 71bf82d9fb
5 changed files with 726 additions and 77 deletions
--- a/scripts/architecture_linter.py
+++ b/scripts/architecture_linter.py
@@ -9,7 +9,7 @@ import re
 SOVEREIGN_RULES = [
    (r"https?://(api\.openai\.com|api\.anthropic\.com)", "CRITICAL: External cloud API detected. Use local custom_provider instead."),
    (r"provider: (openai|anthropic)", "WARNING: Direct cloud provider used. Ensure fallback_model is configured."),
-    (r"api_key: ['"][^'"\s]{10,}['"]", "SECURITY: Hardcoded API key detected. Use environment variables.")
+    (r"api_key:\s*['\"][A-Za-z0-9_\-]{16,}['\"]", "SECURITY: Hardcoded API key detected. Use environment variables.")
 ]

 def lint_file(path):
--- a/scripts/architecture_linter_v2.py
+++ b/scripts/architecture_linter_v2.py
@@ -5,122 +5,233 @@ Part of the Gemini Sovereign Governance System.

 Enforces architectural boundaries, security, and documentation standards
 across the Timmy Foundation fleet.
+
+Refs: #437 — repo-aware, test-backed, CI-enforced.
 """

+import argparse
 import os
 import re
 import sys
-import argparse
 from pathlib import Path

 # --- CONFIGURATION ---
+
 SOVEREIGN_KEYWORDS = ["mempalace", "sovereign_store", "tirith", "bezalel", "nexus"]
-IP_REGEX = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
-API_KEY_REGEX = r'(?:api_key|secret|token|password|auth_token)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']'
+
+# IP addresses (skip 127.0.0.1, 0.0.0.0, 10.x.x.x, 172.16-31.x.x, 192.168.x.x)
+IP_REGEX = r'\b(?!(?:127|10|192\.168|172\.(?:1[6-9]|2\d|3[01]))\.)' \
+           r'(?:\d{1,3}\.){3}\d{1,3}\b'
+
+# API key / secret patterns — catches openai-, sk-, anthropic-, AKIA, etc.
+API_KEY_PATTERNS = [
+    r'sk-[A-Za-z0-9]{20,}',               # OpenAI-style
+    r'sk-ant-[A-Za-z0-9\-]{20,}',          # Anthropic
+    r'AKIA[A-Z0-9]{16}',                    # AWS access key
+    r'ghp_[A-Za-z0-9]{36}',                # GitHub PAT
+    r'glpat-[A-Za-z0-9\-]{20,}',           # GitLab PAT
+    r'(?:api[_-]?key|secret|token)\s*[:=]\s*["\'][A-Za-z0-9_\-]{16,}["\']',
+]
+
+# Sovereignty rules (carried from v1)
+SOVEREIGN_RULES = [
+    (r'https?://api\.openai\.com', 'External cloud API: api.openai.com. Use local custom_provider.'),
+    (r'https?://api\.anthropic\.com', 'External cloud API: api.anthropic.com. Use local custom_provider.'),
+    (r'provider:\s*(?:openai|anthropic)\b', 'Direct cloud provider. Ensure fallback_model is configured.'),
+]
+
+# File extensions to scan
+SCAN_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.yaml', '.yml', '.json', '.env', '.sh', '.cfg', '.toml'}
+SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', '.eggs'}
+
+
+class LinterResult:
+    """Structured result container for programmatic access."""
+
+    def __init__(self, repo_path: str, repo_name: str):
+        self.repo_path = repo_path
+        self.repo_name = repo_name
+        self.errors: list[str] = []
+        self.warnings: list[str] = []
+
+    @property
+    def passed(self) -> bool:
+        return len(self.errors) == 0
+
+    @property
+    def violation_count(self) -> int:
+        return len(self.errors)
+
+    def summary(self) -> str:
+        lines = [f"--- Architecture Linter v2: {self.repo_name} ---"]
+        for w in self.warnings:
+            lines.append(f"  [W] {w}")
+        for e in self.errors:
+            lines.append(f"  [E] {e}")
+        status = "PASSED" if self.passed else f"FAILED ({self.violation_count} violations)"
+        lines.append(f"\nResult: {status}")
+        return '\n'.join(lines)
+

 class Linter:
    def __init__(self, repo_path: str):
        self.repo_path = Path(repo_path).resolve()
+        if not self.repo_path.is_dir():
+            raise FileNotFoundError(f"Repository path does not exist: {self.repo_path}")
        self.repo_name = self.repo_path.name
-        self.errors = []
+        self.result = LinterResult(str(self.repo_path), self.repo_name)

-    def log_error(self, message: str, file: str = None, line: int = None):
-        loc = f"{file}:{line}" if file and line else (file if file else "General")
-        self.errors.append(f"[{loc}] {message}")
+    # --- helpers ---
+
+    def _scan_files(self, extensions=None):
+        """Yield (Path, content) for files matching *extensions*."""
+        exts = extensions or SCAN_EXTENSIONS
+        for root, dirs, files in os.walk(self.repo_path):
+            dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
+            for fname in files:
+                if Path(fname).suffix in exts:
+                    if fname == '.env.example':
+                        continue
+                    fpath = Path(root) / fname
+                    try:
+                        content = fpath.read_text(errors='ignore')
+                    except Exception:
+                        continue
+                    yield fpath, content
+
+    def _line_no(self, content: str, offset: int) -> int:
+        return content.count('\n', 0, offset) + 1
+
+    # --- checks ---

    def check_sidecar_boundary(self):
-        """Rule 1: No sovereign code in hermes-agent (sidecar boundary)"""
-        if self.repo_name == "hermes-agent":
-            for root, _, files in os.walk(self.repo_path):
-                if "node_modules" in root or ".git" in root:
-                    continue
-                for file in files:
-                    if file.endswith((".py", ".ts", ".js", ".tsx")):
-                        path = Path(root) / file
-                        content = path.read_text(errors="ignore")
-                        for kw in SOVEREIGN_KEYWORDS:
-                            if kw in content.lower():
-                                # Exception: imports or comments might be okay, but we're strict for now
-                                self.log_error(f"Sovereign keyword '{kw}' found in hermes-agent. Violates sidecar boundary.", str(path.relative_to(self.repo_path)))
+        """No sovereign code in hermes-agent (sidecar boundary)."""
+        if self.repo_name != 'hermes-agent':
+            return
+        for fpath, content in self._scan_files():
+            for kw in SOVEREIGN_KEYWORDS:
+                if kw in content.lower():
+                    rel = str(fpath.relative_to(self.repo_path))
+                    self.result.errors.append(
+                        f"Sovereign keyword '{kw}' in hermes-agent violates sidecar boundary. [{rel}]"
+                    )

    def check_hardcoded_ips(self):
-        """Rule 2: No hardcoded IPs (use domain names)"""
-        for root, _, files in os.walk(self.repo_path):
-            if "node_modules" in root or ".git" in root:
-                continue
-            for file in files:
-                if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json")):
-                    path = Path(root) / file
-                    content = path.read_text(errors="ignore")
-                    matches = re.finditer(IP_REGEX, content)
-                    for match in matches:
-                        ip = match.group()
-                        if ip in ["127.0.0.1", "0.0.0.0"]:
-                            continue
-                        line_no = content.count('\n', 0, match.start()) + 1
-                        self.log_error(f"Hardcoded IP address '{ip}' found. Use domain names or environment variables.", str(path.relative_to(self.repo_path)), line_no)
+        """No hardcoded public IPs — use DNS or env vars."""
+        for fpath, content in self._scan_files():
+            for m in re.finditer(IP_REGEX, content):
+                ip = m.group()
+                # skip private ranges already handled by lookahead, and 0.0.0.0
+                if ip.startswith('0.'):
+                    continue
+                line = self._line_no(content, m.start())
+                rel = str(fpath.relative_to(self.repo_path))
+                self.result.errors.append(
+                    f"Hardcoded IP '{ip}'. Use DNS or env vars. [{rel}:{line}]"
+                )

    def check_api_keys(self):
-        """Rule 3: No cloud API keys committed to repos"""
-        for root, _, files in os.walk(self.repo_path):
-            if "node_modules" in root or ".git" in root:
-                continue
-            for file in files:
-                if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json", ".env")):
-                    if file == ".env.example":
-                        continue
-                    path = Path(root) / file
-                    content = path.read_text(errors="ignore")
-                    matches = re.finditer(API_KEY_REGEX, content, re.IGNORECASE)
-                    for match in matches:
-                        line_no = content.count('\n', 0, match.start()) + 1
-                        self.log_error("Potential API key or secret found in code.", str(path.relative_to(self.repo_path)), line_no)
+        """No cloud API keys / secrets committed."""
+        for fpath, content in self._scan_files():
+            for pattern in API_KEY_PATTERNS:
+                for m in re.finditer(pattern, content, re.IGNORECASE):
+                    line = self._line_no(content, m.start())
+                    rel = str(fpath.relative_to(self.repo_path))
+                    self.result.errors.append(
+                        f"Potential secret / API key detected. [{rel}:{line}]"
+                    )
+
+    def check_sovereignty_rules(self):
+        """V1 sovereignty rules: no direct cloud API endpoints or providers."""
+        for fpath, content in self._scan_files({'.py', '.ts', '.tsx', '.js', '.yaml', '.yml'}):
+            for pattern, msg in SOVEREIGN_RULES:
+                for m in re.finditer(pattern, content):
+                    line = self._line_no(content, m.start())
+                    rel = str(fpath.relative_to(self.repo_path))
+                    self.result.errors.append(f"{msg} [{rel}:{line}]")

    def check_soul_canonical(self):
-        """Rule 4: SOUL.md exists and is canonical in exactly one location"""
-        soul_path = self.repo_path / "SOUL.md"
-        if self.repo_name == "timmy-config":
+        """SOUL.md must exist exactly in timmy-config root."""
+        soul_path = self.repo_path / 'SOUL.md'
+        if self.repo_name == 'timmy-config':
            if not soul_path.exists():
-                self.log_error("SOUL.md is missing from the canonical location (timmy-config root).")
+                self.result.errors.append(
+                    'SOUL.md missing from canonical location (timmy-config root).'
+                )
        else:
            if soul_path.exists():
-                self.log_error("SOUL.md found in non-canonical repo. It should only live in timmy-config.")
+                self.result.errors.append(
+                    'SOUL.md found in non-canonical repo. Must live only in timmy-config.'
+                )

    def check_readme(self):
-        """Rule 5: Every repo has a README with current truth"""
-        readme_path = self.repo_path / "README.md"
-        if not readme_path.exists():
-            self.log_error("README.md is missing.")
+        """Every repo must have a substantive README."""
+        readme = self.repo_path / 'README.md'
+        if not readme.exists():
+            self.result.errors.append('README.md is missing.')
        else:
-            content = readme_path.read_text(errors="ignore")
+            content = readme.read_text(errors='ignore')
            if len(content.strip()) < 50:
-                self.log_error("README.md is too short or empty. Provide current truth about the repo.")
+                self.result.warnings.append(
+                    'README.md is very short (<50 chars). Provide current truth about the repo.'
+                )

-    def run(self):
-        print(f"--- Gemini Linter: Auditing {self.repo_name} ---")
+    # --- runner ---
+
+    def run(self) -> LinterResult:
+        """Execute all checks and return the result."""
        self.check_sidecar_boundary()
        self.check_hardcoded_ips()
        self.check_api_keys()
+        self.check_sovereignty_rules()
        self.check_soul_canonical()
        self.check_readme()
+        return self.result

-        if self.errors:
-            print(f"\n[FAILURE] Found {len(self.errors)} architectural violations:")
-            for err in self.errors:
-                print(f"  - {err}")
-            return False
-        else:
-            print("\n[SUCCESS] Architecture is sound. Sovereignty maintained.")
-            return True

 def main():
-    parser = argparse.ArgumentParser(description="Gemini Architecture Linter v2")
-    parser.add_argument("repo_path", nargs="?", default=".", help="Path to the repository to lint")
+    parser = argparse.ArgumentParser(
+        description='Gemini Architecture Linter v2 — repo-aware sovereignty gate.'
+    )
+    parser.add_argument(
+        'repo_path', nargs='?', default='.',
+        help='Path to the repository to lint (default: cwd).',
+    )
+    parser.add_argument(
+        '--repo', dest='repo_flag', default=None,
+        help='Explicit repo path (alias for positional arg).',
+    )
+    parser.add_argument(
+        '--json', dest='json_output', action='store_true',
+        help='Emit machine-readable JSON instead of human text.',
+    )
    args = parser.parse_args()

-    linter = Linter(args.repo_path)
-    success = linter.run()
-    sys.exit(0 if success else 1)
+    path = args.repo_flag if args.repo_flag else args.repo_path

-if __name__ == "__main__":
+    try:
+        linter = Linter(path)
+    except FileNotFoundError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        sys.exit(2)
+
+    result = linter.run()
+
+    if args.json_output:
+        import json as _json
+        out = {
+            'repo': result.repo_name,
+            'passed': result.passed,
+            'violation_count': result.violation_count,
+            'errors': result.errors,
+            'warnings': result.warnings,
+        }
+        print(_json.dumps(out, indent=2))
+    else:
+        print(result.summary())
+
+    sys.exit(0 if result.passed else 1)
+
+
+if __name__ == '__main__':
    main()