2026-04-10 09:36:23 +00:00
4 changed files with 462 additions and 77 deletions
--- a/.gitea/workflows/architecture-lint.yml
+++ b/.gitea/workflows/architecture-lint.yml
@@ -0,0 +1,41 @@
+# architecture-lint.yml — CI gate for the Architecture Linter v2
+# Refs: #437 — repo-aware, test-backed, CI-enforced.
+#
+# Runs on every PR to main.  Validates Python syntax, then runs
+# linter tests and finally lints the repo itself.
+
+name: Architecture Lint
+
+on:
+  pull_request:
+    branches: [main, master]
+  push:
+    branches: [main]
+
+jobs:
+  linter-tests:
+    name: Linter Tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install test deps
+        run: pip install pytest
+      - name: Compile-check linter
+        run: python3 -m py_compile scripts/architecture_linter_v2.py
+      - name: Run linter tests
+        run: python3 -m pytest tests/test_linter.py -v
+
+  lint-repo:
+    name: Lint Repository
+    runs-on: ubuntu-latest
+    needs: linter-tests
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Run architecture linter
+        run: python3 scripts/architecture_linter_v2.py .
--- a/scripts/architecture_linter.py
+++ b/scripts/architecture_linter.py
@@ -9,7 +9,7 @@ import re
 SOVEREIGN_RULES = [
    (r"https?://(api\.openai\.com|api\.anthropic\.com)", "CRITICAL: External cloud API detected. Use local custom_provider instead."),
    (r"provider: (openai|anthropic)", "WARNING: Direct cloud provider used. Ensure fallback_model is configured."),
-    (r"api_key: ['"][^'"\s]{10,}['"]", "SECURITY: Hardcoded API key detected. Use environment variables.")
+    (r"api_key:\s*['\"][A-Za-z0-9_\-]{16,}['\"]", "SECURITY: Hardcoded API key detected. Use environment variables.")
 ]

 def lint_file(path):
--- a/scripts/architecture_linter_v2.py
+++ b/scripts/architecture_linter_v2.py
@@ -5,122 +5,233 @@ Part of the Gemini Sovereign Governance System.

 Enforces architectural boundaries, security, and documentation standards
 across the Timmy Foundation fleet.
+
+Refs: #437 — repo-aware, test-backed, CI-enforced.
 """

+import argparse
 import os
 import re
 import sys
-import argparse
 from pathlib import Path

 # --- CONFIGURATION ---
+
 SOVEREIGN_KEYWORDS = ["mempalace", "sovereign_store", "tirith", "bezalel", "nexus"]
-IP_REGEX = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
-API_KEY_REGEX = r'(?:api_key|secret|token|password|auth_token)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']'
+
+# IP addresses (skip 127.0.0.1, 0.0.0.0, 10.x.x.x, 172.16-31.x.x, 192.168.x.x)
+IP_REGEX = r'\b(?!(?:127|10|192\.168|172\.(?:1[6-9]|2\d|3[01]))\.)' \
+           r'(?:\d{1,3}\.){3}\d{1,3}\b'
+
+# API key / secret patterns — catches openai-, sk-, anthropic-, AKIA, etc.
+API_KEY_PATTERNS = [
+    r'sk-[A-Za-z0-9]{20,}',               # OpenAI-style
+    r'sk-ant-[A-Za-z0-9\-]{20,}',          # Anthropic
+    r'AKIA[A-Z0-9]{16}',                    # AWS access key
+    r'ghp_[A-Za-z0-9]{36}',                # GitHub PAT
+    r'glpat-[A-Za-z0-9\-]{20,}',           # GitLab PAT
+    r'(?:api[_-]?key|secret|token)\s*[:=]\s*["\'][A-Za-z0-9_\-]{16,}["\']',
+]
+
+# Sovereignty rules (carried from v1)
+SOVEREIGN_RULES = [
+    (r'https?://api\.openai\.com', 'External cloud API: api.openai.com. Use local custom_provider.'),
+    (r'https?://api\.anthropic\.com', 'External cloud API: api.anthropic.com. Use local custom_provider.'),
+    (r'provider:\s*(?:openai|anthropic)\b', 'Direct cloud provider. Ensure fallback_model is configured.'),
+]
+
+# File extensions to scan
+SCAN_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.yaml', '.yml', '.json', '.env', '.sh', '.cfg', '.toml'}
+SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', '.eggs'}
+
+
+class LinterResult:
+    """Structured result container for programmatic access."""
+
+    def __init__(self, repo_path: str, repo_name: str):
+        self.repo_path = repo_path
+        self.repo_name = repo_name
+        self.errors: list[str] = []
+        self.warnings: list[str] = []
+
+    @property
+    def passed(self) -> bool:
+        return len(self.errors) == 0
+
+    @property
+    def violation_count(self) -> int:
+        return len(self.errors)
+
+    def summary(self) -> str:
+        lines = [f"--- Architecture Linter v2: {self.repo_name} ---"]
+        for w in self.warnings:
+            lines.append(f"  [W] {w}")
+        for e in self.errors:
+            lines.append(f"  [E] {e}")
+        status = "PASSED" if self.passed else f"FAILED ({self.violation_count} violations)"
+        lines.append(f"\nResult: {status}")
+        return '\n'.join(lines)
+

 class Linter:
    def __init__(self, repo_path: str):
        self.repo_path = Path(repo_path).resolve()
+        if not self.repo_path.is_dir():
+            raise FileNotFoundError(f"Repository path does not exist: {self.repo_path}")
        self.repo_name = self.repo_path.name
-        self.errors = []
+        self.result = LinterResult(str(self.repo_path), self.repo_name)

-    def log_error(self, message: str, file: str = None, line: int = None):
-        loc = f"{file}:{line}" if file and line else (file if file else "General")
-        self.errors.append(f"[{loc}] {message}")
+    # --- helpers ---
+
+    def _scan_files(self, extensions=None):
+        """Yield (Path, content) for files matching *extensions*."""
+        exts = extensions or SCAN_EXTENSIONS
+        for root, dirs, files in os.walk(self.repo_path):
+            dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
+            for fname in files:
+                if Path(fname).suffix in exts:
+                    if fname == '.env.example':
+                        continue
+                    fpath = Path(root) / fname
+                    try:
+                        content = fpath.read_text(errors='ignore')
+                    except Exception:
+                        continue
+                    yield fpath, content
+
+    def _line_no(self, content: str, offset: int) -> int:
+        return content.count('\n', 0, offset) + 1
+
+    # --- checks ---

    def check_sidecar_boundary(self):
-        """Rule 1: No sovereign code in hermes-agent (sidecar boundary)"""
-        if self.repo_name == "hermes-agent":
-            for root, _, files in os.walk(self.repo_path):
-                if "node_modules" in root or ".git" in root:
-                    continue
-                for file in files:
-                    if file.endswith((".py", ".ts", ".js", ".tsx")):
-                        path = Path(root) / file
-                        content = path.read_text(errors="ignore")
-                        for kw in SOVEREIGN_KEYWORDS:
-                            if kw in content.lower():
-                                # Exception: imports or comments might be okay, but we're strict for now
-                                self.log_error(f"Sovereign keyword '{kw}' found in hermes-agent. Violates sidecar boundary.", str(path.relative_to(self.repo_path)))
+        """No sovereign code in hermes-agent (sidecar boundary)."""
+        if self.repo_name != 'hermes-agent':
+            return
+        for fpath, content in self._scan_files():
+            for kw in SOVEREIGN_KEYWORDS:
+                if kw in content.lower():
+                    rel = str(fpath.relative_to(self.repo_path))
+                    self.result.errors.append(
+                        f"Sovereign keyword '{kw}' in hermes-agent violates sidecar boundary. [{rel}]"
+                    )

    def check_hardcoded_ips(self):
-        """Rule 2: No hardcoded IPs (use domain names)"""
-        for root, _, files in os.walk(self.repo_path):
-            if "node_modules" in root or ".git" in root:
-                continue
-            for file in files:
-                if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json")):
-                    path = Path(root) / file
-                    content = path.read_text(errors="ignore")
-                    matches = re.finditer(IP_REGEX, content)
-                    for match in matches:
-                        ip = match.group()
-                        if ip in ["127.0.0.1", "0.0.0.0"]:
-                            continue
-                        line_no = content.count('\n', 0, match.start()) + 1
-                        self.log_error(f"Hardcoded IP address '{ip}' found. Use domain names or environment variables.", str(path.relative_to(self.repo_path)), line_no)
+        """No hardcoded public IPs — use DNS or env vars."""
+        for fpath, content in self._scan_files():
+            for m in re.finditer(IP_REGEX, content):
+                ip = m.group()
+                # skip private ranges already handled by lookahead, and 0.0.0.0
+                if ip.startswith('0.'):
+                    continue
+                line = self._line_no(content, m.start())
+                rel = str(fpath.relative_to(self.repo_path))
+                self.result.errors.append(
+                    f"Hardcoded IP '{ip}'. Use DNS or env vars. [{rel}:{line}]"
+                )

    def check_api_keys(self):
-        """Rule 3: No cloud API keys committed to repos"""
-        for root, _, files in os.walk(self.repo_path):
-            if "node_modules" in root or ".git" in root:
-                continue
-            for file in files:
-                if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json", ".env")):
-                    if file == ".env.example":
-                        continue
-                    path = Path(root) / file
-                    content = path.read_text(errors="ignore")
-                    matches = re.finditer(API_KEY_REGEX, content, re.IGNORECASE)
-                    for match in matches:
-                        line_no = content.count('\n', 0, match.start()) + 1
-                        self.log_error("Potential API key or secret found in code.", str(path.relative_to(self.repo_path)), line_no)
+        """No cloud API keys / secrets committed."""
+        for fpath, content in self._scan_files():
+            for pattern in API_KEY_PATTERNS:
+                for m in re.finditer(pattern, content, re.IGNORECASE):
+                    line = self._line_no(content, m.start())
+                    rel = str(fpath.relative_to(self.repo_path))
+                    self.result.errors.append(
+                        f"Potential secret / API key detected. [{rel}:{line}]"
+                    )
+
+    def check_sovereignty_rules(self):
+        """V1 sovereignty rules: no direct cloud API endpoints or providers."""
+        for fpath, content in self._scan_files({'.py', '.ts', '.tsx', '.js', '.yaml', '.yml'}):
+            for pattern, msg in SOVEREIGN_RULES:
+                for m in re.finditer(pattern, content):
+                    line = self._line_no(content, m.start())
+                    rel = str(fpath.relative_to(self.repo_path))
+                    self.result.errors.append(f"{msg} [{rel}:{line}]")

    def check_soul_canonical(self):
-        """Rule 4: SOUL.md exists and is canonical in exactly one location"""
-        soul_path = self.repo_path / "SOUL.md"
-        if self.repo_name == "timmy-config":
+        """SOUL.md must exist exactly in timmy-config root."""
+        soul_path = self.repo_path / 'SOUL.md'
+        if self.repo_name == 'timmy-config':
            if not soul_path.exists():
-                self.log_error("SOUL.md is missing from the canonical location (timmy-config root).")
+                self.result.errors.append(
+                    'SOUL.md missing from canonical location (timmy-config root).'
+                )
        else:
            if soul_path.exists():
-                self.log_error("SOUL.md found in non-canonical repo. It should only live in timmy-config.")
+                self.result.errors.append(
+                    'SOUL.md found in non-canonical repo. Must live only in timmy-config.'
+                )

    def check_readme(self):
-        """Rule 5: Every repo has a README with current truth"""
-        readme_path = self.repo_path / "README.md"
-        if not readme_path.exists():
-            self.log_error("README.md is missing.")
+        """Every repo must have a substantive README."""
+        readme = self.repo_path / 'README.md'
+        if not readme.exists():
+            self.result.errors.append('README.md is missing.')
        else:
-            content = readme_path.read_text(errors="ignore")
+            content = readme.read_text(errors='ignore')
            if len(content.strip()) < 50:
-                self.log_error("README.md is too short or empty. Provide current truth about the repo.")
+                self.result.warnings.append(
+                    'README.md is very short (<50 chars). Provide current truth about the repo.'
+                )

-    def run(self):
-        print(f"--- Gemini Linter: Auditing {self.repo_name} ---")
+    # --- runner ---
+
+    def run(self) -> LinterResult:
+        """Execute all checks and return the result."""
        self.check_sidecar_boundary()
        self.check_hardcoded_ips()
        self.check_api_keys()
+        self.check_sovereignty_rules()
        self.check_soul_canonical()
        self.check_readme()
+        return self.result

-        if self.errors:
-            print(f"\n[FAILURE] Found {len(self.errors)} architectural violations:")
-            for err in self.errors:
-                print(f"  - {err}")
-            return False
-        else:
-            print("\n[SUCCESS] Architecture is sound. Sovereignty maintained.")
-            return True

 def main():
-    parser = argparse.ArgumentParser(description="Gemini Architecture Linter v2")
-    parser.add_argument("repo_path", nargs="?", default=".", help="Path to the repository to lint")
+    parser = argparse.ArgumentParser(
+        description='Gemini Architecture Linter v2 — repo-aware sovereignty gate.'
+    )
+    parser.add_argument(
+        'repo_path', nargs='?', default='.',
+        help='Path to the repository to lint (default: cwd).',
+    )
+    parser.add_argument(
+        '--repo', dest='repo_flag', default=None,
+        help='Explicit repo path (alias for positional arg).',
+    )
+    parser.add_argument(
+        '--json', dest='json_output', action='store_true',
+        help='Emit machine-readable JSON instead of human text.',
+    )
    args = parser.parse_args()

-    linter = Linter(args.repo_path)
-    success = linter.run()
-    sys.exit(0 if success else 1)
+    path = args.repo_flag if args.repo_flag else args.repo_path

-if __name__ == "__main__":
+    try:
+        linter = Linter(path)
+    except FileNotFoundError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        sys.exit(2)
+
+    result = linter.run()
+
+    if args.json_output:
+        import json as _json
+        out = {
+            'repo': result.repo_name,
+            'passed': result.passed,
+            'violation_count': result.violation_count,
+            'errors': result.errors,
+            'warnings': result.warnings,
+        }
+        print(_json.dumps(out, indent=2))
+    else:
+        print(result.summary())
+
+    sys.exit(0 if result.passed else 1)
+
+
+if __name__ == '__main__':
    main()
--- a/tests/test_linter.py
+++ b/tests/test_linter.py
@@ -0,0 +1,233 @@
+"""Tests for Architecture Linter v2.
+
+Validates that the linter correctly detects violations and passes clean repos.
+Refs: #437 — test-backed linter.
+"""
+
+import json
+import sys
+import tempfile
+from pathlib import Path
+
+# Add scripts/ to path
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
+
+from architecture_linter_v2 import Linter, LinterResult
+
+
+# ── helpers ───────────────────────────────────────────────────────────
+
+def _make_repo(tmpdir: str, files: dict[str, str], name: str = "test-repo") -> Path:
+    """Create a fake repo with given files and return its path."""
+    repo = Path(tmpdir) / name
+    repo.mkdir()
+    for relpath, content in files.items():
+        p = repo / relpath
+        p.parent.mkdir(parents=True, exist_ok=True)
+        p.write_text(content)
+    return repo
+
+
+def _run(tmpdir, files, name="test-repo"):
+    repo = _make_repo(tmpdir, files, name)
+    return Linter(str(repo)).run()
+
+
+# ── clean repo passes ─────────────────────────────────────────────────
+
+def test_clean_repo_passes():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# Test Repo\n\nThis is a clean test repo with sufficient content to pass.",
+            "main.py": "print('hello world')\n",
+        })
+        assert result.passed, f"Expected pass but got: {result.errors}"
+        assert result.violation_count == 0
+
+
+# ── missing README ────────────────────────────────────────────────────
+
+def test_missing_readme_fails():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {"main.py": "x = 1\n"})
+        assert not result.passed
+        assert any("README" in e for e in result.errors)
+
+
+def test_short_readme_warns():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {"README.md": "hi\n"})
+        # Warnings don't fail the build
+        assert result.passed
+        assert any("short" in w.lower() for w in result.warnings)
+
+
+# ── hardcoded IPs ─────────────────────────────────────────────────────
+
+def test_hardcoded_public_ip_detected():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "server.py": "HOST = '203.0.113.42'\n",
+        })
+        assert not result.passed
+        assert any("203.0.113.42" in e for e in result.errors)
+
+
+def test_localhost_ip_ignored():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "server.py": "HOST = '127.0.0.1'\n",
+        })
+        ip_errors = [e for e in result.errors if "IP" in e]
+        assert len(ip_errors) == 0
+
+
+# ── API keys ──────────────────────────────────────────────────────────
+
+def test_openai_key_detected():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "config.py": 'key = "sk-abcdefghijklmnopqrstuvwx"\n',
+        })
+        assert not result.passed
+        assert any("secret" in e.lower() or "key" in e.lower() for e in result.errors)
+
+
+def test_aws_key_detected():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "deploy.yaml": 'aws_key: AKIAIOSFODNN7EXAMPLE\n',
+        })
+        assert not result.passed
+        assert any("secret" in e.lower() for e in result.errors)
+
+
+def test_env_example_skipped():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            ".env.example": 'OPENAI_KEY=sk-placeholder\n',
+        })
+        secret_errors = [e for e in result.errors if "secret" in e.lower()]
+        assert len(secret_errors) == 0
+
+
+# ── sovereignty rules (v1 cloud API checks) ───────────────────────────
+
+def test_openai_url_detected():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "app.py": 'url = "https://api.openai.com/v1/chat"\n',
+        })
+        assert not result.passed
+        assert any("openai" in e.lower() for e in result.errors)
+
+
+def test_cloud_provider_detected():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "config.yaml": "provider: openai\n",
+        })
+        assert not result.passed
+        assert any("provider" in e.lower() for e in result.errors)
+
+
+# ── sidecar boundary ──────────────────────────────────────────────────
+
+def test_sovereign_keyword_in_hermes_agent_fails():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "index.py": "import mempalace\n",
+        }, name="hermes-agent")
+        assert not result.passed
+        assert any("sidecar" in e.lower() or "mempalace" in e.lower() for e in result.errors)
+
+
+def test_sovereign_keyword_in_other_repo_ok():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "index.py": "import mempalace\n",
+        }, name="some-other-repo")
+        sidecar_errors = [e for e in result.errors if "sidecar" in e.lower()]
+        assert len(sidecar_errors) == 0
+
+
+# ── SOUL.md canonical location ────────────────────────────────────────
+
+def test_soul_md_required_in_timmy_config():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# timmy-config\n\nConfig repo.",
+        }, name="timmy-config")
+        assert not result.passed
+        assert any("SOUL.md" in e for e in result.errors)
+
+
+def test_soul_md_present_in_timmy_config_ok():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# timmy-config\n\nConfig repo.",
+            "SOUL.md": "# Soul\n\nCanonical identity document.",
+        }, name="timmy-config")
+        soul_errors = [e for e in result.errors if "SOUL" in e]
+        assert len(soul_errors) == 0
+
+
+def test_soul_md_in_wrong_repo_fails():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "SOUL.md": "# Soul\n\nShould not be here.",
+        }, name="other-repo")
+        assert any("canonical" in e.lower() for e in result.errors)
+
+
+# ── LinterResult structure ────────────────────────────────────────────
+
+def test_result_summary_is_string():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {"README.md": "# OK repo with enough text here\n"})
+        assert isinstance(result.summary(), str)
+        assert "PASSED" in result.summary() or "FAILED" in result.summary()
+
+
+def test_result_repo_name():
+    with tempfile.TemporaryDirectory() as tmp:
+        result = _run(tmp, {"README.md": "# OK\n"}, name="my-repo")
+        assert result.repo_name == "my-repo"
+
+
+# ── invalid path ──────────────────────────────────────────────────────
+
+def test_invalid_path_raises():
+    try:
+        Linter("/nonexistent/path/xyz")
+        assert False, "Should have raised FileNotFoundError"
+    except FileNotFoundError:
+        pass
+
+
+# ── skip dirs ──────────────────────────────────────────────────────────
+
+def test_git_dir_skipped():
+    with tempfile.TemporaryDirectory() as tmp:
+        repo = _make_repo(tmp, {
+            "README.md": "# R\n\nGood repo.",
+            "main.py": "x = 1\n",
+        })
+        # Create a .git/ dir with a bad file
+        git_dir = repo / ".git"
+        git_dir.mkdir()
+        (git_dir / "bad.py").write_text("HOST = '203.0.113.1'\n")
+
+        result = Linter(str(repo)).run()
+        git_errors = [e for e in result.errors if ".git" in e]
+        assert len(git_errors) == 0