2026-04-10 09:36:23 +00:00
4 changed files with 462 additions and 77 deletions
--- a/.gitea/workflows/architecture-lint.yml
+++ b/.gitea/workflows/architecture-lint.yml
@@ -0,0 +1,41 @@
 # architecture-lint.yml — CI gate for the Architecture Linter v2
 # Refs: #437 — repo-aware, test-backed, CI-enforced.
 #
 # Runs on every PR to main.  Validates Python syntax, then runs
 # linter tests and finally lints the repo itself.
 name: Architecture Lint
 on:
  pull_request:
    branches: [main, master]
  push:
    branches: [main]
 jobs:
  linter-tests:
    name: Linter Tests
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - name: Install test deps
        run: pip install pytest
      - name: Compile-check linter
        run: python3 -m py_compile scripts/architecture_linter_v2.py
      - name: Run linter tests
        run: python3 -m pytest tests/test_linter.py -v
  lint-repo:
    name: Lint Repository
    runs-on: ubuntu-latest
    needs: linter-tests
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - name: Run architecture linter
        run: python3 scripts/architecture_linter_v2.py .
--- a/scripts/architecture_linter.py
+++ b/scripts/architecture_linter.py
@@ -9,7 +9,7 @@ import re
 SOVEREIGN_RULES = [
    (r"https?://(api\.openai\.com|api\.anthropic\.com)", "CRITICAL: External cloud API detected. Use local custom_provider instead."),
    (r"provider: (openai|anthropic)", "WARNING: Direct cloud provider used. Ensure fallback_model is configured."),
-    (r"api_key: ['"][^'"\s]{10,}['"]", "SECURITY: Hardcoded API key detected. Use environment variables.")
+    (r"api_key:\s*['\"][A-Za-z0-9_\-]{16,}['\"]", "SECURITY: Hardcoded API key detected. Use environment variables.")
 ]
 def lint_file(path):
--- a/scripts/architecture_linter_v2.py
+++ b/scripts/architecture_linter_v2.py
@@ -5,122 +5,233 @@ Part of the Gemini Sovereign Governance System.
 Enforces architectural boundaries, security, and documentation standards
 across the Timmy Foundation fleet.
 Refs: #437 — repo-aware, test-backed, CI-enforced.
 """
 import argparse
 import os
 import re
 import sys
 import argparse
 from pathlib import Path
 # --- CONFIGURATION ---
 SOVEREIGN_KEYWORDS = ["mempalace", "sovereign_store", "tirith", "bezalel", "nexus"]
-IP_REGEX = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
+
-API_KEY_REGEX = r'(?:api_key|secret|token|password|auth_token)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']'
+# IP addresses (skip 127.0.0.1, 0.0.0.0, 10.x.x.x, 172.16-31.x.x, 192.168.x.x)
 IP_REGEX = r'\b(?!(?:127|10|192\.168|172\.(?:1[6-9]|2\d|3[01]))\.)' \
           r'(?:\d{1,3}\.){3}\d{1,3}\b'
 # API key / secret patterns — catches openai-, sk-, anthropic-, AKIA, etc.
 API_KEY_PATTERNS = [
    r'sk-[A-Za-z0-9]{20,}',               # OpenAI-style
    r'sk-ant-[A-Za-z0-9\-]{20,}',          # Anthropic
    r'AKIA[A-Z0-9]{16}',                    # AWS access key
    r'ghp_[A-Za-z0-9]{36}',                # GitHub PAT
    r'glpat-[A-Za-z0-9\-]{20,}',           # GitLab PAT
    r'(?:api[_-]?key|secret|token)\s*[:=]\s*["\'][A-Za-z0-9_\-]{16,}["\']',
 ]
 # Sovereignty rules (carried from v1)
 SOVEREIGN_RULES = [
    (r'https?://api\.openai\.com', 'External cloud API: api.openai.com. Use local custom_provider.'),
    (r'https?://api\.anthropic\.com', 'External cloud API: api.anthropic.com. Use local custom_provider.'),
    (r'provider:\s*(?:openai|anthropic)\b', 'Direct cloud provider. Ensure fallback_model is configured.'),
 ]
 # File extensions to scan
 SCAN_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.yaml', '.yml', '.json', '.env', '.sh', '.cfg', '.toml'}
 SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', '.eggs'}
 class LinterResult:
    """Structured result container for programmatic access."""
    def __init__(self, repo_path: str, repo_name: str):
        self.repo_path = repo_path
        self.repo_name = repo_name
        self.errors: list[str] = []
        self.warnings: list[str] = []
    @property
    def passed(self) -> bool:
        return len(self.errors) == 0
    @property
    def violation_count(self) -> int:
        return len(self.errors)
    def summary(self) -> str:
        lines = [f"--- Architecture Linter v2: {self.repo_name} ---"]
        for w in self.warnings:
            lines.append(f"  [W] {w}")
        for e in self.errors:
            lines.append(f"  [E] {e}")
        status = "PASSED" if self.passed else f"FAILED ({self.violation_count} violations)"
        lines.append(f"\nResult: {status}")
        return '\n'.join(lines)
 class Linter:
    def __init__(self, repo_path: str):
        self.repo_path = Path(repo_path).resolve()
        if not self.repo_path.is_dir():
            raise FileNotFoundError(f"Repository path does not exist: {self.repo_path}")
        self.repo_name = self.repo_path.name
-        self.errors = []
+        self.result = LinterResult(str(self.repo_path), self.repo_name)
-    def log_error(self, message: str, file: str = None, line: int = None):
+    # --- helpers ---
-        loc = f"{file}:{line}" if file and line else (file if file else "General")
+
-        self.errors.append(f"[{loc}] {message}")
+    def _scan_files(self, extensions=None):
        """Yield (Path, content) for files matching *extensions*."""
        exts = extensions or SCAN_EXTENSIONS
        for root, dirs, files in os.walk(self.repo_path):
            dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
            for fname in files:
                if Path(fname).suffix in exts:
                    if fname == '.env.example':
                        continue
                    fpath = Path(root) / fname
                    try:
                        content = fpath.read_text(errors='ignore')
                    except Exception:
                        continue
                    yield fpath, content
    def _line_no(self, content: str, offset: int) -> int:
        return content.count('\n', 0, offset) + 1
    # --- checks ---
    def check_sidecar_boundary(self):
-        """Rule 1: No sovereign code in hermes-agent (sidecar boundary)"""
+        """No sovereign code in hermes-agent (sidecar boundary)."""
-        if self.repo_name == "hermes-agent":
+        if self.repo_name != 'hermes-agent':
-            for root, _, files in os.walk(self.repo_path):
+            return
-                if "node_modules" in root or ".git" in root:
+        for fpath, content in self._scan_files():
-                    continue
+            for kw in SOVEREIGN_KEYWORDS:
-                for file in files:
+                if kw in content.lower():
-                    if file.endswith((".py", ".ts", ".js", ".tsx")):
+                    rel = str(fpath.relative_to(self.repo_path))
-                        path = Path(root) / file
+                    self.result.errors.append(
-                        content = path.read_text(errors="ignore")
+                        f"Sovereign keyword '{kw}' in hermes-agent violates sidecar boundary. [{rel}]"
-                        for kw in SOVEREIGN_KEYWORDS:
+                    )
                            if kw in content.lower():
                                # Exception: imports or comments might be okay, but we're strict for now
                                self.log_error(f"Sovereign keyword '{kw}' found in hermes-agent. Violates sidecar boundary.", str(path.relative_to(self.repo_path)))
    def check_hardcoded_ips(self):
-        """Rule 2: No hardcoded IPs (use domain names)"""
+        """No hardcoded public IPs — use DNS or env vars."""
-        for root, _, files in os.walk(self.repo_path):
+        for fpath, content in self._scan_files():
-            if "node_modules" in root or ".git" in root:
+            for m in re.finditer(IP_REGEX, content):
-                continue
+                ip = m.group()
-            for file in files:
+                # skip private ranges already handled by lookahead, and 0.0.0.0
-                if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json")):
+                if ip.startswith('0.'):
-                    path = Path(root) / file
+                    continue
-                    content = path.read_text(errors="ignore")
+                line = self._line_no(content, m.start())
-                    matches = re.finditer(IP_REGEX, content)
+                rel = str(fpath.relative_to(self.repo_path))
-                    for match in matches:
+                self.result.errors.append(
-                        ip = match.group()
+                    f"Hardcoded IP '{ip}'. Use DNS or env vars. [{rel}:{line}]"
-                        if ip in ["127.0.0.1", "0.0.0.0"]:
+                )
                            continue
                        line_no = content.count('\n', 0, match.start()) + 1
                        self.log_error(f"Hardcoded IP address '{ip}' found. Use domain names or environment variables.", str(path.relative_to(self.repo_path)), line_no)
    def check_api_keys(self):
-        """Rule 3: No cloud API keys committed to repos"""
+        """No cloud API keys / secrets committed."""
-        for root, _, files in os.walk(self.repo_path):
+        for fpath, content in self._scan_files():
-            if "node_modules" in root or ".git" in root:
+            for pattern in API_KEY_PATTERNS:
-                continue
+                for m in re.finditer(pattern, content, re.IGNORECASE):
-            for file in files:
+                    line = self._line_no(content, m.start())
-                if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json", ".env")):
+                    rel = str(fpath.relative_to(self.repo_path))
-                    if file == ".env.example":
+                    self.result.errors.append(
-                        continue
+                        f"Potential secret / API key detected. [{rel}:{line}]"
-                    path = Path(root) / file
+                    )
-                    content = path.read_text(errors="ignore")
+
-                    matches = re.finditer(API_KEY_REGEX, content, re.IGNORECASE)
+    def check_sovereignty_rules(self):
-                    for match in matches:
+        """V1 sovereignty rules: no direct cloud API endpoints or providers."""
-                        line_no = content.count('\n', 0, match.start()) + 1
+        for fpath, content in self._scan_files({'.py', '.ts', '.tsx', '.js', '.yaml', '.yml'}):
-                        self.log_error("Potential API key or secret found in code.", str(path.relative_to(self.repo_path)), line_no)
+            for pattern, msg in SOVEREIGN_RULES:
                for m in re.finditer(pattern, content):
                    line = self._line_no(content, m.start())
                    rel = str(fpath.relative_to(self.repo_path))
                    self.result.errors.append(f"{msg} [{rel}:{line}]")
    def check_soul_canonical(self):
-        """Rule 4: SOUL.md exists and is canonical in exactly one location"""
+        """SOUL.md must exist exactly in timmy-config root."""
-        soul_path = self.repo_path / "SOUL.md"
+        soul_path = self.repo_path / 'SOUL.md'
-        if self.repo_name == "timmy-config":
+        if self.repo_name == 'timmy-config':
            if not soul_path.exists():
-                self.log_error("SOUL.md is missing from the canonical location (timmy-config root).")
+                self.result.errors.append(
                    'SOUL.md missing from canonical location (timmy-config root).'
                )
        else:
            if soul_path.exists():
-                self.log_error("SOUL.md found in non-canonical repo. It should only live in timmy-config.")
+                self.result.errors.append(
                    'SOUL.md found in non-canonical repo. Must live only in timmy-config.'
                )
    def check_readme(self):
-        """Rule 5: Every repo has a README with current truth"""
+        """Every repo must have a substantive README."""
-        readme_path = self.repo_path / "README.md"
+        readme = self.repo_path / 'README.md'
-        if not readme_path.exists():
+        if not readme.exists():
-            self.log_error("README.md is missing.")
+            self.result.errors.append('README.md is missing.')
        else:
-            content = readme_path.read_text(errors="ignore")
+            content = readme.read_text(errors='ignore')
            if len(content.strip()) < 50:
-                self.log_error("README.md is too short or empty. Provide current truth about the repo.")
+                self.result.warnings.append(
                    'README.md is very short (<50 chars). Provide current truth about the repo.'
                )
-    def run(self):
+    # --- runner ---
-        print(f"--- Gemini Linter: Auditing {self.repo_name} ---")
+
    def run(self) -> LinterResult:
        """Execute all checks and return the result."""
        self.check_sidecar_boundary()
        self.check_hardcoded_ips()
        self.check_api_keys()
        self.check_sovereignty_rules()
        self.check_soul_canonical()
        self.check_readme()
        return self.result
        if self.errors:
            print(f"\n[FAILURE] Found {len(self.errors)} architectural violations:")
            for err in self.errors:
                print(f"  - {err}")
            return False
        else:
            print("\n[SUCCESS] Architecture is sound. Sovereignty maintained.")
            return True
 def main():
-    parser = argparse.ArgumentParser(description="Gemini Architecture Linter v2")
+    parser = argparse.ArgumentParser(
-    parser.add_argument("repo_path", nargs="?", default=".", help="Path to the repository to lint")
+        description='Gemini Architecture Linter v2 — repo-aware sovereignty gate.'
    )
    parser.add_argument(
        'repo_path', nargs='?', default='.',
        help='Path to the repository to lint (default: cwd).',
    )
    parser.add_argument(
        '--repo', dest='repo_flag', default=None,
        help='Explicit repo path (alias for positional arg).',
    )
    parser.add_argument(
        '--json', dest='json_output', action='store_true',
        help='Emit machine-readable JSON instead of human text.',
    )
    args = parser.parse_args()
-    linter = Linter(args.repo_path)
+    path = args.repo_flag if args.repo_flag else args.repo_path
    success = linter.run()
    sys.exit(0 if success else 1)
-if __name__ == "__main__":
+    try:
        linter = Linter(path)
    except FileNotFoundError as exc:
        print(f"ERROR: {exc}", file=sys.stderr)
        sys.exit(2)
    result = linter.run()
    if args.json_output:
        import json as _json
        out = {
            'repo': result.repo_name,
            'passed': result.passed,
            'violation_count': result.violation_count,
            'errors': result.errors,
            'warnings': result.warnings,
        }
        print(_json.dumps(out, indent=2))
    else:
        print(result.summary())
    sys.exit(0 if result.passed else 1)
 if __name__ == '__main__':
    main()
--- a/tests/test_linter.py
+++ b/tests/test_linter.py
@@ -0,0 +1,233 @@
 """Tests for Architecture Linter v2.
 Validates that the linter correctly detects violations and passes clean repos.
 Refs: #437 — test-backed linter.
 """
 import json
 import sys
 import tempfile
 from pathlib import Path
 # Add scripts/ to path
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
 from architecture_linter_v2 import Linter, LinterResult
 # ── helpers ───────────────────────────────────────────────────────────
 def _make_repo(tmpdir: str, files: dict[str, str], name: str = "test-repo") -> Path:
    """Create a fake repo with given files and return its path."""
    repo = Path(tmpdir) / name
    repo.mkdir()
    for relpath, content in files.items():
        p = repo / relpath
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(content)
    return repo
 def _run(tmpdir, files, name="test-repo"):
    repo = _make_repo(tmpdir, files, name)
    return Linter(str(repo)).run()
 # ── clean repo passes ─────────────────────────────────────────────────
 def test_clean_repo_passes():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# Test Repo\n\nThis is a clean test repo with sufficient content to pass.",
            "main.py": "print('hello world')\n",
        })
        assert result.passed, f"Expected pass but got: {result.errors}"
        assert result.violation_count == 0
 # ── missing README ────────────────────────────────────────────────────
 def test_missing_readme_fails():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {"main.py": "x = 1\n"})
        assert not result.passed
        assert any("README" in e for e in result.errors)
 def test_short_readme_warns():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {"README.md": "hi\n"})
        # Warnings don't fail the build
        assert result.passed
        assert any("short" in w.lower() for w in result.warnings)
 # ── hardcoded IPs ─────────────────────────────────────────────────────
 def test_hardcoded_public_ip_detected():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            "server.py": "HOST = '203.0.113.42'\n",
        })
        assert not result.passed
        assert any("203.0.113.42" in e for e in result.errors)
 def test_localhost_ip_ignored():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            "server.py": "HOST = '127.0.0.1'\n",
        })
        ip_errors = [e for e in result.errors if "IP" in e]
        assert len(ip_errors) == 0
 # ── API keys ──────────────────────────────────────────────────────────
 def test_openai_key_detected():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            "config.py": 'key = "sk-abcdefghijklmnopqrstuvwx"\n',
        })
        assert not result.passed
        assert any("secret" in e.lower() or "key" in e.lower() for e in result.errors)
 def test_aws_key_detected():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            "deploy.yaml": 'aws_key: AKIAIOSFODNN7EXAMPLE\n',
        })
        assert not result.passed
        assert any("secret" in e.lower() for e in result.errors)
 def test_env_example_skipped():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            ".env.example": 'OPENAI_KEY=sk-placeholder\n',
        })
        secret_errors = [e for e in result.errors if "secret" in e.lower()]
        assert len(secret_errors) == 0
 # ── sovereignty rules (v1 cloud API checks) ───────────────────────────
 def test_openai_url_detected():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            "app.py": 'url = "https://api.openai.com/v1/chat"\n',
        })
        assert not result.passed
        assert any("openai" in e.lower() for e in result.errors)
 def test_cloud_provider_detected():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            "config.yaml": "provider: openai\n",
        })
        assert not result.passed
        assert any("provider" in e.lower() for e in result.errors)
 # ── sidecar boundary ──────────────────────────────────────────────────
 def test_sovereign_keyword_in_hermes_agent_fails():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            "index.py": "import mempalace\n",
        }, name="hermes-agent")
        assert not result.passed
        assert any("sidecar" in e.lower() or "mempalace" in e.lower() for e in result.errors)
 def test_sovereign_keyword_in_other_repo_ok():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            "index.py": "import mempalace\n",
        }, name="some-other-repo")
        sidecar_errors = [e for e in result.errors if "sidecar" in e.lower()]
        assert len(sidecar_errors) == 0
 # ── SOUL.md canonical location ────────────────────────────────────────
 def test_soul_md_required_in_timmy_config():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# timmy-config\n\nConfig repo.",
        }, name="timmy-config")
        assert not result.passed
        assert any("SOUL.md" in e for e in result.errors)
 def test_soul_md_present_in_timmy_config_ok():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# timmy-config\n\nConfig repo.",
            "SOUL.md": "# Soul\n\nCanonical identity document.",
        }, name="timmy-config")
        soul_errors = [e for e in result.errors if "SOUL" in e]
        assert len(soul_errors) == 0
 def test_soul_md_in_wrong_repo_fails():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {
            "README.md": "# R\n\nGood repo.",
            "SOUL.md": "# Soul\n\nShould not be here.",
        }, name="other-repo")
        assert any("canonical" in e.lower() for e in result.errors)
 # ── LinterResult structure ────────────────────────────────────────────
 def test_result_summary_is_string():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {"README.md": "# OK repo with enough text here\n"})
        assert isinstance(result.summary(), str)
        assert "PASSED" in result.summary() or "FAILED" in result.summary()
 def test_result_repo_name():
    with tempfile.TemporaryDirectory() as tmp:
        result = _run(tmp, {"README.md": "# OK\n"}, name="my-repo")
        assert result.repo_name == "my-repo"
 # ── invalid path ──────────────────────────────────────────────────────
 def test_invalid_path_raises():
    try:
        Linter("/nonexistent/path/xyz")
        assert False, "Should have raised FileNotFoundError"
    except FileNotFoundError:
        pass
 # ── skip dirs ──────────────────────────────────────────────────────────
 def test_git_dir_skipped():
    with tempfile.TemporaryDirectory() as tmp:
        repo = _make_repo(tmp, {
            "README.md": "# R\n\nGood repo.",
            "main.py": "x = 1\n",
        })
        # Create a .git/ dir with a bad file
        git_dir = repo / ".git"
        git_dir.mkdir()
        (git_dir / "bad.py").write_text("HOST = '203.0.113.1'\n")
        result = Linter(str(repo)).run()
        git_errors = [e for e in result.errors if ".git" in e]
        assert len(git_errors) == 0