timmy-config/scripts/architecture_linter_v2.py

#!/usr/bin/env python3
"""
[ARCH] Architecture Linter v2
Part of the Gemini Sovereign Governance System.

Enforces architectural boundaries, security, and documentation standards
across the Timmy Foundation fleet.

Refs: #437 — repo-aware, test-backed, CI-enforced.
"""

import argparse
import os
import re
import sys
from pathlib import Path

# --- CONFIGURATION ---

SOVEREIGN_KEYWORDS = ["mempalace", "sovereign_store", "tirith", "bezalel", "nexus"]

# IP addresses (skip 127.0.0.1, 0.0.0.0, 10.x.x.x, 172.16-31.x.x, 192.168.x.x)
IP_REGEX = r'\b(?!(?:127|10|192\.168|172\.(?:1[6-9]|2\d|3[01]))\.)' \
           r'(?:\d{1,3}\.){3}\d{1,3}\b'

# API key / secret patterns — catches openai-, sk-, anthropic-, AKIA, etc.
API_KEY_PATTERNS = [
    r'sk-[A-Za-z0-9]{20,}',               # OpenAI-style
    r'sk-ant-[A-Za-z0-9\-]{20,}',          # Anthropic
    r'AKIA[A-Z0-9]{16}',                    # AWS access key
    r'ghp_[A-Za-z0-9]{36}',                # GitHub PAT
    r'glpat-[A-Za-z0-9\-]{20,}',           # GitLab PAT
    r'(?:api[_-]?key|secret|token)\s*[:=]\s*["\'][A-Za-z0-9_\-]{16,}["\']',
]

# Sovereignty rules (carried from v1)
SOVEREIGN_RULES = [
    (r'https?://api\.openai\.com', 'External cloud API: api.openai.com. Use local custom_provider.'),
    (r'https?://api\.anthropic\.com', 'External cloud API: api.anthropic.com. Use local custom_provider.'),
    (r'provider:\s*(?:openai|anthropic)\b', 'Direct cloud provider. Ensure fallback_model is configured.'),
]

# File extensions to scan
SCAN_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.yaml', '.yml', '.json', '.env', '.sh', '.cfg', '.toml'}
SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', '.eggs'}


class LinterResult:
    """Structured result container for programmatic access."""

    def __init__(self, repo_path: str, repo_name: str):
        self.repo_path = repo_path
        self.repo_name = repo_name
        self.errors: list[str] = []
        self.warnings: list[str] = []

    @property
    def passed(self) -> bool:
        return len(self.errors) == 0

    @property
    def violation_count(self) -> int:
        return len(self.errors)

    def summary(self) -> str:
        lines = [f"--- Architecture Linter v2: {self.repo_name} ---"]
        for w in self.warnings:
            lines.append(f"  [W] {w}")
        for e in self.errors:
            lines.append(f"  [E] {e}")
        status = "PASSED" if self.passed else f"FAILED ({self.violation_count} violations)"
        lines.append(f"\nResult: {status}")
        return '\n'.join(lines)


class Linter:
    def __init__(self, repo_path: str):
        self.repo_path = Path(repo_path).resolve()
        if not self.repo_path.is_dir():
            raise FileNotFoundError(f"Repository path does not exist: {self.repo_path}")
        self.repo_name = self.repo_path.name
        self.result = LinterResult(str(self.repo_path), self.repo_name)

    # --- helpers ---

    def _scan_files(self, extensions=None):
        """Yield (Path, content) for files matching *extensions*."""
        exts = extensions or SCAN_EXTENSIONS
        for root, dirs, files in os.walk(self.repo_path):
            dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
            for fname in files:
                if Path(fname).suffix in exts:
                    if fname == '.env.example':
                        continue
                    fpath = Path(root) / fname
                    try:
                        content = fpath.read_text(errors='ignore')
                    except Exception:
                        continue
                    yield fpath, content

    def _line_no(self, content: str, offset: int) -> int:
        return content.count('\n', 0, offset) + 1

    # --- checks ---

    def check_sidecar_boundary(self):
        """No sovereign code in hermes-agent (sidecar boundary)."""
        if self.repo_name != 'hermes-agent':
            return
        for fpath, content in self._scan_files():
            for kw in SOVEREIGN_KEYWORDS:
                if kw in content.lower():
                    rel = str(fpath.relative_to(self.repo_path))
                    self.result.errors.append(
                        f"Sovereign keyword '{kw}' in hermes-agent violates sidecar boundary. [{rel}]"
                    )

    def check_hardcoded_ips(self):
        """No hardcoded public IPs — use DNS or env vars."""
        for fpath, content in self._scan_files():
            for m in re.finditer(IP_REGEX, content):
                ip = m.group()
                # skip private ranges already handled by lookahead, and 0.0.0.0
                if ip.startswith('0.'):
                    continue
                line = self._line_no(content, m.start())
                rel = str(fpath.relative_to(self.repo_path))
                self.result.errors.append(
                    f"Hardcoded IP '{ip}'. Use DNS or env vars. [{rel}:{line}]"
                )

    def check_api_keys(self):
        """No cloud API keys / secrets committed."""
        for fpath, content in self._scan_files():
            for pattern in API_KEY_PATTERNS:
                for m in re.finditer(pattern, content, re.IGNORECASE):
                    line = self._line_no(content, m.start())
                    rel = str(fpath.relative_to(self.repo_path))
                    self.result.errors.append(
                        f"Potential secret / API key detected. [{rel}:{line}]"
                    )

    def check_sovereignty_rules(self):
        """V1 sovereignty rules: no direct cloud API endpoints or providers."""
        for fpath, content in self._scan_files({'.py', '.ts', '.tsx', '.js', '.yaml', '.yml'}):
            for pattern, msg in SOVEREIGN_RULES:
                for m in re.finditer(pattern, content):
                    line = self._line_no(content, m.start())
                    rel = str(fpath.relative_to(self.repo_path))
                    self.result.errors.append(f"{msg} [{rel}:{line}]")

    def check_soul_canonical(self):
        """SOUL.md must exist exactly in timmy-config root."""
        soul_path = self.repo_path / 'SOUL.md'
        if self.repo_name == 'timmy-config':
            if not soul_path.exists():
                self.result.errors.append(
                    'SOUL.md missing from canonical location (timmy-config root).'
                )
        else:
            if soul_path.exists():
                self.result.errors.append(
                    'SOUL.md found in non-canonical repo. Must live only in timmy-config.'
                )

    def check_readme(self):
        """Every repo must have a substantive README."""
        readme = self.repo_path / 'README.md'
        if not readme.exists():
            self.result.errors.append('README.md is missing.')
        else:
            content = readme.read_text(errors='ignore')
            if len(content.strip()) < 50:
                self.result.warnings.append(
                    'README.md is very short (<50 chars). Provide current truth about the repo.'
                )

    # --- runner ---

    def run(self) -> LinterResult:
        """Execute all checks and return the result."""
        self.check_sidecar_boundary()
        self.check_hardcoded_ips()
        self.check_api_keys()
        self.check_sovereignty_rules()
        self.check_soul_canonical()
        self.check_readme()
        return self.result


def main():
    parser = argparse.ArgumentParser(
        description='Gemini Architecture Linter v2 — repo-aware sovereignty gate.'
    )
    parser.add_argument(
        'repo_path', nargs='?', default='.',
        help='Path to the repository to lint (default: cwd).',
    )
    parser.add_argument(
        '--repo', dest='repo_flag', default=None,
        help='Explicit repo path (alias for positional arg).',
    )
    parser.add_argument(
        '--json', dest='json_output', action='store_true',
        help='Emit machine-readable JSON instead of human text.',
    )
    args = parser.parse_args()

    path = args.repo_flag if args.repo_flag else args.repo_path

    try:
        linter = Linter(path)
    except FileNotFoundError as exc:
        print(f"ERROR: {exc}", file=sys.stderr)
        sys.exit(2)

    result = linter.run()

    if args.json_output:
        import json as _json
        out = {
            'repo': result.repo_name,
            'passed': result.passed,
            'violation_count': result.violation_count,
            'errors': result.errors,
            'warnings': result.warnings,
        }
        print(_json.dumps(out, indent=2))
    else:
        print(result.summary())

    sys.exit(0 if result.passed else 1)


if __name__ == '__main__':
    main()