diff --git a/.gitea/workflows/architecture-lint.yml b/.gitea/workflows/architecture-lint.yml new file mode 100644 index 00000000..e7fccd6c --- /dev/null +++ b/.gitea/workflows/architecture-lint.yml @@ -0,0 +1,41 @@ +# architecture-lint.yml — CI gate for the Architecture Linter v2 +# Refs: #437 — repo-aware, test-backed, CI-enforced. +# +# Runs on every PR to main. Validates Python syntax, then runs +# linter tests and finally lints the repo itself. + +name: Architecture Lint + +on: + pull_request: + branches: [main, master] + push: + branches: [main] + +jobs: + linter-tests: + name: Linter Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install test deps + run: pip install pytest + - name: Compile-check linter + run: python3 -m py_compile scripts/architecture_linter_v2.py + - name: Run linter tests + run: python3 -m pytest tests/test_linter.py -v + + lint-repo: + name: Lint Repository + runs-on: ubuntu-latest + needs: linter-tests + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Run architecture linter + run: python3 scripts/architecture_linter_v2.py . diff --git a/scripts/architecture_linter.py b/scripts/architecture_linter.py index fa426442..a3adb307 100644 --- a/scripts/architecture_linter.py +++ b/scripts/architecture_linter.py @@ -9,7 +9,7 @@ import re SOVEREIGN_RULES = [ (r"https?://(api\.openai\.com|api\.anthropic\.com)", "CRITICAL: External cloud API detected. Use local custom_provider instead."), (r"provider: (openai|anthropic)", "WARNING: Direct cloud provider used. Ensure fallback_model is configured."), - (r"api_key: ['"][^'"\s]{10,}['"]", "SECURITY: Hardcoded API key detected. Use environment variables.") + (r"api_key:\s*['\"][A-Za-z0-9_\-]{16,}['\"]", "SECURITY: Hardcoded API key detected. Use environment variables.") ] def lint_file(path): diff --git a/scripts/architecture_linter_v2.py b/scripts/architecture_linter_v2.py index 60bcd99a..154dd9ee 100644 --- a/scripts/architecture_linter_v2.py +++ b/scripts/architecture_linter_v2.py @@ -5,122 +5,233 @@ Part of the Gemini Sovereign Governance System. Enforces architectural boundaries, security, and documentation standards across the Timmy Foundation fleet. + +Refs: #437 — repo-aware, test-backed, CI-enforced. """ +import argparse import os import re import sys -import argparse from pathlib import Path # --- CONFIGURATION --- + SOVEREIGN_KEYWORDS = ["mempalace", "sovereign_store", "tirith", "bezalel", "nexus"] -IP_REGEX = r'\b(?:\d{1,3}\.){3}\d{1,3}\b' -API_KEY_REGEX = r'(?:api_key|secret|token|password|auth_token)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']' + +# IP addresses (skip 127.0.0.1, 0.0.0.0, 10.x.x.x, 172.16-31.x.x, 192.168.x.x) +IP_REGEX = r'\b(?!(?:127|10|192\.168|172\.(?:1[6-9]|2\d|3[01]))\.)' \ + r'(?:\d{1,3}\.){3}\d{1,3}\b' + +# API key / secret patterns — catches openai-, sk-, anthropic-, AKIA, etc. +API_KEY_PATTERNS = [ + r'sk-[A-Za-z0-9]{20,}', # OpenAI-style + r'sk-ant-[A-Za-z0-9\-]{20,}', # Anthropic + r'AKIA[A-Z0-9]{16}', # AWS access key + r'ghp_[A-Za-z0-9]{36}', # GitHub PAT + r'glpat-[A-Za-z0-9\-]{20,}', # GitLab PAT + r'(?:api[_-]?key|secret|token)\s*[:=]\s*["\'][A-Za-z0-9_\-]{16,}["\']', +] + +# Sovereignty rules (carried from v1) +SOVEREIGN_RULES = [ + (r'https?://api\.openai\.com', 'External cloud API: api.openai.com. Use local custom_provider.'), + (r'https?://api\.anthropic\.com', 'External cloud API: api.anthropic.com. Use local custom_provider.'), + (r'provider:\s*(?:openai|anthropic)\b', 'Direct cloud provider. Ensure fallback_model is configured.'), +] + +# File extensions to scan +SCAN_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.yaml', '.yml', '.json', '.env', '.sh', '.cfg', '.toml'} +SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', '.eggs'} + + +class LinterResult: + """Structured result container for programmatic access.""" + + def __init__(self, repo_path: str, repo_name: str): + self.repo_path = repo_path + self.repo_name = repo_name + self.errors: list[str] = [] + self.warnings: list[str] = [] + + @property + def passed(self) -> bool: + return len(self.errors) == 0 + + @property + def violation_count(self) -> int: + return len(self.errors) + + def summary(self) -> str: + lines = [f"--- Architecture Linter v2: {self.repo_name} ---"] + for w in self.warnings: + lines.append(f" [W] {w}") + for e in self.errors: + lines.append(f" [E] {e}") + status = "PASSED" if self.passed else f"FAILED ({self.violation_count} violations)" + lines.append(f"\nResult: {status}") + return '\n'.join(lines) + class Linter: def __init__(self, repo_path: str): self.repo_path = Path(repo_path).resolve() + if not self.repo_path.is_dir(): + raise FileNotFoundError(f"Repository path does not exist: {self.repo_path}") self.repo_name = self.repo_path.name - self.errors = [] + self.result = LinterResult(str(self.repo_path), self.repo_name) - def log_error(self, message: str, file: str = None, line: int = None): - loc = f"{file}:{line}" if file and line else (file if file else "General") - self.errors.append(f"[{loc}] {message}") + # --- helpers --- + + def _scan_files(self, extensions=None): + """Yield (Path, content) for files matching *extensions*.""" + exts = extensions or SCAN_EXTENSIONS + for root, dirs, files in os.walk(self.repo_path): + dirs[:] = [d for d in dirs if d not in SKIP_DIRS] + for fname in files: + if Path(fname).suffix in exts: + if fname == '.env.example': + continue + fpath = Path(root) / fname + try: + content = fpath.read_text(errors='ignore') + except Exception: + continue + yield fpath, content + + def _line_no(self, content: str, offset: int) -> int: + return content.count('\n', 0, offset) + 1 + + # --- checks --- def check_sidecar_boundary(self): - """Rule 1: No sovereign code in hermes-agent (sidecar boundary)""" - if self.repo_name == "hermes-agent": - for root, _, files in os.walk(self.repo_path): - if "node_modules" in root or ".git" in root: - continue - for file in files: - if file.endswith((".py", ".ts", ".js", ".tsx")): - path = Path(root) / file - content = path.read_text(errors="ignore") - for kw in SOVEREIGN_KEYWORDS: - if kw in content.lower(): - # Exception: imports or comments might be okay, but we're strict for now - self.log_error(f"Sovereign keyword '{kw}' found in hermes-agent. Violates sidecar boundary.", str(path.relative_to(self.repo_path))) + """No sovereign code in hermes-agent (sidecar boundary).""" + if self.repo_name != 'hermes-agent': + return + for fpath, content in self._scan_files(): + for kw in SOVEREIGN_KEYWORDS: + if kw in content.lower(): + rel = str(fpath.relative_to(self.repo_path)) + self.result.errors.append( + f"Sovereign keyword '{kw}' in hermes-agent violates sidecar boundary. [{rel}]" + ) def check_hardcoded_ips(self): - """Rule 2: No hardcoded IPs (use domain names)""" - for root, _, files in os.walk(self.repo_path): - if "node_modules" in root or ".git" in root: - continue - for file in files: - if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json")): - path = Path(root) / file - content = path.read_text(errors="ignore") - matches = re.finditer(IP_REGEX, content) - for match in matches: - ip = match.group() - if ip in ["127.0.0.1", "0.0.0.0"]: - continue - line_no = content.count('\n', 0, match.start()) + 1 - self.log_error(f"Hardcoded IP address '{ip}' found. Use domain names or environment variables.", str(path.relative_to(self.repo_path)), line_no) + """No hardcoded public IPs — use DNS or env vars.""" + for fpath, content in self._scan_files(): + for m in re.finditer(IP_REGEX, content): + ip = m.group() + # skip private ranges already handled by lookahead, and 0.0.0.0 + if ip.startswith('0.'): + continue + line = self._line_no(content, m.start()) + rel = str(fpath.relative_to(self.repo_path)) + self.result.errors.append( + f"Hardcoded IP '{ip}'. Use DNS or env vars. [{rel}:{line}]" + ) def check_api_keys(self): - """Rule 3: No cloud API keys committed to repos""" - for root, _, files in os.walk(self.repo_path): - if "node_modules" in root or ".git" in root: - continue - for file in files: - if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json", ".env")): - if file == ".env.example": - continue - path = Path(root) / file - content = path.read_text(errors="ignore") - matches = re.finditer(API_KEY_REGEX, content, re.IGNORECASE) - for match in matches: - line_no = content.count('\n', 0, match.start()) + 1 - self.log_error("Potential API key or secret found in code.", str(path.relative_to(self.repo_path)), line_no) + """No cloud API keys / secrets committed.""" + for fpath, content in self._scan_files(): + for pattern in API_KEY_PATTERNS: + for m in re.finditer(pattern, content, re.IGNORECASE): + line = self._line_no(content, m.start()) + rel = str(fpath.relative_to(self.repo_path)) + self.result.errors.append( + f"Potential secret / API key detected. [{rel}:{line}]" + ) + + def check_sovereignty_rules(self): + """V1 sovereignty rules: no direct cloud API endpoints or providers.""" + for fpath, content in self._scan_files({'.py', '.ts', '.tsx', '.js', '.yaml', '.yml'}): + for pattern, msg in SOVEREIGN_RULES: + for m in re.finditer(pattern, content): + line = self._line_no(content, m.start()) + rel = str(fpath.relative_to(self.repo_path)) + self.result.errors.append(f"{msg} [{rel}:{line}]") def check_soul_canonical(self): - """Rule 4: SOUL.md exists and is canonical in exactly one location""" - soul_path = self.repo_path / "SOUL.md" - if self.repo_name == "timmy-config": + """SOUL.md must exist exactly in timmy-config root.""" + soul_path = self.repo_path / 'SOUL.md' + if self.repo_name == 'timmy-config': if not soul_path.exists(): - self.log_error("SOUL.md is missing from the canonical location (timmy-config root).") + self.result.errors.append( + 'SOUL.md missing from canonical location (timmy-config root).' + ) else: if soul_path.exists(): - self.log_error("SOUL.md found in non-canonical repo. It should only live in timmy-config.") + self.result.errors.append( + 'SOUL.md found in non-canonical repo. Must live only in timmy-config.' + ) def check_readme(self): - """Rule 5: Every repo has a README with current truth""" - readme_path = self.repo_path / "README.md" - if not readme_path.exists(): - self.log_error("README.md is missing.") + """Every repo must have a substantive README.""" + readme = self.repo_path / 'README.md' + if not readme.exists(): + self.result.errors.append('README.md is missing.') else: - content = readme_path.read_text(errors="ignore") + content = readme.read_text(errors='ignore') if len(content.strip()) < 50: - self.log_error("README.md is too short or empty. Provide current truth about the repo.") + self.result.warnings.append( + 'README.md is very short (<50 chars). Provide current truth about the repo.' + ) - def run(self): - print(f"--- Gemini Linter: Auditing {self.repo_name} ---") + # --- runner --- + + def run(self) -> LinterResult: + """Execute all checks and return the result.""" self.check_sidecar_boundary() self.check_hardcoded_ips() self.check_api_keys() + self.check_sovereignty_rules() self.check_soul_canonical() self.check_readme() + return self.result - if self.errors: - print(f"\n[FAILURE] Found {len(self.errors)} architectural violations:") - for err in self.errors: - print(f" - {err}") - return False - else: - print("\n[SUCCESS] Architecture is sound. Sovereignty maintained.") - return True def main(): - parser = argparse.ArgumentParser(description="Gemini Architecture Linter v2") - parser.add_argument("repo_path", nargs="?", default=".", help="Path to the repository to lint") + parser = argparse.ArgumentParser( + description='Gemini Architecture Linter v2 — repo-aware sovereignty gate.' + ) + parser.add_argument( + 'repo_path', nargs='?', default='.', + help='Path to the repository to lint (default: cwd).', + ) + parser.add_argument( + '--repo', dest='repo_flag', default=None, + help='Explicit repo path (alias for positional arg).', + ) + parser.add_argument( + '--json', dest='json_output', action='store_true', + help='Emit machine-readable JSON instead of human text.', + ) args = parser.parse_args() - linter = Linter(args.repo_path) - success = linter.run() - sys.exit(0 if success else 1) + path = args.repo_flag if args.repo_flag else args.repo_path -if __name__ == "__main__": + try: + linter = Linter(path) + except FileNotFoundError as exc: + print(f"ERROR: {exc}", file=sys.stderr) + sys.exit(2) + + result = linter.run() + + if args.json_output: + import json as _json + out = { + 'repo': result.repo_name, + 'passed': result.passed, + 'violation_count': result.violation_count, + 'errors': result.errors, + 'warnings': result.warnings, + } + print(_json.dumps(out, indent=2)) + else: + print(result.summary()) + + sys.exit(0 if result.passed else 1) + + +if __name__ == '__main__': main() diff --git a/tests/test_linter.py b/tests/test_linter.py new file mode 100644 index 00000000..071da039 --- /dev/null +++ b/tests/test_linter.py @@ -0,0 +1,233 @@ +"""Tests for Architecture Linter v2. + +Validates that the linter correctly detects violations and passes clean repos. +Refs: #437 — test-backed linter. +""" + +import json +import sys +import tempfile +from pathlib import Path + +# Add scripts/ to path +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) + +from architecture_linter_v2 import Linter, LinterResult + + +# ── helpers ─────────────────────────────────────────────────────────── + +def _make_repo(tmpdir: str, files: dict[str, str], name: str = "test-repo") -> Path: + """Create a fake repo with given files and return its path.""" + repo = Path(tmpdir) / name + repo.mkdir() + for relpath, content in files.items(): + p = repo / relpath + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + return repo + + +def _run(tmpdir, files, name="test-repo"): + repo = _make_repo(tmpdir, files, name) + return Linter(str(repo)).run() + + +# ── clean repo passes ───────────────────────────────────────────────── + +def test_clean_repo_passes(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# Test Repo\n\nThis is a clean test repo with sufficient content to pass.", + "main.py": "print('hello world')\n", + }) + assert result.passed, f"Expected pass but got: {result.errors}" + assert result.violation_count == 0 + + +# ── missing README ──────────────────────────────────────────────────── + +def test_missing_readme_fails(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, {"main.py": "x = 1\n"}) + assert not result.passed + assert any("README" in e for e in result.errors) + + +def test_short_readme_warns(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, {"README.md": "hi\n"}) + # Warnings don't fail the build + assert result.passed + assert any("short" in w.lower() for w in result.warnings) + + +# ── hardcoded IPs ───────────────────────────────────────────────────── + +def test_hardcoded_public_ip_detected(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + "server.py": "HOST = '203.0.113.42'\n", + }) + assert not result.passed + assert any("203.0.113.42" in e for e in result.errors) + + +def test_localhost_ip_ignored(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + "server.py": "HOST = '127.0.0.1'\n", + }) + ip_errors = [e for e in result.errors if "IP" in e] + assert len(ip_errors) == 0 + + +# ── API keys ────────────────────────────────────────────────────────── + +def test_openai_key_detected(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + "config.py": 'key = "sk-abcdefghijklmnopqrstuvwx"\n', + }) + assert not result.passed + assert any("secret" in e.lower() or "key" in e.lower() for e in result.errors) + + +def test_aws_key_detected(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + "deploy.yaml": 'aws_key: AKIAIOSFODNN7EXAMPLE\n', + }) + assert not result.passed + assert any("secret" in e.lower() for e in result.errors) + + +def test_env_example_skipped(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + ".env.example": 'OPENAI_KEY=sk-placeholder\n', + }) + secret_errors = [e for e in result.errors if "secret" in e.lower()] + assert len(secret_errors) == 0 + + +# ── sovereignty rules (v1 cloud API checks) ─────────────────────────── + +def test_openai_url_detected(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + "app.py": 'url = "https://api.openai.com/v1/chat"\n', + }) + assert not result.passed + assert any("openai" in e.lower() for e in result.errors) + + +def test_cloud_provider_detected(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + "config.yaml": "provider: openai\n", + }) + assert not result.passed + assert any("provider" in e.lower() for e in result.errors) + + +# ── sidecar boundary ────────────────────────────────────────────────── + +def test_sovereign_keyword_in_hermes_agent_fails(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + "index.py": "import mempalace\n", + }, name="hermes-agent") + assert not result.passed + assert any("sidecar" in e.lower() or "mempalace" in e.lower() for e in result.errors) + + +def test_sovereign_keyword_in_other_repo_ok(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + "index.py": "import mempalace\n", + }, name="some-other-repo") + sidecar_errors = [e for e in result.errors if "sidecar" in e.lower()] + assert len(sidecar_errors) == 0 + + +# ── SOUL.md canonical location ──────────────────────────────────────── + +def test_soul_md_required_in_timmy_config(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# timmy-config\n\nConfig repo.", + }, name="timmy-config") + assert not result.passed + assert any("SOUL.md" in e for e in result.errors) + + +def test_soul_md_present_in_timmy_config_ok(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# timmy-config\n\nConfig repo.", + "SOUL.md": "# Soul\n\nCanonical identity document.", + }, name="timmy-config") + soul_errors = [e for e in result.errors if "SOUL" in e] + assert len(soul_errors) == 0 + + +def test_soul_md_in_wrong_repo_fails(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, { + "README.md": "# R\n\nGood repo.", + "SOUL.md": "# Soul\n\nShould not be here.", + }, name="other-repo") + assert any("canonical" in e.lower() for e in result.errors) + + +# ── LinterResult structure ──────────────────────────────────────────── + +def test_result_summary_is_string(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, {"README.md": "# OK repo with enough text here\n"}) + assert isinstance(result.summary(), str) + assert "PASSED" in result.summary() or "FAILED" in result.summary() + + +def test_result_repo_name(): + with tempfile.TemporaryDirectory() as tmp: + result = _run(tmp, {"README.md": "# OK\n"}, name="my-repo") + assert result.repo_name == "my-repo" + + +# ── invalid path ────────────────────────────────────────────────────── + +def test_invalid_path_raises(): + try: + Linter("/nonexistent/path/xyz") + assert False, "Should have raised FileNotFoundError" + except FileNotFoundError: + pass + + +# ── skip dirs ────────────────────────────────────────────────────────── + +def test_git_dir_skipped(): + with tempfile.TemporaryDirectory() as tmp: + repo = _make_repo(tmp, { + "README.md": "# R\n\nGood repo.", + "main.py": "x = 1\n", + }) + # Create a .git/ dir with a bad file + git_dir = repo / ".git" + git_dir.mkdir() + (git_dir / "bad.py").write_text("HOST = '203.0.113.1'\n") + + result = Linter(str(repo)).run() + git_errors = [e for e in result.errors if ".git" in e] + assert len(git_errors) == 0