- Fix broken API_KEY_REGEX in linter_v2.py (was invalid regex causing runtime crash) - Fix syntax error in architecture_linter.py (malformed character class) - Add --repo flag and --json output to linter_v2 - Add LinterResult class for structured programmatic access - Port v1 sovereignty rules (cloud API endpoint/provider checks) into v2 - Skip .git, node_modules, __pycache__ dirs; skip .env.example files - Add tests/test_linter.py (19 tests covering all checks) - Add .gitea/workflows/architecture-lint.yml for CI enforcement - All files pass python3 -m py_compile Refs: #437
238 lines
8.5 KiB
Python
238 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
[ARCH] Architecture Linter v2
|
|
Part of the Gemini Sovereign Governance System.
|
|
|
|
Enforces architectural boundaries, security, and documentation standards
|
|
across the Timmy Foundation fleet.
|
|
|
|
Refs: #437 — repo-aware, test-backed, CI-enforced.
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# --- CONFIGURATION ---
|
|
|
|
SOVEREIGN_KEYWORDS = ["mempalace", "sovereign_store", "tirith", "bezalel", "nexus"]
|
|
|
|
# IP addresses (skip 127.0.0.1, 0.0.0.0, 10.x.x.x, 172.16-31.x.x, 192.168.x.x)
|
|
IP_REGEX = r'\b(?!(?:127|10|192\.168|172\.(?:1[6-9]|2\d|3[01]))\.)' \
|
|
r'(?:\d{1,3}\.){3}\d{1,3}\b'
|
|
|
|
# API key / secret patterns — catches openai-, sk-, anthropic-, AKIA, etc.
|
|
API_KEY_PATTERNS = [
|
|
r'sk-[A-Za-z0-9]{20,}', # OpenAI-style
|
|
r'sk-ant-[A-Za-z0-9\-]{20,}', # Anthropic
|
|
r'AKIA[A-Z0-9]{16}', # AWS access key
|
|
r'ghp_[A-Za-z0-9]{36}', # GitHub PAT
|
|
r'glpat-[A-Za-z0-9\-]{20,}', # GitLab PAT
|
|
r'(?:api[_-]?key|secret|token)\s*[:=]\s*["\'][A-Za-z0-9_\-]{16,}["\']',
|
|
]
|
|
|
|
# Sovereignty rules (carried from v1)
|
|
SOVEREIGN_RULES = [
|
|
(r'https?://api\.openai\.com', 'External cloud API: api.openai.com. Use local custom_provider.'),
|
|
(r'https?://api\.anthropic\.com', 'External cloud API: api.anthropic.com. Use local custom_provider.'),
|
|
(r'provider:\s*(?:openai|anthropic)\b', 'Direct cloud provider. Ensure fallback_model is configured.'),
|
|
]
|
|
|
|
# File extensions to scan
|
|
SCAN_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.yaml', '.yml', '.json', '.env', '.sh', '.cfg', '.toml'}
|
|
SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', '.eggs'}
|
|
|
|
|
|
class LinterResult:
|
|
"""Structured result container for programmatic access."""
|
|
|
|
def __init__(self, repo_path: str, repo_name: str):
|
|
self.repo_path = repo_path
|
|
self.repo_name = repo_name
|
|
self.errors: list[str] = []
|
|
self.warnings: list[str] = []
|
|
|
|
@property
|
|
def passed(self) -> bool:
|
|
return len(self.errors) == 0
|
|
|
|
@property
|
|
def violation_count(self) -> int:
|
|
return len(self.errors)
|
|
|
|
def summary(self) -> str:
|
|
lines = [f"--- Architecture Linter v2: {self.repo_name} ---"]
|
|
for w in self.warnings:
|
|
lines.append(f" [W] {w}")
|
|
for e in self.errors:
|
|
lines.append(f" [E] {e}")
|
|
status = "PASSED" if self.passed else f"FAILED ({self.violation_count} violations)"
|
|
lines.append(f"\nResult: {status}")
|
|
return '\n'.join(lines)
|
|
|
|
|
|
class Linter:
|
|
def __init__(self, repo_path: str):
|
|
self.repo_path = Path(repo_path).resolve()
|
|
if not self.repo_path.is_dir():
|
|
raise FileNotFoundError(f"Repository path does not exist: {self.repo_path}")
|
|
self.repo_name = self.repo_path.name
|
|
self.result = LinterResult(str(self.repo_path), self.repo_name)
|
|
|
|
# --- helpers ---
|
|
|
|
def _scan_files(self, extensions=None):
|
|
"""Yield (Path, content) for files matching *extensions*."""
|
|
exts = extensions or SCAN_EXTENSIONS
|
|
for root, dirs, files in os.walk(self.repo_path):
|
|
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
|
|
for fname in files:
|
|
if Path(fname).suffix in exts:
|
|
if fname == '.env.example':
|
|
continue
|
|
fpath = Path(root) / fname
|
|
try:
|
|
content = fpath.read_text(errors='ignore')
|
|
except Exception:
|
|
continue
|
|
yield fpath, content
|
|
|
|
def _line_no(self, content: str, offset: int) -> int:
|
|
return content.count('\n', 0, offset) + 1
|
|
|
|
# --- checks ---
|
|
|
|
def check_sidecar_boundary(self):
|
|
"""No sovereign code in hermes-agent (sidecar boundary)."""
|
|
if self.repo_name != 'hermes-agent':
|
|
return
|
|
for fpath, content in self._scan_files():
|
|
for kw in SOVEREIGN_KEYWORDS:
|
|
if kw in content.lower():
|
|
rel = str(fpath.relative_to(self.repo_path))
|
|
self.result.errors.append(
|
|
f"Sovereign keyword '{kw}' in hermes-agent violates sidecar boundary. [{rel}]"
|
|
)
|
|
|
|
def check_hardcoded_ips(self):
|
|
"""No hardcoded public IPs — use DNS or env vars."""
|
|
for fpath, content in self._scan_files():
|
|
for m in re.finditer(IP_REGEX, content):
|
|
ip = m.group()
|
|
# skip private ranges already handled by lookahead, and 0.0.0.0
|
|
if ip.startswith('0.'):
|
|
continue
|
|
line = self._line_no(content, m.start())
|
|
rel = str(fpath.relative_to(self.repo_path))
|
|
self.result.errors.append(
|
|
f"Hardcoded IP '{ip}'. Use DNS or env vars. [{rel}:{line}]"
|
|
)
|
|
|
|
def check_api_keys(self):
|
|
"""No cloud API keys / secrets committed."""
|
|
for fpath, content in self._scan_files():
|
|
for pattern in API_KEY_PATTERNS:
|
|
for m in re.finditer(pattern, content, re.IGNORECASE):
|
|
line = self._line_no(content, m.start())
|
|
rel = str(fpath.relative_to(self.repo_path))
|
|
self.result.errors.append(
|
|
f"Potential secret / API key detected. [{rel}:{line}]"
|
|
)
|
|
|
|
def check_sovereignty_rules(self):
|
|
"""V1 sovereignty rules: no direct cloud API endpoints or providers."""
|
|
for fpath, content in self._scan_files({'.py', '.ts', '.tsx', '.js', '.yaml', '.yml'}):
|
|
for pattern, msg in SOVEREIGN_RULES:
|
|
for m in re.finditer(pattern, content):
|
|
line = self._line_no(content, m.start())
|
|
rel = str(fpath.relative_to(self.repo_path))
|
|
self.result.errors.append(f"{msg} [{rel}:{line}]")
|
|
|
|
def check_soul_canonical(self):
|
|
"""SOUL.md must exist exactly in timmy-config root."""
|
|
soul_path = self.repo_path / 'SOUL.md'
|
|
if self.repo_name == 'timmy-config':
|
|
if not soul_path.exists():
|
|
self.result.errors.append(
|
|
'SOUL.md missing from canonical location (timmy-config root).'
|
|
)
|
|
else:
|
|
if soul_path.exists():
|
|
self.result.errors.append(
|
|
'SOUL.md found in non-canonical repo. Must live only in timmy-config.'
|
|
)
|
|
|
|
def check_readme(self):
|
|
"""Every repo must have a substantive README."""
|
|
readme = self.repo_path / 'README.md'
|
|
if not readme.exists():
|
|
self.result.errors.append('README.md is missing.')
|
|
else:
|
|
content = readme.read_text(errors='ignore')
|
|
if len(content.strip()) < 50:
|
|
self.result.warnings.append(
|
|
'README.md is very short (<50 chars). Provide current truth about the repo.'
|
|
)
|
|
|
|
# --- runner ---
|
|
|
|
def run(self) -> LinterResult:
|
|
"""Execute all checks and return the result."""
|
|
self.check_sidecar_boundary()
|
|
self.check_hardcoded_ips()
|
|
self.check_api_keys()
|
|
self.check_sovereignty_rules()
|
|
self.check_soul_canonical()
|
|
self.check_readme()
|
|
return self.result
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Gemini Architecture Linter v2 — repo-aware sovereignty gate.'
|
|
)
|
|
parser.add_argument(
|
|
'repo_path', nargs='?', default='.',
|
|
help='Path to the repository to lint (default: cwd).',
|
|
)
|
|
parser.add_argument(
|
|
'--repo', dest='repo_flag', default=None,
|
|
help='Explicit repo path (alias for positional arg).',
|
|
)
|
|
parser.add_argument(
|
|
'--json', dest='json_output', action='store_true',
|
|
help='Emit machine-readable JSON instead of human text.',
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
path = args.repo_flag if args.repo_flag else args.repo_path
|
|
|
|
try:
|
|
linter = Linter(path)
|
|
except FileNotFoundError as exc:
|
|
print(f"ERROR: {exc}", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
result = linter.run()
|
|
|
|
if args.json_output:
|
|
import json as _json
|
|
out = {
|
|
'repo': result.repo_name,
|
|
'passed': result.passed,
|
|
'violation_count': result.violation_count,
|
|
'errors': result.errors,
|
|
'warnings': result.warnings,
|
|
}
|
|
print(_json.dumps(out, indent=2))
|
|
else:
|
|
print(result.summary())
|
|
|
|
sys.exit(0 if result.passed else 1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|