fix: architecture_linter_v2 — repo-aware, test-backed, CI-enforced (#437) #453

Merged
Rockachopa merged 2 commits from burn/20260409-1926-linter-v2 into main 2026-04-10 09:36:23 +00:00
4 changed files with 462 additions and 77 deletions

View File

@@ -0,0 +1,41 @@
# architecture-lint.yml — CI gate for the Architecture Linter v2
# Refs: #437 — repo-aware, test-backed, CI-enforced.
#
# Runs on every PR to main. Validates Python syntax, then runs
# linter tests and finally lints the repo itself.
name: Architecture Lint
on:
pull_request:
branches: [main, master]
push:
branches: [main]
jobs:
linter-tests:
name: Linter Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install test deps
run: pip install pytest
- name: Compile-check linter
run: python3 -m py_compile scripts/architecture_linter_v2.py
- name: Run linter tests
run: python3 -m pytest tests/test_linter.py -v
lint-repo:
name: Lint Repository
runs-on: ubuntu-latest
needs: linter-tests
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Run architecture linter
run: python3 scripts/architecture_linter_v2.py .

View File

@@ -9,7 +9,7 @@ import re
SOVEREIGN_RULES = [ SOVEREIGN_RULES = [
(r"https?://(api\.openai\.com|api\.anthropic\.com)", "CRITICAL: External cloud API detected. Use local custom_provider instead."), (r"https?://(api\.openai\.com|api\.anthropic\.com)", "CRITICAL: External cloud API detected. Use local custom_provider instead."),
(r"provider: (openai|anthropic)", "WARNING: Direct cloud provider used. Ensure fallback_model is configured."), (r"provider: (openai|anthropic)", "WARNING: Direct cloud provider used. Ensure fallback_model is configured."),
(r"api_key: ['"][^'"\s]{10,}['"]", "SECURITY: Hardcoded API key detected. Use environment variables.") (r"api_key:\s*['\"][A-Za-z0-9_\-]{16,}['\"]", "SECURITY: Hardcoded API key detected. Use environment variables.")
] ]
def lint_file(path): def lint_file(path):

View File

@@ -5,122 +5,233 @@ Part of the Gemini Sovereign Governance System.
Enforces architectural boundaries, security, and documentation standards Enforces architectural boundaries, security, and documentation standards
across the Timmy Foundation fleet. across the Timmy Foundation fleet.
Refs: #437 — repo-aware, test-backed, CI-enforced.
""" """
import argparse
import os import os
import re import re
import sys import sys
import argparse
from pathlib import Path from pathlib import Path
# --- CONFIGURATION --- # --- CONFIGURATION ---
SOVEREIGN_KEYWORDS = ["mempalace", "sovereign_store", "tirith", "bezalel", "nexus"] SOVEREIGN_KEYWORDS = ["mempalace", "sovereign_store", "tirith", "bezalel", "nexus"]
IP_REGEX = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
API_KEY_REGEX = r'(?:api_key|secret|token|password|auth_token)\s*[:=]\s*["\'][a-zA-Z0-9_\-]{20,}["\']' # IP addresses (skip 127.0.0.1, 0.0.0.0, 10.x.x.x, 172.16-31.x.x, 192.168.x.x)
IP_REGEX = r'\b(?!(?:127|10|192\.168|172\.(?:1[6-9]|2\d|3[01]))\.)' \
r'(?:\d{1,3}\.){3}\d{1,3}\b'
# API key / secret patterns — catches openai-, sk-, anthropic-, AKIA, etc.
API_KEY_PATTERNS = [
r'sk-[A-Za-z0-9]{20,}', # OpenAI-style
r'sk-ant-[A-Za-z0-9\-]{20,}', # Anthropic
r'AKIA[A-Z0-9]{16}', # AWS access key
r'ghp_[A-Za-z0-9]{36}', # GitHub PAT
r'glpat-[A-Za-z0-9\-]{20,}', # GitLab PAT
r'(?:api[_-]?key|secret|token)\s*[:=]\s*["\'][A-Za-z0-9_\-]{16,}["\']',
]
# Sovereignty rules (carried from v1)
SOVEREIGN_RULES = [
(r'https?://api\.openai\.com', 'External cloud API: api.openai.com. Use local custom_provider.'),
(r'https?://api\.anthropic\.com', 'External cloud API: api.anthropic.com. Use local custom_provider.'),
(r'provider:\s*(?:openai|anthropic)\b', 'Direct cloud provider. Ensure fallback_model is configured.'),
]
# File extensions to scan
SCAN_EXTENSIONS = {'.py', '.ts', '.tsx', '.js', '.yaml', '.yml', '.json', '.env', '.sh', '.cfg', '.toml'}
SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', '.eggs'}
class LinterResult:
"""Structured result container for programmatic access."""
def __init__(self, repo_path: str, repo_name: str):
self.repo_path = repo_path
self.repo_name = repo_name
self.errors: list[str] = []
self.warnings: list[str] = []
@property
def passed(self) -> bool:
return len(self.errors) == 0
@property
def violation_count(self) -> int:
return len(self.errors)
def summary(self) -> str:
lines = [f"--- Architecture Linter v2: {self.repo_name} ---"]
for w in self.warnings:
lines.append(f" [W] {w}")
for e in self.errors:
lines.append(f" [E] {e}")
status = "PASSED" if self.passed else f"FAILED ({self.violation_count} violations)"
lines.append(f"\nResult: {status}")
return '\n'.join(lines)
class Linter: class Linter:
def __init__(self, repo_path: str): def __init__(self, repo_path: str):
self.repo_path = Path(repo_path).resolve() self.repo_path = Path(repo_path).resolve()
if not self.repo_path.is_dir():
raise FileNotFoundError(f"Repository path does not exist: {self.repo_path}")
self.repo_name = self.repo_path.name self.repo_name = self.repo_path.name
self.errors = [] self.result = LinterResult(str(self.repo_path), self.repo_name)
def log_error(self, message: str, file: str = None, line: int = None): # --- helpers ---
loc = f"{file}:{line}" if file and line else (file if file else "General")
self.errors.append(f"[{loc}] {message}") def _scan_files(self, extensions=None):
"""Yield (Path, content) for files matching *extensions*."""
exts = extensions or SCAN_EXTENSIONS
for root, dirs, files in os.walk(self.repo_path):
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
for fname in files:
if Path(fname).suffix in exts:
if fname == '.env.example':
continue
fpath = Path(root) / fname
try:
content = fpath.read_text(errors='ignore')
except Exception:
continue
yield fpath, content
def _line_no(self, content: str, offset: int) -> int:
return content.count('\n', 0, offset) + 1
# --- checks ---
def check_sidecar_boundary(self): def check_sidecar_boundary(self):
"""Rule 1: No sovereign code in hermes-agent (sidecar boundary)""" """No sovereign code in hermes-agent (sidecar boundary)."""
if self.repo_name == "hermes-agent": if self.repo_name != 'hermes-agent':
for root, _, files in os.walk(self.repo_path): return
if "node_modules" in root or ".git" in root: for fpath, content in self._scan_files():
continue for kw in SOVEREIGN_KEYWORDS:
for file in files: if kw in content.lower():
if file.endswith((".py", ".ts", ".js", ".tsx")): rel = str(fpath.relative_to(self.repo_path))
path = Path(root) / file self.result.errors.append(
content = path.read_text(errors="ignore") f"Sovereign keyword '{kw}' in hermes-agent violates sidecar boundary. [{rel}]"
for kw in SOVEREIGN_KEYWORDS: )
if kw in content.lower():
# Exception: imports or comments might be okay, but we're strict for now
self.log_error(f"Sovereign keyword '{kw}' found in hermes-agent. Violates sidecar boundary.", str(path.relative_to(self.repo_path)))
def check_hardcoded_ips(self): def check_hardcoded_ips(self):
"""Rule 2: No hardcoded IPs (use domain names)""" """No hardcoded public IPs use DNS or env vars."""
for root, _, files in os.walk(self.repo_path): for fpath, content in self._scan_files():
if "node_modules" in root or ".git" in root: for m in re.finditer(IP_REGEX, content):
continue ip = m.group()
for file in files: # skip private ranges already handled by lookahead, and 0.0.0.0
if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json")): if ip.startswith('0.'):
path = Path(root) / file continue
content = path.read_text(errors="ignore") line = self._line_no(content, m.start())
matches = re.finditer(IP_REGEX, content) rel = str(fpath.relative_to(self.repo_path))
for match in matches: self.result.errors.append(
ip = match.group() f"Hardcoded IP '{ip}'. Use DNS or env vars. [{rel}:{line}]"
if ip in ["127.0.0.1", "0.0.0.0"]: )
continue
line_no = content.count('\n', 0, match.start()) + 1
self.log_error(f"Hardcoded IP address '{ip}' found. Use domain names or environment variables.", str(path.relative_to(self.repo_path)), line_no)
def check_api_keys(self): def check_api_keys(self):
"""Rule 3: No cloud API keys committed to repos""" """No cloud API keys / secrets committed."""
for root, _, files in os.walk(self.repo_path): for fpath, content in self._scan_files():
if "node_modules" in root or ".git" in root: for pattern in API_KEY_PATTERNS:
continue for m in re.finditer(pattern, content, re.IGNORECASE):
for file in files: line = self._line_no(content, m.start())
if file.endswith((".py", ".ts", ".js", ".tsx", ".yaml", ".yml", ".json", ".env")): rel = str(fpath.relative_to(self.repo_path))
if file == ".env.example": self.result.errors.append(
continue f"Potential secret / API key detected. [{rel}:{line}]"
path = Path(root) / file )
content = path.read_text(errors="ignore")
matches = re.finditer(API_KEY_REGEX, content, re.IGNORECASE) def check_sovereignty_rules(self):
for match in matches: """V1 sovereignty rules: no direct cloud API endpoints or providers."""
line_no = content.count('\n', 0, match.start()) + 1 for fpath, content in self._scan_files({'.py', '.ts', '.tsx', '.js', '.yaml', '.yml'}):
self.log_error("Potential API key or secret found in code.", str(path.relative_to(self.repo_path)), line_no) for pattern, msg in SOVEREIGN_RULES:
for m in re.finditer(pattern, content):
line = self._line_no(content, m.start())
rel = str(fpath.relative_to(self.repo_path))
self.result.errors.append(f"{msg} [{rel}:{line}]")
def check_soul_canonical(self): def check_soul_canonical(self):
"""Rule 4: SOUL.md exists and is canonical in exactly one location""" """SOUL.md must exist exactly in timmy-config root."""
soul_path = self.repo_path / "SOUL.md" soul_path = self.repo_path / 'SOUL.md'
if self.repo_name == "timmy-config": if self.repo_name == 'timmy-config':
if not soul_path.exists(): if not soul_path.exists():
self.log_error("SOUL.md is missing from the canonical location (timmy-config root).") self.result.errors.append(
'SOUL.md missing from canonical location (timmy-config root).'
)
else: else:
if soul_path.exists(): if soul_path.exists():
self.log_error("SOUL.md found in non-canonical repo. It should only live in timmy-config.") self.result.errors.append(
'SOUL.md found in non-canonical repo. Must live only in timmy-config.'
)
def check_readme(self): def check_readme(self):
"""Rule 5: Every repo has a README with current truth""" """Every repo must have a substantive README."""
readme_path = self.repo_path / "README.md" readme = self.repo_path / 'README.md'
if not readme_path.exists(): if not readme.exists():
self.log_error("README.md is missing.") self.result.errors.append('README.md is missing.')
else: else:
content = readme_path.read_text(errors="ignore") content = readme.read_text(errors='ignore')
if len(content.strip()) < 50: if len(content.strip()) < 50:
self.log_error("README.md is too short or empty. Provide current truth about the repo.") self.result.warnings.append(
'README.md is very short (<50 chars). Provide current truth about the repo.'
)
def run(self): # --- runner ---
print(f"--- Gemini Linter: Auditing {self.repo_name} ---")
def run(self) -> LinterResult:
"""Execute all checks and return the result."""
self.check_sidecar_boundary() self.check_sidecar_boundary()
self.check_hardcoded_ips() self.check_hardcoded_ips()
self.check_api_keys() self.check_api_keys()
self.check_sovereignty_rules()
self.check_soul_canonical() self.check_soul_canonical()
self.check_readme() self.check_readme()
return self.result
if self.errors:
print(f"\n[FAILURE] Found {len(self.errors)} architectural violations:")
for err in self.errors:
print(f" - {err}")
return False
else:
print("\n[SUCCESS] Architecture is sound. Sovereignty maintained.")
return True
def main(): def main():
parser = argparse.ArgumentParser(description="Gemini Architecture Linter v2") parser = argparse.ArgumentParser(
parser.add_argument("repo_path", nargs="?", default=".", help="Path to the repository to lint") description='Gemini Architecture Linter v2 — repo-aware sovereignty gate.'
)
parser.add_argument(
'repo_path', nargs='?', default='.',
help='Path to the repository to lint (default: cwd).',
)
parser.add_argument(
'--repo', dest='repo_flag', default=None,
help='Explicit repo path (alias for positional arg).',
)
parser.add_argument(
'--json', dest='json_output', action='store_true',
help='Emit machine-readable JSON instead of human text.',
)
args = parser.parse_args() args = parser.parse_args()
linter = Linter(args.repo_path) path = args.repo_flag if args.repo_flag else args.repo_path
success = linter.run()
sys.exit(0 if success else 1)
if __name__ == "__main__": try:
linter = Linter(path)
except FileNotFoundError as exc:
print(f"ERROR: {exc}", file=sys.stderr)
sys.exit(2)
result = linter.run()
if args.json_output:
import json as _json
out = {
'repo': result.repo_name,
'passed': result.passed,
'violation_count': result.violation_count,
'errors': result.errors,
'warnings': result.warnings,
}
print(_json.dumps(out, indent=2))
else:
print(result.summary())
sys.exit(0 if result.passed else 1)
if __name__ == '__main__':
main() main()

233
tests/test_linter.py Normal file
View File

@@ -0,0 +1,233 @@
"""Tests for Architecture Linter v2.
Validates that the linter correctly detects violations and passes clean repos.
Refs: #437 — test-backed linter.
"""
import json
import sys
import tempfile
from pathlib import Path
# Add scripts/ to path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
from architecture_linter_v2 import Linter, LinterResult
# ── helpers ───────────────────────────────────────────────────────────
def _make_repo(tmpdir: str, files: dict[str, str], name: str = "test-repo") -> Path:
"""Create a fake repo with given files and return its path."""
repo = Path(tmpdir) / name
repo.mkdir()
for relpath, content in files.items():
p = repo / relpath
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(content)
return repo
def _run(tmpdir, files, name="test-repo"):
repo = _make_repo(tmpdir, files, name)
return Linter(str(repo)).run()
# ── clean repo passes ─────────────────────────────────────────────────
def test_clean_repo_passes():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# Test Repo\n\nThis is a clean test repo with sufficient content to pass.",
"main.py": "print('hello world')\n",
})
assert result.passed, f"Expected pass but got: {result.errors}"
assert result.violation_count == 0
# ── missing README ────────────────────────────────────────────────────
def test_missing_readme_fails():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {"main.py": "x = 1\n"})
assert not result.passed
assert any("README" in e for e in result.errors)
def test_short_readme_warns():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {"README.md": "hi\n"})
# Warnings don't fail the build
assert result.passed
assert any("short" in w.lower() for w in result.warnings)
# ── hardcoded IPs ─────────────────────────────────────────────────────
def test_hardcoded_public_ip_detected():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
"server.py": "HOST = '203.0.113.42'\n",
})
assert not result.passed
assert any("203.0.113.42" in e for e in result.errors)
def test_localhost_ip_ignored():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
"server.py": "HOST = '127.0.0.1'\n",
})
ip_errors = [e for e in result.errors if "IP" in e]
assert len(ip_errors) == 0
# ── API keys ──────────────────────────────────────────────────────────
def test_openai_key_detected():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
"config.py": 'key = "sk-abcdefghijklmnopqrstuvwx"\n',
})
assert not result.passed
assert any("secret" in e.lower() or "key" in e.lower() for e in result.errors)
def test_aws_key_detected():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
"deploy.yaml": 'aws_key: AKIAIOSFODNN7EXAMPLE\n',
})
assert not result.passed
assert any("secret" in e.lower() for e in result.errors)
def test_env_example_skipped():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
".env.example": 'OPENAI_KEY=sk-placeholder\n',
})
secret_errors = [e for e in result.errors if "secret" in e.lower()]
assert len(secret_errors) == 0
# ── sovereignty rules (v1 cloud API checks) ───────────────────────────
def test_openai_url_detected():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
"app.py": 'url = "https://api.openai.com/v1/chat"\n',
})
assert not result.passed
assert any("openai" in e.lower() for e in result.errors)
def test_cloud_provider_detected():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
"config.yaml": "provider: openai\n",
})
assert not result.passed
assert any("provider" in e.lower() for e in result.errors)
# ── sidecar boundary ──────────────────────────────────────────────────
def test_sovereign_keyword_in_hermes_agent_fails():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
"index.py": "import mempalace\n",
}, name="hermes-agent")
assert not result.passed
assert any("sidecar" in e.lower() or "mempalace" in e.lower() for e in result.errors)
def test_sovereign_keyword_in_other_repo_ok():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
"index.py": "import mempalace\n",
}, name="some-other-repo")
sidecar_errors = [e for e in result.errors if "sidecar" in e.lower()]
assert len(sidecar_errors) == 0
# ── SOUL.md canonical location ────────────────────────────────────────
def test_soul_md_required_in_timmy_config():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# timmy-config\n\nConfig repo.",
}, name="timmy-config")
assert not result.passed
assert any("SOUL.md" in e for e in result.errors)
def test_soul_md_present_in_timmy_config_ok():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# timmy-config\n\nConfig repo.",
"SOUL.md": "# Soul\n\nCanonical identity document.",
}, name="timmy-config")
soul_errors = [e for e in result.errors if "SOUL" in e]
assert len(soul_errors) == 0
def test_soul_md_in_wrong_repo_fails():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {
"README.md": "# R\n\nGood repo.",
"SOUL.md": "# Soul\n\nShould not be here.",
}, name="other-repo")
assert any("canonical" in e.lower() for e in result.errors)
# ── LinterResult structure ────────────────────────────────────────────
def test_result_summary_is_string():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {"README.md": "# OK repo with enough text here\n"})
assert isinstance(result.summary(), str)
assert "PASSED" in result.summary() or "FAILED" in result.summary()
def test_result_repo_name():
with tempfile.TemporaryDirectory() as tmp:
result = _run(tmp, {"README.md": "# OK\n"}, name="my-repo")
assert result.repo_name == "my-repo"
# ── invalid path ──────────────────────────────────────────────────────
def test_invalid_path_raises():
try:
Linter("/nonexistent/path/xyz")
assert False, "Should have raised FileNotFoundError"
except FileNotFoundError:
pass
# ── skip dirs ──────────────────────────────────────────────────────────
def test_git_dir_skipped():
with tempfile.TemporaryDirectory() as tmp:
repo = _make_repo(tmp, {
"README.md": "# R\n\nGood repo.",
"main.py": "x = 1\n",
})
# Create a .git/ dir with a bad file
git_dir = repo / ".git"
git_dir.mkdir()
(git_dir / "bad.py").write_text("HOST = '203.0.113.1'\n")
result = Linter(str(repo)).run()
git_errors = [e for e in result.errors if ".git" in e]
assert len(git_errors) == 0