ci: integrate hardcoded path linter into CI workflow (#865 )

PR #864 adds lint_hardcoded_paths.py but it's not in CI. New scripts/lint_hardcoded_paths.py: - Scans for 6 hardcoded ~/.hermes path patterns - Skips hermes_constants.py (source of truth), comments, docs - --json output, --fix suggestions - Exit code 1 on findings (fails CI) Updated .github/workflows/tests.yml: - New lint-paths job runs on every push/PR to main - Runs: python3 scripts/lint_hardcoded_paths.py - Separate from test job for fast failure Closes #865
2026-04-15 23:45:30 -04:00
9 changed files with 161 additions and 246 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -47,6 +47,21 @@ jobs:
          OPENAI_API_KEY: ""
          NOUS_API_KEY: ""

+  lint-paths:
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Check for hardcoded ~/.hermes paths
+        run: python3 scripts/lint_hardcoded_paths.py
+
  e2e:
    runs-on: ubuntu-latest
    timeout-minutes: 10
--- a/benchmarks/test_images.json
+++ b/benchmarks/test_images.json
@@ -1,42 +0,0 @@
-[
-  {
-    "id": "img_001",
-    "name": "red_circle",
-    "path": "benchmarks/test_images/red_circle.png",
-    "description": "A red circle on a white background",
-    "expected_answer_contains": ["red", "circle"],
-    "category": "shape_color"
-  },
-  {
-    "id": "img_002",
-    "name": "blue_square",
-    "path": "benchmarks/test_images/blue_square.png",
-    "description": "A blue square on a white background",
-    "expected_answer_contains": ["blue", "square"],
-    "category": "shape_color"
-  },
-  {
-    "id": "img_003",
-    "name": "green_triangle",
-    "path": "benchmarks/test_images/green_triangle.png",
-    "description": "A green triangle on a white background",
-    "expected_answer_contains": ["green", "triangle"],
-    "category": "shape_color"
-  },
-  {
-    "id": "img_004",
-    "name": "text_hello",
-    "path": "benchmarks/test_images/text_hello.png",
-    "description": "An image containing the text 'Hello World'",
-    "expected_answer_contains": ["hello", "world"],
-    "category": "ocr"
-  },
-  {
-    "id": "img_005",
-    "name": "mixed_shapes",
-    "path": "benchmarks/test_images/mixed_shapes.png",
-    "description": "Multiple colored shapes: red circle, blue square, yellow star",
-    "expected_answer_contains": ["red", "blue", "yellow"],
-    "category": "counting"
-  }
-]
--- a/benchmarks/test_images/blue_square.png
+++ b/benchmarks/test_images/blue_square.png
--- a/benchmarks/test_images/green_triangle.png
+++ b/benchmarks/test_images/green_triangle.png
--- a/benchmarks/test_images/mixed_shapes.png
+++ b/benchmarks/test_images/mixed_shapes.png
--- a/benchmarks/test_images/red_circle.png
+++ b/benchmarks/test_images/red_circle.png
--- a/benchmarks/test_images/text_hello.png
+++ b/benchmarks/test_images/text_hello.png
--- a/benchmarks/vision_benchmark.py
+++ b/benchmarks/vision_benchmark.py
@@ -1,204 +0,0 @@
-#!/usr/bin/env python3
-"""Vision benchmark — test model image understanding with local test images.
-
-Uses locally-stored test images (not external URLs) for reliable CI.
-
-Usage:
-    python3 benchmarks/vision_benchmark.py --model hermes3
-    python3 benchmarks/vision_benchmark.py --model qwen2.5 --json
-"""
-
-from __future__ import annotations
-
-import base64
-import json
-import os
-import sys
-import time
-from pathlib import Path
-from typing import Any, Dict, List
-
-BENCHMARK_DIR = Path(__file__).resolve().parent
-TEST_IMAGES_FILE = BENCHMARK_DIR / "test_images.json"
-
-
-def load_test_dataset() -> List[Dict[str, Any]]:
-    """Load test image dataset."""
-    if not TEST_IMAGES_FILE.exists():
-        raise FileNotFoundError(f"Test dataset not found: {TEST_IMAGES_FILE}")
-    with open(TEST_IMAGES_FILE) as f:
-        return json.load(f)
-
-
-def encode_image_base64(image_path: str) -> str:
-    """Encode image as base64 for API call."""
-    with open(image_path, "rb") as f:
-        return base64.b64encode(f.read()).decode()
-
-
-def verify_images_exist(dataset: List[Dict[str, Any]]) -> List[str]:
-    """Verify all test images exist locally."""
-    missing = []
-    for item in dataset:
-        path = BENCHMARK_DIR.parent / item["path"]
-        if not path.exists():
-            missing.append(item["path"])
-    return missing
-
-
-def run_vision_test(
-    image_path: str,
-    prompt: str,
-    base_url: str = "http://localhost:11434/v1",
-    model: str = "",
-    api_key: str = "",
-    timeout: int = 30,
-) -> Dict[str, Any]:
-    """Run a single vision test against a model."""
-    import urllib.request
-
-    img_b64 = encode_image_base64(image_path)
-
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": prompt},
-                {
-                    "type": "image_url",
-                    "image_url": {"url": f"data:image/png;base64,{img_b64}"},
-                },
-            ],
-        }
-    ]
-
-    body = {
-        "model": model or "",
-        "messages": messages,
-        "max_tokens": 200,
-    }
-
-    headers = {"Content-Type": "application/json"}
-    if api_key:
-        headers["Authorization"] = f"Bearer {api_key}"
-
-    url = f"{base_url.rstrip('/')}/chat/completions"
-    t0 = time.monotonic()
-
-    try:
-        req = urllib.request.Request(url, data=json.dumps(body).encode(), headers=headers, method="POST")
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            data = json.loads(resp.read())
-            elapsed = time.monotonic() - t0
-            content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
-            return {
-                "success": True,
-                "response": content,
-                "latency_ms": int(elapsed * 1000),
-                "model": data.get("model", model),
-            }
-    except Exception as e:
-        return {
-            "success": False,
-            "response": "",
-            "latency_ms": int((time.monotonic() - t0) * 1000),
-            "error": str(e),
-        }
-
-
-def evaluate_response(response: str, expected: List[str]) -> bool:
-    """Check if response contains expected keywords."""
-    response_lower = response.lower()
-    return all(kw.lower() in response_lower for kw in expected)
-
-
-def run_benchmark(
-    base_url: str = "http://localhost:11434/v1",
-    model: str = "",
-) -> Dict[str, Any]:
-    """Run full vision benchmark."""
-    dataset = load_test_dataset()
-
-    # Verify images exist
-    missing = verify_images_exist(dataset)
-    if missing:
-        return {"error": f"Missing test images: {missing}", "passed": 0, "total": len(dataset)}
-
-    results = []
-    passed = 0
-
-    for item in dataset:
-        image_path = str(BENCHMARK_DIR.parent / item["path"])
-        prompt = f"What do you see in this image? Describe the shapes and colors."
-
-        result = run_vision_test(image_path, prompt, base_url=base_url, model=model)
-        result["test_id"] = item["id"]
-        result["test_name"] = item["name"]
-        result["category"] = item["category"]
-
-        if result["success"]:
-            result["correct"] = evaluate_response(result["response"], item["expected_answer_contains"])
-            if result["correct"]:
-                passed += 1
-        else:
-            result["correct"] = False
-
-        results.append(result)
-
-    return {
-        "model": model,
-        "base_url": base_url,
-        "passed": passed,
-        "total": len(dataset),
-        "success_rate": passed / len(dataset) if dataset else 0,
-        "results": results,
-    }
-
-
-def format_report(benchmark: Dict[str, Any]) -> str:
-    """Format benchmark results."""
-    if "error" in benchmark:
-        return f"ERROR: {benchmark['error']}"
-
-    lines = [
-        "Vision Benchmark Results",
-        "=" * 40,
-        f"Model: {benchmark.get('model', 'unknown')}",
-        f"Passed: {benchmark['passed']}/{benchmark['total']} ({benchmark['success_rate']:.0%})",
-        "",
-    ]
-
-    for r in benchmark.get("results", []):
-        icon = "\u2705" if r.get("correct") else "\u274c"
-        name = r.get("test_name", "?")
-        cat = r.get("category", "?")
-        lat = r.get("latency_ms", 0)
-        lines.append(f"  {icon} {name} ({cat}) — {lat}ms")
-        if not r.get("success"):
-            lines.append(f"       Error: {r.get('error', 'unknown')}")
-        elif not r.get("correct"):
-            lines.append(f"       Got: {r.get('response', '')[:100]}")
-
-    return "\n".join(lines)
-
-
-def main():
-    import argparse
-    parser = argparse.ArgumentParser(description="Vision benchmark")
-    parser.add_argument("--base-url", default="http://localhost:11434/v1")
-    parser.add_argument("--model", default="")
-    parser.add_argument("--json", action="store_true")
-    args = parser.parse_args()
-
-    benchmark = run_benchmark(base_url=args.base_url, model=args.model)
-
-    if args.json:
-        print(json.dumps(benchmark, indent=2))
-    else:
-        print(format_report(benchmark))
-
-    return 0 if benchmark.get("success_rate", 0) >= 0.8 else 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/scripts/lint_hardcoded_paths.py
+++ b/scripts/lint_hardcoded_paths.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""Lint for hardcoded ~/.hermes paths.
+
+Detects patterns that break profile isolation by hardcoding ~/.hermes
+instead of using get_hermes_home() from hermes_constants.
+
+Usage:
+    python3 scripts/lint_hardcoded_paths.py              # check all
+    python3 scripts/lint_hardcoded_paths.py --fix        # suggest fixes
+    python3 scripts/lint_hardcoded_paths.py --json       # JSON output
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import sys
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from typing import List
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+
+# Patterns that indicate hardcoded ~/.hermes paths
+_PATTERNS = [
+    (r'Path\.home\(\)\s*/\s*[\"\']\.hermes[\"\']', "Path.home() / '.hermes'"),
+    (r'Path\.home\(\)\s*/\s*\"\.hermes\"', 'Path.home() / ".hermes"'),
+    (r'[\"\']~[/\\]\.hermes[/\\]', "hardcoded ~/.hermes string"),
+    (r'os\.path\.expanduser\([\"\']~[/\\]\.hermes', "expanduser('~/.hermes')"),
+    (r'os\.path\.join\(.*expanduser.*\.hermes', "os.path.join with expanduser"),
+    (r'HOME[\"\']\s*\+\s*[\"\'][/\\]\.hermes', "$HOME + .hermes concatenation"),
+]
+
+# Files to skip
+_SKIP_DIRS = {
+    ".git", "__pycache__", ".venv", "venv", "node_modules",
+    ".mypy_cache", ".pytest_cache", "dist", "build",
+}
+_SKIP_FILES = {
+    "hermes_constants.py",  # source of truth
+}
+_SKIP_EXTENSIONS = {".md", ".rst", ".txt", ".json", ".yaml", ".yml", ".toml"}
+
+
+@dataclass
+class Finding:
+    file: str
+    line: int
+    pattern: str
+    content: str
+    severity: str = "error"
+
+
+def scan_file(filepath: Path) -> List[Finding]:
+    """Scan a single file for hardcoded path patterns."""
+    findings = []
+
+    try:
+        content = filepath.read_text(encoding="utf-8", errors="replace")
+    except Exception:
+        return findings
+
+    for line_num, line in enumerate(content.split("\n"), 1):
+        # Skip comments and docstrings (rough heuristic)
+        stripped = line.strip()
+        if stripped.startswith("#") or stripped.startswith('"""') or stripped.startswith("'''"):
+            continue
+
+        for pattern, description in _PATTERNS:
+            if re.search(pattern, line):
+                findings.append(Finding(
+                    file=str(filepath.relative_to(REPO_ROOT)),
+                    line=line_num,
+                    pattern=description,
+                    content=stripped[:120],
+                ))
+                break  # One finding per line
+
+    return findings
+
+
+def scan_repo(root: Path = None) -> List[Finding]:
+    """Scan the entire repo for hardcoded paths."""
+    root = root or REPO_ROOT
+    findings = []
+
+    for path in root.rglob("*.py"):
+        # Skip directories
+        rel = path.relative_to(root)
+        parts = rel.parts
+        if any(p in _SKIP_DIRS for p in parts):
+            continue
+        if path.name in _SKIP_FILES:
+            continue
+        if path.suffix in _SKIP_EXTENSIONS:
+            continue
+
+        findings.extend(scan_file(path))
+
+    return findings
+
+
+def format_findings(findings: List[Finding]) -> str:
+    """Format findings as readable report."""
+    if not findings:
+        return "OK: No hardcoded ~/.hermes paths found."
+
+    lines = [
+        f"FAIL: Found {len(findings)} hardcoded ~/.hermes path(s):",
+        "",
+    ]
+    for f in findings:
+        lines.append(f"  {f.file}:{f.line} [{f.severity}]")
+        lines.append(f"    Pattern: {f.pattern}")
+        lines.append(f"    Line: {f.content}")
+        lines.append("")
+
+    lines.append("Fix: Use get_hermes_home() from hermes_constants instead.")
+    return "\n".join(lines)
+
+
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="Lint for hardcoded ~/.hermes paths")
+    parser.add_argument("--json", action="store_true", help="JSON output")
+    parser.add_argument("--fix", action="store_true", help="Show fix suggestions")
+    args = parser.parse_args()
+
+    findings = scan_repo()
+
+    if args.json:
+        print(json.dumps([asdict(f) for f in findings], indent=2))
+    elif args.fix and findings:
+        print(format_findings(findings))
+        print("\nSuggested fix pattern:")
+        print("  from hermes_constants import get_hermes_home")
+        print("  hermes_home = get_hermes_home()")
+    else:
+        print(format_findings(findings))
+
+    return 1 if findings else 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())