Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
62925c294d |
15
.github/workflows/tests.yml
vendored
15
.github/workflows/tests.yml
vendored
@@ -47,6 +47,21 @@ jobs:
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
|
||||
lint-paths:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Check for hardcoded ~/.hermes paths
|
||||
run: python3 scripts/lint_hardcoded_paths.py
|
||||
|
||||
e2e:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
[
|
||||
{
|
||||
"id": "img_001",
|
||||
"name": "red_circle",
|
||||
"path": "benchmarks/test_images/red_circle.png",
|
||||
"description": "A red circle on a white background",
|
||||
"expected_answer_contains": ["red", "circle"],
|
||||
"category": "shape_color"
|
||||
},
|
||||
{
|
||||
"id": "img_002",
|
||||
"name": "blue_square",
|
||||
"path": "benchmarks/test_images/blue_square.png",
|
||||
"description": "A blue square on a white background",
|
||||
"expected_answer_contains": ["blue", "square"],
|
||||
"category": "shape_color"
|
||||
},
|
||||
{
|
||||
"id": "img_003",
|
||||
"name": "green_triangle",
|
||||
"path": "benchmarks/test_images/green_triangle.png",
|
||||
"description": "A green triangle on a white background",
|
||||
"expected_answer_contains": ["green", "triangle"],
|
||||
"category": "shape_color"
|
||||
},
|
||||
{
|
||||
"id": "img_004",
|
||||
"name": "text_hello",
|
||||
"path": "benchmarks/test_images/text_hello.png",
|
||||
"description": "An image containing the text 'Hello World'",
|
||||
"expected_answer_contains": ["hello", "world"],
|
||||
"category": "ocr"
|
||||
},
|
||||
{
|
||||
"id": "img_005",
|
||||
"name": "mixed_shapes",
|
||||
"path": "benchmarks/test_images/mixed_shapes.png",
|
||||
"description": "Multiple colored shapes: red circle, blue square, yellow star",
|
||||
"expected_answer_contains": ["red", "blue", "yellow"],
|
||||
"category": "counting"
|
||||
}
|
||||
]
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 779 B |
Binary file not shown.
|
Before Width: | Height: | Size: 1.3 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 1.2 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 1.4 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 3.3 KiB |
@@ -1,204 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Vision benchmark — test model image understanding with local test images.
|
||||
|
||||
Uses locally-stored test images (not external URLs) for reliable CI.
|
||||
|
||||
Usage:
|
||||
python3 benchmarks/vision_benchmark.py --model hermes3
|
||||
python3 benchmarks/vision_benchmark.py --model qwen2.5 --json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
BENCHMARK_DIR = Path(__file__).resolve().parent
|
||||
TEST_IMAGES_FILE = BENCHMARK_DIR / "test_images.json"
|
||||
|
||||
|
||||
def load_test_dataset() -> List[Dict[str, Any]]:
|
||||
"""Load test image dataset."""
|
||||
if not TEST_IMAGES_FILE.exists():
|
||||
raise FileNotFoundError(f"Test dataset not found: {TEST_IMAGES_FILE}")
|
||||
with open(TEST_IMAGES_FILE) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def encode_image_base64(image_path: str) -> str:
|
||||
"""Encode image as base64 for API call."""
|
||||
with open(image_path, "rb") as f:
|
||||
return base64.b64encode(f.read()).decode()
|
||||
|
||||
|
||||
def verify_images_exist(dataset: List[Dict[str, Any]]) -> List[str]:
|
||||
"""Verify all test images exist locally."""
|
||||
missing = []
|
||||
for item in dataset:
|
||||
path = BENCHMARK_DIR.parent / item["path"]
|
||||
if not path.exists():
|
||||
missing.append(item["path"])
|
||||
return missing
|
||||
|
||||
|
||||
def run_vision_test(
|
||||
image_path: str,
|
||||
prompt: str,
|
||||
base_url: str = "http://localhost:11434/v1",
|
||||
model: str = "",
|
||||
api_key: str = "",
|
||||
timeout: int = 30,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run a single vision test against a model."""
|
||||
import urllib.request
|
||||
|
||||
img_b64 = encode_image_base64(image_path)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{img_b64}"},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
body = {
|
||||
"model": model or "",
|
||||
"messages": messages,
|
||||
"max_tokens": 200,
|
||||
}
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
url = f"{base_url.rstrip('/')}/chat/completions"
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(url, data=json.dumps(body).encode(), headers=headers, method="POST")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read())
|
||||
elapsed = time.monotonic() - t0
|
||||
content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
return {
|
||||
"success": True,
|
||||
"response": content,
|
||||
"latency_ms": int(elapsed * 1000),
|
||||
"model": data.get("model", model),
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"response": "",
|
||||
"latency_ms": int((time.monotonic() - t0) * 1000),
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
|
||||
def evaluate_response(response: str, expected: List[str]) -> bool:
|
||||
"""Check if response contains expected keywords."""
|
||||
response_lower = response.lower()
|
||||
return all(kw.lower() in response_lower for kw in expected)
|
||||
|
||||
|
||||
def run_benchmark(
|
||||
base_url: str = "http://localhost:11434/v1",
|
||||
model: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Run full vision benchmark."""
|
||||
dataset = load_test_dataset()
|
||||
|
||||
# Verify images exist
|
||||
missing = verify_images_exist(dataset)
|
||||
if missing:
|
||||
return {"error": f"Missing test images: {missing}", "passed": 0, "total": len(dataset)}
|
||||
|
||||
results = []
|
||||
passed = 0
|
||||
|
||||
for item in dataset:
|
||||
image_path = str(BENCHMARK_DIR.parent / item["path"])
|
||||
prompt = f"What do you see in this image? Describe the shapes and colors."
|
||||
|
||||
result = run_vision_test(image_path, prompt, base_url=base_url, model=model)
|
||||
result["test_id"] = item["id"]
|
||||
result["test_name"] = item["name"]
|
||||
result["category"] = item["category"]
|
||||
|
||||
if result["success"]:
|
||||
result["correct"] = evaluate_response(result["response"], item["expected_answer_contains"])
|
||||
if result["correct"]:
|
||||
passed += 1
|
||||
else:
|
||||
result["correct"] = False
|
||||
|
||||
results.append(result)
|
||||
|
||||
return {
|
||||
"model": model,
|
||||
"base_url": base_url,
|
||||
"passed": passed,
|
||||
"total": len(dataset),
|
||||
"success_rate": passed / len(dataset) if dataset else 0,
|
||||
"results": results,
|
||||
}
|
||||
|
||||
|
||||
def format_report(benchmark: Dict[str, Any]) -> str:
|
||||
"""Format benchmark results."""
|
||||
if "error" in benchmark:
|
||||
return f"ERROR: {benchmark['error']}"
|
||||
|
||||
lines = [
|
||||
"Vision Benchmark Results",
|
||||
"=" * 40,
|
||||
f"Model: {benchmark.get('model', 'unknown')}",
|
||||
f"Passed: {benchmark['passed']}/{benchmark['total']} ({benchmark['success_rate']:.0%})",
|
||||
"",
|
||||
]
|
||||
|
||||
for r in benchmark.get("results", []):
|
||||
icon = "\u2705" if r.get("correct") else "\u274c"
|
||||
name = r.get("test_name", "?")
|
||||
cat = r.get("category", "?")
|
||||
lat = r.get("latency_ms", 0)
|
||||
lines.append(f" {icon} {name} ({cat}) — {lat}ms")
|
||||
if not r.get("success"):
|
||||
lines.append(f" Error: {r.get('error', 'unknown')}")
|
||||
elif not r.get("correct"):
|
||||
lines.append(f" Got: {r.get('response', '')[:100]}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Vision benchmark")
|
||||
parser.add_argument("--base-url", default="http://localhost:11434/v1")
|
||||
parser.add_argument("--model", default="")
|
||||
parser.add_argument("--json", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
benchmark = run_benchmark(base_url=args.base_url, model=args.model)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(benchmark, indent=2))
|
||||
else:
|
||||
print(format_report(benchmark))
|
||||
|
||||
return 0 if benchmark.get("success_rate", 0) >= 0.8 else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
146
scripts/lint_hardcoded_paths.py
Normal file
146
scripts/lint_hardcoded_paths.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Lint for hardcoded ~/.hermes paths.
|
||||
|
||||
Detects patterns that break profile isolation by hardcoding ~/.hermes
|
||||
instead of using get_hermes_home() from hermes_constants.
|
||||
|
||||
Usage:
|
||||
python3 scripts/lint_hardcoded_paths.py # check all
|
||||
python3 scripts/lint_hardcoded_paths.py --fix # suggest fixes
|
||||
python3 scripts/lint_hardcoded_paths.py --json # JSON output
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, asdict
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
# Patterns that indicate hardcoded ~/.hermes paths
|
||||
_PATTERNS = [
|
||||
(r'Path\.home\(\)\s*/\s*[\"\']\.hermes[\"\']', "Path.home() / '.hermes'"),
|
||||
(r'Path\.home\(\)\s*/\s*\"\.hermes\"', 'Path.home() / ".hermes"'),
|
||||
(r'[\"\']~[/\\]\.hermes[/\\]', "hardcoded ~/.hermes string"),
|
||||
(r'os\.path\.expanduser\([\"\']~[/\\]\.hermes', "expanduser('~/.hermes')"),
|
||||
(r'os\.path\.join\(.*expanduser.*\.hermes', "os.path.join with expanduser"),
|
||||
(r'HOME[\"\']\s*\+\s*[\"\'][/\\]\.hermes', "$HOME + .hermes concatenation"),
|
||||
]
|
||||
|
||||
# Files to skip
|
||||
_SKIP_DIRS = {
|
||||
".git", "__pycache__", ".venv", "venv", "node_modules",
|
||||
".mypy_cache", ".pytest_cache", "dist", "build",
|
||||
}
|
||||
_SKIP_FILES = {
|
||||
"hermes_constants.py", # source of truth
|
||||
}
|
||||
_SKIP_EXTENSIONS = {".md", ".rst", ".txt", ".json", ".yaml", ".yml", ".toml"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Finding:
|
||||
file: str
|
||||
line: int
|
||||
pattern: str
|
||||
content: str
|
||||
severity: str = "error"
|
||||
|
||||
|
||||
def scan_file(filepath: Path) -> List[Finding]:
|
||||
"""Scan a single file for hardcoded path patterns."""
|
||||
findings = []
|
||||
|
||||
try:
|
||||
content = filepath.read_text(encoding="utf-8", errors="replace")
|
||||
except Exception:
|
||||
return findings
|
||||
|
||||
for line_num, line in enumerate(content.split("\n"), 1):
|
||||
# Skip comments and docstrings (rough heuristic)
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("#") or stripped.startswith('"""') or stripped.startswith("'''"):
|
||||
continue
|
||||
|
||||
for pattern, description in _PATTERNS:
|
||||
if re.search(pattern, line):
|
||||
findings.append(Finding(
|
||||
file=str(filepath.relative_to(REPO_ROOT)),
|
||||
line=line_num,
|
||||
pattern=description,
|
||||
content=stripped[:120],
|
||||
))
|
||||
break # One finding per line
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def scan_repo(root: Path = None) -> List[Finding]:
|
||||
"""Scan the entire repo for hardcoded paths."""
|
||||
root = root or REPO_ROOT
|
||||
findings = []
|
||||
|
||||
for path in root.rglob("*.py"):
|
||||
# Skip directories
|
||||
rel = path.relative_to(root)
|
||||
parts = rel.parts
|
||||
if any(p in _SKIP_DIRS for p in parts):
|
||||
continue
|
||||
if path.name in _SKIP_FILES:
|
||||
continue
|
||||
if path.suffix in _SKIP_EXTENSIONS:
|
||||
continue
|
||||
|
||||
findings.extend(scan_file(path))
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def format_findings(findings: List[Finding]) -> str:
|
||||
"""Format findings as readable report."""
|
||||
if not findings:
|
||||
return "OK: No hardcoded ~/.hermes paths found."
|
||||
|
||||
lines = [
|
||||
f"FAIL: Found {len(findings)} hardcoded ~/.hermes path(s):",
|
||||
"",
|
||||
]
|
||||
for f in findings:
|
||||
lines.append(f" {f.file}:{f.line} [{f.severity}]")
|
||||
lines.append(f" Pattern: {f.pattern}")
|
||||
lines.append(f" Line: {f.content}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("Fix: Use get_hermes_home() from hermes_constants instead.")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Lint for hardcoded ~/.hermes paths")
|
||||
parser.add_argument("--json", action="store_true", help="JSON output")
|
||||
parser.add_argument("--fix", action="store_true", help="Show fix suggestions")
|
||||
args = parser.parse_args()
|
||||
|
||||
findings = scan_repo()
|
||||
|
||||
if args.json:
|
||||
print(json.dumps([asdict(f) for f in findings], indent=2))
|
||||
elif args.fix and findings:
|
||||
print(format_findings(findings))
|
||||
print("\nSuggested fix pattern:")
|
||||
print(" from hermes_constants import get_hermes_home")
|
||||
print(" hermes_home = get_hermes_home()")
|
||||
else:
|
||||
print(format_findings(findings))
|
||||
|
||||
return 1 if findings else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user