22 lines
734 B
Python
22 lines
734 B
Python
|
|
import json
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
|
||
|
|
DATASET = Path("benchmarks/test_images.json")
|
||
|
|
REPORT = Path("metrics/vision-benchmark-smoke-2026-04-22.md")
|
||
|
|
|
||
|
|
|
||
|
|
def test_benchmark_dataset_is_issue_sized_and_category_complete() -> None:
|
||
|
|
items = json.loads(DATASET.read_text(encoding="utf-8"))
|
||
|
|
assert len(items) >= 50
|
||
|
|
categories = {item["category"] for item in items}
|
||
|
|
assert {"screenshot", "diagram", "photo", "ocr", "chart", "document"}.issubset(categories)
|
||
|
|
|
||
|
|
|
||
|
|
def test_metrics_report_exists_with_recommendation() -> None:
|
||
|
|
assert REPORT.exists(), "missing benchmark report under metrics/"
|
||
|
|
text = REPORT.read_text(encoding="utf-8")
|
||
|
|
assert "Recommendation" in text
|
||
|
|
assert "Gemma 4" in text
|
||
|
|
assert "Gemini" in text
|