From f77ce4dff2f282c5352ca7a8ba8acddd354b61bd Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Wed, 22 Apr 2026 12:07:52 -0400 Subject: [PATCH] wip: add regression tests for vision benchmark artifacts --- tests/test_vision_benchmark_artifacts.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/test_vision_benchmark_artifacts.py diff --git a/tests/test_vision_benchmark_artifacts.py b/tests/test_vision_benchmark_artifacts.py new file mode 100644 index 000000000..f6d6bce76 --- /dev/null +++ b/tests/test_vision_benchmark_artifacts.py @@ -0,0 +1,21 @@ +import json +from pathlib import Path + + +DATASET = Path("benchmarks/test_images.json") +REPORT = Path("metrics/vision-benchmark-smoke-2026-04-22.md") + + +def test_benchmark_dataset_is_issue_sized_and_category_complete() -> None: + items = json.loads(DATASET.read_text(encoding="utf-8")) + assert len(items) >= 50 + categories = {item["category"] for item in items} + assert {"screenshot", "diagram", "photo", "ocr", "chart", "document"}.issubset(categories) + + +def test_metrics_report_exists_with_recommendation() -> None: + assert REPORT.exists(), "missing benchmark report under metrics/" + text = REPORT.read_text(encoding="utf-8") + assert "Recommendation" in text + assert "Gemma 4" in text + assert "Gemini" in text