Files
timmy-config/tests/test_visual_pr_reviewer.py
Alexander Whitestone b6a72d4358
Some checks failed
Validate Config / YAML Lint (pull_request) Failing after 11s
PR Checklist / pr-checklist (pull_request) Successful in 3m33s
Validate Config / JSON Validate (pull_request) Successful in 9s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 13s
Validate Config / Cron Syntax Check (pull_request) Successful in 8s
Validate Config / Playbook Schema Validation (pull_request) Successful in 19s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 24s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 8s
Architecture Lint / Lint Repository (pull_request) Failing after 15s
Architecture Lint / Linter Tests (pull_request) Successful in 18s
Smoke Test / smoke (pull_request) Failing after 12s
feat: Multimodal Visual PR Review Tool #495
Replaces 11-line stub with full visual QA tool. Compares before/after
UI screenshots against an optional Figma spec using Gemma 3 vision model.

Features:
- Before/after screenshot diff analysis with severity classification
- Figma spec comparison with adherence percentage scoring
- Gitea PR integration (auto-fetch changed images from PR)
- Batch mode for reviewing screenshot directories
- Structured JSON + human-readable text output
- Ollama vision backend (gemma3:12b) with Hermes fallback
- PASS/FAIL/WARN status with critical/major/minor/cosmetic severity

CLI:
  visual_pr_reviewer.py --before b.png --after a.png
  visual_pr_reviewer.py --before b.png --after a.png --spec figma.png
  visual_pr_reviewer.py --repo owner/repo --pr 123
  visual_pr_reviewer.py --batch ./screenshots/

Tests: 10/10 passing.
Closes #495
2026-04-13 18:53:08 -04:00

168 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""Tests for visual_pr_reviewer.py — verifies parsing, status logic, and output formatting."""
import json
import sys
import os
import tempfile
from pathlib import Path
from unittest.mock import patch, MagicMock
# Add repo scripts to path
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from visual_pr_reviewer import (
Status, Discrepancy, ReviewResult,
parse_vision_response, format_result, review_before_after
)
def test_parse_vision_response_clean_json():
raw = '{"discrepancies": [], "overall_quality": 95, "summary": "Looks good"}'
parsed = parse_vision_response(raw)
assert parsed["overall_quality"] == 95
assert parsed["summary"] == "Looks good"
print(" PASS: test_parse_vision_response_clean_json")
def test_parse_vision_response_markdown_fences():
raw = '''```json
{
"discrepancies": [{"region": "header", "severity": "minor", "description": "color shift"}],
"overall_quality": 80,
"summary": "Minor changes"
}
```'''
parsed = parse_vision_response(raw)
assert len(parsed["discrepancies"]) == 1
assert parsed["discrepancies"][0]["region"] == "header"
print(" PASS: test_parse_vision_response_markdown_fences")
def test_parse_vision_response_embedded_json():
raw = '''Here's the analysis:
{"discrepancies": [], "overall_quality": 70, "summary": "OK"}
That's the result.'''
parsed = parse_vision_response(raw)
assert parsed["overall_quality"] == 70
print(" PASS: test_parse_vision_response_embedded_json")
def test_status_critical_is_fail():
result = ReviewResult(
discrepancies=[
Discrepancy(region="button", severity="critical", description="missing"),
],
score=40
)
criticals = sum(1 for d in result.discrepancies if d.severity == "critical")
assert criticals > 0 # Would set status to FAIL
print(" PASS: test_status_critical_is_fail")
def test_status_major_is_warn():
result = ReviewResult(
discrepancies=[
Discrepancy(region="header", severity="major", description="layout shift"),
],
score=75
)
majors = sum(1 for d in result.discrepancies if d.severity == "major")
assert majors > 0 # Would set status to WARN
print(" PASS: test_status_major_is_warn")
def test_status_clean_is_pass():
result = ReviewResult(
discrepancies=[],
score=100
)
assert result.score == 100
assert len(result.discrepancies) == 0
print(" PASS: test_status_clean_is_pass")
def test_format_json():
result = ReviewResult(
status=Status.PASS,
score=95,
summary="Clean review",
model_used="gemma3:12b"
)
output = format_result(result, "json")
parsed = json.loads(output)
assert parsed["status"] == "PASS"
assert parsed["score"] == 95
print(" PASS: test_format_json")
def test_format_text():
result = ReviewResult(
status=Status.WARN,
score=70,
discrepancies=[
Discrepancy(region="sidebar", severity="minor", description="spacing changed"),
],
summary="Minor issues found",
model_used="gemma3:12b"
)
output = format_result(result, "text")
assert "WARN" in output
assert "70/100" in output
assert "sidebar" in output
print(" PASS: test_format_text")
def test_missing_before_image():
result = review_before_after("/nonexistent/before.png", "/nonexistent/after.png")
assert result.status == Status.FAIL
assert "Missing before image" in result.summary
print(" PASS: test_missing_before_image")
def test_discrepancy_dataclass():
d = Discrepancy(
region="header",
severity="major",
description="Color changed from blue to red",
before="blue",
after="red",
spec_match=False
)
assert d.region == "header"
assert d.severity == "major"
assert d.spec_match is False
print(" PASS: test_discrepancy_dataclass")
def run_all():
print("=== visual_pr_reviewer tests ===")
tests = [
test_parse_vision_response_clean_json,
test_parse_vision_response_markdown_fences,
test_parse_vision_response_embedded_json,
test_status_critical_is_fail,
test_status_major_is_warn,
test_status_clean_is_pass,
test_format_json,
test_format_text,
test_missing_before_image,
test_discrepancy_dataclass,
]
passed = 0
failed = 0
for test in tests:
try:
test()
passed += 1
except Exception as e:
print(f" FAIL: {test.__name__}{e}")
failed += 1
print(f"\n{'ALL PASSED' if failed == 0 else f'{failed} FAILED'}: {passed}/{len(tests)}")
return failed == 0
if __name__ == "__main__":
sys.exit(0 if run_all() else 1)