timmy-config/scripts/foundation_accessibility_audit.py

#!/usr/bin/env python3
"""
foundation_accessibility_audit.py — Multimodal Visual Accessibility Audit.

Analyzes web pages for WCAG 2.1 AA compliance using both programmatic checks
and vision model analysis. Screenshots pages, checks contrast ratios, detects
layout issues, validates alt text, and produces structured audit reports.

Usage:
    # Audit a single page
    python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org

    # Audit multiple pages
    python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --pages /about /donate /blog

    # With vision model analysis (Gemma 3)
    python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --vision

    # Programmatic-only (no vision model needed)
    python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --programmatic

    # Output as text report
    python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --format text

WCAG 2.1 AA Checks:
    1.4.3  Contrast (Minimum) — text vs background ratio >= 4.5:1
    1.4.6  Contrast (Enhanced) — ratio >= 7:1 for AAA
    1.4.11 Non-text Contrast — UI components >= 3:1
    1.3.1  Info and Relationships — heading hierarchy, landmarks
    1.1.1  Non-text Content — alt text on images
    2.4.1  Bypass Blocks — skip navigation link
    2.4.2  Page Titled — meaningful <title>
    2.4.6  Headings and Labels — descriptive headings
    4.1.2  Name, Role, Value — ARIA labels on interactive elements

Refs: timmy-config#492, WCAG 2.1 AA
"""

from __future__ import annotations

import argparse
import base64
import colorsys
import json
import os
import re
import subprocess
import sys
import tempfile
import urllib.error
import urllib.request
from dataclasses import dataclass, field, asdict
from enum import Enum
from pathlib import Path
from typing import Optional
from html.parser import HTMLParser


# === Configuration ===

OLLAMA_BASE = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
VISION_MODEL = os.environ.get("VISUAL_REVIEW_MODEL", "gemma3:12b")

DEFAULT_PAGES = ["/", "/about", "/donate", "/blog", "/contact"]


class Severity(str, Enum):
    CRITICAL = "critical"   # Blocks access entirely
    MAJOR = "major"         # Significant barrier
    MINOR = "minor"         # Inconvenience
    PASS = "pass"


@dataclass
class A11yViolation:
    """A single accessibility violation."""
    criterion: str          # WCAG criterion (e.g. "1.4.3")
    criterion_name: str     # Human-readable name
    severity: Severity = Severity.MINOR
    element: str = ""       # CSS selector or element description
    description: str = ""   # What's wrong
    fix: str = ""           # Suggested fix
    source: str = ""        # "programmatic" or "vision"


@dataclass
class A11yPageResult:
    """Audit result for a single page."""
    url: str = ""
    title: str = ""
    score: int = 100
    violations: list[A11yViolation] = field(default_factory=list)
    passed_checks: list[str] = field(default_factory=list)
    summary: str = ""


@dataclass
class A11yAuditReport:
    """Complete audit report across all pages."""
    site: str = ""
    pages_audited: int = 0
    overall_score: int = 100
    total_violations: int = 0
    critical_violations: int = 0
    major_violations: int = 0
    page_results: list[A11yPageResult] = field(default_factory=list)
    summary: str = ""


# === HTML Parser for Programmatic Checks ===

class A11yHTMLParser(HTMLParser):
    """Extract accessibility-relevant elements from HTML."""

    def __init__(self):
        super().__init__()
        self.title = ""
        self.images = []        # [{"src": ..., "alt": ...}]
        self.headings = []      # [{"level": int, "text": ...}]
        self.links = []         # [{"text": ..., "href": ...}]
        self.inputs = []        # [{"type": ..., "label": ..., "id": ...}]
        self.landmarks = []     # [{"tag": ..., "role": ...}]
        self.skip_nav = False
        self.lang = ""
        self.in_title = False
        self.in_heading = False
        self.heading_level = 0
        self.heading_text = ""
        self.current_text = ""

    def handle_starttag(self, tag, attrs):
        attr_dict = dict(attrs)

        if tag == "title":
            self.in_title = True
        elif tag == "html":
            self.lang = attr_dict.get("lang", "")
        elif tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
            self.in_heading = True
            self.heading_level = int(tag[1])
            self.heading_text = ""
        elif tag == "img":
            self.images.append({
                "src": attr_dict.get("src", ""),
                "alt": attr_dict.get("alt"),
                "role": attr_dict.get("role", ""),
            })
        elif tag == "a":
            self.links.append({
                "href": attr_dict.get("href", ""),
                "text": "",
                "aria_label": attr_dict.get("aria-label", ""),
            })
        elif tag in ("input", "select", "textarea"):
            self.inputs.append({
                "tag": tag,
                "type": attr_dict.get("type", "text"),
                "id": attr_dict.get("id", ""),
                "aria_label": attr_dict.get("aria-label", ""),
                "aria_labelledby": attr_dict.get("aria-labelledby", ""),
            })
        elif tag in ("main", "nav", "header", "footer", "aside", "section", "form"):
            self.landmarks.append({"tag": tag, "role": attr_dict.get("role", "")})
        elif tag == "a" and ("skip" in attr_dict.get("href", "").lower() or
                              "skip" in attr_dict.get("class", "").lower()):
            self.skip_nav = True

        role = attr_dict.get("role", "")
        if role in ("navigation", "main", "banner", "contentinfo", "complementary", "search"):
            self.landmarks.append({"tag": tag, "role": role})
        if role == "link" and "skip" in (attr_dict.get("aria-label", "") + attr_dict.get("href", "")).lower():
            self.skip_nav = True

    def handle_endtag(self, tag):
        if tag == "title":
            self.in_title = False
        elif tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
            self.headings.append({"level": self.heading_level, "text": self.heading_text.strip()})
            self.in_heading = False
        elif tag == "a" and self.links:
            self.links[-1]["text"] = self.current_text.strip()
        self.current_text = ""

    def handle_data(self, data):
        if self.in_title:
            self.title += data
        if self.in_heading:
            self.heading_text += data
        self.current_text += data


# === Color/Contrast Utilities ===

def parse_color(color_str: str) -> Optional[tuple]:
    """Parse CSS color string to (r, g, b) tuple (0-255)."""
    if not color_str:
        return None

    color_str = color_str.strip().lower()

    # Named colors (subset)
    named = {
        "white": (255, 255, 255), "black": (0, 0, 0),
        "red": (255, 0, 0), "green": (0, 128, 0), "blue": (0, 0, 255),
        "gray": (128, 128, 128), "grey": (128, 128, 128),
        "silver": (192, 192, 192), "yellow": (255, 255, 0),
        "orange": (255, 165, 0), "purple": (128, 0, 128),
        "transparent": None,
    }
    if color_str in named:
        return named[color_str]

    # #RRGGBB or #RGB
    if color_str.startswith("#"):
        hex_str = color_str[1:]
        if len(hex_str) == 3:
            hex_str = "".join(c * 2 for c in hex_str)
        if len(hex_str) == 6:
            try:
                return tuple(int(hex_str[i:i+2], 16) for i in (0, 2, 4))
            except ValueError:
                return None

    # rgb(r, g, b)
    match = re.match(r"rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)", color_str)
    if match:
        return tuple(int(match.group(i)) for i in (1, 2, 3))

    # rgba(r, g, b, a)
    match = re.match(r"rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)", color_str)
    if match:
        return tuple(int(match.group(i)) for i in (1, 2, 3))

    return None


def relative_luminance(rgb: tuple) -> float:
    """Calculate relative luminance per WCAG 2.1 (sRGB)."""
    def linearize(c):
        c = c / 255.0
        return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4

    r, g, b = [linearize(c) for c in rgb]
    return 0.2126 * r + 0.7152 * g + 0.0722 * b


def contrast_ratio(color1: tuple, color2: tuple) -> float:
    """Calculate contrast ratio between two colors per WCAG 2.1."""
    l1 = relative_luminance(color1)
    l2 = relative_luminance(color2)
    lighter = max(l1, l2)
    darker = min(l1, l2)
    return (lighter + 0.05) / (darker + 0.05)


# === Programmatic Checks ===

def check_page_title(parser: A11yHTMLParser) -> list[A11yViolation]:
    """WCAG 2.4.2 — Page Titled."""
    violations = []
    title = parser.title.strip()
    if not title:
        violations.append(A11yViolation(
            criterion="2.4.2", criterion_name="Page Titled",
            severity=Severity.MAJOR,
            element="<title>",
            description="Page has no title or title is empty.",
            fix="Add a meaningful <title> that describes the page purpose.",
            source="programmatic"
        ))
    elif len(title) < 5:
        violations.append(A11yViolation(
            criterion="2.4.2", criterion_name="Page Titled",
            severity=Severity.MINOR,
            element=f"<title>{title}</title>",
            description=f"Page title is very short: '{title}'",
            fix="Use a more descriptive title.",
            source="programmatic"
        ))
    return violations


def check_lang_attribute(parser: A11yHTMLParser) -> list[A11yViolation]:
    """WCAG 3.1.1 — Language of Page."""
    violations = []
    if not parser.lang:
        violations.append(A11yViolation(
            criterion="3.1.1", criterion_name="Language of Page",
            severity=Severity.MAJOR,
            element="<html>",
            description="Missing lang attribute on <html> element.",
            fix="Add lang=\"en\" (or appropriate language code) to <html>.",
            source="programmatic"
        ))
    return violations


def check_images_alt_text(parser: A11yHTMLParser) -> list[A11yViolation]:
    """WCAG 1.1.1 — Non-text Content."""
    violations = []
    for img in parser.images:
        if img.get("role") == "presentation" or img.get("role") == "none":
            continue  # Decorative images are exempt
        alt = img.get("alt")
        src = img.get("src", "unknown")
        if alt is None:
            violations.append(A11yViolation(
                criterion="1.1.1", criterion_name="Non-text Content",
                severity=Severity.CRITICAL,
                element=f"<img src=\"{src[:80]}\">",
                description="Image missing alt attribute.",
                fix="Add descriptive alt text, or alt=\"\" with role=\"presentation\" for decorative images.",
                source="programmatic"
            ))
        elif alt.strip() == "":
            # Empty alt is OK only for decorative images
            if img.get("role") not in ("presentation", "none"):
                violations.append(A11yViolation(
                    criterion="1.1.1", criterion_name="Non-text Content",
                    severity=Severity.MINOR,
                    element=f"<img src=\"{src[:80]}\" alt=\"\">",
                    description="Empty alt text — ensure this image is decorative.",
                    fix="If decorative, add role=\"presentation\". If meaningful, add descriptive alt text.",
                    source="programmatic"
                ))
    return violations


def check_heading_hierarchy(parser: A11yHTMLParser) -> list[A11yViolation]:
    """WCAG 1.3.1 — Info and Relationships (heading hierarchy)."""
    violations = []
    if not parser.headings:
        violations.append(A11yViolation(
            criterion="1.3.1", criterion_name="Info and Relationships",
            severity=Severity.MAJOR,
            element="document",
            description="No headings found on page.",
            fix="Add proper heading hierarchy starting with <h1>.",
            source="programmatic"
        ))
        return violations

    # Check for H1
    h1s = [h for h in parser.headings if h["level"] == 1]
    if not h1s:
        violations.append(A11yViolation(
            criterion="1.3.1", criterion_name="Info and Relationships",
            severity=Severity.MAJOR,
            element="document",
            description="No <h1> heading found.",
            fix="Add a single <h1> as the main page heading.",
            source="programmatic"
        ))
    elif len(h1s) > 1:
        violations.append(A11yViolation(
            criterion="1.3.1", criterion_name="Info and Relationships",
            severity=Severity.MINOR,
            element="document",
            description=f"Multiple <h1> headings found ({len(h1s)}).",
            fix="Use a single <h1> per page for the main heading.",
            source="programmatic"
        ))

    # Check hierarchy skips
    prev_level = 0
    for h in parser.headings:
        level = h["level"]
        if level > prev_level + 1 and prev_level > 0:
            violations.append(A11yViolation(
                criterion="1.3.1", criterion_name="Info and Relationships",
                severity=Severity.MINOR,
                element=f"<h{level}>{h['text'][:50]}</h{level}>",
                description=f"Heading level skipped: h{prev_level} → h{level}",
                fix=f"Use <h{prev_level + 1}> instead, or fill the gap.",
                source="programmatic"
            ))
        prev_level = level

    return violations


def check_landmarks(parser: A11yHTMLParser) -> list[A11yViolation]:
    """WCAG 1.3.1 — Landmarks and structure."""
    violations = []
    roles = {lm.get("role", "") for lm in parser.landmarks}
    tags = {lm.get("tag", "") for lm in parser.landmarks}

    has_main = "main" in roles or "main" in tags
    has_nav = "navigation" in roles or "nav" in tags

    if not has_main:
        violations.append(A11yViolation(
            criterion="1.3.1", criterion_name="Info and Relationships",
            severity=Severity.MAJOR,
            element="document",
            description="No <main> landmark found.",
            fix="Wrap the main content in a <main> element.",
            source="programmatic"
        ))

    if not has_nav:
        violations.append(A11yViolation(
            criterion="1.3.1", criterion_name="Info and Relationships",
            severity=Severity.MINOR,
            element="document",
            description="No <nav> landmark found.",
            fix="Wrap navigation in a <nav> element.",
            source="programmatic"
        ))

    return violations


def check_skip_nav(parser: A11yHTMLParser) -> list[A11yViolation]:
    """WCAG 2.4.1 — Bypass Blocks."""
    violations = []
    if not parser.skip_nav:
        # Also check links for "skip" text
        has_skip_link = any("skip" in l.get("text", "").lower() for l in parser.links)
        if not has_skip_link:
            violations.append(A11yViolation(
                criterion="2.4.1", criterion_name="Bypass Blocks",
                severity=Severity.MAJOR,
                element="document",
                description="No skip navigation link found.",
                fix="Add a 'Skip to main content' link as the first focusable element.",
                source="programmatic"
            ))
    return violations


def check_form_labels(parser: A11yHTMLParser) -> list[A11yViolation]:
    """WCAG 4.1.2 — Name, Role, Value (form inputs)."""
    violations = []
    for inp in parser.inputs:
        if inp["type"] in ("hidden", "submit", "button", "reset", "image"):
            continue
        has_label = bool(inp.get("aria_label") or inp.get("aria_labelledby") or inp.get("id"))
        if not has_label:
            violations.append(A11yViolation(
                criterion="4.1.2", criterion_name="Name, Role, Value",
                severity=Severity.MAJOR,
                element=f"<{inp['tag']} type=\"{inp['type']}\">",
                description="Form input has no associated label or aria-label.",
                fix="Add a <label for=\"...\"> or aria-label attribute.",
                source="programmatic"
            ))
    return violations


def check_link_text(parser: A11yHTMLParser) -> list[A11yViolation]:
    """WCAG 2.4.4 — Link Purpose."""
    violations = []
    for link in parser.links:
        text = (link.get("text", "") or link.get("aria_label", "")).strip().lower()
        href = link.get("href", "")
        if not text:
            violations.append(A11yViolation(
                criterion="2.4.4", criterion_name="Link Purpose",
                severity=Severity.MAJOR,
                element=f"<a href=\"{href[:60]}\">",
                description="Link has no accessible text.",
                fix="Add visible text content or aria-label to the link.",
                source="programmatic"
            ))
        elif text in ("click here", "read more", "here", "more", "link"):
            violations.append(A11yViolation(
                criterion="2.4.4", criterion_name="Link Purpose",
                severity=Severity.MINOR,
                element=f"<a href=\"{href[:60]}\">{text}</a>",
                description=f"Non-descriptive link text: '{text}'",
                fix="Use descriptive text that explains the link destination.",
                source="programmatic"
            ))
    return violations


def run_programmatic_checks(html: str) -> list[A11yViolation]:
    """Run all programmatic accessibility checks on HTML content."""
    parser = A11yHTMLParser()
    try:
        parser.feed(html)
    except Exception:
        pass

    violations = []
    violations.extend(check_page_title(parser))
    violations.extend(check_lang_attribute(parser))
    violations.extend(check_images_alt_text(parser))
    violations.extend(check_heading_hierarchy(parser))
    violations.extend(check_landmarks(parser))
    violations.extend(check_skip_nav(parser))
    violations.extend(check_form_labels(parser))
    violations.extend(check_link_text(parser))

    return violations


# === Vision Model Checks ===

A11Y_VISION_PROMPT = """You are a WCAG 2.1 AA accessibility auditor. Analyze this screenshot of a web page.

Check for these specific issues:

1. COLOR CONTRAST: Are text colors sufficiently different from their backgrounds?
   - Normal text needs 4.5:1 contrast ratio
   - Large text (18pt+) needs 3:1
   - UI components need 3:1
   List any text or UI elements where contrast looks insufficient.

2. FONT LEGIBILITY: Is text readable?
   - Font size >= 12px for body text
   - Line height >= 1.5 for body text
   - No text in images (should be real text)

3. LAYOUT ISSUES: Is the layout accessible?
   - Touch targets >= 44x44px
   - Content not cut off or overlapping
   - Logical reading order visible
   - No horizontal scrolling at standard widths

4. FOCUS INDICATORS: Can you see which element has focus?
   - Interactive elements should have visible focus rings

5. COLOR ALONE: Is information conveyed only by color?
   - Errors/warnings should not rely solely on red/green

Respond as JSON:
{
    "violations": [
        {
            "criterion": "1.4.3",
            "criterion_name": "Contrast (Minimum)",
            "severity": "critical|major|minor",
            "element": "description of element",
            "description": "what's wrong",
            "fix": "how to fix"
        }
    ],
    "passed_checks": ["list of things that look good"],
    "overall_score": 0-100,
    "summary": "brief summary"
}"""


def run_vision_check(screenshot_path: str, model: str = VISION_MODEL) -> list[A11yViolation]:
    """Run vision model accessibility check on a screenshot."""
    try:
        b64 = base64.b64encode(Path(screenshot_path).read_bytes()).decode()
        payload = json.dumps({
            "model": model,
            "messages": [{"role": "user", "content": [
                {"type": "text", "text": A11Y_VISION_PROMPT},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}
            ]}],
            "stream": False,
            "options": {"temperature": 0.1}
        }).encode()

        req = urllib.request.Request(
            f"{OLLAMA_BASE}/api/chat",
            data=payload,
            headers={"Content-Type": "application/json"}
        )
        with urllib.request.urlopen(req, timeout=120) as resp:
            result = json.loads(resp.read())
            content = result.get("message", {}).get("content", "")

        # Parse response
        parsed = _parse_json_response(content)
        violations = []
        for v in parsed.get("violations", []):
            violations.append(A11yViolation(
                criterion=v.get("criterion", ""),
                criterion_name=v.get("criterion_name", ""),
                severity=Severity(v.get("severity", "minor")),
                element=v.get("element", ""),
                description=v.get("description", ""),
                fix=v.get("fix", ""),
                source="vision"
            ))
        return violations

    except Exception as e:
        print(f"  Vision check failed: {e}", file=sys.stderr)
        return []


def _parse_json_response(text: str) -> dict:
    """Extract JSON from potentially messy vision response."""
    cleaned = text.strip()
    if cleaned.startswith("```"):
        lines = cleaned.split("\n")[1:]
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        cleaned = "\n".join(lines)
    try:
        return json.loads(cleaned)
    except json.JSONDecodeError:
        start = cleaned.find("{")
        end = cleaned.rfind("}")
        if start >= 0 and end > start:
            try:
                return json.loads(cleaned[start:end + 1])
            except json.JSONDecodeError:
                pass
    return {}


# === Page Fetching ===

def fetch_page(url: str) -> Optional[str]:
    """Fetch HTML content of a page."""
    try:
        req = urllib.request.Request(url, headers={"User-Agent": "A11yAudit/1.0"})
        with urllib.request.urlopen(req, timeout=30) as resp:
            return resp.read().decode("utf-8", errors="replace")
    except Exception as e:
        print(f"  Failed to fetch {url}: {e}", file=sys.stderr)
        return None


def take_screenshot(url: str, output_path: str, width: int = 1280, height: int = 900) -> bool:
    """Take a screenshot using Playwright or curl-based headless capture."""
    # Try Playwright first
    try:
        script = f"""
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
    browser = p.chromium.launch(headless=True)
    page = browser.new_page(viewport={{"width": {width}, "height": {height}}})
    page.goto("{url}", wait_until="networkidle", timeout=30000)
    page.screenshot(path="{output_path}", full_page=True)
    browser.close()
"""
        result = subprocess.run(
            ["python3", "-c", script],
            capture_output=True, text=True, timeout=60
        )
        if result.returncode == 0 and Path(output_path).exists():
            return True
    except Exception:
        pass

    # Try curl + wkhtmltoimage
    try:
        result = subprocess.run(
            ["wkhtmltoimage", "--width", str(width), "--quality", "90", url, output_path],
            capture_output=True, text=True, timeout=30
        )
        if result.returncode == 0 and Path(output_path).exists():
            return True
    except Exception:
        pass

    return False


# === Audit Logic ===

def audit_page(url: str, use_vision: bool = False, model: str = VISION_MODEL) -> A11yPageResult:
    """Run a full accessibility audit on a single page."""
    result = A11yPageResult(url=url)

    # Fetch HTML
    html = fetch_page(url)
    if not html:
        result.summary = f"Failed to fetch {url}"
        result.score = 0
        return result

    # Extract title
    title_match = re.search(r"<title[^>]*>(.*?)</title>", html, re.IGNORECASE | re.DOTALL)
    result.title = title_match.group(1).strip() if title_match else ""

    # Run programmatic checks
    prog_violations = run_programmatic_checks(html)
    result.violations.extend(prog_violations)

    # Track passed checks
    criteria_checked = {
        "2.4.2": "Page Titled",
        "3.1.1": "Language of Page",
        "1.1.1": "Non-text Content",
        "1.3.1": "Info and Relationships",
        "2.4.1": "Bypass Blocks",
        "4.1.2": "Name, Role, Value",
        "2.4.4": "Link Purpose",
    }
    violated_criteria = {v.criterion for v in result.violations}
    for criterion, name in criteria_checked.items():
        if criterion not in violated_criteria:
            result.passed_checks.append(f"{criterion} {name}")

    # Vision check (optional)
    if use_vision:
        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
            screenshot_path = tmp.name
        try:
            print(f"  Taking screenshot of {url}...", file=sys.stderr)
            if take_screenshot(url, screenshot_path):
                print(f"  Running vision analysis...", file=sys.stderr)
                vision_violations = run_vision_check(screenshot_path, model)
                result.violations.extend(vision_violations)
                result.passed_checks.append("Vision model analysis completed")
            else:
                result.passed_checks.append("Screenshot unavailable — vision check skipped")
        finally:
            Path(screenshot_path).unlink(missing_ok=True)

    # Calculate score
    criticals = sum(1 for v in result.violations if v.severity == Severity.CRITICAL)
    majors = sum(1 for v in result.violations if v.severity == Severity.MAJOR)
    minors = sum(1 for v in result.violations if v.severity == Severity.MINOR)
    result.score = max(0, 100 - (criticals * 25) - (majors * 10) - (minors * 3))

    # Summary
    if not result.violations:
        result.summary = f"All programmatic checks passed for {url}"
    else:
        result.summary = (
            f"{len(result.violations)} issue(s) found: "
            f"{criticals} critical, {majors} major, {minors} minor"
        )

    return result


def audit_site(base_url: str, pages: list[str], use_vision: bool = False,
               model: str = VISION_MODEL) -> A11yAuditReport:
    """Audit multiple pages of a site."""
    report = A11yAuditReport(site=base_url)

    for path in pages:
        url = base_url.rstrip("/") + path if not path.startswith("http") else path
        print(f"Auditing: {url}", file=sys.stderr)
        result = audit_page(url, use_vision, model)
        report.page_results.append(result)

    report.pages_audited = len(report.page_results)
    report.total_violations = sum(len(p.violations) for p in report.page_results)
    report.critical_violations = sum(
        sum(1 for v in p.violations if v.severity == Severity.CRITICAL)
        for p in report.page_results
    )
    report.major_violations = sum(
        sum(1 for v in p.violations if v.severity == Severity.MAJOR)
        for p in report.page_results
    )

    if report.page_results:
        report.overall_score = sum(p.score for p in report.page_results) // len(report.page_results)

    report.summary = (
        f"Audited {report.pages_audited} pages. "
        f"Overall score: {report.overall_score}/100. "
        f"{report.total_violations} total issues: "
        f"{report.critical_violations} critical, {report.major_violations} major."
    )

    return report


# === Output Formatting ===

def format_report(report: A11yAuditReport, fmt: str = "json") -> str:
    """Format the audit report."""
    if fmt == "json":
        data = {
            "site": report.site,
            "pages_audited": report.pages_audited,
            "overall_score": report.overall_score,
            "total_violations": report.total_violations,
            "critical_violations": report.critical_violations,
            "major_violations": report.major_violations,
            "summary": report.summary,
            "pages": []
        }
        for page in report.page_results:
            page_data = {
                "url": page.url,
                "title": page.title,
                "score": page.score,
                "violations": [asdict(v) for v in page.violations],
                "passed_checks": page.passed_checks,
                "summary": page.summary,
            }
            # Convert severity enum to string
            for v in page_data["violations"]:
                if hasattr(v["severity"], "value"):
                    v["severity"] = v["severity"].value
            data["pages"].append(page_data)
        return json.dumps(data, indent=2)

    elif fmt == "text":
        lines = []
        lines.append("=" * 60)
        lines.append("  WEB ACCESSIBILITY AUDIT REPORT")
        lines.append("=" * 60)
        lines.append(f"  Site: {report.site}")
        lines.append(f"  Pages audited: {report.pages_audited}")
        lines.append(f"  Overall score: {report.overall_score}/100")
        lines.append(f"  Issues: {report.total_violations} total "
                      f"({report.critical_violations} critical, {report.major_violations} major)")
        lines.append("")

        for page in report.page_results:
            lines.append(f"  ── {page.url} ──")
            lines.append(f"  Title: {page.title}")
            lines.append(f"  Score: {page.score}/100")
            lines.append("")

            if page.violations:
                lines.append(f"  Violations ({len(page.violations)}):")
                for v in page.violations:
                    sev_icon = {"critical": "🔴", "major": "🟡", "minor": "🔵"}.get(
                        v.severity.value if hasattr(v.severity, "value") else str(v.severity), "⚪"
                    )
                    lines.append(f"    {sev_icon} [{v.criterion}] {v.criterion_name}")
                    lines.append(f"       Element: {v.element}")
                    lines.append(f"       Issue: {v.description}")
                    lines.append(f"       Fix: {v.fix}")
                    lines.append(f"       Source: {v.source}")
                    lines.append("")
            else:
                lines.append("  ✓ No violations found")
                lines.append("")

            if page.passed_checks:
                lines.append(f"  Passed: {', '.join(page.passed_checks)}")
                lines.append("")

        lines.append("=" * 60)
        lines.append(f"  Summary: {report.summary}")
        lines.append("=" * 60)
        return "\n".join(lines)

    else:
        raise ValueError(f"Unknown format: {fmt}")


# === CLI ===

def main():
    parser = argparse.ArgumentParser(
        description="Visual Accessibility Audit — WCAG 2.1 AA compliance checker",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s --url https://timmyfoundation.org
  %(prog)s --url https://timmyfoundation.org --pages /about /donate
  %(prog)s --url https://timmyfoundation.org --vision
  %(prog)s --url https://timmyfoundation.org --format text
        """
    )
    parser.add_argument("--url", required=True, help="Base URL to audit")
    parser.add_argument("--pages", nargs="*", default=DEFAULT_PAGES,
                        help="Paths to audit (default: / /about /donate /blog /contact)")
    parser.add_argument("--vision", action="store_true",
                        help="Include vision model analysis (requires Ollama)")
    parser.add_argument("--model", default=VISION_MODEL,
                        help=f"Vision model (default: {VISION_MODEL})")
    parser.add_argument("--format", choices=["json", "text"], default="json",
                        help="Output format")
    parser.add_argument("--output", "-o", help="Output file (default: stdout)")

    args = parser.parse_args()

    report = audit_site(args.url, args.pages, use_vision=args.vision, model=args.model)
    output = format_report(report, args.format)

    if args.output:
        Path(args.output).write_text(output)
        print(f"Report written to {args.output}", file=sys.stderr)
    else:
        print(output)

    # Exit code: non-zero if critical violations
    if report.critical_violations > 0:
        sys.exit(1)


if __name__ == "__main__":
    main()