Some checks failed
Architecture Lint / Lint Repository (push) Has been cancelled
Architecture Lint / Linter Tests (push) Has been cancelled
Smoke Test / smoke (push) Has been cancelled
Validate Config / Python Syntax & Import Check (push) Has been cancelled
Validate Config / Python Test Suite (push) Has been cancelled
Validate Config / Shell Script Lint (push) Has been cancelled
Validate Config / Cron Syntax Check (push) Has been cancelled
Validate Config / Deploy Script Dry Run (push) Has been cancelled
Validate Config / Playbook Schema Validation (push) Has been cancelled
Validate Config / YAML Lint (push) Has been cancelled
Validate Config / JSON Validate (push) Has been cancelled
Merge PR #531
885 lines
32 KiB
Python
885 lines
32 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
foundation_accessibility_audit.py — Multimodal Visual Accessibility Audit.
|
|
|
|
Analyzes web pages for WCAG 2.1 AA compliance using both programmatic checks
|
|
and vision model analysis. Screenshots pages, checks contrast ratios, detects
|
|
layout issues, validates alt text, and produces structured audit reports.
|
|
|
|
Usage:
|
|
# Audit a single page
|
|
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org
|
|
|
|
# Audit multiple pages
|
|
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --pages /about /donate /blog
|
|
|
|
# With vision model analysis (Gemma 3)
|
|
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --vision
|
|
|
|
# Programmatic-only (no vision model needed)
|
|
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --programmatic
|
|
|
|
# Output as text report
|
|
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --format text
|
|
|
|
WCAG 2.1 AA Checks:
|
|
1.4.3 Contrast (Minimum) — text vs background ratio >= 4.5:1
|
|
1.4.6 Contrast (Enhanced) — ratio >= 7:1 for AAA
|
|
1.4.11 Non-text Contrast — UI components >= 3:1
|
|
1.3.1 Info and Relationships — heading hierarchy, landmarks
|
|
1.1.1 Non-text Content — alt text on images
|
|
2.4.1 Bypass Blocks — skip navigation link
|
|
2.4.2 Page Titled — meaningful <title>
|
|
2.4.6 Headings and Labels — descriptive headings
|
|
4.1.2 Name, Role, Value — ARIA labels on interactive elements
|
|
|
|
Refs: timmy-config#492, WCAG 2.1 AA
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import base64
|
|
import colorsys
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import urllib.error
|
|
import urllib.request
|
|
from dataclasses import dataclass, field, asdict
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
from html.parser import HTMLParser
|
|
|
|
|
|
# === Configuration ===
|
|
|
|
OLLAMA_BASE = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
|
|
VISION_MODEL = os.environ.get("VISUAL_REVIEW_MODEL", "gemma3:12b")
|
|
|
|
DEFAULT_PAGES = ["/", "/about", "/donate", "/blog", "/contact"]
|
|
|
|
|
|
class Severity(str, Enum):
|
|
CRITICAL = "critical" # Blocks access entirely
|
|
MAJOR = "major" # Significant barrier
|
|
MINOR = "minor" # Inconvenience
|
|
PASS = "pass"
|
|
|
|
|
|
@dataclass
|
|
class A11yViolation:
|
|
"""A single accessibility violation."""
|
|
criterion: str # WCAG criterion (e.g. "1.4.3")
|
|
criterion_name: str # Human-readable name
|
|
severity: Severity = Severity.MINOR
|
|
element: str = "" # CSS selector or element description
|
|
description: str = "" # What's wrong
|
|
fix: str = "" # Suggested fix
|
|
source: str = "" # "programmatic" or "vision"
|
|
|
|
|
|
@dataclass
|
|
class A11yPageResult:
|
|
"""Audit result for a single page."""
|
|
url: str = ""
|
|
title: str = ""
|
|
score: int = 100
|
|
violations: list[A11yViolation] = field(default_factory=list)
|
|
passed_checks: list[str] = field(default_factory=list)
|
|
summary: str = ""
|
|
|
|
|
|
@dataclass
|
|
class A11yAuditReport:
|
|
"""Complete audit report across all pages."""
|
|
site: str = ""
|
|
pages_audited: int = 0
|
|
overall_score: int = 100
|
|
total_violations: int = 0
|
|
critical_violations: int = 0
|
|
major_violations: int = 0
|
|
page_results: list[A11yPageResult] = field(default_factory=list)
|
|
summary: str = ""
|
|
|
|
|
|
# === HTML Parser for Programmatic Checks ===
|
|
|
|
class A11yHTMLParser(HTMLParser):
|
|
"""Extract accessibility-relevant elements from HTML."""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.title = ""
|
|
self.images = [] # [{"src": ..., "alt": ...}]
|
|
self.headings = [] # [{"level": int, "text": ...}]
|
|
self.links = [] # [{"text": ..., "href": ...}]
|
|
self.inputs = [] # [{"type": ..., "label": ..., "id": ...}]
|
|
self.landmarks = [] # [{"tag": ..., "role": ...}]
|
|
self.skip_nav = False
|
|
self.lang = ""
|
|
self.in_title = False
|
|
self.in_heading = False
|
|
self.heading_level = 0
|
|
self.heading_text = ""
|
|
self.current_text = ""
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
attr_dict = dict(attrs)
|
|
|
|
if tag == "title":
|
|
self.in_title = True
|
|
elif tag == "html":
|
|
self.lang = attr_dict.get("lang", "")
|
|
elif tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
|
|
self.in_heading = True
|
|
self.heading_level = int(tag[1])
|
|
self.heading_text = ""
|
|
elif tag == "img":
|
|
self.images.append({
|
|
"src": attr_dict.get("src", ""),
|
|
"alt": attr_dict.get("alt"),
|
|
"role": attr_dict.get("role", ""),
|
|
})
|
|
elif tag == "a":
|
|
self.links.append({
|
|
"href": attr_dict.get("href", ""),
|
|
"text": "",
|
|
"aria_label": attr_dict.get("aria-label", ""),
|
|
})
|
|
elif tag in ("input", "select", "textarea"):
|
|
self.inputs.append({
|
|
"tag": tag,
|
|
"type": attr_dict.get("type", "text"),
|
|
"id": attr_dict.get("id", ""),
|
|
"aria_label": attr_dict.get("aria-label", ""),
|
|
"aria_labelledby": attr_dict.get("aria-labelledby", ""),
|
|
})
|
|
elif tag in ("main", "nav", "header", "footer", "aside", "section", "form"):
|
|
self.landmarks.append({"tag": tag, "role": attr_dict.get("role", "")})
|
|
elif tag == "a" and ("skip" in attr_dict.get("href", "").lower() or
|
|
"skip" in attr_dict.get("class", "").lower()):
|
|
self.skip_nav = True
|
|
|
|
role = attr_dict.get("role", "")
|
|
if role in ("navigation", "main", "banner", "contentinfo", "complementary", "search"):
|
|
self.landmarks.append({"tag": tag, "role": role})
|
|
if role == "link" and "skip" in (attr_dict.get("aria-label", "") + attr_dict.get("href", "")).lower():
|
|
self.skip_nav = True
|
|
|
|
def handle_endtag(self, tag):
|
|
if tag == "title":
|
|
self.in_title = False
|
|
elif tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
|
|
self.headings.append({"level": self.heading_level, "text": self.heading_text.strip()})
|
|
self.in_heading = False
|
|
elif tag == "a" and self.links:
|
|
self.links[-1]["text"] = self.current_text.strip()
|
|
self.current_text = ""
|
|
|
|
def handle_data(self, data):
|
|
if self.in_title:
|
|
self.title += data
|
|
if self.in_heading:
|
|
self.heading_text += data
|
|
self.current_text += data
|
|
|
|
|
|
# === Color/Contrast Utilities ===
|
|
|
|
def parse_color(color_str: str) -> Optional[tuple]:
|
|
"""Parse CSS color string to (r, g, b) tuple (0-255)."""
|
|
if not color_str:
|
|
return None
|
|
|
|
color_str = color_str.strip().lower()
|
|
|
|
# Named colors (subset)
|
|
named = {
|
|
"white": (255, 255, 255), "black": (0, 0, 0),
|
|
"red": (255, 0, 0), "green": (0, 128, 0), "blue": (0, 0, 255),
|
|
"gray": (128, 128, 128), "grey": (128, 128, 128),
|
|
"silver": (192, 192, 192), "yellow": (255, 255, 0),
|
|
"orange": (255, 165, 0), "purple": (128, 0, 128),
|
|
"transparent": None,
|
|
}
|
|
if color_str in named:
|
|
return named[color_str]
|
|
|
|
# #RRGGBB or #RGB
|
|
if color_str.startswith("#"):
|
|
hex_str = color_str[1:]
|
|
if len(hex_str) == 3:
|
|
hex_str = "".join(c * 2 for c in hex_str)
|
|
if len(hex_str) == 6:
|
|
try:
|
|
return tuple(int(hex_str[i:i+2], 16) for i in (0, 2, 4))
|
|
except ValueError:
|
|
return None
|
|
|
|
# rgb(r, g, b)
|
|
match = re.match(r"rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)", color_str)
|
|
if match:
|
|
return tuple(int(match.group(i)) for i in (1, 2, 3))
|
|
|
|
# rgba(r, g, b, a)
|
|
match = re.match(r"rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)", color_str)
|
|
if match:
|
|
return tuple(int(match.group(i)) for i in (1, 2, 3))
|
|
|
|
return None
|
|
|
|
|
|
def relative_luminance(rgb: tuple) -> float:
|
|
"""Calculate relative luminance per WCAG 2.1 (sRGB)."""
|
|
def linearize(c):
|
|
c = c / 255.0
|
|
return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
|
|
|
|
r, g, b = [linearize(c) for c in rgb]
|
|
return 0.2126 * r + 0.7152 * g + 0.0722 * b
|
|
|
|
|
|
def contrast_ratio(color1: tuple, color2: tuple) -> float:
|
|
"""Calculate contrast ratio between two colors per WCAG 2.1."""
|
|
l1 = relative_luminance(color1)
|
|
l2 = relative_luminance(color2)
|
|
lighter = max(l1, l2)
|
|
darker = min(l1, l2)
|
|
return (lighter + 0.05) / (darker + 0.05)
|
|
|
|
|
|
# === Programmatic Checks ===
|
|
|
|
def check_page_title(parser: A11yHTMLParser) -> list[A11yViolation]:
|
|
"""WCAG 2.4.2 — Page Titled."""
|
|
violations = []
|
|
title = parser.title.strip()
|
|
if not title:
|
|
violations.append(A11yViolation(
|
|
criterion="2.4.2", criterion_name="Page Titled",
|
|
severity=Severity.MAJOR,
|
|
element="<title>",
|
|
description="Page has no title or title is empty.",
|
|
fix="Add a meaningful <title> that describes the page purpose.",
|
|
source="programmatic"
|
|
))
|
|
elif len(title) < 5:
|
|
violations.append(A11yViolation(
|
|
criterion="2.4.2", criterion_name="Page Titled",
|
|
severity=Severity.MINOR,
|
|
element=f"<title>{title}</title>",
|
|
description=f"Page title is very short: '{title}'",
|
|
fix="Use a more descriptive title.",
|
|
source="programmatic"
|
|
))
|
|
return violations
|
|
|
|
|
|
def check_lang_attribute(parser: A11yHTMLParser) -> list[A11yViolation]:
|
|
"""WCAG 3.1.1 — Language of Page."""
|
|
violations = []
|
|
if not parser.lang:
|
|
violations.append(A11yViolation(
|
|
criterion="3.1.1", criterion_name="Language of Page",
|
|
severity=Severity.MAJOR,
|
|
element="<html>",
|
|
description="Missing lang attribute on <html> element.",
|
|
fix="Add lang=\"en\" (or appropriate language code) to <html>.",
|
|
source="programmatic"
|
|
))
|
|
return violations
|
|
|
|
|
|
def check_images_alt_text(parser: A11yHTMLParser) -> list[A11yViolation]:
|
|
"""WCAG 1.1.1 — Non-text Content."""
|
|
violations = []
|
|
for img in parser.images:
|
|
if img.get("role") == "presentation" or img.get("role") == "none":
|
|
continue # Decorative images are exempt
|
|
alt = img.get("alt")
|
|
src = img.get("src", "unknown")
|
|
if alt is None:
|
|
violations.append(A11yViolation(
|
|
criterion="1.1.1", criterion_name="Non-text Content",
|
|
severity=Severity.CRITICAL,
|
|
element=f"<img src=\"{src[:80]}\">",
|
|
description="Image missing alt attribute.",
|
|
fix="Add descriptive alt text, or alt=\"\" with role=\"presentation\" for decorative images.",
|
|
source="programmatic"
|
|
))
|
|
elif alt.strip() == "":
|
|
# Empty alt is OK only for decorative images
|
|
if img.get("role") not in ("presentation", "none"):
|
|
violations.append(A11yViolation(
|
|
criterion="1.1.1", criterion_name="Non-text Content",
|
|
severity=Severity.MINOR,
|
|
element=f"<img src=\"{src[:80]}\" alt=\"\">",
|
|
description="Empty alt text — ensure this image is decorative.",
|
|
fix="If decorative, add role=\"presentation\". If meaningful, add descriptive alt text.",
|
|
source="programmatic"
|
|
))
|
|
return violations
|
|
|
|
|
|
def check_heading_hierarchy(parser: A11yHTMLParser) -> list[A11yViolation]:
|
|
"""WCAG 1.3.1 — Info and Relationships (heading hierarchy)."""
|
|
violations = []
|
|
if not parser.headings:
|
|
violations.append(A11yViolation(
|
|
criterion="1.3.1", criterion_name="Info and Relationships",
|
|
severity=Severity.MAJOR,
|
|
element="document",
|
|
description="No headings found on page.",
|
|
fix="Add proper heading hierarchy starting with <h1>.",
|
|
source="programmatic"
|
|
))
|
|
return violations
|
|
|
|
# Check for H1
|
|
h1s = [h for h in parser.headings if h["level"] == 1]
|
|
if not h1s:
|
|
violations.append(A11yViolation(
|
|
criterion="1.3.1", criterion_name="Info and Relationships",
|
|
severity=Severity.MAJOR,
|
|
element="document",
|
|
description="No <h1> heading found.",
|
|
fix="Add a single <h1> as the main page heading.",
|
|
source="programmatic"
|
|
))
|
|
elif len(h1s) > 1:
|
|
violations.append(A11yViolation(
|
|
criterion="1.3.1", criterion_name="Info and Relationships",
|
|
severity=Severity.MINOR,
|
|
element="document",
|
|
description=f"Multiple <h1> headings found ({len(h1s)}).",
|
|
fix="Use a single <h1> per page for the main heading.",
|
|
source="programmatic"
|
|
))
|
|
|
|
# Check hierarchy skips
|
|
prev_level = 0
|
|
for h in parser.headings:
|
|
level = h["level"]
|
|
if level > prev_level + 1 and prev_level > 0:
|
|
violations.append(A11yViolation(
|
|
criterion="1.3.1", criterion_name="Info and Relationships",
|
|
severity=Severity.MINOR,
|
|
element=f"<h{level}>{h['text'][:50]}</h{level}>",
|
|
description=f"Heading level skipped: h{prev_level} → h{level}",
|
|
fix=f"Use <h{prev_level + 1}> instead, or fill the gap.",
|
|
source="programmatic"
|
|
))
|
|
prev_level = level
|
|
|
|
return violations
|
|
|
|
|
|
def check_landmarks(parser: A11yHTMLParser) -> list[A11yViolation]:
|
|
"""WCAG 1.3.1 — Landmarks and structure."""
|
|
violations = []
|
|
roles = {lm.get("role", "") for lm in parser.landmarks}
|
|
tags = {lm.get("tag", "") for lm in parser.landmarks}
|
|
|
|
has_main = "main" in roles or "main" in tags
|
|
has_nav = "navigation" in roles or "nav" in tags
|
|
|
|
if not has_main:
|
|
violations.append(A11yViolation(
|
|
criterion="1.3.1", criterion_name="Info and Relationships",
|
|
severity=Severity.MAJOR,
|
|
element="document",
|
|
description="No <main> landmark found.",
|
|
fix="Wrap the main content in a <main> element.",
|
|
source="programmatic"
|
|
))
|
|
|
|
if not has_nav:
|
|
violations.append(A11yViolation(
|
|
criterion="1.3.1", criterion_name="Info and Relationships",
|
|
severity=Severity.MINOR,
|
|
element="document",
|
|
description="No <nav> landmark found.",
|
|
fix="Wrap navigation in a <nav> element.",
|
|
source="programmatic"
|
|
))
|
|
|
|
return violations
|
|
|
|
|
|
def check_skip_nav(parser: A11yHTMLParser) -> list[A11yViolation]:
|
|
"""WCAG 2.4.1 — Bypass Blocks."""
|
|
violations = []
|
|
if not parser.skip_nav:
|
|
# Also check links for "skip" text
|
|
has_skip_link = any("skip" in l.get("text", "").lower() for l in parser.links)
|
|
if not has_skip_link:
|
|
violations.append(A11yViolation(
|
|
criterion="2.4.1", criterion_name="Bypass Blocks",
|
|
severity=Severity.MAJOR,
|
|
element="document",
|
|
description="No skip navigation link found.",
|
|
fix="Add a 'Skip to main content' link as the first focusable element.",
|
|
source="programmatic"
|
|
))
|
|
return violations
|
|
|
|
|
|
def check_form_labels(parser: A11yHTMLParser) -> list[A11yViolation]:
|
|
"""WCAG 4.1.2 — Name, Role, Value (form inputs)."""
|
|
violations = []
|
|
for inp in parser.inputs:
|
|
if inp["type"] in ("hidden", "submit", "button", "reset", "image"):
|
|
continue
|
|
has_label = bool(inp.get("aria_label") or inp.get("aria_labelledby") or inp.get("id"))
|
|
if not has_label:
|
|
violations.append(A11yViolation(
|
|
criterion="4.1.2", criterion_name="Name, Role, Value",
|
|
severity=Severity.MAJOR,
|
|
element=f"<{inp['tag']} type=\"{inp['type']}\">",
|
|
description="Form input has no associated label or aria-label.",
|
|
fix="Add a <label for=\"...\"> or aria-label attribute.",
|
|
source="programmatic"
|
|
))
|
|
return violations
|
|
|
|
|
|
def check_link_text(parser: A11yHTMLParser) -> list[A11yViolation]:
|
|
"""WCAG 2.4.4 — Link Purpose."""
|
|
violations = []
|
|
for link in parser.links:
|
|
text = (link.get("text", "") or link.get("aria_label", "")).strip().lower()
|
|
href = link.get("href", "")
|
|
if not text:
|
|
violations.append(A11yViolation(
|
|
criterion="2.4.4", criterion_name="Link Purpose",
|
|
severity=Severity.MAJOR,
|
|
element=f"<a href=\"{href[:60]}\">",
|
|
description="Link has no accessible text.",
|
|
fix="Add visible text content or aria-label to the link.",
|
|
source="programmatic"
|
|
))
|
|
elif text in ("click here", "read more", "here", "more", "link"):
|
|
violations.append(A11yViolation(
|
|
criterion="2.4.4", criterion_name="Link Purpose",
|
|
severity=Severity.MINOR,
|
|
element=f"<a href=\"{href[:60]}\">{text}</a>",
|
|
description=f"Non-descriptive link text: '{text}'",
|
|
fix="Use descriptive text that explains the link destination.",
|
|
source="programmatic"
|
|
))
|
|
return violations
|
|
|
|
|
|
def run_programmatic_checks(html: str) -> list[A11yViolation]:
|
|
"""Run all programmatic accessibility checks on HTML content."""
|
|
parser = A11yHTMLParser()
|
|
try:
|
|
parser.feed(html)
|
|
except Exception:
|
|
pass
|
|
|
|
violations = []
|
|
violations.extend(check_page_title(parser))
|
|
violations.extend(check_lang_attribute(parser))
|
|
violations.extend(check_images_alt_text(parser))
|
|
violations.extend(check_heading_hierarchy(parser))
|
|
violations.extend(check_landmarks(parser))
|
|
violations.extend(check_skip_nav(parser))
|
|
violations.extend(check_form_labels(parser))
|
|
violations.extend(check_link_text(parser))
|
|
|
|
return violations
|
|
|
|
|
|
# === Vision Model Checks ===
|
|
|
|
A11Y_VISION_PROMPT = """You are a WCAG 2.1 AA accessibility auditor. Analyze this screenshot of a web page.
|
|
|
|
Check for these specific issues:
|
|
|
|
1. COLOR CONTRAST: Are text colors sufficiently different from their backgrounds?
|
|
- Normal text needs 4.5:1 contrast ratio
|
|
- Large text (18pt+) needs 3:1
|
|
- UI components need 3:1
|
|
List any text or UI elements where contrast looks insufficient.
|
|
|
|
2. FONT LEGIBILITY: Is text readable?
|
|
- Font size >= 12px for body text
|
|
- Line height >= 1.5 for body text
|
|
- No text in images (should be real text)
|
|
|
|
3. LAYOUT ISSUES: Is the layout accessible?
|
|
- Touch targets >= 44x44px
|
|
- Content not cut off or overlapping
|
|
- Logical reading order visible
|
|
- No horizontal scrolling at standard widths
|
|
|
|
4. FOCUS INDICATORS: Can you see which element has focus?
|
|
- Interactive elements should have visible focus rings
|
|
|
|
5. COLOR ALONE: Is information conveyed only by color?
|
|
- Errors/warnings should not rely solely on red/green
|
|
|
|
Respond as JSON:
|
|
{
|
|
"violations": [
|
|
{
|
|
"criterion": "1.4.3",
|
|
"criterion_name": "Contrast (Minimum)",
|
|
"severity": "critical|major|minor",
|
|
"element": "description of element",
|
|
"description": "what's wrong",
|
|
"fix": "how to fix"
|
|
}
|
|
],
|
|
"passed_checks": ["list of things that look good"],
|
|
"overall_score": 0-100,
|
|
"summary": "brief summary"
|
|
}"""
|
|
|
|
|
|
def run_vision_check(screenshot_path: str, model: str = VISION_MODEL) -> list[A11yViolation]:
|
|
"""Run vision model accessibility check on a screenshot."""
|
|
try:
|
|
b64 = base64.b64encode(Path(screenshot_path).read_bytes()).decode()
|
|
payload = json.dumps({
|
|
"model": model,
|
|
"messages": [{"role": "user", "content": [
|
|
{"type": "text", "text": A11Y_VISION_PROMPT},
|
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}
|
|
]}],
|
|
"stream": False,
|
|
"options": {"temperature": 0.1}
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(
|
|
f"{OLLAMA_BASE}/api/chat",
|
|
data=payload,
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
with urllib.request.urlopen(req, timeout=120) as resp:
|
|
result = json.loads(resp.read())
|
|
content = result.get("message", {}).get("content", "")
|
|
|
|
# Parse response
|
|
parsed = _parse_json_response(content)
|
|
violations = []
|
|
for v in parsed.get("violations", []):
|
|
violations.append(A11yViolation(
|
|
criterion=v.get("criterion", ""),
|
|
criterion_name=v.get("criterion_name", ""),
|
|
severity=Severity(v.get("severity", "minor")),
|
|
element=v.get("element", ""),
|
|
description=v.get("description", ""),
|
|
fix=v.get("fix", ""),
|
|
source="vision"
|
|
))
|
|
return violations
|
|
|
|
except Exception as e:
|
|
print(f" Vision check failed: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
|
|
def _parse_json_response(text: str) -> dict:
|
|
"""Extract JSON from potentially messy vision response."""
|
|
cleaned = text.strip()
|
|
if cleaned.startswith("```"):
|
|
lines = cleaned.split("\n")[1:]
|
|
if lines and lines[-1].strip() == "```":
|
|
lines = lines[:-1]
|
|
cleaned = "\n".join(lines)
|
|
try:
|
|
return json.loads(cleaned)
|
|
except json.JSONDecodeError:
|
|
start = cleaned.find("{")
|
|
end = cleaned.rfind("}")
|
|
if start >= 0 and end > start:
|
|
try:
|
|
return json.loads(cleaned[start:end + 1])
|
|
except json.JSONDecodeError:
|
|
pass
|
|
return {}
|
|
|
|
|
|
# === Page Fetching ===
|
|
|
|
def fetch_page(url: str) -> Optional[str]:
|
|
"""Fetch HTML content of a page."""
|
|
try:
|
|
req = urllib.request.Request(url, headers={"User-Agent": "A11yAudit/1.0"})
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
return resp.read().decode("utf-8", errors="replace")
|
|
except Exception as e:
|
|
print(f" Failed to fetch {url}: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
|
|
def take_screenshot(url: str, output_path: str, width: int = 1280, height: int = 900) -> bool:
|
|
"""Take a screenshot using Playwright or curl-based headless capture."""
|
|
# Try Playwright first
|
|
try:
|
|
script = f"""
|
|
from playwright.sync_api import sync_playwright
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=True)
|
|
page = browser.new_page(viewport={{"width": {width}, "height": {height}}})
|
|
page.goto("{url}", wait_until="networkidle", timeout=30000)
|
|
page.screenshot(path="{output_path}", full_page=True)
|
|
browser.close()
|
|
"""
|
|
result = subprocess.run(
|
|
["python3", "-c", script],
|
|
capture_output=True, text=True, timeout=60
|
|
)
|
|
if result.returncode == 0 and Path(output_path).exists():
|
|
return True
|
|
except Exception:
|
|
pass
|
|
|
|
# Try curl + wkhtmltoimage
|
|
try:
|
|
result = subprocess.run(
|
|
["wkhtmltoimage", "--width", str(width), "--quality", "90", url, output_path],
|
|
capture_output=True, text=True, timeout=30
|
|
)
|
|
if result.returncode == 0 and Path(output_path).exists():
|
|
return True
|
|
except Exception:
|
|
pass
|
|
|
|
return False
|
|
|
|
|
|
# === Audit Logic ===
|
|
|
|
def audit_page(url: str, use_vision: bool = False, model: str = VISION_MODEL) -> A11yPageResult:
|
|
"""Run a full accessibility audit on a single page."""
|
|
result = A11yPageResult(url=url)
|
|
|
|
# Fetch HTML
|
|
html = fetch_page(url)
|
|
if not html:
|
|
result.summary = f"Failed to fetch {url}"
|
|
result.score = 0
|
|
return result
|
|
|
|
# Extract title
|
|
title_match = re.search(r"<title[^>]*>(.*?)</title>", html, re.IGNORECASE | re.DOTALL)
|
|
result.title = title_match.group(1).strip() if title_match else ""
|
|
|
|
# Run programmatic checks
|
|
prog_violations = run_programmatic_checks(html)
|
|
result.violations.extend(prog_violations)
|
|
|
|
# Track passed checks
|
|
criteria_checked = {
|
|
"2.4.2": "Page Titled",
|
|
"3.1.1": "Language of Page",
|
|
"1.1.1": "Non-text Content",
|
|
"1.3.1": "Info and Relationships",
|
|
"2.4.1": "Bypass Blocks",
|
|
"4.1.2": "Name, Role, Value",
|
|
"2.4.4": "Link Purpose",
|
|
}
|
|
violated_criteria = {v.criterion for v in result.violations}
|
|
for criterion, name in criteria_checked.items():
|
|
if criterion not in violated_criteria:
|
|
result.passed_checks.append(f"{criterion} {name}")
|
|
|
|
# Vision check (optional)
|
|
if use_vision:
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
|
screenshot_path = tmp.name
|
|
try:
|
|
print(f" Taking screenshot of {url}...", file=sys.stderr)
|
|
if take_screenshot(url, screenshot_path):
|
|
print(f" Running vision analysis...", file=sys.stderr)
|
|
vision_violations = run_vision_check(screenshot_path, model)
|
|
result.violations.extend(vision_violations)
|
|
result.passed_checks.append("Vision model analysis completed")
|
|
else:
|
|
result.passed_checks.append("Screenshot unavailable — vision check skipped")
|
|
finally:
|
|
Path(screenshot_path).unlink(missing_ok=True)
|
|
|
|
# Calculate score
|
|
criticals = sum(1 for v in result.violations if v.severity == Severity.CRITICAL)
|
|
majors = sum(1 for v in result.violations if v.severity == Severity.MAJOR)
|
|
minors = sum(1 for v in result.violations if v.severity == Severity.MINOR)
|
|
result.score = max(0, 100 - (criticals * 25) - (majors * 10) - (minors * 3))
|
|
|
|
# Summary
|
|
if not result.violations:
|
|
result.summary = f"All programmatic checks passed for {url}"
|
|
else:
|
|
result.summary = (
|
|
f"{len(result.violations)} issue(s) found: "
|
|
f"{criticals} critical, {majors} major, {minors} minor"
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
def audit_site(base_url: str, pages: list[str], use_vision: bool = False,
|
|
model: str = VISION_MODEL) -> A11yAuditReport:
|
|
"""Audit multiple pages of a site."""
|
|
report = A11yAuditReport(site=base_url)
|
|
|
|
for path in pages:
|
|
url = base_url.rstrip("/") + path if not path.startswith("http") else path
|
|
print(f"Auditing: {url}", file=sys.stderr)
|
|
result = audit_page(url, use_vision, model)
|
|
report.page_results.append(result)
|
|
|
|
report.pages_audited = len(report.page_results)
|
|
report.total_violations = sum(len(p.violations) for p in report.page_results)
|
|
report.critical_violations = sum(
|
|
sum(1 for v in p.violations if v.severity == Severity.CRITICAL)
|
|
for p in report.page_results
|
|
)
|
|
report.major_violations = sum(
|
|
sum(1 for v in p.violations if v.severity == Severity.MAJOR)
|
|
for p in report.page_results
|
|
)
|
|
|
|
if report.page_results:
|
|
report.overall_score = sum(p.score for p in report.page_results) // len(report.page_results)
|
|
|
|
report.summary = (
|
|
f"Audited {report.pages_audited} pages. "
|
|
f"Overall score: {report.overall_score}/100. "
|
|
f"{report.total_violations} total issues: "
|
|
f"{report.critical_violations} critical, {report.major_violations} major."
|
|
)
|
|
|
|
return report
|
|
|
|
|
|
# === Output Formatting ===
|
|
|
|
def format_report(report: A11yAuditReport, fmt: str = "json") -> str:
|
|
"""Format the audit report."""
|
|
if fmt == "json":
|
|
data = {
|
|
"site": report.site,
|
|
"pages_audited": report.pages_audited,
|
|
"overall_score": report.overall_score,
|
|
"total_violations": report.total_violations,
|
|
"critical_violations": report.critical_violations,
|
|
"major_violations": report.major_violations,
|
|
"summary": report.summary,
|
|
"pages": []
|
|
}
|
|
for page in report.page_results:
|
|
page_data = {
|
|
"url": page.url,
|
|
"title": page.title,
|
|
"score": page.score,
|
|
"violations": [asdict(v) for v in page.violations],
|
|
"passed_checks": page.passed_checks,
|
|
"summary": page.summary,
|
|
}
|
|
# Convert severity enum to string
|
|
for v in page_data["violations"]:
|
|
if hasattr(v["severity"], "value"):
|
|
v["severity"] = v["severity"].value
|
|
data["pages"].append(page_data)
|
|
return json.dumps(data, indent=2)
|
|
|
|
elif fmt == "text":
|
|
lines = []
|
|
lines.append("=" * 60)
|
|
lines.append(" WEB ACCESSIBILITY AUDIT REPORT")
|
|
lines.append("=" * 60)
|
|
lines.append(f" Site: {report.site}")
|
|
lines.append(f" Pages audited: {report.pages_audited}")
|
|
lines.append(f" Overall score: {report.overall_score}/100")
|
|
lines.append(f" Issues: {report.total_violations} total "
|
|
f"({report.critical_violations} critical, {report.major_violations} major)")
|
|
lines.append("")
|
|
|
|
for page in report.page_results:
|
|
lines.append(f" ── {page.url} ──")
|
|
lines.append(f" Title: {page.title}")
|
|
lines.append(f" Score: {page.score}/100")
|
|
lines.append("")
|
|
|
|
if page.violations:
|
|
lines.append(f" Violations ({len(page.violations)}):")
|
|
for v in page.violations:
|
|
sev_icon = {"critical": "🔴", "major": "🟡", "minor": "🔵"}.get(
|
|
v.severity.value if hasattr(v.severity, "value") else str(v.severity), "⚪"
|
|
)
|
|
lines.append(f" {sev_icon} [{v.criterion}] {v.criterion_name}")
|
|
lines.append(f" Element: {v.element}")
|
|
lines.append(f" Issue: {v.description}")
|
|
lines.append(f" Fix: {v.fix}")
|
|
lines.append(f" Source: {v.source}")
|
|
lines.append("")
|
|
else:
|
|
lines.append(" ✓ No violations found")
|
|
lines.append("")
|
|
|
|
if page.passed_checks:
|
|
lines.append(f" Passed: {', '.join(page.passed_checks)}")
|
|
lines.append("")
|
|
|
|
lines.append("=" * 60)
|
|
lines.append(f" Summary: {report.summary}")
|
|
lines.append("=" * 60)
|
|
return "\n".join(lines)
|
|
|
|
else:
|
|
raise ValueError(f"Unknown format: {fmt}")
|
|
|
|
|
|
# === CLI ===
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Visual Accessibility Audit — WCAG 2.1 AA compliance checker",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
%(prog)s --url https://timmyfoundation.org
|
|
%(prog)s --url https://timmyfoundation.org --pages /about /donate
|
|
%(prog)s --url https://timmyfoundation.org --vision
|
|
%(prog)s --url https://timmyfoundation.org --format text
|
|
"""
|
|
)
|
|
parser.add_argument("--url", required=True, help="Base URL to audit")
|
|
parser.add_argument("--pages", nargs="*", default=DEFAULT_PAGES,
|
|
help="Paths to audit (default: / /about /donate /blog /contact)")
|
|
parser.add_argument("--vision", action="store_true",
|
|
help="Include vision model analysis (requires Ollama)")
|
|
parser.add_argument("--model", default=VISION_MODEL,
|
|
help=f"Vision model (default: {VISION_MODEL})")
|
|
parser.add_argument("--format", choices=["json", "text"], default="json",
|
|
help="Output format")
|
|
parser.add_argument("--output", "-o", help="Output file (default: stdout)")
|
|
|
|
args = parser.parse_args()
|
|
|
|
report = audit_site(args.url, args.pages, use_vision=args.vision, model=args.model)
|
|
output = format_report(report, args.format)
|
|
|
|
if args.output:
|
|
Path(args.output).write_text(output)
|
|
print(f"Report written to {args.output}", file=sys.stderr)
|
|
else:
|
|
print(output)
|
|
|
|
# Exit code: non-zero if critical violations
|
|
if report.critical_violations > 0:
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|