#!/usr/bin/env python3
"""
foundation_accessibility_audit.py — Multimodal Visual Accessibility Audit.
Analyzes web pages for WCAG 2.1 AA compliance using both programmatic checks
and vision model analysis. Screenshots pages, checks contrast ratios, detects
layout issues, validates alt text, and produces structured audit reports.
Usage:
# Audit a single page
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org
# Audit multiple pages
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --pages /about /donate /blog
# With vision model analysis (Gemma 3)
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --vision
# Programmatic-only (no vision model needed)
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --programmatic
# Output as text report
python scripts/foundation_accessibility_audit.py --url https://timmyfoundation.org --format text
WCAG 2.1 AA Checks:
1.4.3 Contrast (Minimum) — text vs background ratio >= 4.5:1
1.4.6 Contrast (Enhanced) — ratio >= 7:1 for AAA
1.4.11 Non-text Contrast — UI components >= 3:1
1.3.1 Info and Relationships — heading hierarchy, landmarks
1.1.1 Non-text Content — alt text on images
2.4.1 Bypass Blocks — skip navigation link
2.4.2 Page Titled — meaningful
2.4.6 Headings and Labels — descriptive headings
4.1.2 Name, Role, Value — ARIA labels on interactive elements
Refs: timmy-config#492, WCAG 2.1 AA
"""
from __future__ import annotations
import argparse
import base64
import colorsys
import json
import os
import re
import subprocess
import sys
import tempfile
import urllib.error
import urllib.request
from dataclasses import dataclass, field, asdict
from enum import Enum
from pathlib import Path
from typing import Optional
from html.parser import HTMLParser
# === Configuration ===
OLLAMA_BASE = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
VISION_MODEL = os.environ.get("VISUAL_REVIEW_MODEL", "gemma3:12b")
DEFAULT_PAGES = ["/", "/about", "/donate", "/blog", "/contact"]
class Severity(str, Enum):
CRITICAL = "critical" # Blocks access entirely
MAJOR = "major" # Significant barrier
MINOR = "minor" # Inconvenience
PASS = "pass"
@dataclass
class A11yViolation:
"""A single accessibility violation."""
criterion: str # WCAG criterion (e.g. "1.4.3")
criterion_name: str # Human-readable name
severity: Severity = Severity.MINOR
element: str = "" # CSS selector or element description
description: str = "" # What's wrong
fix: str = "" # Suggested fix
source: str = "" # "programmatic" or "vision"
@dataclass
class A11yPageResult:
"""Audit result for a single page."""
url: str = ""
title: str = ""
score: int = 100
violations: list[A11yViolation] = field(default_factory=list)
passed_checks: list[str] = field(default_factory=list)
summary: str = ""
@dataclass
class A11yAuditReport:
"""Complete audit report across all pages."""
site: str = ""
pages_audited: int = 0
overall_score: int = 100
total_violations: int = 0
critical_violations: int = 0
major_violations: int = 0
page_results: list[A11yPageResult] = field(default_factory=list)
summary: str = ""
# === HTML Parser for Programmatic Checks ===
class A11yHTMLParser(HTMLParser):
"""Extract accessibility-relevant elements from HTML."""
def __init__(self):
super().__init__()
self.title = ""
self.images = [] # [{"src": ..., "alt": ...}]
self.headings = [] # [{"level": int, "text": ...}]
self.links = [] # [{"text": ..., "href": ...}]
self.inputs = [] # [{"type": ..., "label": ..., "id": ...}]
self.landmarks = [] # [{"tag": ..., "role": ...}]
self.skip_nav = False
self.lang = ""
self.in_title = False
self.in_heading = False
self.heading_level = 0
self.heading_text = ""
self.current_text = ""
def handle_starttag(self, tag, attrs):
attr_dict = dict(attrs)
if tag == "title":
self.in_title = True
elif tag == "html":
self.lang = attr_dict.get("lang", "")
elif tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
self.in_heading = True
self.heading_level = int(tag[1])
self.heading_text = ""
elif tag == "img":
self.images.append({
"src": attr_dict.get("src", ""),
"alt": attr_dict.get("alt"),
"role": attr_dict.get("role", ""),
})
elif tag == "a":
self.links.append({
"href": attr_dict.get("href", ""),
"text": "",
"aria_label": attr_dict.get("aria-label", ""),
})
elif tag in ("input", "select", "textarea"):
self.inputs.append({
"tag": tag,
"type": attr_dict.get("type", "text"),
"id": attr_dict.get("id", ""),
"aria_label": attr_dict.get("aria-label", ""),
"aria_labelledby": attr_dict.get("aria-labelledby", ""),
})
elif tag in ("main", "nav", "header", "footer", "aside", "section", "form"):
self.landmarks.append({"tag": tag, "role": attr_dict.get("role", "")})
elif tag == "a" and ("skip" in attr_dict.get("href", "").lower() or
"skip" in attr_dict.get("class", "").lower()):
self.skip_nav = True
role = attr_dict.get("role", "")
if role in ("navigation", "main", "banner", "contentinfo", "complementary", "search"):
self.landmarks.append({"tag": tag, "role": role})
if role == "link" and "skip" in (attr_dict.get("aria-label", "") + attr_dict.get("href", "")).lower():
self.skip_nav = True
def handle_endtag(self, tag):
if tag == "title":
self.in_title = False
elif tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
self.headings.append({"level": self.heading_level, "text": self.heading_text.strip()})
self.in_heading = False
elif tag == "a" and self.links:
self.links[-1]["text"] = self.current_text.strip()
self.current_text = ""
def handle_data(self, data):
if self.in_title:
self.title += data
if self.in_heading:
self.heading_text += data
self.current_text += data
# === Color/Contrast Utilities ===
def parse_color(color_str: str) -> Optional[tuple]:
"""Parse CSS color string to (r, g, b) tuple (0-255)."""
if not color_str:
return None
color_str = color_str.strip().lower()
# Named colors (subset)
named = {
"white": (255, 255, 255), "black": (0, 0, 0),
"red": (255, 0, 0), "green": (0, 128, 0), "blue": (0, 0, 255),
"gray": (128, 128, 128), "grey": (128, 128, 128),
"silver": (192, 192, 192), "yellow": (255, 255, 0),
"orange": (255, 165, 0), "purple": (128, 0, 128),
"transparent": None,
}
if color_str in named:
return named[color_str]
# #RRGGBB or #RGB
if color_str.startswith("#"):
hex_str = color_str[1:]
if len(hex_str) == 3:
hex_str = "".join(c * 2 for c in hex_str)
if len(hex_str) == 6:
try:
return tuple(int(hex_str[i:i+2], 16) for i in (0, 2, 4))
except ValueError:
return None
# rgb(r, g, b)
match = re.match(r"rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)", color_str)
if match:
return tuple(int(match.group(i)) for i in (1, 2, 3))
# rgba(r, g, b, a)
match = re.match(r"rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)", color_str)
if match:
return tuple(int(match.group(i)) for i in (1, 2, 3))
return None
def relative_luminance(rgb: tuple) -> float:
"""Calculate relative luminance per WCAG 2.1 (sRGB)."""
def linearize(c):
c = c / 255.0
return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
r, g, b = [linearize(c) for c in rgb]
return 0.2126 * r + 0.7152 * g + 0.0722 * b
def contrast_ratio(color1: tuple, color2: tuple) -> float:
"""Calculate contrast ratio between two colors per WCAG 2.1."""
l1 = relative_luminance(color1)
l2 = relative_luminance(color2)
lighter = max(l1, l2)
darker = min(l1, l2)
return (lighter + 0.05) / (darker + 0.05)
# === Programmatic Checks ===
def check_page_title(parser: A11yHTMLParser) -> list[A11yViolation]:
"""WCAG 2.4.2 — Page Titled."""
violations = []
title = parser.title.strip()
if not title:
violations.append(A11yViolation(
criterion="2.4.2", criterion_name="Page Titled",
severity=Severity.MAJOR,
element="",
description="Page has no title or title is empty.",
fix="Add a meaningful that describes the page purpose.",
source="programmatic"
))
elif len(title) < 5:
violations.append(A11yViolation(
criterion="2.4.2", criterion_name="Page Titled",
severity=Severity.MINOR,
element=f"{title}",
description=f"Page title is very short: '{title}'",
fix="Use a more descriptive title.",
source="programmatic"
))
return violations
def check_lang_attribute(parser: A11yHTMLParser) -> list[A11yViolation]:
"""WCAG 3.1.1 — Language of Page."""
violations = []
if not parser.lang:
violations.append(A11yViolation(
criterion="3.1.1", criterion_name="Language of Page",
severity=Severity.MAJOR,
element="",
description="Missing lang attribute on element.",
fix="Add lang=\"en\" (or appropriate language code) to .",
source="programmatic"
))
return violations
def check_images_alt_text(parser: A11yHTMLParser) -> list[A11yViolation]:
"""WCAG 1.1.1 — Non-text Content."""
violations = []
for img in parser.images:
if img.get("role") == "presentation" or img.get("role") == "none":
continue # Decorative images are exempt
alt = img.get("alt")
src = img.get("src", "unknown")
if alt is None:
violations.append(A11yViolation(
criterion="1.1.1", criterion_name="Non-text Content",
severity=Severity.CRITICAL,
element=f"",
description="Image missing alt attribute.",
fix="Add descriptive alt text, or alt=\"\" with role=\"presentation\" for decorative images.",
source="programmatic"
))
elif alt.strip() == "":
# Empty alt is OK only for decorative images
if img.get("role") not in ("presentation", "none"):
violations.append(A11yViolation(
criterion="1.1.1", criterion_name="Non-text Content",
severity=Severity.MINOR,
element=f"",
description="Empty alt text — ensure this image is decorative.",
fix="If decorative, add role=\"presentation\". If meaningful, add descriptive alt text.",
source="programmatic"
))
return violations
def check_heading_hierarchy(parser: A11yHTMLParser) -> list[A11yViolation]:
"""WCAG 1.3.1 — Info and Relationships (heading hierarchy)."""
violations = []
if not parser.headings:
violations.append(A11yViolation(
criterion="1.3.1", criterion_name="Info and Relationships",
severity=Severity.MAJOR,
element="document",
description="No headings found on page.",
fix="Add proper heading hierarchy starting with
.",
source="programmatic"
))
return violations
# Check for H1
h1s = [h for h in parser.headings if h["level"] == 1]
if not h1s:
violations.append(A11yViolation(
criterion="1.3.1", criterion_name="Info and Relationships",
severity=Severity.MAJOR,
element="document",
description="No
heading found.",
fix="Add a single
as the main page heading.",
source="programmatic"
))
elif len(h1s) > 1:
violations.append(A11yViolation(
criterion="1.3.1", criterion_name="Info and Relationships",
severity=Severity.MINOR,
element="document",
description=f"Multiple
headings found ({len(h1s)}).",
fix="Use a single
per page for the main heading.",
source="programmatic"
))
# Check hierarchy skips
prev_level = 0
for h in parser.headings:
level = h["level"]
if level > prev_level + 1 and prev_level > 0:
violations.append(A11yViolation(
criterion="1.3.1", criterion_name="Info and Relationships",
severity=Severity.MINOR,
element=f"{h['text'][:50]}",
description=f"Heading level skipped: h{prev_level} → h{level}",
fix=f"Use instead, or fill the gap.",
source="programmatic"
))
prev_level = level
return violations
def check_landmarks(parser: A11yHTMLParser) -> list[A11yViolation]:
"""WCAG 1.3.1 — Landmarks and structure."""
violations = []
roles = {lm.get("role", "") for lm in parser.landmarks}
tags = {lm.get("tag", "") for lm in parser.landmarks}
has_main = "main" in roles or "main" in tags
has_nav = "navigation" in roles or "nav" in tags
if not has_main:
violations.append(A11yViolation(
criterion="1.3.1", criterion_name="Info and Relationships",
severity=Severity.MAJOR,
element="document",
description="No landmark found.",
fix="Wrap the main content in a element.",
source="programmatic"
))
if not has_nav:
violations.append(A11yViolation(
criterion="1.3.1", criterion_name="Info and Relationships",
severity=Severity.MINOR,
element="document",
description="No