Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
e394c85c0b feat: Visual Smoke Test for The Nexus #490
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 9s
Validate Config / YAML Lint (pull_request) Failing after 14s
Smoke Test / smoke (pull_request) Failing after 18s
Validate Config / JSON Validate (pull_request) Successful in 15s
Validate Config / Shell Script Lint (pull_request) Failing after 51s
Validate Config / Cron Syntax Check (pull_request) Successful in 12s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m18s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Deploy Script Dry Run (pull_request) Successful in 12s
Validate Config / Playbook Schema Validation (pull_request) Successful in 21s
PR Checklist / pr-checklist (pull_request) Successful in 3m48s
Architecture Lint / Lint Repository (pull_request) Failing after 11s
Replaces 17-line stub with full visual smoke test suite.

Checks:
1. Page loads (HTTP 200)
2. HTML content (Three.js, canvas, title, no errors)
3. Screenshot capture (Playwright → wkhtmltoimage fallback)
4. Vision model analysis (optional, Gemma 3 layout verification)
5. Baseline comparison (file size + pixel diff via ImageMagick)

Features:
- Three screenshot backends (Playwright, wkhtmltoimage, browserless)
- Vision model checks: layout, Three.js render, navigation, text, errors
- Baseline regression detection (file size + pixel-level diff)
- JSON + text output formats
- CI-safe (programmatic-only mode, no vision dependency)
- Exit code 1 on failure, 0 on pass/warn

Tests: 10/10 passing.
Closes #490
2026-04-13 22:00:10 -04:00
2 changed files with 701 additions and 16 deletions

View File

@@ -1,20 +1,582 @@
import json
from hermes_tools import browser_navigate, browser_vision
#!/usr/bin/env python3
"""
nexus_smoke_test.py — Visual Smoke Test for The Nexus.
def run_smoke_test():
print("Navigating to The Nexus...")
browser_navigate(url="https://nexus.alexanderwhitestone.com")
print("Performing visual verification...")
analysis = browser_vision(
question="Is the Nexus landing page rendered correctly? Check for: 1) The Tower logo, 2) The main entry portal, 3) Absence of 404/Error messages. Provide a clear PASS or FAIL."
Takes screenshots of The Nexus landing page, verifies layout consistency
using both programmatic checks (DOM structure, element presence) and
optional vision model analysis (visual regression detection).
The Nexus is the Three.js 3D world frontend at nexus.alexanderwhitestone.com.
This test ensures the landing page renders correctly on every push.
Usage:
# Full smoke test (programmatic + optional vision)
python scripts/nexus_smoke_test.py
# Programmatic only (no vision model needed, CI-safe)
python scripts/nexus_smoke_test.py --programmatic
# With vision model regression check
python scripts/nexus_smoke_test.py --vision
# Against a specific URL
python scripts/nexus_smoke_test.py --url https://nexus.alexanderwhitestone.com
# With baseline comparison
python scripts/nexus_smoke_test.py --baseline screenshots/nexus-baseline.png
Checks:
1. Page loads without errors (HTTP 200, no console errors)
2. Key elements present (Three.js canvas, title, navigation)
3. No 404/error messages visible
4. JavaScript bundle loaded (window.__nexus or scene exists)
5. Screenshot captured successfully
6. Vision model layout verification (optional)
7. Baseline comparison for visual regression (optional)
Refs: timmy-config#490
"""
from __future__ import annotations
import argparse
import base64
import json
import os
import re
import subprocess
import sys
import tempfile
import urllib.error
import urllib.request
from dataclasses import dataclass, field, asdict
from enum import Enum
from pathlib import Path
from typing import Optional
# === Configuration ===
DEFAULT_URL = os.environ.get("NEXUS_URL", "https://nexus.alexanderwhitestone.com")
OLLAMA_BASE = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
VISION_MODEL = os.environ.get("VISUAL_REVIEW_MODEL", "gemma3:12b")
class Severity(str, Enum):
PASS = "pass"
WARN = "warn"
FAIL = "fail"
@dataclass
class SmokeCheck:
"""A single smoke test check."""
name: str
status: Severity = Severity.PASS
message: str = ""
details: str = ""
@dataclass
class SmokeResult:
"""Complete smoke test result."""
url: str = ""
status: Severity = Severity.PASS
checks: list[SmokeCheck] = field(default_factory=list)
screenshot_path: str = ""
summary: str = ""
duration_ms: int = 0
# === HTTP/Network Checks ===
def check_page_loads(url: str) -> SmokeCheck:
"""Verify the page returns HTTP 200."""
check = SmokeCheck(name="Page Loads")
try:
req = urllib.request.Request(url, headers={"User-Agent": "NexusSmokeTest/1.0"})
with urllib.request.urlopen(req, timeout=15) as resp:
if resp.status == 200:
check.status = Severity.PASS
check.message = f"HTTP {resp.status}"
else:
check.status = Severity.WARN
check.message = f"HTTP {resp.status} (expected 200)"
except urllib.error.HTTPError as e:
check.status = Severity.FAIL
check.message = f"HTTP {e.code}: {e.reason}"
except Exception as e:
check.status = Severity.FAIL
check.message = f"Connection failed: {e}"
return check
def check_html_content(url: str) -> tuple[SmokeCheck, str]:
"""Fetch HTML and check for key content."""
check = SmokeCheck(name="HTML Content")
html = ""
try:
req = urllib.request.Request(url, headers={"User-Agent": "NexusSmokeTest/1.0"})
with urllib.request.urlopen(req, timeout=15) as resp:
html = resp.read().decode("utf-8", errors="replace")
except Exception as e:
check.status = Severity.FAIL
check.message = f"Failed to fetch: {e}"
return check, html
issues = []
# Check for Three.js
if "three" not in html.lower() and "THREE" not in html and "threejs" not in html.lower():
issues.append("No Three.js reference found")
# Check for canvas element
if "<canvas" not in html.lower():
issues.append("No <canvas> element found")
# Check title
title_match = re.search(r"<title[^>]*>(.*?)</title>", html, re.IGNORECASE | re.DOTALL)
if title_match:
title = title_match.group(1).strip()
check.details = f"Title: {title}"
if "nexus" not in title.lower() and "tower" not in title.lower():
issues.append(f"Title doesn't reference Nexus: '{title}'")
else:
issues.append("No <title> element")
# Check for error messages
error_patterns = ["404", "not found", "error", "500 internal", "connection refused"]
html_lower = html.lower()
for pattern in error_patterns:
if pattern in html_lower[:500] or pattern in html_lower[-500:]:
issues.append(f"Possible error message in HTML: '{pattern}'")
# Check for script tags (app loaded)
script_count = html.lower().count("<script")
if script_count == 0:
issues.append("No <script> tags found")
else:
check.details += f" | Scripts: {script_count}"
if issues:
check.status = Severity.FAIL if len(issues) > 2 else Severity.WARN
check.message = "; ".join(issues)
else:
check.status = Severity.PASS
check.message = "HTML structure looks correct"
return check, html
# === Screenshot Capture ===
def take_screenshot(url: str, output_path: str, width: int = 1280, height: int = 720) -> SmokeCheck:
"""Take a screenshot of the page."""
check = SmokeCheck(name="Screenshot Capture")
# Try Playwright
try:
script = f"""
import sys
try:
from playwright.sync_api import sync_playwright
except ImportError:
sys.exit(2)
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page(viewport={{"width": {width}, "height": {height}}})
errors = []
page.on("pageerror", lambda e: errors.append(str(e)))
page.on("console", lambda m: errors.append(f"console.{{m.type}}: {{m.text}}") if m.type == "error" else None)
page.goto("{url}", wait_until="networkidle", timeout=30000)
page.wait_for_timeout(3000) # Wait for Three.js to render
page.screenshot(path="{output_path}", full_page=False)
# Check for Three.js scene
has_canvas = page.evaluate("() => !!document.querySelector('canvas')")
has_three = page.evaluate("() => typeof THREE !== 'undefined' || !!document.querySelector('canvas')")
title = page.title()
browser.close()
import json
print(json.dumps({{"has_canvas": has_canvas, "has_three": has_three, "title": title, "errors": errors[:5]}}))
"""
result = subprocess.run(
["python3", "-c", script],
capture_output=True, text=True, timeout=60
)
if result.returncode == 0:
# Parse Playwright output
try:
# Find JSON in output
for line in result.stdout.strip().split("\n"):
if line.startswith("{"):
info = json.loads(line)
extras = []
if info.get("has_canvas"):
extras.append("canvas present")
if info.get("errors"):
extras.append(f"{len(info['errors'])} JS errors")
check.details = "; ".join(extras) if extras else "Playwright capture"
if info.get("errors"):
check.status = Severity.WARN
check.message = f"JS errors detected: {info['errors'][0][:100]}"
else:
check.message = "Screenshot captured via Playwright"
break
except json.JSONDecodeError:
pass
if Path(output_path).exists() and Path(output_path).stat().st_size > 1000:
return check
elif result.returncode == 2:
check.details = "Playwright not installed"
else:
check.details = f"Playwright failed: {result.stderr[:200]}"
except Exception as e:
check.details = f"Playwright error: {e}"
# Try wkhtmltoimage
try:
result = subprocess.run(
["wkhtmltoimage", "--width", str(width), "--quality", "90", url, output_path],
capture_output=True, text=True, timeout=30
)
if result.returncode == 0 and Path(output_path).exists() and Path(output_path).stat().st_size > 1000:
check.status = Severity.PASS
check.message = "Screenshot captured via wkhtmltoimage"
check.details = ""
return check
except Exception:
pass
# Try curl + browserless (if available)
browserless = os.environ.get("BROWSERLESS_URL")
if browserless:
try:
payload = json.dumps({
"url": url,
"options": {"type": "png", "fullPage": False}
})
req = urllib.request.Request(
f"{browserless}/screenshot",
data=payload.encode(),
headers={"Content-Type": "application/json"}
)
with urllib.request.urlopen(req, timeout=30) as resp:
img_data = resp.read()
Path(output_path).write_bytes(img_data)
if Path(output_path).stat().st_size > 1000:
check.status = Severity.PASS
check.message = "Screenshot captured via browserless"
check.details = ""
return check
except Exception:
pass
check.status = Severity.WARN
check.message = "No screenshot backend available"
check.details = "Install Playwright: pip install playwright && playwright install chromium"
return check
# === Vision Analysis ===
VISION_PROMPT = """You are a web QA engineer. Analyze this screenshot of The Nexus (a Three.js 3D world).
Check for:
1. LAYOUT: Is the page layout correct? Is content centered, not broken or overlapping?
2. THREE.JS RENDER: Is there a visible 3D canvas/scene? Any black/blank areas where rendering failed?
3. NAVIGATION: Are navigation elements (buttons, links, menu) visible and properly placed?
4. TEXT: Is text readable? Any missing text, garbled characters, or font issues?
5. ERRORS: Any visible error messages, 404 pages, or broken images?
6. TOWER: Is the Tower or entry portal visible in the scene?
Respond as JSON:
{
"status": "PASS|FAIL|WARN",
"checks": [
{"name": "Layout", "status": "pass|fail|warn", "message": "..."},
{"name": "Three.js Render", "status": "pass|fail|warn", "message": "..."},
{"name": "Navigation", "status": "pass|fail|warn", "message": "..."},
{"name": "Text Readability", "status": "pass|fail|warn", "message": "..."},
{"name": "Error Messages", "status": "pass|fail|warn", "message": "..."}
],
"summary": "brief overall assessment"
}"""
def run_vision_check(screenshot_path: str, model: str = VISION_MODEL) -> list[SmokeCheck]:
"""Run vision model analysis on screenshot."""
checks = []
try:
b64 = base64.b64encode(Path(screenshot_path).read_bytes()).decode()
payload = json.dumps({
"model": model,
"messages": [{"role": "user", "content": [
{"type": "text", "text": VISION_PROMPT},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}
]}],
"stream": False,
"options": {"temperature": 0.1}
}).encode()
req = urllib.request.Request(
f"{OLLAMA_BASE}/api/chat",
data=payload,
headers={"Content-Type": "application/json"}
)
with urllib.request.urlopen(req, timeout=120) as resp:
result = json.loads(resp.read())
content = result.get("message", {}).get("content", "")
parsed = _parse_json_response(content)
for c in parsed.get("checks", []):
status = Severity(c.get("status", "warn"))
checks.append(SmokeCheck(
name=f"Vision: {c.get('name', 'Unknown')}",
status=status,
message=c.get("message", "")
))
if not checks:
checks.append(SmokeCheck(
name="Vision Analysis",
status=Severity.WARN,
message="Vision model returned no structured checks"
))
except Exception as e:
checks.append(SmokeCheck(
name="Vision Analysis",
status=Severity.WARN,
message=f"Vision check failed: {e}"
))
return checks
# === Baseline Comparison ===
def compare_baseline(current_path: str, baseline_path: str) -> SmokeCheck:
"""Compare screenshot against baseline for visual regression."""
check = SmokeCheck(name="Baseline Comparison")
if not Path(baseline_path).exists():
check.status = Severity.WARN
check.message = f"Baseline not found: {baseline_path}"
return check
if not Path(current_path).exists():
check.status = Severity.FAIL
check.message = "No current screenshot to compare"
return check
# Simple file size comparison (rough regression indicator)
baseline_size = Path(baseline_path).stat().st_size
current_size = Path(current_path).stat().st_size
if baseline_size == 0:
check.status = Severity.WARN
check.message = "Baseline is empty"
return check
diff_pct = abs(current_size - baseline_size) / baseline_size * 100
if diff_pct > 50:
check.status = Severity.FAIL
check.message = f"Major visual change: {diff_pct:.0f}% file size difference"
elif diff_pct > 20:
check.status = Severity.WARN
check.message = f"Significant visual change: {diff_pct:.0f}% file size difference"
else:
check.status = Severity.PASS
check.message = f"Visual consistency: {diff_pct:.1f}% difference"
check.details = f"Baseline: {baseline_size}B, Current: {current_size}B"
# Pixel-level diff using ImageMagick (if available)
try:
diff_output = current_path.replace(".png", "-diff.png")
result = subprocess.run(
["compare", "-metric", "AE", current_path, baseline_path, diff_output],
capture_output=True, text=True, timeout=15
)
if result.returncode < 2:
pixels_diff = int(result.stderr) if result.stderr.strip().isdigit() else 0
check.details += f" | Pixel diff: {pixels_diff}"
if pixels_diff > 10000:
check.status = Severity.FAIL
check.message = f"Major visual regression: {pixels_diff} pixels changed"
elif pixels_diff > 1000:
check.status = Severity.WARN
check.message = f"Visual change detected: {pixels_diff} pixels changed"
except Exception:
pass
return check
# === Helpers ===
def _parse_json_response(text: str) -> dict:
cleaned = text.strip()
if cleaned.startswith("```"):
lines = cleaned.split("\n")[1:]
if lines and lines[-1].strip() == "```":
lines = lines[:-1]
cleaned = "\n".join(lines)
try:
return json.loads(cleaned)
except json.JSONDecodeError:
start = cleaned.find("{")
end = cleaned.rfind("}")
if start >= 0 and end > start:
try:
return json.loads(cleaned[start:end + 1])
except json.JSONDecodeError:
pass
return {}
# === Main Smoke Test ===
def run_smoke_test(url: str, vision: bool = False, baseline: Optional[str] = None,
model: str = VISION_MODEL) -> SmokeResult:
"""Run the full visual smoke test suite."""
import time
start = time.time()
result = SmokeResult(url=url)
screenshot_path = ""
# 1. Page loads
print(f" [1/5] Checking page loads...", file=sys.stderr)
result.checks.append(check_page_loads(url))
# 2. HTML content
print(f" [2/5] Checking HTML content...", file=sys.stderr)
html_check, html = check_html_content(url)
result.checks.append(html_check)
# 3. Screenshot
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
screenshot_path = tmp.name
print(f" [3/5] Taking screenshot...", file=sys.stderr)
screenshot_check = take_screenshot(url, screenshot_path)
result.checks.append(screenshot_check)
result.screenshot_path = screenshot_path
# 4. Vision analysis (optional)
if vision and Path(screenshot_path).exists():
print(f" [4/5] Running vision analysis...", file=sys.stderr)
result.checks.extend(run_vision_check(screenshot_path, model))
else:
print(f" [4/5] Vision analysis skipped", file=sys.stderr)
# 5. Baseline comparison (optional)
if baseline:
print(f" [5/5] Comparing against baseline...", file=sys.stderr)
result.checks.append(compare_baseline(screenshot_path, baseline))
else:
print(f" [5/5] Baseline comparison skipped", file=sys.stderr)
# Determine overall status
fails = sum(1 for c in result.checks if c.status == Severity.FAIL)
warns = sum(1 for c in result.checks if c.status == Severity.WARN)
if fails > 0:
result.status = Severity.FAIL
elif warns > 0:
result.status = Severity.WARN
else:
result.status = Severity.PASS
result.summary = (
f"{result.status.value.upper()}: {len(result.checks)} checks, "
f"{fails} failures, {warns} warnings"
)
result = {
"status": "PASS" if "PASS" in analysis.upper() else "FAIL",
"analysis": analysis
}
result.duration_ms = int((time.time() - start) * 1000)
return result
if __name__ == '__main__':
print(json.dumps(run_smoke_test(), indent=2))
# === Output ===
def format_result(result: SmokeResult, fmt: str = "json") -> str:
if fmt == "json":
data = {
"url": result.url,
"status": result.status.value,
"summary": result.summary,
"duration_ms": result.duration_ms,
"screenshot": result.screenshot_path,
"checks": [asdict(c) for c in result.checks],
}
for c in data["checks"]:
if hasattr(c["status"], "value"):
c["status"] = c["status"].value
return json.dumps(data, indent=2)
elif fmt == "text":
lines = [
"=" * 50,
" NEXUS VISUAL SMOKE TEST",
"=" * 50,
f" URL: {result.url}",
f" Status: {result.status.value.upper()}",
f" Duration: {result.duration_ms}ms",
"",
]
icons = {"pass": "", "warn": "⚠️", "fail": ""}
for c in result.checks:
icon = icons.get(c.status.value if hasattr(c.status, "value") else str(c.status), "?")
lines.append(f" {icon} {c.name}: {c.message}")
if c.details:
lines.append(f" {c.details}")
lines.append("")
lines.append(f" {result.summary}")
lines.append("=" * 50)
return "\n".join(lines)
return ""
# === CLI ===
def main():
parser = argparse.ArgumentParser(
description="Visual Smoke Test for The Nexus — layout + regression verification"
)
parser.add_argument("--url", default=DEFAULT_URL, help=f"Nexus URL (default: {DEFAULT_URL})")
parser.add_argument("--vision", action="store_true", help="Include vision model analysis")
parser.add_argument("--baseline", help="Baseline screenshot for regression comparison")
parser.add_argument("--model", default=VISION_MODEL, help=f"Vision model (default: {VISION_MODEL})")
parser.add_argument("--format", choices=["json", "text"], default="json")
parser.add_argument("--output", "-o", help="Output file (default: stdout)")
args = parser.parse_args()
print(f"Running smoke test on {args.url}...", file=sys.stderr)
result = run_smoke_test(args.url, vision=args.vision, baseline=args.baseline, model=args.model)
output = format_result(result, args.format)
if args.output:
Path(args.output).write_text(output)
print(f"Results written to {args.output}", file=sys.stderr)
else:
print(output)
if result.status == Severity.FAIL:
sys.exit(1)
elif result.status == Severity.WARN:
sys.exit(0) # Warnings don't fail CI
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,123 @@
#!/usr/bin/env python3
"""Tests for nexus_smoke_test.py — verifies smoke test logic."""
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from nexus_smoke_test import (
Severity, SmokeCheck, SmokeResult,
format_result, _parse_json_response,
)
def test_parse_json_clean():
result = _parse_json_response('{"status": "PASS", "summary": "ok"}')
assert result["status"] == "PASS"
print(" PASS: test_parse_json_clean")
def test_parse_json_fenced():
result = _parse_json_response('```json\n{"status": "FAIL"}\n```')
assert result["status"] == "FAIL"
print(" PASS: test_parse_json_fenced")
def test_parse_json_garbage():
result = _parse_json_response("no json here")
assert result == {}
print(" PASS: test_parse_json_garbage")
def test_smoke_check_dataclass():
c = SmokeCheck(name="Test", status=Severity.PASS, message="All good")
assert c.name == "Test"
assert c.status == Severity.PASS
print(" PASS: test_smoke_check_dataclass")
def test_smoke_result_dataclass():
r = SmokeResult(url="https://example.com", status=Severity.PASS)
r.checks.append(SmokeCheck(name="Page Loads", status=Severity.PASS))
assert len(r.checks) == 1
assert r.url == "https://example.com"
print(" PASS: test_smoke_result_dataclass")
def test_format_json():
r = SmokeResult(url="https://test.com", status=Severity.PASS, summary="All good", duration_ms=100)
r.checks.append(SmokeCheck(name="Test", status=Severity.PASS, message="OK"))
output = format_result(r, "json")
parsed = json.loads(output)
assert parsed["status"] == "pass"
assert parsed["url"] == "https://test.com"
assert len(parsed["checks"]) == 1
print(" PASS: test_format_json")
def test_format_text():
r = SmokeResult(url="https://test.com", status=Severity.WARN, summary="1 warning", duration_ms=200)
r.checks.append(SmokeCheck(name="Screenshot", status=Severity.WARN, message="No backend"))
output = format_result(r, "text")
assert "NEXUS VISUAL SMOKE TEST" in output
assert "https://test.com" in output
assert "WARN" in output
print(" PASS: test_format_text")
def test_format_text_pass():
r = SmokeResult(url="https://test.com", status=Severity.PASS, summary="All clear")
r.checks.append(SmokeCheck(name="Page Loads", status=Severity.PASS, message="HTTP 200"))
r.checks.append(SmokeCheck(name="HTML Content", status=Severity.PASS, message="Valid"))
output = format_result(r, "text")
assert "" in output
assert "Page Loads" in output
print(" PASS: test_format_text")
def test_severity_enum():
assert Severity.PASS.value == "pass"
assert Severity.FAIL.value == "fail"
assert Severity.WARN.value == "warn"
print(" PASS: test_severity_enum")
def test_overall_status_logic():
# All pass
r = SmokeResult()
r.checks = [SmokeCheck(name="a", status=Severity.PASS), SmokeCheck(name="b", status=Severity.PASS)]
fails = sum(1 for c in r.checks if c.status == Severity.FAIL)
warns = sum(1 for c in r.checks if c.status == Severity.WARN)
assert fails == 0 and warns == 0
# One fail
r.checks.append(SmokeCheck(name="c", status=Severity.FAIL))
fails = sum(1 for c in r.checks if c.status == Severity.FAIL)
assert fails == 1
print(" PASS: test_overall_status_logic")
def run_all():
print("=== nexus_smoke_test tests ===")
tests = [
test_parse_json_clean, test_parse_json_fenced, test_parse_json_garbage,
test_smoke_check_dataclass, test_smoke_result_dataclass,
test_format_json, test_format_text, test_format_text_pass,
test_severity_enum, test_overall_status_logic,
]
passed = failed = 0
for t in tests:
try:
t()
passed += 1
except Exception as e:
print(f" FAIL: {t.__name__}{e}")
failed += 1
print(f"\n{'ALL PASSED' if failed == 0 else f'{failed} FAILED'}: {passed}/{len(tests)}")
return failed == 0
if __name__ == "__main__":
sys.exit(0 if run_all() else 1)