timmy-config/scripts/nexus_smoke_test.py

#!/usr/bin/env python3
"""
nexus_smoke_test.py — Visual Smoke Test for The Nexus.

Takes screenshots of The Nexus landing page, verifies layout consistency
using both programmatic checks (DOM structure, element presence) and
optional vision model analysis (visual regression detection).

The Nexus is the Three.js 3D world frontend at nexus.alexanderwhitestone.com.
This test ensures the landing page renders correctly on every push.

Usage:
    # Full smoke test (programmatic + optional vision)
    python scripts/nexus_smoke_test.py

    # Programmatic only (no vision model needed, CI-safe)
    python scripts/nexus_smoke_test.py --programmatic

    # With vision model regression check
    python scripts/nexus_smoke_test.py --vision

    # Against a specific URL
    python scripts/nexus_smoke_test.py --url https://nexus.alexanderwhitestone.com

    # With baseline comparison
    python scripts/nexus_smoke_test.py --baseline screenshots/nexus-baseline.png

Checks:
    1. Page loads without errors (HTTP 200, no console errors)
    2. Key elements present (Three.js canvas, title, navigation)
    3. No 404/error messages visible
    4. JavaScript bundle loaded (window.__nexus or scene exists)
    5. Screenshot captured successfully
    6. Vision model layout verification (optional)
    7. Baseline comparison for visual regression (optional)

Refs: timmy-config#490
"""

from __future__ import annotations

import argparse
import base64
import json
import os
import re
import subprocess
import sys
import tempfile
import urllib.error
import urllib.request
from dataclasses import dataclass, field, asdict
from enum import Enum
from pathlib import Path
from typing import Optional


# === Configuration ===

DEFAULT_URL = os.environ.get("NEXUS_URL", "https://nexus.alexanderwhitestone.com")
OLLAMA_BASE = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
VISION_MODEL = os.environ.get("VISUAL_REVIEW_MODEL", "gemma3:12b")


class Severity(str, Enum):
    PASS = "pass"
    WARN = "warn"
    FAIL = "fail"


@dataclass
class SmokeCheck:
    """A single smoke test check."""
    name: str
    status: Severity = Severity.PASS
    message: str = ""
    details: str = ""


@dataclass
class SmokeResult:
    """Complete smoke test result."""
    url: str = ""
    status: Severity = Severity.PASS
    checks: list[SmokeCheck] = field(default_factory=list)
    screenshot_path: str = ""
    summary: str = ""
    duration_ms: int = 0


# === HTTP/Network Checks ===

def check_page_loads(url: str) -> SmokeCheck:
    """Verify the page returns HTTP 200."""
    check = SmokeCheck(name="Page Loads")
    try:
        req = urllib.request.Request(url, headers={"User-Agent": "NexusSmokeTest/1.0"})
        with urllib.request.urlopen(req, timeout=15) as resp:
            if resp.status == 200:
                check.status = Severity.PASS
                check.message = f"HTTP {resp.status}"
            else:
                check.status = Severity.WARN
                check.message = f"HTTP {resp.status} (expected 200)"
    except urllib.error.HTTPError as e:
        check.status = Severity.FAIL
        check.message = f"HTTP {e.code}: {e.reason}"
    except Exception as e:
        check.status = Severity.FAIL
        check.message = f"Connection failed: {e}"
    return check


def check_html_content(url: str) -> tuple[SmokeCheck, str]:
    """Fetch HTML and check for key content."""
    check = SmokeCheck(name="HTML Content")
    html = ""
    try:
        req = urllib.request.Request(url, headers={"User-Agent": "NexusSmokeTest/1.0"})
        with urllib.request.urlopen(req, timeout=15) as resp:
            html = resp.read().decode("utf-8", errors="replace")
    except Exception as e:
        check.status = Severity.FAIL
        check.message = f"Failed to fetch: {e}"
        return check, html

    issues = []

    # Check for Three.js
    if "three" not in html.lower() and "THREE" not in html and "threejs" not in html.lower():
        issues.append("No Three.js reference found")

    # Check for canvas element
    if "<canvas" not in html.lower():
        issues.append("No <canvas> element found")

    # Check title
    title_match = re.search(r"<title[^>]*>(.*?)</title>", html, re.IGNORECASE | re.DOTALL)
    if title_match:
        title = title_match.group(1).strip()
        check.details = f"Title: {title}"
        if "nexus" not in title.lower() and "tower" not in title.lower():
            issues.append(f"Title doesn't reference Nexus: '{title}'")
    else:
        issues.append("No <title> element")

    # Check for error messages
    error_patterns = ["404", "not found", "error", "500 internal", "connection refused"]
    html_lower = html.lower()
    for pattern in error_patterns:
        if pattern in html_lower[:500] or pattern in html_lower[-500:]:
            issues.append(f"Possible error message in HTML: '{pattern}'")

    # Check for script tags (app loaded)
    script_count = html.lower().count("<script")
    if script_count == 0:
        issues.append("No <script> tags found")
    else:
        check.details += f" | Scripts: {script_count}"

    if issues:
        check.status = Severity.FAIL if len(issues) > 2 else Severity.WARN
        check.message = "; ".join(issues)
    else:
        check.status = Severity.PASS
        check.message = "HTML structure looks correct"

    return check, html


# === Screenshot Capture ===

def take_screenshot(url: str, output_path: str, width: int = 1280, height: int = 720) -> SmokeCheck:
    """Take a screenshot of the page."""
    check = SmokeCheck(name="Screenshot Capture")

    # Try Playwright
    try:
        script = f"""
import sys
try:
    from playwright.sync_api import sync_playwright
except ImportError:
    sys.exit(2)

with sync_playwright() as p:
    browser = p.chromium.launch(headless=True)
    page = browser.new_page(viewport={{"width": {width}, "height": {height}}})

    errors = []
    page.on("pageerror", lambda e: errors.append(str(e)))
    page.on("console", lambda m: errors.append(f"console.{{m.type}}: {{m.text}}") if m.type == "error" else None)

    page.goto("{url}", wait_until="networkidle", timeout=30000)
    page.wait_for_timeout(3000)  # Wait for Three.js to render
    page.screenshot(path="{output_path}", full_page=False)

    # Check for Three.js scene
    has_canvas = page.evaluate("() => !!document.querySelector('canvas')")
    has_three = page.evaluate("() => typeof THREE !== 'undefined' || !!document.querySelector('canvas')")
    title = page.title()

    browser.close()

    import json
    print(json.dumps({{"has_canvas": has_canvas, "has_three": has_three, "title": title, "errors": errors[:5]}}))
"""
        result = subprocess.run(
            ["python3", "-c", script],
            capture_output=True, text=True, timeout=60
        )

        if result.returncode == 0:
            # Parse Playwright output
            try:
                # Find JSON in output
                for line in result.stdout.strip().split("\n"):
                    if line.startswith("{"):
                        info = json.loads(line)
                        extras = []
                        if info.get("has_canvas"):
                            extras.append("canvas present")
                        if info.get("errors"):
                            extras.append(f"{len(info['errors'])} JS errors")
                        check.details = "; ".join(extras) if extras else "Playwright capture"
                        if info.get("errors"):
                            check.status = Severity.WARN
                            check.message = f"JS errors detected: {info['errors'][0][:100]}"
                        else:
                            check.message = "Screenshot captured via Playwright"
                        break
            except json.JSONDecodeError:
                pass

            if Path(output_path).exists() and Path(output_path).stat().st_size > 1000:
                return check
        elif result.returncode == 2:
            check.details = "Playwright not installed"
        else:
            check.details = f"Playwright failed: {result.stderr[:200]}"
    except Exception as e:
        check.details = f"Playwright error: {e}"

    # Try wkhtmltoimage
    try:
        result = subprocess.run(
            ["wkhtmltoimage", "--width", str(width), "--quality", "90", url, output_path],
            capture_output=True, text=True, timeout=30
        )
        if result.returncode == 0 and Path(output_path).exists() and Path(output_path).stat().st_size > 1000:
            check.status = Severity.PASS
            check.message = "Screenshot captured via wkhtmltoimage"
            check.details = ""
            return check
    except Exception:
        pass

    # Try curl + browserless (if available)
    browserless = os.environ.get("BROWSERLESS_URL")
    if browserless:
        try:
            payload = json.dumps({
                "url": url,
                "options": {"type": "png", "fullPage": False}
            })
            req = urllib.request.Request(
                f"{browserless}/screenshot",
                data=payload.encode(),
                headers={"Content-Type": "application/json"}
            )
            with urllib.request.urlopen(req, timeout=30) as resp:
                img_data = resp.read()
                Path(output_path).write_bytes(img_data)
                if Path(output_path).stat().st_size > 1000:
                    check.status = Severity.PASS
                    check.message = "Screenshot captured via browserless"
                    check.details = ""
                    return check
        except Exception:
            pass

    check.status = Severity.WARN
    check.message = "No screenshot backend available"
    check.details = "Install Playwright: pip install playwright && playwright install chromium"
    return check


# === Vision Analysis ===

VISION_PROMPT = """You are a web QA engineer. Analyze this screenshot of The Nexus (a Three.js 3D world).

Check for:
1. LAYOUT: Is the page layout correct? Is content centered, not broken or overlapping?
2. THREE.JS RENDER: Is there a visible 3D canvas/scene? Any black/blank areas where rendering failed?
3. NAVIGATION: Are navigation elements (buttons, links, menu) visible and properly placed?
4. TEXT: Is text readable? Any missing text, garbled characters, or font issues?
5. ERRORS: Any visible error messages, 404 pages, or broken images?
6. TOWER: Is the Tower or entry portal visible in the scene?

Respond as JSON:
{
    "status": "PASS|FAIL|WARN",
    "checks": [
        {"name": "Layout", "status": "pass|fail|warn", "message": "..."},
        {"name": "Three.js Render", "status": "pass|fail|warn", "message": "..."},
        {"name": "Navigation", "status": "pass|fail|warn", "message": "..."},
        {"name": "Text Readability", "status": "pass|fail|warn", "message": "..."},
        {"name": "Error Messages", "status": "pass|fail|warn", "message": "..."}
    ],
    "summary": "brief overall assessment"
}"""


def run_vision_check(screenshot_path: str, model: str = VISION_MODEL) -> list[SmokeCheck]:
    """Run vision model analysis on screenshot."""
    checks = []
    try:
        b64 = base64.b64encode(Path(screenshot_path).read_bytes()).decode()
        payload = json.dumps({
            "model": model,
            "messages": [{"role": "user", "content": [
                {"type": "text", "text": VISION_PROMPT},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}}
            ]}],
            "stream": False,
            "options": {"temperature": 0.1}
        }).encode()

        req = urllib.request.Request(
            f"{OLLAMA_BASE}/api/chat",
            data=payload,
            headers={"Content-Type": "application/json"}
        )
        with urllib.request.urlopen(req, timeout=120) as resp:
            result = json.loads(resp.read())
            content = result.get("message", {}).get("content", "")

        parsed = _parse_json_response(content)
        for c in parsed.get("checks", []):
            status = Severity(c.get("status", "warn"))
            checks.append(SmokeCheck(
                name=f"Vision: {c.get('name', 'Unknown')}",
                status=status,
                message=c.get("message", "")
            ))

        if not checks:
            checks.append(SmokeCheck(
                name="Vision Analysis",
                status=Severity.WARN,
                message="Vision model returned no structured checks"
            ))

    except Exception as e:
        checks.append(SmokeCheck(
            name="Vision Analysis",
            status=Severity.WARN,
            message=f"Vision check failed: {e}"
        ))

    return checks


# === Baseline Comparison ===

def compare_baseline(current_path: str, baseline_path: str) -> SmokeCheck:
    """Compare screenshot against baseline for visual regression."""
    check = SmokeCheck(name="Baseline Comparison")

    if not Path(baseline_path).exists():
        check.status = Severity.WARN
        check.message = f"Baseline not found: {baseline_path}"
        return check

    if not Path(current_path).exists():
        check.status = Severity.FAIL
        check.message = "No current screenshot to compare"
        return check

    # Simple file size comparison (rough regression indicator)
    baseline_size = Path(baseline_path).stat().st_size
    current_size = Path(current_path).stat().st_size

    if baseline_size == 0:
        check.status = Severity.WARN
        check.message = "Baseline is empty"
        return check

    diff_pct = abs(current_size - baseline_size) / baseline_size * 100

    if diff_pct > 50:
        check.status = Severity.FAIL
        check.message = f"Major visual change: {diff_pct:.0f}% file size difference"
    elif diff_pct > 20:
        check.status = Severity.WARN
        check.message = f"Significant visual change: {diff_pct:.0f}% file size difference"
    else:
        check.status = Severity.PASS
        check.message = f"Visual consistency: {diff_pct:.1f}% difference"

    check.details = f"Baseline: {baseline_size}B, Current: {current_size}B"

    # Pixel-level diff using ImageMagick (if available)
    try:
        diff_output = current_path.replace(".png", "-diff.png")
        result = subprocess.run(
            ["compare", "-metric", "AE", current_path, baseline_path, diff_output],
            capture_output=True, text=True, timeout=15
        )
        if result.returncode < 2:
            pixels_diff = int(result.stderr) if result.stderr.strip().isdigit() else 0
            check.details += f" | Pixel diff: {pixels_diff}"
            if pixels_diff > 10000:
                check.status = Severity.FAIL
                check.message = f"Major visual regression: {pixels_diff} pixels changed"
            elif pixels_diff > 1000:
                check.status = Severity.WARN
                check.message = f"Visual change detected: {pixels_diff} pixels changed"
    except Exception:
        pass

    return check


# === Helpers ===

def _parse_json_response(text: str) -> dict:
    cleaned = text.strip()
    if cleaned.startswith("```"):
        lines = cleaned.split("\n")[1:]
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        cleaned = "\n".join(lines)
    try:
        return json.loads(cleaned)
    except json.JSONDecodeError:
        start = cleaned.find("{")
        end = cleaned.rfind("}")
        if start >= 0 and end > start:
            try:
                return json.loads(cleaned[start:end + 1])
            except json.JSONDecodeError:
                pass
    return {}


# === Main Smoke Test ===

def run_smoke_test(url: str, vision: bool = False, baseline: Optional[str] = None,
                   model: str = VISION_MODEL) -> SmokeResult:
    """Run the full visual smoke test suite."""
    import time
    start = time.time()

    result = SmokeResult(url=url)
    screenshot_path = ""

    # 1. Page loads
    print(f"  [1/5] Checking page loads...", file=sys.stderr)
    result.checks.append(check_page_loads(url))

    # 2. HTML content
    print(f"  [2/5] Checking HTML content...", file=sys.stderr)
    html_check, html = check_html_content(url)
    result.checks.append(html_check)

    # 3. Screenshot
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
        screenshot_path = tmp.name
    print(f"  [3/5] Taking screenshot...", file=sys.stderr)
    screenshot_check = take_screenshot(url, screenshot_path)
    result.checks.append(screenshot_check)
    result.screenshot_path = screenshot_path

    # 4. Vision analysis (optional)
    if vision and Path(screenshot_path).exists():
        print(f"  [4/5] Running vision analysis...", file=sys.stderr)
        result.checks.extend(run_vision_check(screenshot_path, model))
    else:
        print(f"  [4/5] Vision analysis skipped", file=sys.stderr)

    # 5. Baseline comparison (optional)
    if baseline:
        print(f"  [5/5] Comparing against baseline...", file=sys.stderr)
        result.checks.append(compare_baseline(screenshot_path, baseline))
    else:
        print(f"  [5/5] Baseline comparison skipped", file=sys.stderr)

    # Determine overall status
    fails = sum(1 for c in result.checks if c.status == Severity.FAIL)
    warns = sum(1 for c in result.checks if c.status == Severity.WARN)

    if fails > 0:
        result.status = Severity.FAIL
    elif warns > 0:
        result.status = Severity.WARN
    else:
        result.status = Severity.PASS

    result.summary = (
        f"{result.status.value.upper()}: {len(result.checks)} checks, "
        f"{fails} failures, {warns} warnings"
    )
    result.duration_ms = int((time.time() - start) * 1000)

    return result


# === Output ===

def format_result(result: SmokeResult, fmt: str = "json") -> str:
    if fmt == "json":
        data = {
            "url": result.url,
            "status": result.status.value,
            "summary": result.summary,
            "duration_ms": result.duration_ms,
            "screenshot": result.screenshot_path,
            "checks": [asdict(c) for c in result.checks],
        }
        for c in data["checks"]:
            if hasattr(c["status"], "value"):
                c["status"] = c["status"].value
        return json.dumps(data, indent=2)

    elif fmt == "text":
        lines = [
            "=" * 50,
            "  NEXUS VISUAL SMOKE TEST",
            "=" * 50,
            f"  URL: {result.url}",
            f"  Status: {result.status.value.upper()}",
            f"  Duration: {result.duration_ms}ms",
            "",
        ]
        icons = {"pass": "✅", "warn": "⚠️", "fail": "❌"}
        for c in result.checks:
            icon = icons.get(c.status.value if hasattr(c.status, "value") else str(c.status), "?")
            lines.append(f"  {icon} {c.name}: {c.message}")
            if c.details:
                lines.append(f"     {c.details}")
        lines.append("")
        lines.append(f"  {result.summary}")
        lines.append("=" * 50)
        return "\n".join(lines)

    return ""


# === CLI ===

def main():
    parser = argparse.ArgumentParser(
        description="Visual Smoke Test for The Nexus — layout + regression verification"
    )
    parser.add_argument("--url", default=DEFAULT_URL, help=f"Nexus URL (default: {DEFAULT_URL})")
    parser.add_argument("--vision", action="store_true", help="Include vision model analysis")
    parser.add_argument("--baseline", help="Baseline screenshot for regression comparison")
    parser.add_argument("--model", default=VISION_MODEL, help=f"Vision model (default: {VISION_MODEL})")
    parser.add_argument("--format", choices=["json", "text"], default="json")
    parser.add_argument("--output", "-o", help="Output file (default: stdout)")

    args = parser.parse_args()

    print(f"Running smoke test on {args.url}...", file=sys.stderr)
    result = run_smoke_test(args.url, vision=args.vision, baseline=args.baseline, model=args.model)
    output = format_result(result, args.format)

    if args.output:
        Path(args.output).write_text(output)
        print(f"Results written to {args.output}", file=sys.stderr)
    else:
        print(output)

    if result.status == Severity.FAIL:
        sys.exit(1)
    elif result.status == Severity.WARN:
        sys.exit(0)  # Warnings don't fail CI


if __name__ == "__main__":
    main()