timmy-config/bin/matrix_glitch_detector.py

#!/usr/bin/env python3
"""
Matrix 3D World Glitch Detector

Scans a 3D web world for visual artifacts using browser automation
and vision AI analysis. Produces structured glitch reports.

Usage:
    python matrix_glitch_detector.py <url> [--angles 4] [--output report.json]
    python matrix_glitch_detector.py --demo  # Run with synthetic test data

Ref: timmy-config#491
"""

import argparse
import base64
import json
import os
import sys
import time
import uuid
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional

# Add parent for glitch_patterns import
sys.path.insert(0, str(Path(__file__).resolve().parent))
from glitch_patterns import (
    GlitchCategory,
    GlitchPattern,
    GlitchSeverity,
    MATRIX_GLITCH_PATTERNS,
    build_vision_prompt,
    get_patterns_by_severity,
)


@dataclass
class DetectedGlitch:
    """A single detected glitch with metadata."""
    id: str
    category: str
    name: str
    description: str
    severity: str
    confidence: float
    location_x: Optional[float] = None  # percentage across image
    location_y: Optional[float] = None  # percentage down image
    screenshot_index: int = 0
    screenshot_angle: str = "front"
    timestamp: str = ""

    def __post_init__(self):
        if not self.timestamp:
            self.timestamp = datetime.now(timezone.utc).isoformat()


@dataclass
class ScanResult:
    """Complete scan result for a 3D world URL."""
    scan_id: str
    url: str
    timestamp: str
    total_screenshots: int
    angles_captured: list[str]
    glitches: list[dict] = field(default_factory=list)
    summary: dict = field(default_factory=dict)
    metadata: dict = field(default_factory=dict)

    def to_json(self, indent: int = 2) -> str:
        return json.dumps(asdict(self), indent=indent)


def generate_scan_angles(num_angles: int) -> list[dict]:
    """Generate camera angle configurations for multi-angle scanning.

    Returns a list of dicts with yaw/pitch/label for browser camera control.
    """
    base_angles = [
        {"yaw": 0, "pitch": 0, "label": "front"},
        {"yaw": 90, "pitch": 0, "label": "right"},
        {"yaw": 180, "pitch": 0, "label": "back"},
        {"yaw": 270, "pitch": 0, "label": "left"},
        {"yaw": 0, "pitch": -30, "label": "front_low"},
        {"yaw": 45, "pitch": -15, "label": "front_right_low"},
        {"yaw": 0, "pitch": 30, "label": "front_high"},
        {"yaw": 45, "pitch": 0, "label": "front_right"},
    ]

    if num_angles <= len(base_angles):
        return base_angles[:num_angles]
    return base_angles + [
        {"yaw": i * (360 // num_angles), "pitch": 0, "label": f"angle_{i}"}
        for i in range(len(base_angles), num_angles)
    ]


def capture_screenshots(url: str, angles: list[dict], output_dir: Path) -> list[Path]:
    """Capture screenshots of a 3D web world from multiple angles.

    Uses browser_vision tool when available; falls back to placeholder generation
    for testing and environments without browser access.
    """
    output_dir.mkdir(parents=True, exist_ok=True)
    screenshots = []

    for i, angle in enumerate(angles):
        filename = output_dir / f"screenshot_{i:03d}_{angle['label']}.png"

        # Attempt browser-based capture via browser_vision
        try:
            result = _browser_capture(url, angle, filename)
            if result:
                screenshots.append(filename)
                continue
        except Exception:
            pass

        # Generate placeholder screenshot for offline/test scenarios
        _generate_placeholder_screenshot(filename, angle)
        screenshots.append(filename)

    return screenshots


def _browser_capture(url: str, angle: dict, output_path: Path) -> bool:
    """Capture a screenshot via browser automation.

    This is a stub that delegates to the browser_vision tool when run
    in an environment that provides it. In CI or offline mode, returns False.
    """
    # Check if browser_vision is available via environment
    bv_script = os.environ.get("BROWSER_VISION_SCRIPT")
    if bv_script and Path(bv_script).exists():
        import subprocess
        cmd = [
            sys.executable, bv_script,
            "--url", url,
            "--screenshot", str(output_path),
            "--rotate-yaw", str(angle["yaw"]),
            "--rotate-pitch", str(angle["pitch"]),
        ]
        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        return proc.returncode == 0 and output_path.exists()
    return False


def _generate_placeholder_screenshot(path: Path, angle: dict):
    """Generate a minimal 1x1 PNG as a placeholder for testing."""
    # Minimal valid PNG (1x1 transparent pixel)
    png_data = (
        b"\x89PNG\r\n\x1a\n"
        b"\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
        b"\x08\x06\x00\x00\x00\x1f\x15\xc4\x89"
        b"\x00\x00\x00\nIDATx\x9cc\x00\x01\x00\x00\x05\x00\x01"
        b"\r\n\xb4\x00\x00\x00\x00IEND\xaeB`\x82"
    )
    path.write_bytes(png_data)


def analyze_with_vision(
    screenshot_paths: list[Path],
    angles: list[dict],
    patterns: list[GlitchPattern] | None = None,
) -> list[DetectedGlitch]:
    """Send screenshots to vision AI for glitch analysis.

    In environments with a vision model available, sends each screenshot
    with the composite detection prompt. Otherwise returns simulated results.
    """
    if patterns is None:
        patterns = MATRIX_GLITCH_PATTERNS

    prompt = build_vision_prompt(patterns)
    glitches = []

    for i, (path, angle) in enumerate(zip(screenshot_paths, angles)):
        # Attempt vision analysis
        detected = _vision_analyze_image(path, prompt, i, angle["label"])
        glitches.extend(detected)

    return glitches


def _vision_analyze_image(
    image_path: Path,
    prompt: str,
    screenshot_index: int,
    angle_label: str,
) -> list[DetectedGlitch]:
    """Analyze a single screenshot with vision AI.

    Uses the vision_analyze tool when available; returns empty list otherwise.
    """
    # Check for vision API configuration
    api_key = os.environ.get("VISION_API_KEY") or os.environ.get("OPENAI_API_KEY")
    api_base = os.environ.get("VISION_API_BASE", "https://api.openai.com/v1")

    if api_key:
        try:
            return _call_vision_api(
                image_path, prompt, screenshot_index, angle_label, api_key, api_base
            )
        except Exception as e:
            print(f"  [!] Vision API error for {image_path.name}: {e}", file=sys.stderr)

    # No vision backend available
    return []


def _call_vision_api(
    image_path: Path,
    prompt: str,
    screenshot_index: int,
    angle_label: str,
    api_key: str,
    api_base: str,
) -> list[DetectedGlitch]:
    """Call a vision API (OpenAI-compatible) for image analysis."""
    import urllib.request
    import urllib.error

    image_data = base64.b64encode(image_path.read_bytes()).decode()

    payload = json.dumps({
        "model": os.environ.get("VISION_MODEL", "gpt-4o"),
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{image_data}",
                            "detail": "high",
                        },
                    },
                ],
            }
        ],
        "max_tokens": 4096,
    }).encode()

    req = urllib.request.Request(
        f"{api_base}/chat/completions",
        data=payload,
        headers={
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}",
        },
    )

    with urllib.request.urlopen(req, timeout=60) as resp:
        result = json.loads(resp.read())

    content = result["choices"][0]["message"]["content"]
    return _parse_vision_response(content, screenshot_index, angle_label)


def _add_glitch_from_dict(
    item: dict,
    glitches: list[DetectedGlitch],
    screenshot_index: int,
    angle_label: str,
):
    """Convert a dict from vision API response into a DetectedGlitch."""
    cat = item.get("category", item.get("type", "unknown"))
    conf = float(item.get("confidence", item.get("score", 0.5)))

    glitch = DetectedGlitch(
        id=str(uuid.uuid4())[:8],
        category=cat,
        name=item.get("name", item.get("label", cat)),
        description=item.get("description", item.get("detail", "")),
        severity=item.get("severity", _infer_severity(cat, conf)),
        confidence=conf,
        location_x=item.get("location_x", item.get("x")),
        location_y=item.get("location_y", item.get("y")),
        screenshot_index=screenshot_index,
        screenshot_angle=angle_label,
    )
    glitches.append(glitch)


def _parse_vision_response(
    text: str, screenshot_index: int, angle_label: str
) -> list[DetectedGlitch]:
    """Parse vision AI response into structured glitch detections."""
    glitches = []

    # Try to extract JSON from the response
    json_blocks = []
    in_json = False
    json_buf = []

    for line in text.split("\n"):
        stripped = line.strip()
        if stripped.startswith("```"):
            if in_json and json_buf:
                try:
                    json_blocks.append(json.loads("\n".join(json_buf)))
                except json.JSONDecodeError:
                    pass
                json_buf = []
            in_json = not in_json
            continue
        if in_json:
            json_buf.append(line)

    # Flush any remaining buffer
    if in_json and json_buf:
        try:
            json_blocks.append(json.loads("\n".join(json_buf)))
        except json.JSONDecodeError:
            pass

    # Also try parsing the entire response as JSON
    try:
        parsed = json.loads(text)
        if isinstance(parsed, list):
            json_blocks.extend(parsed)
        elif isinstance(parsed, dict):
            if "glitches" in parsed:
                json_blocks.extend(parsed["glitches"])
            elif "detections" in parsed:
                json_blocks.extend(parsed["detections"])
            else:
                json_blocks.append(parsed)
    except json.JSONDecodeError:
        pass

    for item in json_blocks:
        # Flatten arrays of detections
        if isinstance(item, list):
            for sub in item:
                if isinstance(sub, dict):
                    _add_glitch_from_dict(sub, glitches, screenshot_index, angle_label)
        elif isinstance(item, dict):
            _add_glitch_from_dict(item, glitches, screenshot_index, angle_label)

    return glitches


def _infer_severity(category: str, confidence: float) -> str:
    """Infer severity from category and confidence when not provided."""
    critical_cats = {"missing_textures", "clipping"}
    high_cats = {"floating_assets", "broken_normals"}

    cat_lower = category.lower()
    if any(c in cat_lower for c in critical_cats):
        return "critical" if confidence > 0.7 else "high"
    if any(c in cat_lower for c in high_cats):
        return "high" if confidence > 0.7 else "medium"
    return "medium" if confidence > 0.6 else "low"


def build_report(
    url: str,
    angles: list[dict],
    screenshots: list[Path],
    glitches: list[DetectedGlitch],
) -> ScanResult:
    """Build the final structured scan report."""
    severity_counts = {}
    category_counts = {}

    for g in glitches:
        severity_counts[g.severity] = severity_counts.get(g.severity, 0) + 1
        category_counts[g.category] = category_counts.get(g.category, 0) + 1

    report = ScanResult(
        scan_id=str(uuid.uuid4()),
        url=url,
        timestamp=datetime.now(timezone.utc).isoformat(),
        total_screenshots=len(screenshots),
        angles_captured=[a["label"] for a in angles],
        glitches=[asdict(g) for g in glitches],
        summary={
            "total_glitches": len(glitches),
            "by_severity": severity_counts,
            "by_category": category_counts,
            "highest_severity": max(severity_counts.keys(), default="none"),
            "clean_screenshots": sum(
                1
                for i in range(len(screenshots))
                if not any(g.screenshot_index == i for g in glitches)
            ),
        },
        metadata={
            "detector_version": "0.1.0",
            "pattern_count": len(MATRIX_GLITCH_PATTERNS),
            "reference": "timmy-config#491",
        },
    )

    return report


def run_demo(output_path: Optional[Path] = None) -> ScanResult:
    """Run a demonstration scan with simulated detections."""
    print("[*] Running Matrix glitch detection demo...")

    url = "https://matrix.example.com/world/alpha"
    angles = generate_scan_angles(4)
    screenshots_dir = Path("/tmp/matrix_glitch_screenshots")

    print(f"[*] Capturing {len(angles)} screenshots from: {url}")
    screenshots = capture_screenshots(url, angles, screenshots_dir)
    print(f"[*] Captured {len(screenshots)} screenshots")

    # Simulate detections for demo
    demo_glitches = [
        DetectedGlitch(
            id=str(uuid.uuid4())[:8],
            category="floating_assets",
            name="Floating Chair",
            description="Office chair floating 0.3m above floor in sector 7",
            severity="high",
            confidence=0.87,
            location_x=35.2,
            location_y=62.1,
            screenshot_index=0,
            screenshot_angle="front",
        ),
        DetectedGlitch(
            id=str(uuid.uuid4())[:8],
            category="z_fighting",
            name="Wall Texture Flicker",
            description="Z-fighting between wall panel and decorative overlay",
            severity="medium",
            confidence=0.72,
            location_x=58.0,
            location_y=40.5,
            screenshot_index=1,
            screenshot_angle="right",
        ),
        DetectedGlitch(
            id=str(uuid.uuid4())[:8],
            category="missing_textures",
            name="Placeholder Texture",
            description="Bright magenta surface on door frame — missing asset reference",
            severity="critical",
            confidence=0.95,
            location_x=72.3,
            location_y=28.8,
            screenshot_index=2,
            screenshot_angle="back",
        ),
        DetectedGlitch(
            id=str(uuid.uuid4())[:8],
            category="clipping",
            name="Desk Through Wall",
            description="Desk corner clipping through adjacent wall geometry",
            severity="high",
            confidence=0.81,
            location_x=15.0,
            location_y=55.0,
            screenshot_index=3,
            screenshot_angle="left",
        ),
    ]

    print(f"[*] Detected {len(demo_glitches)} glitches")
    report = build_report(url, angles, screenshots, demo_glitches)

    if output_path:
        output_path.write_text(report.to_json())
        print(f"[*] Report saved to: {output_path}")

    return report


def main():
    parser = argparse.ArgumentParser(
        description="Matrix 3D World Glitch Detector — scan for visual artifacts",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s https://matrix.example.com/world/alpha
  %(prog)s https://matrix.example.com/world/alpha --angles 8 --output report.json
  %(prog)s --demo
        """,
    )
    parser.add_argument("url", nargs="?", help="URL of the 3D world to scan")
    parser.add_argument(
        "--angles", type=int, default=4, help="Number of camera angles to capture (default: 4)"
    )
    parser.add_argument("--output", "-o", type=str, help="Output file path for JSON report")
    parser.add_argument("--demo", action="store_true", help="Run demo with simulated data")
    parser.add_argument(
        "--min-severity",
        choices=["info", "low", "medium", "high", "critical"],
        default="info",
        help="Minimum severity to include in report",
    )
    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")

    args = parser.parse_args()

    if args.demo:
        output = Path(args.output) if args.output else Path("glitch_report_demo.json")
        report = run_demo(output)
        print(f"\n=== Scan Summary ===")
        print(f"URL: {report.url}")
        print(f"Screenshots: {report.total_screenshots}")
        print(f"Glitches found: {report.summary['total_glitches']}")
        print(f"By severity: {report.summary['by_severity']}")
        return

    if not args.url:
        parser.error("URL required (or use --demo)")

    scan_id = str(uuid.uuid4())[:8]
    print(f"[*] Matrix Glitch Detector — Scan {scan_id}")
    print(f"[*] Target: {args.url}")

    # Generate camera angles
    angles = generate_scan_angles(args.angles)
    print(f"[*] Capturing {len(angles)} screenshots...")

    # Capture screenshots
    screenshots_dir = Path(f"/tmp/matrix_glitch_{scan_id}")
    screenshots = capture_screenshots(args.url, angles, screenshots_dir)
    print(f"[*] Captured {len(screenshots)} screenshots")

    # Filter patterns by severity
    min_sev = GlitchSeverity(args.min_severity)
    patterns = get_patterns_by_severity(min_sev)

    # Analyze with vision AI
    print(f"[*] Analyzing with vision AI ({len(patterns)} patterns)...")
    glitches = analyze_with_vision(screenshots, angles, patterns)

    # Build and save report
    report = build_report(args.url, angles, screenshots, glitches)

    if args.output:
        Path(args.output).write_text(report.to_json())
        print(f"[*] Report saved: {args.output}")
    else:
        print(report.to_json())

    print(f"\n[*] Done — {len(glitches)} glitches detected")


if __name__ == "__main__":
    main()