Timmy-time-dashboard/scripts/visual_state_verifier.py

"""
Visual State Verification Module for Game Agents
=================================================

Provides screenshot-based environmental state verification for game agents
(Morrowind, Minecraft, or any game with a screenshot API). Uses multimodal
analysis to confirm agent expectations match actual game state.

Usage:
    from scripts.visual_state_verifier import VisualStateVerifier

    verifier = VisualStateVerifier()
    result = verifier.verify_state(
        screenshot_path="/tmp/game_screenshot.png",
        expected_state={"location": "Balmora", "health_above": 50, "has_weapon": True},
        context="Player should be in Balmora with a weapon equipped"
    )
    print(result.verified)  # True/False
    print(result.details)   # Human-readable analysis
"""

import json
import os
import subprocess
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Optional


class VerificationStatus(Enum):
    """Status of a visual state verification."""
    VERIFIED = "verified"
    FAILED = "failed"
    UNCERTAIN = "uncertain"
    ERROR = "error"


@dataclass
class VerificationResult:
    """Result of a visual state verification."""
    status: VerificationStatus
    verified: bool
    confidence: float  # 0.0 - 1.0
    details: str
    expected: dict
    observed: dict = field(default_factory=dict)
    mismatches: list = field(default_factory=list)
    screenshot_path: Optional[str] = None


class VisualStateVerifier:
    """
    Verifies game state by analyzing screenshots against expected conditions.

    Supports any game that can produce screenshots. Designed for integration
    with MCP screenshot tools and vision analysis capabilities.
    """

    def __init__(self, vision_backend: str = "builtin"):
        """
        Args:
            vision_backend: "builtin" for MCP vision, "ollama" for local model
        """
        self.vision_backend = vision_backend

    def verify_state(
        self,
        screenshot_path: str,
        expected_state: dict,
        context: str = "",
        game: str = "generic"
    ) -> VerificationResult:
        """
        Verify a game screenshot matches expected state conditions.

        Args:
            screenshot_path: Path to the screenshot file
            expected_state: Dict of expected conditions, e.g.:
                {
                    "location": "Balmora",
                    "health_above": 50,
                    "has_weapon": True,
                    "time_of_day": "day",
                    "nearby_npcs": ["Caius Cosades"]
                }
            context: Additional context for the vision model
            game: Game name for context ("morrowind", "minecraft", "generic")

        Returns:
            VerificationResult with status, confidence, and details
        """
        if not Path(screenshot_path).exists():
            return VerificationResult(
                status=VerificationStatus.ERROR,
                verified=False,
                confidence=0.0,
                details=f"Screenshot not found: {screenshot_path}",
                expected=expected_state,
                screenshot_path=screenshot_path
            )

        # Build verification prompt
        prompt = self._build_prompt(expected_state, context, game)

        # Analyze screenshot
        analysis = self._analyze_screenshot(screenshot_path, prompt)

        # Parse results
        return self._parse_analysis(analysis, expected_state, screenshot_path)

    def _build_prompt(self, expected: dict, context: str, game: str) -> str:
        """Build a structured verification prompt for the vision model."""
        conditions = []
        for key, value in expected.items():
            if isinstance(value, bool):
                conditions.append(f"- {key}: {'yes' if value else 'no'}")
            elif isinstance(value, (int, float)):
                conditions.append(f"- {key}: {value} or better")
            elif isinstance(value, list):
                conditions.append(f"- {key}: should include {', '.join(str(v) for v in value)}")
            else:
                conditions.append(f"- {key}: {value}")

        prompt = f"""Analyze this {game} game screenshot and verify the following conditions:

{chr(10).join(conditions)}

Context: {context if context else 'No additional context provided.'}

For each condition, state VERIFIED, FAILED, or UNCERTAIN with a brief reason.
End with a JSON block:
```json
{{
  "verified": true/false,
  "confidence": 0.0-1.0,
  "details": "brief summary",
  "mismatches": ["list of failed conditions"]
}}
```
"""
        return prompt

    def _analyze_screenshot(self, path: str, prompt: str) -> str:
        """
        Send screenshot to vision backend for analysis.

        In a live agent context, this would call the MCP vision tool.
        For standalone use, it returns the prompt for manual invocation.
        """
        # Return structured prompt for the calling agent to process
        return json.dumps({
            "prompt": prompt,
            "screenshot_path": str(path),
            "instruction": "Use vision_analyze tool with this prompt and screenshot_path"
        })

    def _parse_analysis(
        self, analysis: str, expected: dict, screenshot_path: str
    ) -> VerificationResult:
        """Parse vision analysis into a VerificationResult."""
        try:
            data = json.loads(analysis)
            if "instruction" in data:
                # Not yet analyzed - return pending
                return VerificationResult(
                    status=VerificationStatus.UNCERTAIN,
                    verified=False,
                    confidence=0.0,
                    details=f"Pending analysis. Run: vision_analyze("{data['screenshot_path']}", "{data['prompt'][:100]}...")",
                    expected=expected,
                    screenshot_path=screenshot_path
                )
        except json.JSONDecodeError:
            pass

        # Parse text analysis for JSON block
        import re
        json_match = re.search(r"```json\s*({.*?})\s*```", analysis, re.DOTALL)
        if json_match:
            try:
                result = json.loads(json_match.group(1))
                status = VerificationStatus.VERIFIED if result.get("verified") else VerificationStatus.FAILED
                return VerificationResult(
                    status=status,
                    verified=result.get("verified", False),
                    confidence=result.get("confidence", 0.0),
                    details=result.get("details", ""),
                    expected=expected,
                    mismatches=result.get("mismatches", []),
                    screenshot_path=screenshot_path
                )
            except json.JSONDecodeError:
                pass

        # Fallback: return as uncertain
        return VerificationResult(
            status=VerificationStatus.UNCERTAIN,
            verified=False,
            confidence=0.3,
            details=analysis[:500],
            expected=expected,
            screenshot_path=screenshot_path
        )

    @staticmethod
    def morrowind_state(
        location: Optional[str] = None,
        health_min: Optional[int] = None,
        has_weapon: Optional[bool] = None,
        is_indoors: Optional[bool] = None,
        time_of_day: Optional[str] = None,
        nearby_npcs: Optional[list] = None,
        **extra
    ) -> dict:
        """Build expected state dict for Morrowind."""
        state = {}
        if location:
            state["location"] = location
        if health_min is not None:
            state["health_above"] = health_min
        if has_weapon is not None:
            state["has_weapon"] = has_weapon
        if is_indoors is not None:
            state["indoors"] = is_indoors
        if time_of_day:
            state["time_of_day"] = time_of_day
        if nearby_npcs:
            state["nearby_npcs"] = nearby_npcs
        state.update(extra)
        return state


# --- Example Verification Flows ---

EXAMPLE_MORROWIND_VERIFICATION = """
# Verify player is in Balmora with a weapon
verifier = VisualStateVerifier()
result = verifier.verify_state(
    screenshot_path="/tmp/morrowind_screenshot.png",
    expected_state=VisualStateVerifier.morrowind_state(
        location="Balmora",
        health_min=50,
        has_weapon=True
    ),
    context="After completing the first Caius Cosades quest",
    game="morrowind"
)

if result.verified:
    print(f"State confirmed: {result.details}")
else:
    print(f"State mismatch: {result.mismatches}")
"""

EXAMPLE_BATCH_VERIFICATION = """
# Verify multiple game states in sequence
states = [
    {"screenshot": "screen1.png", "expected": {"location": "Seyda Neen"}, "context": "After character creation"},
    {"screenshot": "screen2.png", "expected": {"location": "Balmora", "has_weapon": True}, "context": "After buying weapon"},
    {"screenshot": "screen3.png", "expected": {"health_above": 80}, "context": "After resting"},
]

verifier = VisualStateVerifier()
for state in states:
    result = verifier.verify_state(**state, game="morrowind")
    print(f"{state['context']}: {'PASS' if result.verified else 'FAIL'} (confidence: {result.confidence:.0%})")
"""

if __name__ == "__main__":
    # Demo: build and display a verification prompt
    verifier = VisualStateVerifier()
    expected = verifier.morrowind_state(
        location="Balmora",
        health_min=50,
        has_weapon=True,
        nearby_npcs=["Caius Cosades"]
    )
    result = verifier.verify_state(
        screenshot_path="/tmp/demo_screenshot.png",
        expected_state=expected,
        context="Player should have completed the first quest",
        game="morrowind"
    )
    print(result.details)