""" Visual State Verification Module for Game Agents ================================================= Provides screenshot-based environmental state verification for game agents (Morrowind, Minecraft, or any game with a screenshot API). Uses multimodal analysis to confirm agent expectations match actual game state. Usage: from scripts.visual_state_verifier import VisualStateVerifier verifier = VisualStateVerifier() result = verifier.verify_state( screenshot_path="/tmp/game_screenshot.png", expected_state={"location": "Balmora", "health_above": 50, "has_weapon": True}, context="Player should be in Balmora with a weapon equipped" ) print(result.verified) # True/False print(result.details) # Human-readable analysis """ import json import os import subprocess from dataclasses import dataclass, field from enum import Enum from pathlib import Path from typing import Optional class VerificationStatus(Enum): """Status of a visual state verification.""" VERIFIED = "verified" FAILED = "failed" UNCERTAIN = "uncertain" ERROR = "error" @dataclass class VerificationResult: """Result of a visual state verification.""" status: VerificationStatus verified: bool confidence: float # 0.0 - 1.0 details: str expected: dict observed: dict = field(default_factory=dict) mismatches: list = field(default_factory=list) screenshot_path: Optional[str] = None class VisualStateVerifier: """ Verifies game state by analyzing screenshots against expected conditions. Supports any game that can produce screenshots. Designed for integration with MCP screenshot tools and vision analysis capabilities. """ def __init__(self, vision_backend: str = "builtin"): """ Args: vision_backend: "builtin" for MCP vision, "ollama" for local model """ self.vision_backend = vision_backend def verify_state( self, screenshot_path: str, expected_state: dict, context: str = "", game: str = "generic" ) -> VerificationResult: """ Verify a game screenshot matches expected state conditions. Args: screenshot_path: Path to the screenshot file expected_state: Dict of expected conditions, e.g.: { "location": "Balmora", "health_above": 50, "has_weapon": True, "time_of_day": "day", "nearby_npcs": ["Caius Cosades"] } context: Additional context for the vision model game: Game name for context ("morrowind", "minecraft", "generic") Returns: VerificationResult with status, confidence, and details """ if not Path(screenshot_path).exists(): return VerificationResult( status=VerificationStatus.ERROR, verified=False, confidence=0.0, details=f"Screenshot not found: {screenshot_path}", expected=expected_state, screenshot_path=screenshot_path ) # Build verification prompt prompt = self._build_prompt(expected_state, context, game) # Analyze screenshot analysis = self._analyze_screenshot(screenshot_path, prompt) # Parse results return self._parse_analysis(analysis, expected_state, screenshot_path) def _build_prompt(self, expected: dict, context: str, game: str) -> str: """Build a structured verification prompt for the vision model.""" conditions = [] for key, value in expected.items(): if isinstance(value, bool): conditions.append(f"- {key}: {'yes' if value else 'no'}") elif isinstance(value, (int, float)): conditions.append(f"- {key}: {value} or better") elif isinstance(value, list): conditions.append(f"- {key}: should include {', '.join(str(v) for v in value)}") else: conditions.append(f"- {key}: {value}") prompt = f"""Analyze this {game} game screenshot and verify the following conditions: {chr(10).join(conditions)} Context: {context if context else 'No additional context provided.'} For each condition, state VERIFIED, FAILED, or UNCERTAIN with a brief reason. End with a JSON block: ```json {{ "verified": true/false, "confidence": 0.0-1.0, "details": "brief summary", "mismatches": ["list of failed conditions"] }} ``` """ return prompt def _analyze_screenshot(self, path: str, prompt: str) -> str: """ Send screenshot to vision backend for analysis. In a live agent context, this would call the MCP vision tool. For standalone use, it returns the prompt for manual invocation. """ # Return structured prompt for the calling agent to process return json.dumps({ "prompt": prompt, "screenshot_path": str(path), "instruction": "Use vision_analyze tool with this prompt and screenshot_path" }) def _parse_analysis( self, analysis: str, expected: dict, screenshot_path: str ) -> VerificationResult: """Parse vision analysis into a VerificationResult.""" try: data = json.loads(analysis) if "instruction" in data: # Not yet analyzed - return pending return VerificationResult( status=VerificationStatus.UNCERTAIN, verified=False, confidence=0.0, details=f"Pending analysis. Run: vision_analyze("{data['screenshot_path']}", "{data['prompt'][:100]}...")", expected=expected, screenshot_path=screenshot_path ) except json.JSONDecodeError: pass # Parse text analysis for JSON block import re json_match = re.search(r"```json\s*({.*?})\s*```", analysis, re.DOTALL) if json_match: try: result = json.loads(json_match.group(1)) status = VerificationStatus.VERIFIED if result.get("verified") else VerificationStatus.FAILED return VerificationResult( status=status, verified=result.get("verified", False), confidence=result.get("confidence", 0.0), details=result.get("details", ""), expected=expected, mismatches=result.get("mismatches", []), screenshot_path=screenshot_path ) except json.JSONDecodeError: pass # Fallback: return as uncertain return VerificationResult( status=VerificationStatus.UNCERTAIN, verified=False, confidence=0.3, details=analysis[:500], expected=expected, screenshot_path=screenshot_path ) @staticmethod def morrowind_state( location: Optional[str] = None, health_min: Optional[int] = None, has_weapon: Optional[bool] = None, is_indoors: Optional[bool] = None, time_of_day: Optional[str] = None, nearby_npcs: Optional[list] = None, **extra ) -> dict: """Build expected state dict for Morrowind.""" state = {} if location: state["location"] = location if health_min is not None: state["health_above"] = health_min if has_weapon is not None: state["has_weapon"] = has_weapon if is_indoors is not None: state["indoors"] = is_indoors if time_of_day: state["time_of_day"] = time_of_day if nearby_npcs: state["nearby_npcs"] = nearby_npcs state.update(extra) return state # --- Example Verification Flows --- EXAMPLE_MORROWIND_VERIFICATION = """ # Verify player is in Balmora with a weapon verifier = VisualStateVerifier() result = verifier.verify_state( screenshot_path="/tmp/morrowind_screenshot.png", expected_state=VisualStateVerifier.morrowind_state( location="Balmora", health_min=50, has_weapon=True ), context="After completing the first Caius Cosades quest", game="morrowind" ) if result.verified: print(f"State confirmed: {result.details}") else: print(f"State mismatch: {result.mismatches}") """ EXAMPLE_BATCH_VERIFICATION = """ # Verify multiple game states in sequence states = [ {"screenshot": "screen1.png", "expected": {"location": "Seyda Neen"}, "context": "After character creation"}, {"screenshot": "screen2.png", "expected": {"location": "Balmora", "has_weapon": True}, "context": "After buying weapon"}, {"screenshot": "screen3.png", "expected": {"health_above": 80}, "context": "After resting"}, ] verifier = VisualStateVerifier() for state in states: result = verifier.verify_state(**state, game="morrowind") print(f"{state['context']}: {'PASS' if result.verified else 'FAIL'} (confidence: {result.confidence:.0%})") """ if __name__ == "__main__": # Demo: build and display a verification prompt verifier = VisualStateVerifier() expected = verifier.morrowind_state( location="Balmora", health_min=50, has_weapon=True, nearby_npcs=["Caius Cosades"] ) result = verifier.verify_state( screenshot_path="/tmp/demo_screenshot.png", expected_state=expected, context="Player should have completed the first quest", game="morrowind" ) print(result.details)