feat: add visual state verification module for game agents
Multimodal screenshot-based state verification: - Generic verifier for any game with screenshots - Morrowind-specific state builder - Structured prompt generation for vision models - JSON result parsing with confidence scoring - Batch verification examples Relates to #1482
This commit is contained in:
285
scripts/visual_state_verifier.py
Normal file
285
scripts/visual_state_verifier.py
Normal file
@@ -0,0 +1,285 @@
|
||||
"""
|
||||
Visual State Verification Module for Game Agents
|
||||
=================================================
|
||||
|
||||
Provides screenshot-based environmental state verification for game agents
|
||||
(Morrowind, Minecraft, or any game with a screenshot API). Uses multimodal
|
||||
analysis to confirm agent expectations match actual game state.
|
||||
|
||||
Usage:
|
||||
from scripts.visual_state_verifier import VisualStateVerifier
|
||||
|
||||
verifier = VisualStateVerifier()
|
||||
result = verifier.verify_state(
|
||||
screenshot_path="/tmp/game_screenshot.png",
|
||||
expected_state={"location": "Balmora", "health_above": 50, "has_weapon": True},
|
||||
context="Player should be in Balmora with a weapon equipped"
|
||||
)
|
||||
print(result.verified) # True/False
|
||||
print(result.details) # Human-readable analysis
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class VerificationStatus(Enum):
|
||||
"""Status of a visual state verification."""
|
||||
VERIFIED = "verified"
|
||||
FAILED = "failed"
|
||||
UNCERTAIN = "uncertain"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
@dataclass
|
||||
class VerificationResult:
|
||||
"""Result of a visual state verification."""
|
||||
status: VerificationStatus
|
||||
verified: bool
|
||||
confidence: float # 0.0 - 1.0
|
||||
details: str
|
||||
expected: dict
|
||||
observed: dict = field(default_factory=dict)
|
||||
mismatches: list = field(default_factory=list)
|
||||
screenshot_path: Optional[str] = None
|
||||
|
||||
|
||||
class VisualStateVerifier:
|
||||
"""
|
||||
Verifies game state by analyzing screenshots against expected conditions.
|
||||
|
||||
Supports any game that can produce screenshots. Designed for integration
|
||||
with MCP screenshot tools and vision analysis capabilities.
|
||||
"""
|
||||
|
||||
def __init__(self, vision_backend: str = "builtin"):
|
||||
"""
|
||||
Args:
|
||||
vision_backend: "builtin" for MCP vision, "ollama" for local model
|
||||
"""
|
||||
self.vision_backend = vision_backend
|
||||
|
||||
def verify_state(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
expected_state: dict,
|
||||
context: str = "",
|
||||
game: str = "generic"
|
||||
) -> VerificationResult:
|
||||
"""
|
||||
Verify a game screenshot matches expected state conditions.
|
||||
|
||||
Args:
|
||||
screenshot_path: Path to the screenshot file
|
||||
expected_state: Dict of expected conditions, e.g.:
|
||||
{
|
||||
"location": "Balmora",
|
||||
"health_above": 50,
|
||||
"has_weapon": True,
|
||||
"time_of_day": "day",
|
||||
"nearby_npcs": ["Caius Cosades"]
|
||||
}
|
||||
context: Additional context for the vision model
|
||||
game: Game name for context ("morrowind", "minecraft", "generic")
|
||||
|
||||
Returns:
|
||||
VerificationResult with status, confidence, and details
|
||||
"""
|
||||
if not Path(screenshot_path).exists():
|
||||
return VerificationResult(
|
||||
status=VerificationStatus.ERROR,
|
||||
verified=False,
|
||||
confidence=0.0,
|
||||
details=f"Screenshot not found: {screenshot_path}",
|
||||
expected=expected_state,
|
||||
screenshot_path=screenshot_path
|
||||
)
|
||||
|
||||
# Build verification prompt
|
||||
prompt = self._build_prompt(expected_state, context, game)
|
||||
|
||||
# Analyze screenshot
|
||||
analysis = self._analyze_screenshot(screenshot_path, prompt)
|
||||
|
||||
# Parse results
|
||||
return self._parse_analysis(analysis, expected_state, screenshot_path)
|
||||
|
||||
def _build_prompt(self, expected: dict, context: str, game: str) -> str:
|
||||
"""Build a structured verification prompt for the vision model."""
|
||||
conditions = []
|
||||
for key, value in expected.items():
|
||||
if isinstance(value, bool):
|
||||
conditions.append(f"- {key}: {'yes' if value else 'no'}")
|
||||
elif isinstance(value, (int, float)):
|
||||
conditions.append(f"- {key}: {value} or better")
|
||||
elif isinstance(value, list):
|
||||
conditions.append(f"- {key}: should include {', '.join(str(v) for v in value)}")
|
||||
else:
|
||||
conditions.append(f"- {key}: {value}")
|
||||
|
||||
prompt = f"""Analyze this {game} game screenshot and verify the following conditions:
|
||||
|
||||
{chr(10).join(conditions)}
|
||||
|
||||
Context: {context if context else 'No additional context provided.'}
|
||||
|
||||
For each condition, state VERIFIED, FAILED, or UNCERTAIN with a brief reason.
|
||||
End with a JSON block:
|
||||
```json
|
||||
{{
|
||||
"verified": true/false,
|
||||
"confidence": 0.0-1.0,
|
||||
"details": "brief summary",
|
||||
"mismatches": ["list of failed conditions"]
|
||||
}}
|
||||
```
|
||||
"""
|
||||
return prompt
|
||||
|
||||
def _analyze_screenshot(self, path: str, prompt: str) -> str:
|
||||
"""
|
||||
Send screenshot to vision backend for analysis.
|
||||
|
||||
In a live agent context, this would call the MCP vision tool.
|
||||
For standalone use, it returns the prompt for manual invocation.
|
||||
"""
|
||||
# Return structured prompt for the calling agent to process
|
||||
return json.dumps({
|
||||
"prompt": prompt,
|
||||
"screenshot_path": str(path),
|
||||
"instruction": "Use vision_analyze tool with this prompt and screenshot_path"
|
||||
})
|
||||
|
||||
def _parse_analysis(
|
||||
self, analysis: str, expected: dict, screenshot_path: str
|
||||
) -> VerificationResult:
|
||||
"""Parse vision analysis into a VerificationResult."""
|
||||
try:
|
||||
data = json.loads(analysis)
|
||||
if "instruction" in data:
|
||||
# Not yet analyzed - return pending
|
||||
return VerificationResult(
|
||||
status=VerificationStatus.UNCERTAIN,
|
||||
verified=False,
|
||||
confidence=0.0,
|
||||
details=f"Pending analysis. Run: vision_analyze("{data['screenshot_path']}", "{data['prompt'][:100]}...")",
|
||||
expected=expected,
|
||||
screenshot_path=screenshot_path
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Parse text analysis for JSON block
|
||||
import re
|
||||
json_match = re.search(r"```json\s*({.*?})\s*```", analysis, re.DOTALL)
|
||||
if json_match:
|
||||
try:
|
||||
result = json.loads(json_match.group(1))
|
||||
status = VerificationStatus.VERIFIED if result.get("verified") else VerificationStatus.FAILED
|
||||
return VerificationResult(
|
||||
status=status,
|
||||
verified=result.get("verified", False),
|
||||
confidence=result.get("confidence", 0.0),
|
||||
details=result.get("details", ""),
|
||||
expected=expected,
|
||||
mismatches=result.get("mismatches", []),
|
||||
screenshot_path=screenshot_path
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Fallback: return as uncertain
|
||||
return VerificationResult(
|
||||
status=VerificationStatus.UNCERTAIN,
|
||||
verified=False,
|
||||
confidence=0.3,
|
||||
details=analysis[:500],
|
||||
expected=expected,
|
||||
screenshot_path=screenshot_path
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def morrowind_state(
|
||||
location: Optional[str] = None,
|
||||
health_min: Optional[int] = None,
|
||||
has_weapon: Optional[bool] = None,
|
||||
is_indoors: Optional[bool] = None,
|
||||
time_of_day: Optional[str] = None,
|
||||
nearby_npcs: Optional[list] = None,
|
||||
**extra
|
||||
) -> dict:
|
||||
"""Build expected state dict for Morrowind."""
|
||||
state = {}
|
||||
if location:
|
||||
state["location"] = location
|
||||
if health_min is not None:
|
||||
state["health_above"] = health_min
|
||||
if has_weapon is not None:
|
||||
state["has_weapon"] = has_weapon
|
||||
if is_indoors is not None:
|
||||
state["indoors"] = is_indoors
|
||||
if time_of_day:
|
||||
state["time_of_day"] = time_of_day
|
||||
if nearby_npcs:
|
||||
state["nearby_npcs"] = nearby_npcs
|
||||
state.update(extra)
|
||||
return state
|
||||
|
||||
|
||||
# --- Example Verification Flows ---
|
||||
|
||||
EXAMPLE_MORROWIND_VERIFICATION = """
|
||||
# Verify player is in Balmora with a weapon
|
||||
verifier = VisualStateVerifier()
|
||||
result = verifier.verify_state(
|
||||
screenshot_path="/tmp/morrowind_screenshot.png",
|
||||
expected_state=VisualStateVerifier.morrowind_state(
|
||||
location="Balmora",
|
||||
health_min=50,
|
||||
has_weapon=True
|
||||
),
|
||||
context="After completing the first Caius Cosades quest",
|
||||
game="morrowind"
|
||||
)
|
||||
|
||||
if result.verified:
|
||||
print(f"State confirmed: {result.details}")
|
||||
else:
|
||||
print(f"State mismatch: {result.mismatches}")
|
||||
"""
|
||||
|
||||
EXAMPLE_BATCH_VERIFICATION = """
|
||||
# Verify multiple game states in sequence
|
||||
states = [
|
||||
{"screenshot": "screen1.png", "expected": {"location": "Seyda Neen"}, "context": "After character creation"},
|
||||
{"screenshot": "screen2.png", "expected": {"location": "Balmora", "has_weapon": True}, "context": "After buying weapon"},
|
||||
{"screenshot": "screen3.png", "expected": {"health_above": 80}, "context": "After resting"},
|
||||
]
|
||||
|
||||
verifier = VisualStateVerifier()
|
||||
for state in states:
|
||||
result = verifier.verify_state(**state, game="morrowind")
|
||||
print(f"{state['context']}: {'PASS' if result.verified else 'FAIL'} (confidence: {result.confidence:.0%})")
|
||||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Demo: build and display a verification prompt
|
||||
verifier = VisualStateVerifier()
|
||||
expected = verifier.morrowind_state(
|
||||
location="Balmora",
|
||||
health_min=50,
|
||||
has_weapon=True,
|
||||
nearby_npcs=["Caius Cosades"]
|
||||
)
|
||||
result = verifier.verify_state(
|
||||
screenshot_path="/tmp/demo_screenshot.png",
|
||||
expected_state=expected,
|
||||
context="Player should have completed the first quest",
|
||||
game="morrowind"
|
||||
)
|
||||
print(result.details)
|
||||
Reference in New Issue
Block a user