feat: Complete Bannerlord MCP Harness implementation (Issue #722)

Implements the Hermes observation/control path for local Bannerlord per GamePortal Protocol. ## New Components - nexus/bannerlord_harness.py (874 lines) - MCPClient for JSON-RPC communication with MCP servers - capture_state() → GameState with visual + Steam context - execute_action() → ActionResult for all input types - observe-decide-act loop with telemetry through Hermes WS - Bannerlord-specific actions (inventory, party, save/load) - Mock mode for testing without game running - mcp_servers/desktop_control_server.py (14KB) - 13 desktop automation tools via pyautogui - Screenshot, mouse, keyboard control - Headless environment support - mcp_servers/steam_info_server.py (18KB) - 6 Steam Web API tools - Mock mode without API key, live mode with STEAM_API_KEY - tests/test_bannerlord_harness.py (37 tests, all passing) - GameState/ActionResult validation - Mock mode action tests - ODA loop tests - GamePortal Protocol compliance tests - docs/BANNERLORD_HARNESS_PROOF.md - Architecture documentation - Proof of ODA loop execution - Telemetry flow diagrams - examples/harness_demo.py - Runnable demo showing full ODA loop ## Updates - portals.json: Bannerlord metadata per GAMEPORTAL_PROTOCOL.md - status: active, portal_type: game-world - app_id: 261550, window_title: 'Mount & Blade II: Bannerlord' - telemetry_source: hermes-harness:bannerlord ## Verification pytest tests/test_bannerlord_harness.py -v 37 passed, 2 skipped, 11 warnings Closes #722
2026-03-31 04:53:29 +00:00
parent 576b394248
commit 29e64ef01f
11 changed files with 3654 additions and 1 deletions
--- a/examples/harness_demo.py
+++ b/examples/harness_demo.py
@@ -0,0 +1,385 @@
+#!/usr/bin/env python3
+"""
+Bannerlord Harness Demo — Proof of Concept
+
+This script demonstrates a complete Observe-Decide-Act (ODA) loop
+cycle with the Bannerlord Harness, showing:
+
+1. State capture (screenshot + game context)
+2. Decision making (rule-based for demo)
+3. Action execution (keyboard/mouse input)
+4. Telemetry logging to Hermes
+
+Usage:
+    python examples/harness_demo.py
+    python examples/harness_demo.py --mock          # No game required
+    python examples/harness_demo.py --iterations 5  # More cycles
+
+Environment Variables:
+    HERMES_WS_URL    - Hermes WebSocket URL (default: ws://localhost:8000/ws)
+    BANNERLORD_MOCK  - Set to "1" to force mock mode
+"""
+
+import argparse
+import asyncio
+import json
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from nexus.bannerlord_harness import (
+    BANNERLORD_WINDOW_TITLE,
+    BannerlordHarness,
+    GameState,
+)
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# DEMO DECISION FUNCTIONS
+# ═══════════════════════════════════════════════════════════════════════════
+
+def demo_decision_function(state: GameState) -> list[dict]:
+    """
+    A demonstration decision function for the ODA loop.
+
+    In a real implementation, this would:
+    1. Analyze the screenshot with a vision model
+    2. Consider game context (playtime, player count)
+    3. Return contextually appropriate actions
+
+    For this demo, we use simple heuristics to simulate intelligent behavior.
+    """
+    actions = []
+    screen_w, screen_h = state.visual.screen_size
+    center_x = screen_w // 2
+    center_y = screen_h // 2
+
+    print(f"    [DECISION] Analyzing game state...")
+    print(f"      - Screen: {screen_w}x{screen_h}")
+    print(f"      - Window found: {state.visual.window_found}")
+    print(f"      - Players online: {state.game_context.current_players_online}")
+    print(f"      - Playtime: {state.game_context.playtime_hours:.1f} hours")
+
+    # Simulate "looking around" by moving mouse
+    if state.visual.window_found:
+        # Move to center (campaign map)
+        actions.append({
+            "type": "move_to",
+            "x": center_x,
+            "y": center_y,
+        })
+        print(f"      → Moving mouse to center ({center_x}, {center_y})")
+
+        # Simulate a "space" press (pause/unpause or interact)
+        actions.append({
+            "type": "press_key",
+            "key": "space",
+        })
+        print(f"      → Pressing SPACE key")
+
+        # Demo Bannerlord-specific actions based on playtime
+        if state.game_context.playtime_hours > 100:
+            actions.append({
+                "type": "press_key",
+                "key": "i",
+            })
+            print(f"      → Opening inventory (veteran player)")
+
+    return actions
+
+
+def strategic_decision_function(state: GameState) -> list[dict]:
+    """
+    A more complex decision function simulating strategic gameplay.
+
+    This demonstrates how different strategies could be implemented
+    based on game state analysis.
+    """
+    actions = []
+    screen_w, screen_h = state.visual.screen_size
+
+    print(f"    [STRATEGY] Evaluating tactical situation...")
+
+    # Simulate scanning the campaign map
+    scan_positions = [
+        (screen_w // 4, screen_h // 4),
+        (3 * screen_w // 4, screen_h // 4),
+        (screen_w // 4, 3 * screen_h // 4),
+        (3 * screen_w // 4, 3 * screen_h // 4),
+    ]
+
+    for i, (x, y) in enumerate(scan_positions[:2]):  # Just scan 2 positions for demo
+        actions.append({
+            "type": "move_to",
+            "x": x,
+            "y": y,
+        })
+        print(f"      → Scanning position {i+1}: ({x}, {y})")
+
+    # Simulate checking party status
+    actions.append({
+        "type": "press_key",
+        "key": "p",
+    })
+    print(f"      → Opening party screen")
+
+    return actions
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# DEMO EXECUTION
+# ═══════════════════════════════════════════════════════════════════════════
+
+async def run_demo(mock_mode: bool = True, iterations: int = 3, delay: float = 1.0):
+    """
+    Run the full harness demonstration.
+
+    Args:
+        mock_mode: If True, runs without actual MCP servers
+        iterations: Number of ODA cycles to run
+        delay: Seconds between cycles
+    """
+    print("\n" + "=" * 70)
+    print("  BANNERLORD HARNESS — PROOF OF CONCEPT DEMO")
+    print("=" * 70)
+    print()
+    print("This demo showcases the GamePortal Protocol implementation:")
+    print("  1. OBSERVE — Capture game state (screenshot, stats)")
+    print("  2. DECIDE — Analyze and determine actions")
+    print("  3. ACT — Execute keyboard/mouse inputs")
+    print("  4. TELEMETRY — Stream events to Hermes WebSocket")
+    print()
+    print(f"Configuration:")
+    print(f"  Mode: {'MOCK (no game required)' if mock_mode else 'LIVE (requires game)'}")
+    print(f"  Iterations: {iterations}")
+    print(f"  Delay: {delay}s")
+    print(f"  Hermes WS: {os.environ.get('HERMES_WS_URL', 'ws://localhost:8000/ws')}")
+    print("=" * 70)
+    print()
+
+    # Create harness
+    harness = BannerlordHarness(
+        hermes_ws_url=os.environ.get("HERMES_WS_URL", "ws://localhost:8000/ws"),
+        enable_mock=mock_mode,
+    )
+
+    try:
+        # Initialize harness
+        print("[INIT] Starting harness...")
+        await harness.start()
+        print(f"[INIT] Session ID: {harness.session_id}")
+        print()
+
+        # Run Phase 1: Simple ODA loop
+        print("-" * 70)
+        print("PHASE 1: Basic ODA Loop (Simple Decision Function)")
+        print("-" * 70)
+
+        await harness.run_observe_decide_act_loop(
+            decision_fn=demo_decision_function,
+            max_iterations=iterations,
+            iteration_delay=delay,
+        )
+
+        print()
+        print("-" * 70)
+        print("PHASE 2: Strategic ODA Loop (Complex Decision Function)")
+        print("-" * 70)
+
+        # Run Phase 2: Strategic ODA loop
+        await harness.run_observe_decide_act_loop(
+            decision_fn=strategic_decision_function,
+            max_iterations=2,
+            iteration_delay=delay,
+        )
+
+        print()
+        print("-" * 70)
+        print("PHASE 3: Bannerlord-Specific Actions")
+        print("-" * 70)
+
+        # Demonstrate Bannerlord-specific convenience methods
+        print("\n[PHASE 3] Testing Bannerlord-specific actions:")
+
+        actions_to_test = [
+            ("Open Inventory", lambda h: h.open_inventory()),
+            ("Open Character", lambda h: h.open_character()),
+            ("Open Party", lambda h: h.open_party()),
+        ]
+
+        for name, action_fn in actions_to_test:
+            print(f"\n  → {name}...")
+            result = await action_fn(harness)
+            status = "✅" if result.success else "❌"
+            print(f"    {status} Result: {'Success' if result.success else 'Failed'}")
+            if result.error:
+                print(f"       Error: {result.error}")
+            await asyncio.sleep(0.5)
+
+        # Demo save/load (commented out to avoid actual save during demo)
+        # print("\n  → Save Game (Ctrl+S)...")
+        # result = await harness.save_game()
+        # print(f"    Result: {'Success' if result.success else 'Failed'}")
+
+        print()
+        print("=" * 70)
+        print("  DEMO COMPLETE")
+        print("=" * 70)
+        print()
+        print(f"Session Summary:")
+        print(f"  Session ID: {harness.session_id}")
+        print(f"  Total ODA cycles: {harness.cycle_count + 1}")
+        print(f"  Mock mode: {mock_mode}")
+        print(f"  Hermes connected: {harness.ws_connected}")
+        print()
+
+    except KeyboardInterrupt:
+        print("\n[INTERRUPT] Demo interrupted by user")
+    except Exception as e:
+        print(f"\n[ERROR] Demo failed: {e}")
+        import traceback
+        traceback.print_exc()
+    finally:
+        print("[CLEANUP] Shutting down harness...")
+        await harness.stop()
+        print("[CLEANUP] Harness stopped")
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# BEFORE/AFTER SCREENSHOT DEMO
+# ═══════════════════════════════════════════════════════════════════════════
+
+async def run_screenshot_demo(mock_mode: bool = True):
+    """
+    Demonstrate before/after screenshot capture.
+
+    This shows how the harness can capture visual state at different
+    points in time, which is essential for training data collection.
+    """
+    print("\n" + "=" * 70)
+    print("  SCREENSHOT CAPTURE DEMO")
+    print("=" * 70)
+    print()
+
+    harness = BannerlordHarness(enable_mock=mock_mode)
+
+    try:
+        await harness.start()
+
+        print("[1] Capturing initial state...")
+        state_before = await harness.capture_state()
+        print(f"    Screenshot: {state_before.visual.screenshot_path}")
+        print(f"    Screen size: {state_before.visual.screen_size}")
+        print(f"    Mouse position: {state_before.visual.mouse_position}")
+
+        print("\n[2] Executing action (move mouse to center)...")
+        screen_w, screen_h = state_before.visual.screen_size
+        await harness.execute_action({
+            "type": "move_to",
+            "x": screen_w // 2,
+            "y": screen_h // 2,
+        })
+        await asyncio.sleep(0.5)
+
+        print("\n[3] Capturing state after action...")
+        state_after = await harness.capture_state()
+        print(f"    Screenshot: {state_after.visual.screenshot_path}")
+        print(f"    Mouse position: {state_after.visual.mouse_position}")
+
+        print("\n[4] State delta:")
+        print(f"    Time between captures: ~0.5s")
+        print(f"    Mouse moved to: ({screen_w // 2}, {screen_h // 2})")
+
+        if not mock_mode:
+            print("\n[5] Screenshot files:")
+            print(f"    Before: {state_before.visual.screenshot_path}")
+            print(f"    After: {state_after.visual.screenshot_path}")
+
+        print()
+        print("=" * 70)
+        print("  SCREENSHOT DEMO COMPLETE")
+        print("=" * 70)
+
+    finally:
+        await harness.stop()
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# MAIN ENTRYPOINT
+# ═══════════════════════════════════════════════════════════════════════════
+
+def main():
+    """Parse arguments and run the appropriate demo."""
+    parser = argparse.ArgumentParser(
+        description="Bannerlord Harness Proof-of-Concept Demo",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    python examples/harness_demo.py                  # Run full demo (mock mode)
+    python examples/harness_demo.py --mock           # Same as above
+    python examples/harness_demo.py --iterations 5   # Run 5 ODA cycles
+    python examples/harness_demo.py --delay 2.0      # 2 second delay between cycles
+    python examples/harness_demo.py --screenshot     # Screenshot demo only
+
+Environment Variables:
+    HERMES_WS_URL    Hermes WebSocket URL (default: ws://localhost:8000/ws)
+    BANNERLORD_MOCK  Force mock mode when set to "1"
+        """,
+    )
+
+    parser.add_argument(
+        "--mock",
+        action="store_true",
+        help="Run in mock mode (no actual game/MCP servers required)",
+    )
+    parser.add_argument(
+        "--iterations",
+        type=int,
+        default=3,
+        help="Number of ODA loop iterations (default: 3)",
+    )
+    parser.add_argument(
+        "--delay",
+        type=float,
+        default=1.0,
+        help="Delay between iterations in seconds (default: 1.0)",
+    )
+    parser.add_argument(
+        "--screenshot",
+        action="store_true",
+        help="Run screenshot demo only",
+    )
+    parser.add_argument(
+        "--hermes-ws",
+        default=os.environ.get("HERMES_WS_URL", "ws://localhost:8000/ws"),
+        help="Hermes WebSocket URL",
+    )
+
+    args = parser.parse_args()
+
+    # Set environment from arguments
+    os.environ["HERMES_WS_URL"] = args.hermes_ws
+
+    # Force mock mode if env var set or --mock flag
+    mock_mode = args.mock or os.environ.get("BANNERLORD_MOCK") == "1"
+
+    try:
+        if args.screenshot:
+            asyncio.run(run_screenshot_demo(mock_mode=mock_mode))
+        else:
+            asyncio.run(run_demo(
+                mock_mode=mock_mode,
+                iterations=args.iterations,
+                delay=args.delay,
+            ))
+    except KeyboardInterrupt:
+        print("\n[EXIT] Demo cancelled by user")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()