""" Phase 1 Demo — Desktop Automation via Hermes (#1125) Demonstrates the computer_use primitives end-to-end: 1. Take a baseline screenshot 2. Open a browser and navigate to the Gitea forge 3. Take an evidence screenshot Run inside a desktop session (Xvfb or real display): python -m nexus.computer_use_demo Or via Docker: docker compose -f docker-compose.desktop.yml run hermes-desktop \ python -m nexus.computer_use_demo """ from __future__ import annotations import logging import sys import time from pathlib import Path from nexus.computer_use import ( computer_click, computer_screenshot, computer_type, read_action_log, ) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") log = logging.getLogger(__name__) GITEA_URL = "https://forge.alexanderwhitestone.com" EVIDENCE_DIR = Path.home() / ".nexus" / "computer_use_evidence" def run_demo() -> bool: """Execute the Phase 1 demo. Returns True on success.""" EVIDENCE_DIR.mkdir(parents=True, exist_ok=True) log.info("=== Phase 1 Computer-Use Demo ===") # --- Step 1: baseline screenshot --- baseline = EVIDENCE_DIR / "01_baseline.png" log.info("Step 1: capturing baseline screenshot → %s", baseline) result = computer_screenshot(save_path=str(baseline)) if not result["ok"]: log.error("Baseline screenshot failed: %s", result["error"]) return False log.info(" ✓ baseline saved") # --- Step 2: open browser --- log.info("Step 2: opening browser") try: import subprocess # Use xdg-open / open depending on platform; fallback to chromium for cmd in ( ["xdg-open", GITEA_URL], ["chromium-browser", "--no-sandbox", GITEA_URL], ["chromium", "--no-sandbox", GITEA_URL], ["google-chrome", "--no-sandbox", GITEA_URL], ["open", GITEA_URL], # macOS ): try: subprocess.Popen(cmd, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) log.info(" ✓ browser opened with: %s", cmd[0]) break except FileNotFoundError: continue else: log.warning(" ⚠ no browser found — skipping open step") except Exception as exc: log.warning(" ⚠ could not open browser: %s", exc) # Give the browser time to load time.sleep(3) # --- Step 3: click address bar and navigate (best-effort) --- log.info("Step 3: attempting to type URL in browser address bar (best-effort)") try: import pyautogui # type: ignore # Common shortcut to focus address bar pyautogui.hotkey("ctrl", "l") time.sleep(0.3) result_type = computer_type(GITEA_URL) if result_type["ok"]: pyautogui.press("enter") time.sleep(2) log.info(" ✓ URL typed") else: log.warning(" ⚠ type failed: %s", result_type["error"]) except ImportError: log.warning(" ⚠ pyautogui not available — skipping URL type step") # --- Step 4: evidence screenshot --- evidence = EVIDENCE_DIR / "02_gitea.png" log.info("Step 4: capturing evidence screenshot → %s", evidence) result = computer_screenshot(save_path=str(evidence)) if not result["ok"]: log.error("Evidence screenshot failed: %s", result["error"]) return False log.info(" ✓ evidence saved") # --- Step 5: summary --- log.info("Step 5: recent action log") for entry in read_action_log(n=10): log.info(" %s %s ok=%s", entry["ts"], entry["action"], entry["result"].get("ok")) log.info("=== Demo complete — evidence in %s ===", EVIDENCE_DIR) return True if __name__ == "__main__": success = run_demo() sys.exit(0 if success else 1)