Implements Phase 1 and Phase 2 tooling from issue #1125: - nexus/computer_use.py: four Hermes tools with poka-yoke safety * computer_screenshot() — capture & base64-encode desktop snapshot * computer_click(x, y, button, confirm) — right/middle require confirm=True * computer_type(text, confirm) — sensitive keywords blocked without confirm=True; text value is never written to audit log * computer_scroll(x, y, amount) — scroll wheel * read_action_log() — inspect recent JSONL audit entries * pyautogui.FAILSAFE=True; all tools degrade gracefully when headless - nexus/computer_use_demo.py: Phase 1 demo (baseline screenshot → open browser → navigate to Gitea forge → evidence screenshot) - tests/test_computer_use.py: 32 unit tests, fully headless (pyautogui mocked), all passing - docs/computer-use.md: API reference, safety table, phase roadmap, pilot recipes - docker-compose.desktop.yml: sandboxed Xvfb + noVNC container Fixes #1125 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
119 lines
3.8 KiB
Python
119 lines
3.8 KiB
Python
"""
|
|
Phase 1 Demo — Desktop Automation via Hermes (#1125)
|
|
|
|
Demonstrates the computer_use primitives end-to-end:
|
|
1. Take a baseline screenshot
|
|
2. Open a browser and navigate to the Gitea forge
|
|
3. Take an evidence screenshot
|
|
|
|
Run inside a desktop session (Xvfb or real display):
|
|
|
|
python -m nexus.computer_use_demo
|
|
|
|
Or via Docker:
|
|
|
|
docker compose -f docker-compose.desktop.yml run hermes-desktop \
|
|
python -m nexus.computer_use_demo
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from nexus.computer_use import (
|
|
computer_click,
|
|
computer_screenshot,
|
|
computer_type,
|
|
read_action_log,
|
|
)
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|
log = logging.getLogger(__name__)
|
|
|
|
GITEA_URL = "https://forge.alexanderwhitestone.com"
|
|
EVIDENCE_DIR = Path.home() / ".nexus" / "computer_use_evidence"
|
|
|
|
|
|
def run_demo() -> bool:
|
|
"""Execute the Phase 1 demo. Returns True on success."""
|
|
EVIDENCE_DIR.mkdir(parents=True, exist_ok=True)
|
|
log.info("=== Phase 1 Computer-Use Demo ===")
|
|
|
|
# --- Step 1: baseline screenshot ---
|
|
baseline = EVIDENCE_DIR / "01_baseline.png"
|
|
log.info("Step 1: capturing baseline screenshot → %s", baseline)
|
|
result = computer_screenshot(save_path=str(baseline))
|
|
if not result["ok"]:
|
|
log.error("Baseline screenshot failed: %s", result["error"])
|
|
return False
|
|
log.info(" ✓ baseline saved")
|
|
|
|
# --- Step 2: open browser ---
|
|
log.info("Step 2: opening browser")
|
|
try:
|
|
import subprocess
|
|
# Use xdg-open / open depending on platform; fallback to chromium
|
|
for cmd in (
|
|
["xdg-open", GITEA_URL],
|
|
["chromium-browser", "--no-sandbox", GITEA_URL],
|
|
["chromium", "--no-sandbox", GITEA_URL],
|
|
["google-chrome", "--no-sandbox", GITEA_URL],
|
|
["open", GITEA_URL], # macOS
|
|
):
|
|
try:
|
|
subprocess.Popen(cmd, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
|
|
log.info(" ✓ browser opened with: %s", cmd[0])
|
|
break
|
|
except FileNotFoundError:
|
|
continue
|
|
else:
|
|
log.warning(" ⚠ no browser found — skipping open step")
|
|
except Exception as exc:
|
|
log.warning(" ⚠ could not open browser: %s", exc)
|
|
|
|
# Give the browser time to load
|
|
time.sleep(3)
|
|
|
|
# --- Step 3: click address bar and navigate (best-effort) ---
|
|
log.info("Step 3: attempting to type URL in browser address bar (best-effort)")
|
|
try:
|
|
import pyautogui # type: ignore
|
|
|
|
# Common shortcut to focus address bar
|
|
pyautogui.hotkey("ctrl", "l")
|
|
time.sleep(0.3)
|
|
result_type = computer_type(GITEA_URL)
|
|
if result_type["ok"]:
|
|
pyautogui.press("enter")
|
|
time.sleep(2)
|
|
log.info(" ✓ URL typed")
|
|
else:
|
|
log.warning(" ⚠ type failed: %s", result_type["error"])
|
|
except ImportError:
|
|
log.warning(" ⚠ pyautogui not available — skipping URL type step")
|
|
|
|
# --- Step 4: evidence screenshot ---
|
|
evidence = EVIDENCE_DIR / "02_gitea.png"
|
|
log.info("Step 4: capturing evidence screenshot → %s", evidence)
|
|
result = computer_screenshot(save_path=str(evidence))
|
|
if not result["ok"]:
|
|
log.error("Evidence screenshot failed: %s", result["error"])
|
|
return False
|
|
log.info(" ✓ evidence saved")
|
|
|
|
# --- Step 5: summary ---
|
|
log.info("Step 5: recent action log")
|
|
for entry in read_action_log(n=10):
|
|
log.info(" %s %s ok=%s", entry["ts"], entry["action"], entry["result"].get("ok"))
|
|
|
|
log.info("=== Demo complete — evidence in %s ===", EVIDENCE_DIR)
|
|
return True
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = run_demo()
|
|
sys.exit(0 if success else 1)
|