119 lines
3.8 KiB
Python
119 lines
3.8 KiB
Python
|
|
"""
|
||
|
|
Phase 1 Demo — Desktop Automation via Hermes (#1125)
|
||
|
|
|
||
|
|
Demonstrates the computer_use primitives end-to-end:
|
||
|
|
1. Take a baseline screenshot
|
||
|
|
2. Open a browser and navigate to the Gitea forge
|
||
|
|
3. Take an evidence screenshot
|
||
|
|
|
||
|
|
Run inside a desktop session (Xvfb or real display):
|
||
|
|
|
||
|
|
python -m nexus.computer_use_demo
|
||
|
|
|
||
|
|
Or via Docker:
|
||
|
|
|
||
|
|
docker compose -f docker-compose.desktop.yml run hermes-desktop \
|
||
|
|
python -m nexus.computer_use_demo
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import logging
|
||
|
|
import sys
|
||
|
|
import time
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
from nexus.computer_use import (
|
||
|
|
computer_click,
|
||
|
|
computer_screenshot,
|
||
|
|
computer_type,
|
||
|
|
read_action_log,
|
||
|
|
)
|
||
|
|
|
||
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||
|
|
log = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
GITEA_URL = "https://forge.alexanderwhitestone.com"
|
||
|
|
EVIDENCE_DIR = Path.home() / ".nexus" / "computer_use_evidence"
|
||
|
|
|
||
|
|
|
||
|
|
def run_demo() -> bool:
|
||
|
|
"""Execute the Phase 1 demo. Returns True on success."""
|
||
|
|
EVIDENCE_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
|
log.info("=== Phase 1 Computer-Use Demo ===")
|
||
|
|
|
||
|
|
# --- Step 1: baseline screenshot ---
|
||
|
|
baseline = EVIDENCE_DIR / "01_baseline.png"
|
||
|
|
log.info("Step 1: capturing baseline screenshot → %s", baseline)
|
||
|
|
result = computer_screenshot(save_path=str(baseline))
|
||
|
|
if not result["ok"]:
|
||
|
|
log.error("Baseline screenshot failed: %s", result["error"])
|
||
|
|
return False
|
||
|
|
log.info(" ✓ baseline saved")
|
||
|
|
|
||
|
|
# --- Step 2: open browser ---
|
||
|
|
log.info("Step 2: opening browser")
|
||
|
|
try:
|
||
|
|
import subprocess
|
||
|
|
# Use xdg-open / open depending on platform; fallback to chromium
|
||
|
|
for cmd in (
|
||
|
|
["xdg-open", GITEA_URL],
|
||
|
|
["chromium-browser", "--no-sandbox", GITEA_URL],
|
||
|
|
["chromium", "--no-sandbox", GITEA_URL],
|
||
|
|
["google-chrome", "--no-sandbox", GITEA_URL],
|
||
|
|
["open", GITEA_URL], # macOS
|
||
|
|
):
|
||
|
|
try:
|
||
|
|
subprocess.Popen(cmd, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
|
||
|
|
log.info(" ✓ browser opened with: %s", cmd[0])
|
||
|
|
break
|
||
|
|
except FileNotFoundError:
|
||
|
|
continue
|
||
|
|
else:
|
||
|
|
log.warning(" ⚠ no browser found — skipping open step")
|
||
|
|
except Exception as exc:
|
||
|
|
log.warning(" ⚠ could not open browser: %s", exc)
|
||
|
|
|
||
|
|
# Give the browser time to load
|
||
|
|
time.sleep(3)
|
||
|
|
|
||
|
|
# --- Step 3: click address bar and navigate (best-effort) ---
|
||
|
|
log.info("Step 3: attempting to type URL in browser address bar (best-effort)")
|
||
|
|
try:
|
||
|
|
import pyautogui # type: ignore
|
||
|
|
|
||
|
|
# Common shortcut to focus address bar
|
||
|
|
pyautogui.hotkey("ctrl", "l")
|
||
|
|
time.sleep(0.3)
|
||
|
|
result_type = computer_type(GITEA_URL)
|
||
|
|
if result_type["ok"]:
|
||
|
|
pyautogui.press("enter")
|
||
|
|
time.sleep(2)
|
||
|
|
log.info(" ✓ URL typed")
|
||
|
|
else:
|
||
|
|
log.warning(" ⚠ type failed: %s", result_type["error"])
|
||
|
|
except ImportError:
|
||
|
|
log.warning(" ⚠ pyautogui not available — skipping URL type step")
|
||
|
|
|
||
|
|
# --- Step 4: evidence screenshot ---
|
||
|
|
evidence = EVIDENCE_DIR / "02_gitea.png"
|
||
|
|
log.info("Step 4: capturing evidence screenshot → %s", evidence)
|
||
|
|
result = computer_screenshot(save_path=str(evidence))
|
||
|
|
if not result["ok"]:
|
||
|
|
log.error("Evidence screenshot failed: %s", result["error"])
|
||
|
|
return False
|
||
|
|
log.info(" ✓ evidence saved")
|
||
|
|
|
||
|
|
# --- Step 5: summary ---
|
||
|
|
log.info("Step 5: recent action log")
|
||
|
|
for entry in read_action_log(n=10):
|
||
|
|
log.info(" %s %s ok=%s", entry["ts"], entry["action"], entry["result"].get("ok"))
|
||
|
|
|
||
|
|
log.info("=== Demo complete — evidence in %s ===", EVIDENCE_DIR)
|
||
|
|
return True
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
success = run_demo()
|
||
|
|
sys.exit(0 if success else 1)
|