Files
the-nexus/tests/test_computer_use.py
Alexander Whitestone a3a28aa4c2
Some checks failed
CI / test (pull_request) Failing after 20s
CI / validate (pull_request) Failing after 25s
Review Approval Gate / verify-review (pull_request) Failing after 5s
feat: add desktop automation primitives to Hermes (#1125)
Implements Phase 1 & 2 of the [COMPUTER_USE] epic:

- nexus/computer_use.py — four Hermes tools with safety guards and
  JSONL action logging:
    computer_screenshot(), computer_click(), computer_type(), computer_scroll()
  Poka-yoke: right/middle clicks require confirm=True; text containing
  password/token/key keywords is refused without confirm=True.
  pyautogui.FAILSAFE=True enabled globally (corner-abort).

- nexus/computer_use_demo.py — end-to-end Phase 1 demo: baseline
  screenshot → open browser → navigate to Gitea → evidence screenshot.

- tests/test_computer_use.py — 29 unit tests, fully headless (pyautogui
  mocked); all pass.

- docs/computer-use.md — full Phase 1–3 documentation including API
  reference, safety table, action-log format, and pilot recipes.

- docker-compose.desktop.yml — sandboxed Xvfb + noVNC container for
  safe headless desktop automation.

The existing mcp_servers/desktop_control_server.py is unchanged; it
remains available for external/MCP callers (Bannerlord harness etc).

Fixes #1125
2026-04-08 06:29:27 -04:00

281 lines
9.7 KiB
Python

"""
tests/test_computer_use.py — Unit tests for nexus.computer_use
All tests run without a real display by patching pyautogui.
"""
from __future__ import annotations
import importlib
import json
import sys
import types
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Helpers: stub pyautogui so tests run headless
# ---------------------------------------------------------------------------
def _make_pyautogui_stub() -> MagicMock:
"""Return a minimal pyautogui mock with the attributes we use."""
stub = MagicMock()
stub.FAILSAFE = True
stub.PAUSE = 0.05
# screenshot() → PIL-like object with .save()
img = MagicMock()
img.save = MagicMock()
stub.screenshot.return_value = img
stub.size.return_value = (1920, 1080)
stub.position.return_value = (100, 200)
return stub
def _reload_module(pyautogui_stub=None):
"""
Reload nexus.computer_use with an optional pyautogui stub.
Returns the freshly imported module.
"""
# Remove cached module so we get a clean import
for key in list(sys.modules.keys()):
if "nexus.computer_use" in key or key == "nexus.computer_use":
del sys.modules[key]
if pyautogui_stub is not None:
sys.modules["pyautogui"] = pyautogui_stub
else:
sys.modules.pop("pyautogui", None)
import nexus.computer_use as cu
return cu
@pytest.fixture()
def cu(tmp_path, monkeypatch):
"""Fixture: computer_use module with pyautogui stubbed and log dir in tmp."""
stub = _make_pyautogui_stub()
mod = _reload_module(pyautogui_stub=stub)
# Redirect log dir to tmp so tests don't write to ~/.nexus
monkeypatch.setenv("NEXUS_HOME", str(tmp_path))
mod._LOG_DIR = tmp_path
mod._ACTION_LOG = None # reset so it picks up new dir
mod._PYAUTOGUI_OK = True
mod.pyautogui = stub
yield mod
# Cleanup: remove stub from sys.modules so other tests aren't affected
sys.modules.pop("pyautogui", None)
for key in list(sys.modules.keys()):
if "nexus.computer_use" in key:
del sys.modules[key]
# ---------------------------------------------------------------------------
# computer_screenshot
# ---------------------------------------------------------------------------
class TestComputerScreenshot:
def test_returns_ok_with_path(self, cu, tmp_path):
result = cu.computer_screenshot()
assert result["ok"] is True
assert result["tool"] == "computer_screenshot"
assert result["path"].endswith(".png")
def test_respects_custom_path(self, cu, tmp_path):
target = str(tmp_path / "custom.png")
result = cu.computer_screenshot(output_path=target)
assert result["ok"] is True
assert result["path"] == target
def test_saves_screenshot(self, cu, tmp_path):
cu.computer_screenshot()
# pyautogui.screenshot().save should have been called
cu.pyautogui.screenshot.assert_called()
cu.pyautogui.screenshot.return_value.save.assert_called()
def test_writes_action_log(self, cu, tmp_path):
cu.computer_screenshot()
log_path = tmp_path / "computer_use_log.jsonl"
assert log_path.exists()
records = [json.loads(l) for l in log_path.read_text().splitlines() if l.strip()]
assert len(records) == 1
assert records[0]["tool"] == "computer_screenshot"
def test_error_when_unavailable(self, tmp_path, monkeypatch):
for key in list(sys.modules.keys()):
if "nexus.computer_use" in key:
del sys.modules[key]
sys.modules.pop("pyautogui", None)
import nexus.computer_use as cu_mod
cu_mod._PYAUTOGUI_OK = False
cu_mod.pyautogui = None
cu_mod._LOG_DIR = tmp_path
cu_mod._ACTION_LOG = None
result = cu_mod.computer_screenshot()
assert result["ok"] is False
assert "error" in result
# ---------------------------------------------------------------------------
# computer_click
# ---------------------------------------------------------------------------
class TestComputerClick:
def test_left_click_ok(self, cu):
result = cu.computer_click(100, 200)
assert result["ok"] is True
assert result["x"] == 100
assert result["y"] == 200
assert result["button"] == "left"
cu.pyautogui.click.assert_called_once_with(100, 200)
def test_right_click_requires_confirm(self, cu):
result = cu.computer_click(10, 10, button="right")
assert result["ok"] is False
assert "confirm=True" in result["error"]
def test_right_click_with_confirm(self, cu):
result = cu.computer_click(10, 10, button="right", confirm=True)
assert result["ok"] is True
cu.pyautogui.rightClick.assert_called_once_with(10, 10)
def test_middle_click_requires_confirm(self, cu):
result = cu.computer_click(10, 10, button="middle")
assert result["ok"] is False
def test_invalid_button(self, cu):
result = cu.computer_click(10, 10, button="superclick", confirm=True)
assert result["ok"] is False
assert "Unknown button" in result["error"]
def test_screenshots_captured(self, cu):
cu.computer_click(50, 50)
# screenshot() should be called twice (before + after)
assert cu.pyautogui.screenshot.call_count >= 2
def test_writes_log_on_success(self, cu, tmp_path):
cu.computer_click(1, 2)
log_path = tmp_path / "computer_use_log.jsonl"
records = [json.loads(l) for l in log_path.read_text().splitlines() if l.strip()]
assert any(r["tool"] == "computer_click" for r in records)
def test_writes_log_on_poka_yoke_rejection(self, cu, tmp_path):
cu.computer_click(1, 2, button="right")
log_path = tmp_path / "computer_use_log.jsonl"
records = [json.loads(l) for l in log_path.read_text().splitlines() if l.strip()]
assert any(r["ok"] is False for r in records)
# ---------------------------------------------------------------------------
# computer_type
# ---------------------------------------------------------------------------
class TestComputerType:
def test_type_plain_text(self, cu):
result = cu.computer_type("hello world")
assert result["ok"] is True
assert result["length"] == len("hello world")
cu.pyautogui.typewrite.assert_called_once_with("hello world", interval=0.02)
def test_sensitive_text_rejected_without_confirm(self, cu):
result = cu.computer_type("mypassword123")
assert result["ok"] is False
assert "confirm=True" in result["error"]
def test_sensitive_text_allowed_with_confirm(self, cu):
result = cu.computer_type("mypassword123", confirm=True)
assert result["ok"] is True
def test_token_keyword_triggers_poka_yoke(self, cu):
result = cu.computer_type("Bearer token abc123")
assert result["ok"] is False
def test_key_keyword_triggers_poka_yoke(self, cu):
result = cu.computer_type("api_key=secret")
assert result["ok"] is False
def test_plain_text_no_confirm_needed(self, cu):
result = cu.computer_type("navigate to settings")
assert result["ok"] is True
def test_length_in_result(self, cu):
text = "hello"
result = cu.computer_type(text)
assert result["length"] == len(text)
# ---------------------------------------------------------------------------
# computer_scroll
# ---------------------------------------------------------------------------
class TestComputerScroll:
def test_scroll_down(self, cu):
result = cu.computer_scroll(100, 200, -3)
assert result["ok"] is True
assert result["amount"] == -3
cu.pyautogui.moveTo.assert_called_once_with(100, 200)
cu.pyautogui.scroll.assert_called_once_with(-3)
def test_scroll_up(self, cu):
result = cu.computer_scroll(0, 0, 5)
assert result["ok"] is True
assert result["amount"] == 5
def test_scroll_zero(self, cu):
result = cu.computer_scroll(0, 0, 0)
assert result["ok"] is True
def test_writes_log(self, cu, tmp_path):
cu.computer_scroll(10, 20, 2)
log_path = tmp_path / "computer_use_log.jsonl"
records = [json.loads(l) for l in log_path.read_text().splitlines() if l.strip()]
assert any(r["tool"] == "computer_scroll" for r in records)
def test_error_when_unavailable(self, tmp_path):
for key in list(sys.modules.keys()):
if "nexus.computer_use" in key:
del sys.modules[key]
sys.modules.pop("pyautogui", None)
import nexus.computer_use as cu_mod
cu_mod._PYAUTOGUI_OK = False
cu_mod.pyautogui = None
cu_mod._LOG_DIR = tmp_path
cu_mod._ACTION_LOG = None
result = cu_mod.computer_scroll(0, 0, 1)
assert result["ok"] is False
# ---------------------------------------------------------------------------
# read_action_log
# ---------------------------------------------------------------------------
class TestReadActionLog:
def test_empty_log(self, cu, tmp_path):
records = cu.read_action_log()
assert records == []
def test_returns_records_after_actions(self, cu, tmp_path):
cu.computer_screenshot()
cu.computer_click(1, 1)
records = cu.read_action_log()
assert len(records) >= 2
def test_last_n_respected(self, cu, tmp_path):
for _ in range(10):
cu.computer_screenshot()
records = cu.read_action_log(last_n=3)
assert len(records) == 3
def test_records_have_tool_field(self, cu, tmp_path):
cu.computer_screenshot()
records = cu.read_action_log()
assert all("tool" in r for r in records)