Files
the-nexus/tests/test_computer_use.py
Alexander Whitestone 220f20c794
Some checks failed
CI / test (pull_request) Failing after 8s
CI / validate (pull_request) Failing after 10s
Review Approval Gate / verify-review (pull_request) Failing after 2s
feat: add desktop automation primitives to Hermes (#1125)
Implements Phase 1 and Phase 2 tooling from issue #1125:

- nexus/computer_use.py: four Hermes tools with poka-yoke safety
    * computer_screenshot() — capture & base64-encode desktop snapshot
    * computer_click(x, y, button, confirm) — right/middle require confirm=True
    * computer_type(text, confirm) — sensitive keywords blocked without confirm=True;
      text value is never written to audit log
    * computer_scroll(x, y, amount) — scroll wheel
    * read_action_log() — inspect recent JSONL audit entries
    * pyautogui.FAILSAFE=True; all tools degrade gracefully when headless

- nexus/computer_use_demo.py: Phase 1 demo (baseline screenshot →
  open browser → navigate to Gitea forge → evidence screenshot)

- tests/test_computer_use.py: 32 unit tests, fully headless
  (pyautogui mocked), all passing

- docs/computer-use.md: API reference, safety table, phase roadmap,
  pilot recipes

- docker-compose.desktop.yml: sandboxed Xvfb + noVNC container

Fixes #1125

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-10 05:45:27 -04:00

363 lines
13 KiB
Python

"""
Tests for nexus.computer_use — Desktop Automation Primitives (#1125)
All tests run fully headless: pyautogui is mocked throughout.
No display is required.
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
from unittest.mock import MagicMock, patch, call
import pytest
sys.path.insert(0, str(Path(__file__).parent.parent))
from nexus.computer_use import (
_DANGEROUS_BUTTONS,
_SENSITIVE_KEYWORDS,
computer_click,
computer_screenshot,
computer_scroll,
computer_type,
read_action_log,
)
# ---------------------------------------------------------------------------
# Helpers / fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def tmp_log(tmp_path):
"""Return a temporary JSONL audit log path."""
return tmp_path / "actions.jsonl"
def _last_log_entry(log_path: Path) -> dict:
lines = [l.strip() for l in log_path.read_text().splitlines() if l.strip()]
return json.loads(lines[-1])
def _make_mock_pag(screenshot_raises=None):
"""Build a minimal pyautogui mock."""
mock = MagicMock()
mock.FAILSAFE = True
mock.PAUSE = 0.05
if screenshot_raises:
mock.screenshot.side_effect = screenshot_raises
else:
img_mock = MagicMock()
img_mock.save = MagicMock()
mock.screenshot.return_value = img_mock
return mock
# ---------------------------------------------------------------------------
# computer_screenshot
# ---------------------------------------------------------------------------
class TestComputerScreenshot:
def test_returns_b64_when_no_save_path(self, tmp_log):
mock_pag = _make_mock_pag()
# Make save() write fake PNG bytes
import io
buf = io.BytesIO(b"\x89PNG\r\n\x1a\n" + b"\x00" * 20)
def fake_save(obj, format=None):
obj.write(buf.getvalue())
mock_pag.screenshot.return_value.save = MagicMock(side_effect=fake_save)
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_screenshot(log_path=tmp_log)
assert result["ok"] is True
assert result["image_b64"] is not None
assert result["saved_to"] is None
assert result["error"] is None
def test_saves_to_path(self, tmp_log, tmp_path):
mock_pag = _make_mock_pag()
out_png = tmp_path / "shot.png"
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_screenshot(save_path=str(out_png), log_path=tmp_log)
assert result["ok"] is True
assert result["saved_to"] == str(out_png)
assert result["image_b64"] is None
mock_pag.screenshot.return_value.save.assert_called_once_with(str(out_png))
def test_logs_action(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
computer_screenshot(log_path=tmp_log)
entry = _last_log_entry(tmp_log)
assert entry["action"] == "screenshot"
assert "ts" in entry
def test_returns_error_when_headless(self, tmp_log):
with patch("nexus.computer_use._get_pyautogui", return_value=None):
result = computer_screenshot(log_path=tmp_log)
assert result["ok"] is False
assert "unavailable" in result["error"]
def test_handles_screenshot_exception(self, tmp_log):
mock_pag = _make_mock_pag(screenshot_raises=RuntimeError("display error"))
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_screenshot(log_path=tmp_log)
assert result["ok"] is False
assert "display error" in result["error"]
def test_image_b64_not_written_to_log(self, tmp_log):
"""The (potentially huge) base64 blob must NOT appear in the audit log."""
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
computer_screenshot(log_path=tmp_log)
raw = tmp_log.read_text()
assert "image_b64" not in raw
# ---------------------------------------------------------------------------
# computer_click
# ---------------------------------------------------------------------------
class TestComputerClick:
def test_left_click_succeeds(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_click(100, 200, log_path=tmp_log)
assert result["ok"] is True
mock_pag.click.assert_called_once_with(100, 200, button="left")
def test_right_click_blocked_without_confirm(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_click(100, 200, button="right", log_path=tmp_log)
assert result["ok"] is False
assert "confirm=True" in result["error"]
mock_pag.click.assert_not_called()
def test_right_click_allowed_with_confirm(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_click(100, 200, button="right", confirm=True, log_path=tmp_log)
assert result["ok"] is True
mock_pag.click.assert_called_once_with(100, 200, button="right")
def test_middle_click_blocked_without_confirm(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_click(50, 50, button="middle", log_path=tmp_log)
assert result["ok"] is False
def test_middle_click_allowed_with_confirm(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_click(50, 50, button="middle", confirm=True, log_path=tmp_log)
assert result["ok"] is True
def test_unknown_button_rejected(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_click(0, 0, button="turbo", log_path=tmp_log)
assert result["ok"] is False
assert "Unknown button" in result["error"]
def test_logs_click_action(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
computer_click(10, 20, log_path=tmp_log)
entry = _last_log_entry(tmp_log)
assert entry["action"] == "click"
assert entry["params"]["x"] == 10
assert entry["params"]["y"] == 20
def test_returns_error_when_headless(self, tmp_log):
with patch("nexus.computer_use._get_pyautogui", return_value=None):
result = computer_click(0, 0, log_path=tmp_log)
assert result["ok"] is False
def test_handles_click_exception(self, tmp_log):
mock_pag = _make_mock_pag()
mock_pag.click.side_effect = Exception("out of bounds")
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_click(99999, 99999, log_path=tmp_log)
assert result["ok"] is False
assert "out of bounds" in result["error"]
# ---------------------------------------------------------------------------
# computer_type
# ---------------------------------------------------------------------------
class TestComputerType:
def test_plain_text_succeeds(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_type("hello world", log_path=tmp_log)
assert result["ok"] is True
mock_pag.typewrite.assert_called_once_with("hello world", interval=0.02)
def test_sensitive_text_blocked_without_confirm(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_type("mypassword123", log_path=tmp_log)
assert result["ok"] is False
assert "confirm=True" in result["error"]
mock_pag.typewrite.assert_not_called()
def test_sensitive_text_allowed_with_confirm(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_type("mypassword123", confirm=True, log_path=tmp_log)
assert result["ok"] is True
def test_sensitive_keywords_all_blocked(self, tmp_log):
mock_pag = _make_mock_pag()
for keyword in _SENSITIVE_KEYWORDS:
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_type(f"my{keyword}value", log_path=tmp_log)
assert result["ok"] is False, f"keyword {keyword!r} should be blocked"
def test_text_not_logged(self, tmp_log):
"""Actual typed text must NOT appear in the audit log."""
mock_pag = _make_mock_pag()
secret = "super_secret_value_xyz"
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
computer_type(secret, confirm=True, log_path=tmp_log)
raw = tmp_log.read_text()
assert secret not in raw
def test_logs_length_not_content(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
computer_type("hello", log_path=tmp_log)
entry = _last_log_entry(tmp_log)
assert entry["params"]["length"] == 5
def test_returns_error_when_headless(self, tmp_log):
with patch("nexus.computer_use._get_pyautogui", return_value=None):
result = computer_type("abc", log_path=tmp_log)
assert result["ok"] is False
def test_handles_type_exception(self, tmp_log):
mock_pag = _make_mock_pag()
mock_pag.typewrite.side_effect = Exception("keyboard error")
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_type("hello", log_path=tmp_log)
assert result["ok"] is False
assert "keyboard error" in result["error"]
# ---------------------------------------------------------------------------
# computer_scroll
# ---------------------------------------------------------------------------
class TestComputerScroll:
def test_scroll_up(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_scroll(400, 300, amount=5, log_path=tmp_log)
assert result["ok"] is True
mock_pag.scroll.assert_called_once_with(5, x=400, y=300)
def test_scroll_down_negative(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_scroll(400, 300, amount=-3, log_path=tmp_log)
assert result["ok"] is True
mock_pag.scroll.assert_called_once_with(-3, x=400, y=300)
def test_logs_scroll_action(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
computer_scroll(10, 20, amount=2, log_path=tmp_log)
entry = _last_log_entry(tmp_log)
assert entry["action"] == "scroll"
assert entry["params"]["amount"] == 2
def test_returns_error_when_headless(self, tmp_log):
with patch("nexus.computer_use._get_pyautogui", return_value=None):
result = computer_scroll(0, 0, log_path=tmp_log)
assert result["ok"] is False
def test_handles_scroll_exception(self, tmp_log):
mock_pag = _make_mock_pag()
mock_pag.scroll.side_effect = Exception("scroll error")
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
result = computer_scroll(0, 0, log_path=tmp_log)
assert result["ok"] is False
# ---------------------------------------------------------------------------
# read_action_log
# ---------------------------------------------------------------------------
class TestReadActionLog:
def test_returns_empty_list_when_no_log(self, tmp_path):
missing = tmp_path / "nonexistent.jsonl"
assert read_action_log(log_path=missing) == []
def test_returns_recent_entries(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
computer_click(1, 1, log_path=tmp_log)
computer_click(2, 2, log_path=tmp_log)
computer_click(3, 3, log_path=tmp_log)
entries = read_action_log(n=2, log_path=tmp_log)
assert len(entries) == 2
def test_newest_first(self, tmp_log):
mock_pag = _make_mock_pag()
with patch("nexus.computer_use._get_pyautogui", return_value=mock_pag):
computer_click(1, 1, log_path=tmp_log)
computer_scroll(5, 5, log_path=tmp_log)
entries = read_action_log(log_path=tmp_log)
# Most recent action (scroll) should be first
assert entries[0]["action"] == "scroll"
assert entries[1]["action"] == "click"
def test_skips_malformed_lines(self, tmp_log):
tmp_log.parent.mkdir(parents=True, exist_ok=True)
tmp_log.write_text('{"action": "click", "ts": "2026-01-01", "params": {}, "result": {}}\nNOT JSON\n')
entries = read_action_log(log_path=tmp_log)
assert len(entries) == 1