Implements Phase 1 & 2 of the [COMPUTER_USE] epic:
- nexus/computer_use.py — four Hermes tools with safety guards and
JSONL action logging:
computer_screenshot(), computer_click(), computer_type(), computer_scroll()
Poka-yoke: right/middle clicks require confirm=True; text containing
password/token/key keywords is refused without confirm=True.
pyautogui.FAILSAFE=True enabled globally (corner-abort).
- nexus/computer_use_demo.py — end-to-end Phase 1 demo: baseline
screenshot → open browser → navigate to Gitea → evidence screenshot.
- tests/test_computer_use.py — 29 unit tests, fully headless (pyautogui
mocked); all pass.
- docs/computer-use.md — full Phase 1–3 documentation including API
reference, safety table, action-log format, and pilot recipes.
- docker-compose.desktop.yml — sandboxed Xvfb + noVNC container for
safe headless desktop automation.
The existing mcp_servers/desktop_control_server.py is unchanged; it
remains available for external/MCP callers (Bannerlord harness etc).
Fixes #1125
281 lines
9.7 KiB
Python
281 lines
9.7 KiB
Python
"""
|
|
tests/test_computer_use.py — Unit tests for nexus.computer_use
|
|
|
|
All tests run without a real display by patching pyautogui.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
import json
|
|
import sys
|
|
import types
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers: stub pyautogui so tests run headless
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_pyautogui_stub() -> MagicMock:
|
|
"""Return a minimal pyautogui mock with the attributes we use."""
|
|
stub = MagicMock()
|
|
stub.FAILSAFE = True
|
|
stub.PAUSE = 0.05
|
|
# screenshot() → PIL-like object with .save()
|
|
img = MagicMock()
|
|
img.save = MagicMock()
|
|
stub.screenshot.return_value = img
|
|
stub.size.return_value = (1920, 1080)
|
|
stub.position.return_value = (100, 200)
|
|
return stub
|
|
|
|
|
|
def _reload_module(pyautogui_stub=None):
|
|
"""
|
|
Reload nexus.computer_use with an optional pyautogui stub.
|
|
Returns the freshly imported module.
|
|
"""
|
|
# Remove cached module so we get a clean import
|
|
for key in list(sys.modules.keys()):
|
|
if "nexus.computer_use" in key or key == "nexus.computer_use":
|
|
del sys.modules[key]
|
|
|
|
if pyautogui_stub is not None:
|
|
sys.modules["pyautogui"] = pyautogui_stub
|
|
else:
|
|
sys.modules.pop("pyautogui", None)
|
|
|
|
import nexus.computer_use as cu
|
|
return cu
|
|
|
|
|
|
@pytest.fixture()
|
|
def cu(tmp_path, monkeypatch):
|
|
"""Fixture: computer_use module with pyautogui stubbed and log dir in tmp."""
|
|
stub = _make_pyautogui_stub()
|
|
mod = _reload_module(pyautogui_stub=stub)
|
|
|
|
# Redirect log dir to tmp so tests don't write to ~/.nexus
|
|
monkeypatch.setenv("NEXUS_HOME", str(tmp_path))
|
|
mod._LOG_DIR = tmp_path
|
|
mod._ACTION_LOG = None # reset so it picks up new dir
|
|
mod._PYAUTOGUI_OK = True
|
|
mod.pyautogui = stub
|
|
|
|
yield mod
|
|
|
|
# Cleanup: remove stub from sys.modules so other tests aren't affected
|
|
sys.modules.pop("pyautogui", None)
|
|
for key in list(sys.modules.keys()):
|
|
if "nexus.computer_use" in key:
|
|
del sys.modules[key]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# computer_screenshot
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestComputerScreenshot:
|
|
def test_returns_ok_with_path(self, cu, tmp_path):
|
|
result = cu.computer_screenshot()
|
|
assert result["ok"] is True
|
|
assert result["tool"] == "computer_screenshot"
|
|
assert result["path"].endswith(".png")
|
|
|
|
def test_respects_custom_path(self, cu, tmp_path):
|
|
target = str(tmp_path / "custom.png")
|
|
result = cu.computer_screenshot(output_path=target)
|
|
assert result["ok"] is True
|
|
assert result["path"] == target
|
|
|
|
def test_saves_screenshot(self, cu, tmp_path):
|
|
cu.computer_screenshot()
|
|
# pyautogui.screenshot().save should have been called
|
|
cu.pyautogui.screenshot.assert_called()
|
|
cu.pyautogui.screenshot.return_value.save.assert_called()
|
|
|
|
def test_writes_action_log(self, cu, tmp_path):
|
|
cu.computer_screenshot()
|
|
log_path = tmp_path / "computer_use_log.jsonl"
|
|
assert log_path.exists()
|
|
records = [json.loads(l) for l in log_path.read_text().splitlines() if l.strip()]
|
|
assert len(records) == 1
|
|
assert records[0]["tool"] == "computer_screenshot"
|
|
|
|
def test_error_when_unavailable(self, tmp_path, monkeypatch):
|
|
for key in list(sys.modules.keys()):
|
|
if "nexus.computer_use" in key:
|
|
del sys.modules[key]
|
|
sys.modules.pop("pyautogui", None)
|
|
|
|
import nexus.computer_use as cu_mod
|
|
cu_mod._PYAUTOGUI_OK = False
|
|
cu_mod.pyautogui = None
|
|
cu_mod._LOG_DIR = tmp_path
|
|
cu_mod._ACTION_LOG = None
|
|
|
|
result = cu_mod.computer_screenshot()
|
|
assert result["ok"] is False
|
|
assert "error" in result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# computer_click
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestComputerClick:
|
|
def test_left_click_ok(self, cu):
|
|
result = cu.computer_click(100, 200)
|
|
assert result["ok"] is True
|
|
assert result["x"] == 100
|
|
assert result["y"] == 200
|
|
assert result["button"] == "left"
|
|
cu.pyautogui.click.assert_called_once_with(100, 200)
|
|
|
|
def test_right_click_requires_confirm(self, cu):
|
|
result = cu.computer_click(10, 10, button="right")
|
|
assert result["ok"] is False
|
|
assert "confirm=True" in result["error"]
|
|
|
|
def test_right_click_with_confirm(self, cu):
|
|
result = cu.computer_click(10, 10, button="right", confirm=True)
|
|
assert result["ok"] is True
|
|
cu.pyautogui.rightClick.assert_called_once_with(10, 10)
|
|
|
|
def test_middle_click_requires_confirm(self, cu):
|
|
result = cu.computer_click(10, 10, button="middle")
|
|
assert result["ok"] is False
|
|
|
|
def test_invalid_button(self, cu):
|
|
result = cu.computer_click(10, 10, button="superclick", confirm=True)
|
|
assert result["ok"] is False
|
|
assert "Unknown button" in result["error"]
|
|
|
|
def test_screenshots_captured(self, cu):
|
|
cu.computer_click(50, 50)
|
|
# screenshot() should be called twice (before + after)
|
|
assert cu.pyautogui.screenshot.call_count >= 2
|
|
|
|
def test_writes_log_on_success(self, cu, tmp_path):
|
|
cu.computer_click(1, 2)
|
|
log_path = tmp_path / "computer_use_log.jsonl"
|
|
records = [json.loads(l) for l in log_path.read_text().splitlines() if l.strip()]
|
|
assert any(r["tool"] == "computer_click" for r in records)
|
|
|
|
def test_writes_log_on_poka_yoke_rejection(self, cu, tmp_path):
|
|
cu.computer_click(1, 2, button="right")
|
|
log_path = tmp_path / "computer_use_log.jsonl"
|
|
records = [json.loads(l) for l in log_path.read_text().splitlines() if l.strip()]
|
|
assert any(r["ok"] is False for r in records)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# computer_type
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestComputerType:
|
|
def test_type_plain_text(self, cu):
|
|
result = cu.computer_type("hello world")
|
|
assert result["ok"] is True
|
|
assert result["length"] == len("hello world")
|
|
cu.pyautogui.typewrite.assert_called_once_with("hello world", interval=0.02)
|
|
|
|
def test_sensitive_text_rejected_without_confirm(self, cu):
|
|
result = cu.computer_type("mypassword123")
|
|
assert result["ok"] is False
|
|
assert "confirm=True" in result["error"]
|
|
|
|
def test_sensitive_text_allowed_with_confirm(self, cu):
|
|
result = cu.computer_type("mypassword123", confirm=True)
|
|
assert result["ok"] is True
|
|
|
|
def test_token_keyword_triggers_poka_yoke(self, cu):
|
|
result = cu.computer_type("Bearer token abc123")
|
|
assert result["ok"] is False
|
|
|
|
def test_key_keyword_triggers_poka_yoke(self, cu):
|
|
result = cu.computer_type("api_key=secret")
|
|
assert result["ok"] is False
|
|
|
|
def test_plain_text_no_confirm_needed(self, cu):
|
|
result = cu.computer_type("navigate to settings")
|
|
assert result["ok"] is True
|
|
|
|
def test_length_in_result(self, cu):
|
|
text = "hello"
|
|
result = cu.computer_type(text)
|
|
assert result["length"] == len(text)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# computer_scroll
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestComputerScroll:
|
|
def test_scroll_down(self, cu):
|
|
result = cu.computer_scroll(100, 200, -3)
|
|
assert result["ok"] is True
|
|
assert result["amount"] == -3
|
|
cu.pyautogui.moveTo.assert_called_once_with(100, 200)
|
|
cu.pyautogui.scroll.assert_called_once_with(-3)
|
|
|
|
def test_scroll_up(self, cu):
|
|
result = cu.computer_scroll(0, 0, 5)
|
|
assert result["ok"] is True
|
|
assert result["amount"] == 5
|
|
|
|
def test_scroll_zero(self, cu):
|
|
result = cu.computer_scroll(0, 0, 0)
|
|
assert result["ok"] is True
|
|
|
|
def test_writes_log(self, cu, tmp_path):
|
|
cu.computer_scroll(10, 20, 2)
|
|
log_path = tmp_path / "computer_use_log.jsonl"
|
|
records = [json.loads(l) for l in log_path.read_text().splitlines() if l.strip()]
|
|
assert any(r["tool"] == "computer_scroll" for r in records)
|
|
|
|
def test_error_when_unavailable(self, tmp_path):
|
|
for key in list(sys.modules.keys()):
|
|
if "nexus.computer_use" in key:
|
|
del sys.modules[key]
|
|
sys.modules.pop("pyautogui", None)
|
|
|
|
import nexus.computer_use as cu_mod
|
|
cu_mod._PYAUTOGUI_OK = False
|
|
cu_mod.pyautogui = None
|
|
cu_mod._LOG_DIR = tmp_path
|
|
cu_mod._ACTION_LOG = None
|
|
|
|
result = cu_mod.computer_scroll(0, 0, 1)
|
|
assert result["ok"] is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# read_action_log
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestReadActionLog:
|
|
def test_empty_log(self, cu, tmp_path):
|
|
records = cu.read_action_log()
|
|
assert records == []
|
|
|
|
def test_returns_records_after_actions(self, cu, tmp_path):
|
|
cu.computer_screenshot()
|
|
cu.computer_click(1, 1)
|
|
records = cu.read_action_log()
|
|
assert len(records) >= 2
|
|
|
|
def test_last_n_respected(self, cu, tmp_path):
|
|
for _ in range(10):
|
|
cu.computer_screenshot()
|
|
records = cu.read_action_log(last_n=3)
|
|
assert len(records) == 3
|
|
|
|
def test_records_have_tool_field(self, cu, tmp_path):
|
|
cu.computer_screenshot()
|
|
records = cu.read_action_log()
|
|
assert all("tool" in r for r in records)
|