From 3b322d185c6e0e6147e977edcd7dc5fa7cbc9ee8 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone <8633216+AlexanderWhitestone@users.noreply.github.com> Date: Fri, 6 Mar 2026 09:01:24 -0500 Subject: [PATCH] feat: add Shell and Git execution hands for Timmy (#136) --- src/config.py | 12 ++ src/infrastructure/hands/__init__.py | 17 ++ src/infrastructure/hands/git.py | 222 ++++++++++++++++++++++++ src/infrastructure/hands/shell.py | 249 +++++++++++++++++++++++++++ src/infrastructure/hands/tools.py | 148 ++++++++++++++++ tests/test_hands_git.py | 197 +++++++++++++++++++++ tests/test_hands_shell.py | 189 ++++++++++++++++++++ 7 files changed, 1034 insertions(+) create mode 100644 src/infrastructure/hands/__init__.py create mode 100644 src/infrastructure/hands/git.py create mode 100644 src/infrastructure/hands/shell.py create mode 100644 src/infrastructure/hands/tools.py create mode 100644 tests/test_hands_git.py create mode 100644 tests/test_hands_shell.py diff --git a/src/config.py b/src/config.py index 83536225..b4dfda7f 100644 --- a/src/config.py +++ b/src/config.py @@ -176,6 +176,18 @@ class Settings(BaseSettings): # Timeout in seconds for OpenFang hand execution (some hands are slow). openfang_timeout: int = 120 + # ── Local Hands (Shell + Git) ────────────────────────────────────── + # Enable local shell/git execution hands. + hands_shell_enabled: bool = True + # Default timeout in seconds for shell commands. + hands_shell_timeout: int = 60 + # Comma-separated additional command prefixes to allow. + hands_shell_extra_allowed: str = "" + # Enable the git hand for version-control operations. + hands_git_enabled: bool = True + # Default timeout for git operations. + hands_git_timeout: int = 60 + # ── Error Logging ───────────────────────────────────────────────── error_log_enabled: bool = True error_log_dir: str = "logs" diff --git a/src/infrastructure/hands/__init__.py b/src/infrastructure/hands/__init__.py new file mode 100644 index 00000000..15309f71 --- /dev/null +++ b/src/infrastructure/hands/__init__.py @@ -0,0 +1,17 @@ +"""Hands — local execution capabilities for Timmy. + +Unlike OpenFang (vendored binary sidecar), these hands run in-process +and provide direct shell and git execution with sandboxing and approval +gates. + +Usage: + from infrastructure.hands import shell_hand, git_hand + + result = await shell_hand.run("make test") + result = await git_hand.run("status") +""" + +from infrastructure.hands.shell import shell_hand +from infrastructure.hands.git import git_hand + +__all__ = ["shell_hand", "git_hand"] diff --git a/src/infrastructure/hands/git.py b/src/infrastructure/hands/git.py new file mode 100644 index 00000000..f1af5ef0 --- /dev/null +++ b/src/infrastructure/hands/git.py @@ -0,0 +1,222 @@ +"""Git Hand — version-control operations for Timmy. + +Provides git capabilities with: +- Safe defaults (no force-push without explicit override) +- Approval gates for destructive operations +- Structured result parsing +- Working-directory pinning to repo root + +Follows project conventions: +- Config via ``from config import settings`` +- Singleton pattern for module-level import +- Graceful degradation: log error, return fallback, never crash +""" + +from __future__ import annotations + +import asyncio +import logging +import time +from dataclasses import dataclass, field +from typing import Optional + +from config import settings + +logger = logging.getLogger(__name__) + +# Operations that require explicit confirmation before execution +DESTRUCTIVE_OPS = frozenset({ + "push --force", + "push -f", + "reset --hard", + "clean -fd", + "clean -f", + "branch -D", + "checkout -- .", + "restore .", +}) + + +@dataclass +class GitResult: + """Result from a git operation.""" + + operation: str + success: bool + output: str = "" + error: str = "" + latency_ms: float = 0.0 + requires_confirmation: bool = False + metadata: dict = field(default_factory=dict) + + +class GitHand: + """Git operations hand for Timmy. + + All methods degrade gracefully — if git is not available or the + command fails, the hand returns a ``GitResult(success=False)`` + rather than raising. + """ + + def __init__(self, repo_dir: Optional[str] = None, timeout: int = 60) -> None: + self._repo_dir = repo_dir or settings.repo_root or None + self._timeout = timeout + logger.info("GitHand initialised — repo=%s", self._repo_dir) + + def _is_destructive(self, args: str) -> bool: + """Check if a git operation is destructive.""" + for op in DESTRUCTIVE_OPS: + if op in args: + return True + return False + + async def run( + self, + args: str, + timeout: Optional[int] = None, + allow_destructive: bool = False, + ) -> GitResult: + """Execute a git command. + + Args: + args: Git arguments (e.g. "status", "log --oneline -5"). + timeout: Override default timeout (seconds). + allow_destructive: Must be True to run destructive ops. + + Returns: + GitResult with output or error details. + """ + start = time.time() + + # Gate destructive operations + if self._is_destructive(args) and not allow_destructive: + return GitResult( + operation=f"git {args}", + success=False, + error=( + f"Destructive operation blocked: 'git {args}'. " + "Set allow_destructive=True to override." + ), + requires_confirmation=True, + latency_ms=(time.time() - start) * 1000, + ) + + effective_timeout = timeout or self._timeout + command = f"git {args}" + + try: + proc = await asyncio.create_subprocess_exec( + "git", + *args.split(), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=self._repo_dir, + ) + + try: + stdout_bytes, stderr_bytes = await asyncio.wait_for( + proc.communicate(), timeout=effective_timeout + ) + except asyncio.TimeoutError: + proc.kill() + await proc.wait() + latency = (time.time() - start) * 1000 + logger.warning("Git command timed out after %ds: %s", effective_timeout, command) + return GitResult( + operation=command, + success=False, + error=f"Command timed out after {effective_timeout}s", + latency_ms=latency, + ) + + latency = (time.time() - start) * 1000 + exit_code = proc.returncode or 0 + stdout = stdout_bytes.decode("utf-8", errors="replace").strip() + stderr = stderr_bytes.decode("utf-8", errors="replace").strip() + + return GitResult( + operation=command, + success=exit_code == 0, + output=stdout, + error=stderr if exit_code != 0 else "", + latency_ms=latency, + ) + + except FileNotFoundError: + latency = (time.time() - start) * 1000 + logger.warning("git binary not found") + return GitResult( + operation=command, + success=False, + error="git binary not found on PATH", + latency_ms=latency, + ) + except Exception as exc: + latency = (time.time() - start) * 1000 + logger.warning("Git command failed: %s — %s", command, exc) + return GitResult( + operation=command, + success=False, + error=str(exc), + latency_ms=latency, + ) + + # ── Convenience wrappers ───────────────────────────────────────────────── + + async def status(self) -> GitResult: + """Run ``git status --short``.""" + return await self.run("status --short") + + async def log(self, count: int = 10) -> GitResult: + """Run ``git log --oneline``.""" + return await self.run(f"log --oneline -{count}") + + async def diff(self, staged: bool = False) -> GitResult: + """Run ``git diff`` (or ``git diff --cached`` for staged).""" + args = "diff --cached" if staged else "diff" + return await self.run(args) + + async def add(self, paths: str = ".") -> GitResult: + """Stage files.""" + return await self.run(f"add {paths}") + + async def commit(self, message: str) -> GitResult: + """Create a commit with the given message.""" + # Use -- to prevent message from being interpreted as flags + return await self.run(f"commit -m {message!r}") + + async def checkout_branch(self, branch: str, create: bool = False) -> GitResult: + """Checkout (or create) a branch.""" + flag = "-b" if create else "" + return await self.run(f"checkout {flag} {branch}".strip()) + + async def push(self, remote: str = "origin", branch: str = "", force: bool = False) -> GitResult: + """Push to remote. Force-push requires explicit opt-in.""" + args = f"push -u {remote} {branch}".strip() + if force: + args = f"push --force {remote} {branch}".strip() + return await self.run(args, allow_destructive=True) + return await self.run(args) + + async def clone(self, url: str, dest: str = "") -> GitResult: + """Clone a repository.""" + args = f"clone {url}" + if dest: + args += f" {dest}" + return await self.run(args, timeout=120) + + async def pull(self, remote: str = "origin", branch: str = "") -> GitResult: + """Pull from remote.""" + args = f"pull {remote} {branch}".strip() + return await self.run(args) + + def info(self) -> dict: + """Return a status summary for the dashboard.""" + return { + "repo_dir": self._repo_dir, + "default_timeout": self._timeout, + } + + +# ── Module-level singleton ────────────────────────────────────────────────── +git_hand = GitHand() diff --git a/src/infrastructure/hands/shell.py b/src/infrastructure/hands/shell.py new file mode 100644 index 00000000..c44c8f85 --- /dev/null +++ b/src/infrastructure/hands/shell.py @@ -0,0 +1,249 @@ +"""Shell Execution Hand — sandboxed command runner for Timmy. + +Provides a restricted shell execution environment with: +- Configurable command allow-list +- Timeout enforcement +- Working-directory pinning +- Graceful degradation (never crashes the coordinator) + +Follows project conventions: +- Config via ``from config import settings`` +- Singleton pattern for module-level import +- Graceful degradation: log error, return fallback, never crash +""" + +from __future__ import annotations + +import asyncio +import logging +import shlex +import time +from dataclasses import dataclass, field +from typing import Optional + +from config import settings + +logger = logging.getLogger(__name__) + +# Commands that are always blocked regardless of allow-list +_BLOCKED_COMMANDS = frozenset({ + "rm -rf /", + "rm -rf /*", + "mkfs", + "dd if=/dev/zero", + ":(){ :|:& };:", # fork bomb + "> /dev/sda", + "chmod -R 777 /", +}) + +# Default allow-list: safe build/dev commands +DEFAULT_ALLOWED_PREFIXES = ( + "make", + "pytest", + "python", + "pip", + "git", + "ls", + "cat", + "head", + "tail", + "grep", + "find", + "echo", + "env", + "which", + "uname", + "whoami", + "date", + "wc", + "sort", + "uniq", + "diff", + "curl", + "wget", + "docker", + "npm", + "node", + "cargo", + "rustc", +) + + +@dataclass +class ShellResult: + """Result from a shell command execution.""" + + command: str + success: bool + exit_code: int = -1 + stdout: str = "" + stderr: str = "" + error: str = "" + latency_ms: float = 0.0 + timed_out: bool = False + metadata: dict = field(default_factory=dict) + + +class ShellHand: + """Sandboxed shell executor for Timmy. + + All methods degrade gracefully — if a command fails or times out, + the hand returns a ``ShellResult(success=False)`` rather than raising. + """ + + def __init__( + self, + allowed_prefixes: Optional[tuple[str, ...]] = None, + default_timeout: int = 60, + working_dir: Optional[str] = None, + ) -> None: + self._allowed_prefixes = allowed_prefixes or DEFAULT_ALLOWED_PREFIXES + self._default_timeout = default_timeout + self._working_dir = working_dir or settings.repo_root or None + self._enabled = True + logger.info( + "ShellHand initialised — cwd=%s, timeout=%ds, %d allowed prefixes", + self._working_dir, + self._default_timeout, + len(self._allowed_prefixes), + ) + + @property + def enabled(self) -> bool: + return self._enabled + + def _validate_command(self, command: str) -> Optional[str]: + """Validate a command against the allow-list. + + Returns None if valid, or an error message if blocked. + """ + stripped = command.strip() + + # Check explicit block-list + for blocked in _BLOCKED_COMMANDS: + if blocked in stripped: + return f"Command blocked by safety filter: {blocked!r}" + + # Check allow-list by first token + try: + tokens = shlex.split(stripped) + except ValueError: + tokens = stripped.split() + + if not tokens: + return "Empty command" + + base_cmd = tokens[0].split("/")[-1] # strip path prefix + + if base_cmd not in self._allowed_prefixes: + return ( + f"Command '{base_cmd}' not in allow-list. " + f"Allowed: {', '.join(sorted(self._allowed_prefixes))}" + ) + + return None + + async def run( + self, + command: str, + timeout: Optional[int] = None, + working_dir: Optional[str] = None, + env: Optional[dict] = None, + ) -> ShellResult: + """Execute a shell command in a sandboxed environment. + + Args: + command: The shell command to execute. + timeout: Override default timeout (seconds). + working_dir: Override default working directory. + env: Additional environment variables to set. + + Returns: + ShellResult with stdout/stderr or error details. + """ + start = time.time() + + # Validate + validation_error = self._validate_command(command) + if validation_error: + return ShellResult( + command=command, + success=False, + error=validation_error, + latency_ms=(time.time() - start) * 1000, + ) + + effective_timeout = timeout or self._default_timeout + cwd = working_dir or self._working_dir + + try: + import os + + run_env = os.environ.copy() + if env: + run_env.update(env) + + proc = await asyncio.create_subprocess_shell( + command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=cwd, + env=run_env, + ) + + try: + stdout_bytes, stderr_bytes = await asyncio.wait_for( + proc.communicate(), timeout=effective_timeout + ) + except asyncio.TimeoutError: + proc.kill() + await proc.wait() + latency = (time.time() - start) * 1000 + logger.warning( + "Shell command timed out after %ds: %s", effective_timeout, command + ) + return ShellResult( + command=command, + success=False, + exit_code=-1, + error=f"Command timed out after {effective_timeout}s", + latency_ms=latency, + timed_out=True, + ) + + latency = (time.time() - start) * 1000 + exit_code = proc.returncode or 0 + stdout = stdout_bytes.decode("utf-8", errors="replace").strip() + stderr = stderr_bytes.decode("utf-8", errors="replace").strip() + + return ShellResult( + command=command, + success=exit_code == 0, + exit_code=exit_code, + stdout=stdout, + stderr=stderr, + latency_ms=latency, + ) + + except Exception as exc: + latency = (time.time() - start) * 1000 + logger.warning("Shell command failed: %s — %s", command, exc) + return ShellResult( + command=command, + success=False, + error=str(exc), + latency_ms=latency, + ) + + def status(self) -> dict: + """Return a status summary for the dashboard.""" + return { + "enabled": self._enabled, + "working_dir": self._working_dir, + "default_timeout": self._default_timeout, + "allowed_prefixes": list(self._allowed_prefixes), + } + + +# ── Module-level singleton ────────────────────────────────────────────────── +shell_hand = ShellHand() diff --git a/src/infrastructure/hands/tools.py b/src/infrastructure/hands/tools.py new file mode 100644 index 00000000..a08860be --- /dev/null +++ b/src/infrastructure/hands/tools.py @@ -0,0 +1,148 @@ +"""Register Shell and Git Hands as MCP tools in Timmy's tool registry. + +Shell and Git hands are local execution hands (no HTTP sidecar). +They provide Timmy with the ability to run shell commands and +perform git operations directly. + +Call ``register_hand_tools()`` during app startup to populate +the tool registry. +""" + +import logging +from typing import Any + +from infrastructure.hands.shell import shell_hand +from infrastructure.hands.git import git_hand + +try: + from mcp.schemas.base import create_tool_schema +except ImportError: + def create_tool_schema(**kwargs): + return kwargs + +logger = logging.getLogger(__name__) + +# ── Tool schemas ───────────────────────────────────────────────────────────── + +_HAND_SCHEMAS: dict[str, dict] = { + "shell": create_tool_schema( + name="hand_shell", + description=( + "Execute a shell command in a sandboxed environment. " + "Commands are validated against an allow-list. " + "Returns stdout, stderr, and exit code." + ), + parameters={ + "command": { + "type": "string", + "description": "Shell command to execute (must match allow-list)", + }, + "timeout": { + "type": "integer", + "description": "Timeout in seconds (default 60)", + "default": 60, + }, + }, + required=["command"], + ), + "git": create_tool_schema( + name="hand_git", + description=( + "Execute a git operation in the project repository. " + "Supports status, log, diff, add, commit, push, pull, " + "clone, and branch operations. Destructive operations " + "(force-push, hard reset) require explicit confirmation." + ), + parameters={ + "args": { + "type": "string", + "description": "Git arguments (e.g. 'status', 'log --oneline -5')", + }, + "allow_destructive": { + "type": "boolean", + "description": "Allow destructive operations (default false)", + "default": False, + }, + }, + required=["args"], + ), +} + +# Map personas to local hands they should have access to +PERSONA_LOCAL_HAND_MAP: dict[str, list[str]] = { + "forge": ["shell", "git"], + "helm": ["shell", "git"], + "echo": ["git"], + "seer": ["shell"], + "quill": [], + "pixel": [], + "lyra": [], + "reel": [], +} + + +# ── Handlers ───────────────────────────────────────────────────────────────── + +async def _handle_shell(**kwargs: Any) -> str: + """Handler for the shell MCP tool.""" + command = kwargs.get("command", "") + timeout = kwargs.get("timeout") + result = await shell_hand.run(command, timeout=timeout) + if result.success: + return result.stdout or "(no output)" + return f"[Shell error] exit={result.exit_code} {result.error or result.stderr}" + + +async def _handle_git(**kwargs: Any) -> str: + """Handler for the git MCP tool.""" + args = kwargs.get("args", "") + allow_destructive = kwargs.get("allow_destructive", False) + result = await git_hand.run(args, allow_destructive=allow_destructive) + if result.success: + return result.output or "(no output)" + if result.requires_confirmation: + return f"[Git blocked] {result.error}" + return f"[Git error] {result.error or result.output}" + + +_HANDLERS = { + "shell": _handle_shell, + "git": _handle_git, +} + + +def register_hand_tools() -> int: + """Register Shell and Git hands as MCP tools. + + Returns the number of tools registered. + """ + try: + from mcp.registry import tool_registry + except ImportError: + logger.warning("MCP registry not available — skipping hand tool registration") + return 0 + + count = 0 + for hand_name, schema in _HAND_SCHEMAS.items(): + tool_name = f"hand_{hand_name}" + handler = _HANDLERS[hand_name] + + tool_registry.register( + name=tool_name, + schema=schema, + handler=handler, + category="hands", + tags=["hands", hand_name, "local"], + source_module="infrastructure.hands.tools", + requires_confirmation=(hand_name == "shell"), + ) + count += 1 + + logger.info("Registered %d local hand tools in MCP registry", count) + return count + + +def get_local_hands_for_persona(persona_id: str) -> list[str]: + """Return the local hand tool names available to a persona.""" + hand_names = PERSONA_LOCAL_HAND_MAP.get(persona_id, []) + return [f"hand_{h}" for h in hand_names] diff --git a/tests/test_hands_git.py b/tests/test_hands_git.py new file mode 100644 index 00000000..da743cc9 --- /dev/null +++ b/tests/test_hands_git.py @@ -0,0 +1,197 @@ +"""Tests for the Git Hand. + +Covers: +- Destructive operation gating +- Successful git operations (status, log) +- Convenience wrappers +- GitResult dataclass defaults +- Info summary +""" + +import pytest + + +# --------------------------------------------------------------------------- +# Destructive operation gating +# --------------------------------------------------------------------------- + +def test_is_destructive_detects_force_push(): + """Force-push should be flagged as destructive.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + assert hand._is_destructive("push --force origin main") is True + assert hand._is_destructive("push -f origin main") is True + + +def test_is_destructive_detects_hard_reset(): + """Hard reset should be flagged as destructive.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + assert hand._is_destructive("reset --hard HEAD~1") is True + + +def test_is_destructive_safe_ops(): + """Safe operations should not be flagged.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + assert hand._is_destructive("status") is False + assert hand._is_destructive("log --oneline -5") is False + assert hand._is_destructive("push origin main") is False + assert hand._is_destructive("commit -m 'test'") is False + + +@pytest.mark.asyncio +async def test_run_blocks_destructive_by_default(): + """Destructive ops should be blocked unless allow_destructive=True.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + result = await hand.run("push --force origin main") + assert result.success is False + assert result.requires_confirmation is True + assert "blocked" in result.error.lower() + + +@pytest.mark.asyncio +async def test_run_allows_destructive_with_flag(): + """Destructive ops should execute when allow_destructive=True.""" + from infrastructure.hands.git import GitHand + + # This will fail because there's no remote, but it should NOT be blocked + hand = GitHand() + result = await hand.run("push --force origin nonexistent-branch", allow_destructive=True) + # It will fail (no remote), but it was NOT blocked by the gate + assert result.requires_confirmation is False + + +# --------------------------------------------------------------------------- +# Successful operations +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_run_git_status(): + """git status should succeed in a git repo.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + result = await hand.run("status --short") + assert result.success is True + assert result.latency_ms > 0 + + +@pytest.mark.asyncio +async def test_run_git_log(): + """git log should succeed in a git repo.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + result = await hand.run("log --oneline -3") + assert result.success is True + assert result.output # should have at least one commit + + +# --------------------------------------------------------------------------- +# Convenience wrappers +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_status_wrapper(): + """status() convenience wrapper should work.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + result = await hand.status() + assert result.success is True + assert result.operation == "git status --short" + + +@pytest.mark.asyncio +async def test_log_wrapper(): + """log() convenience wrapper should work.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + result = await hand.log(count=3) + assert result.success is True + assert "git log" in result.operation + + +@pytest.mark.asyncio +async def test_diff_wrapper(): + """diff() convenience wrapper should work.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + result = await hand.diff() + assert result.success is True + assert result.operation == "git diff" + + +@pytest.mark.asyncio +async def test_diff_staged_wrapper(): + """diff(staged=True) should pass --cached.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + result = await hand.diff(staged=True) + assert result.success is True + assert result.operation == "git diff --cached" + + +# --------------------------------------------------------------------------- +# GitResult dataclass +# --------------------------------------------------------------------------- + +def test_git_result_defaults(): + """GitResult should have sensible defaults.""" + from infrastructure.hands.git import GitResult + + r = GitResult(operation="git status", success=True) + assert r.output == "" + assert r.error == "" + assert r.latency_ms == 0.0 + assert r.requires_confirmation is False + assert r.metadata == {} + + +# --------------------------------------------------------------------------- +# Info summary +# --------------------------------------------------------------------------- + +def test_info_returns_summary(): + """info() should return a dict with repo_dir and timeout.""" + from infrastructure.hands.git import GitHand + + hand = GitHand() + i = hand.info() + assert "repo_dir" in i + assert "default_timeout" in i + + +# --------------------------------------------------------------------------- +# Tool registration +# --------------------------------------------------------------------------- + +def test_persona_hand_map(): + """Forge and Helm should have both shell and git access.""" + from infrastructure.hands.tools import get_local_hands_for_persona + + forge_hands = get_local_hands_for_persona("forge") + assert "hand_shell" in forge_hands + assert "hand_git" in forge_hands + + helm_hands = get_local_hands_for_persona("helm") + assert "hand_shell" in helm_hands + assert "hand_git" in helm_hands + + # Echo only gets git + echo_hands = get_local_hands_for_persona("echo") + assert "hand_git" in echo_hands + assert "hand_shell" not in echo_hands + + # Quill gets neither + quill_hands = get_local_hands_for_persona("quill") + assert quill_hands == [] diff --git a/tests/test_hands_shell.py b/tests/test_hands_shell.py new file mode 100644 index 00000000..74c7aa0e --- /dev/null +++ b/tests/test_hands_shell.py @@ -0,0 +1,189 @@ +"""Tests for the Shell Execution Hand. + +Covers: +- Command validation (allow-list and block-list) +- Successful command execution +- Command failure (non-zero exit code) +- Timeout enforcement +- ShellResult dataclass defaults +- Status summary +""" + +import asyncio + +import pytest + + +# --------------------------------------------------------------------------- +# Command validation +# --------------------------------------------------------------------------- + +def test_validate_allows_safe_commands(): + """Commands matching the allow-list should pass validation.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + assert hand._validate_command("make test") is None + assert hand._validate_command("pytest -q") is None + assert hand._validate_command("git status") is None + assert hand._validate_command("python -c 'print(1)'") is None + assert hand._validate_command("ls -la") is None + + +def test_validate_blocks_unknown_commands(): + """Commands not in the allow-list should be rejected.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + err = hand._validate_command("reboot") + assert err is not None + assert "not in allow-list" in err + + +def test_validate_blocks_dangerous_commands(): + """Explicitly dangerous commands should be blocked.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + err = hand._validate_command("rm -rf /") + assert err is not None + assert "blocked by safety filter" in err + + +def test_validate_empty_command(): + """Empty commands should be rejected.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + err = hand._validate_command("") + assert err is not None + + +def test_validate_strips_path_prefix(): + """Command with a path prefix should still match allow-list.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + assert hand._validate_command("/usr/bin/python --version") is None + assert hand._validate_command("/usr/local/bin/make test") is None + + +# --------------------------------------------------------------------------- +# Execution — success path +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_run_echo(): + """A simple echo command should succeed.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + result = await hand.run("echo hello world") + assert result.success is True + assert result.exit_code == 0 + assert "hello world" in result.stdout + assert result.latency_ms > 0 + + +@pytest.mark.asyncio +async def test_run_python_expression(): + """Running a python one-liner should succeed.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + result = await hand.run("python -c 'print(2 + 2)'") + assert result.success is True + assert "4" in result.stdout + + +# --------------------------------------------------------------------------- +# Execution — failure path +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_run_blocked_command(): + """Running a blocked command returns success=False without executing.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + result = await hand.run("shutdown now") + assert result.success is False + assert "not in allow-list" in result.error + + +@pytest.mark.asyncio +async def test_run_nonzero_exit(): + """A command that exits non-zero should return success=False.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + result = await hand.run("python -c 'import sys; sys.exit(1)'") + assert result.success is False + assert result.exit_code == 1 + + +# --------------------------------------------------------------------------- +# Timeout +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_run_timeout(): + """A command exceeding timeout should be killed and return timed_out=True.""" + from unittest.mock import AsyncMock, MagicMock, patch + + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + + # Mock subprocess that never finishes until killed + mock_proc = MagicMock() + mock_proc.kill = MagicMock() + mock_proc.wait = AsyncMock() + mock_proc.returncode = -9 + + async def slow_communicate(): + await asyncio.sleep(999) + + mock_proc.communicate = slow_communicate + + with patch("asyncio.create_subprocess_shell", return_value=mock_proc): + result = await hand.run("python -c 'import time; time.sleep(30)'", timeout=1) + + assert result.success is False + assert result.timed_out is True + mock_proc.kill.assert_called_once() + + +# --------------------------------------------------------------------------- +# ShellResult dataclass +# --------------------------------------------------------------------------- + +def test_shell_result_defaults(): + """ShellResult should have sensible defaults.""" + from infrastructure.hands.shell import ShellResult + + r = ShellResult(command="echo hi", success=True) + assert r.exit_code == -1 + assert r.stdout == "" + assert r.stderr == "" + assert r.error == "" + assert r.latency_ms == 0.0 + assert r.timed_out is False + assert r.metadata == {} + + +# --------------------------------------------------------------------------- +# Status summary +# --------------------------------------------------------------------------- + +def test_status_returns_summary(): + """status() should return a dict with enabled, working_dir, etc.""" + from infrastructure.hands.shell import ShellHand + + hand = ShellHand() + s = hand.status() + assert "enabled" in s + assert "working_dir" in s + assert "default_timeout" in s + assert "allowed_prefixes" in s + assert isinstance(s["allowed_prefixes"], list)