feat: add Shell and Git execution hands for Timmy (#136)

2026-03-06 09:01:24 -05:00
parent 39461858a0
commit 3b322d185c
7 changed files with 1034 additions and 0 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -176,6 +176,18 @@ class Settings(BaseSettings):
    # Timeout in seconds for OpenFang hand execution (some hands are slow).
    openfang_timeout: int = 120

+    # ── Local Hands (Shell + Git) ──────────────────────────────────────
+    # Enable local shell/git execution hands.
+    hands_shell_enabled: bool = True
+    # Default timeout in seconds for shell commands.
+    hands_shell_timeout: int = 60
+    # Comma-separated additional command prefixes to allow.
+    hands_shell_extra_allowed: str = ""
+    # Enable the git hand for version-control operations.
+    hands_git_enabled: bool = True
+    # Default timeout for git operations.
+    hands_git_timeout: int = 60
+
    # ── Error Logging ─────────────────────────────────────────────────
    error_log_enabled: bool = True
    error_log_dir: str = "logs"
--- a/src/infrastructure/hands/init.py
+++ b/src/infrastructure/hands/init.py
@@ -0,0 +1,17 @@
+"""Hands — local execution capabilities for Timmy.
+
+Unlike OpenFang (vendored binary sidecar), these hands run in-process
+and provide direct shell and git execution with sandboxing and approval
+gates.
+
+Usage:
+    from infrastructure.hands import shell_hand, git_hand
+
+    result = await shell_hand.run("make test")
+    result = await git_hand.run("status")
+"""
+
+from infrastructure.hands.shell import shell_hand
+from infrastructure.hands.git import git_hand
+
+__all__ = ["shell_hand", "git_hand"]
--- a/src/infrastructure/hands/git.py
+++ b/src/infrastructure/hands/git.py
@@ -0,0 +1,222 @@
+"""Git Hand — version-control operations for Timmy.
+
+Provides git capabilities with:
+- Safe defaults (no force-push without explicit override)
+- Approval gates for destructive operations
+- Structured result parsing
+- Working-directory pinning to repo root
+
+Follows project conventions:
+- Config via ``from config import settings``
+- Singleton pattern for module-level import
+- Graceful degradation: log error, return fallback, never crash
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import time
+from dataclasses import dataclass, field
+from typing import Optional
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+# Operations that require explicit confirmation before execution
+DESTRUCTIVE_OPS = frozenset({
+    "push --force",
+    "push -f",
+    "reset --hard",
+    "clean -fd",
+    "clean -f",
+    "branch -D",
+    "checkout -- .",
+    "restore .",
+})
+
+
+@dataclass
+class GitResult:
+    """Result from a git operation."""
+
+    operation: str
+    success: bool
+    output: str = ""
+    error: str = ""
+    latency_ms: float = 0.0
+    requires_confirmation: bool = False
+    metadata: dict = field(default_factory=dict)
+
+
+class GitHand:
+    """Git operations hand for Timmy.
+
+    All methods degrade gracefully — if git is not available or the
+    command fails, the hand returns a ``GitResult(success=False)``
+    rather than raising.
+    """
+
+    def __init__(self, repo_dir: Optional[str] = None, timeout: int = 60) -> None:
+        self._repo_dir = repo_dir or settings.repo_root or None
+        self._timeout = timeout
+        logger.info("GitHand initialised — repo=%s", self._repo_dir)
+
+    def _is_destructive(self, args: str) -> bool:
+        """Check if a git operation is destructive."""
+        for op in DESTRUCTIVE_OPS:
+            if op in args:
+                return True
+        return False
+
+    async def run(
+        self,
+        args: str,
+        timeout: Optional[int] = None,
+        allow_destructive: bool = False,
+    ) -> GitResult:
+        """Execute a git command.
+
+        Args:
+            args: Git arguments (e.g. "status", "log --oneline -5").
+            timeout: Override default timeout (seconds).
+            allow_destructive: Must be True to run destructive ops.
+
+        Returns:
+            GitResult with output or error details.
+        """
+        start = time.time()
+
+        # Gate destructive operations
+        if self._is_destructive(args) and not allow_destructive:
+            return GitResult(
+                operation=f"git {args}",
+                success=False,
+                error=(
+                    f"Destructive operation blocked: 'git {args}'. "
+                    "Set allow_destructive=True to override."
+                ),
+                requires_confirmation=True,
+                latency_ms=(time.time() - start) * 1000,
+            )
+
+        effective_timeout = timeout or self._timeout
+        command = f"git {args}"
+
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                "git",
+                *args.split(),
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=self._repo_dir,
+            )
+
+            try:
+                stdout_bytes, stderr_bytes = await asyncio.wait_for(
+                    proc.communicate(), timeout=effective_timeout
+                )
+            except asyncio.TimeoutError:
+                proc.kill()
+                await proc.wait()
+                latency = (time.time() - start) * 1000
+                logger.warning("Git command timed out after %ds: %s", effective_timeout, command)
+                return GitResult(
+                    operation=command,
+                    success=False,
+                    error=f"Command timed out after {effective_timeout}s",
+                    latency_ms=latency,
+                )
+
+            latency = (time.time() - start) * 1000
+            exit_code = proc.returncode or 0
+            stdout = stdout_bytes.decode("utf-8", errors="replace").strip()
+            stderr = stderr_bytes.decode("utf-8", errors="replace").strip()
+
+            return GitResult(
+                operation=command,
+                success=exit_code == 0,
+                output=stdout,
+                error=stderr if exit_code != 0 else "",
+                latency_ms=latency,
+            )
+
+        except FileNotFoundError:
+            latency = (time.time() - start) * 1000
+            logger.warning("git binary not found")
+            return GitResult(
+                operation=command,
+                success=False,
+                error="git binary not found on PATH",
+                latency_ms=latency,
+            )
+        except Exception as exc:
+            latency = (time.time() - start) * 1000
+            logger.warning("Git command failed: %s — %s", command, exc)
+            return GitResult(
+                operation=command,
+                success=False,
+                error=str(exc),
+                latency_ms=latency,
+            )
+
+    # ── Convenience wrappers ─────────────────────────────────────────────────
+
+    async def status(self) -> GitResult:
+        """Run ``git status --short``."""
+        return await self.run("status --short")
+
+    async def log(self, count: int = 10) -> GitResult:
+        """Run ``git log --oneline``."""
+        return await self.run(f"log --oneline -{count}")
+
+    async def diff(self, staged: bool = False) -> GitResult:
+        """Run ``git diff`` (or ``git diff --cached`` for staged)."""
+        args = "diff --cached" if staged else "diff"
+        return await self.run(args)
+
+    async def add(self, paths: str = ".") -> GitResult:
+        """Stage files."""
+        return await self.run(f"add {paths}")
+
+    async def commit(self, message: str) -> GitResult:
+        """Create a commit with the given message."""
+        # Use -- to prevent message from being interpreted as flags
+        return await self.run(f"commit -m {message!r}")
+
+    async def checkout_branch(self, branch: str, create: bool = False) -> GitResult:
+        """Checkout (or create) a branch."""
+        flag = "-b" if create else ""
+        return await self.run(f"checkout {flag} {branch}".strip())
+
+    async def push(self, remote: str = "origin", branch: str = "", force: bool = False) -> GitResult:
+        """Push to remote. Force-push requires explicit opt-in."""
+        args = f"push -u {remote} {branch}".strip()
+        if force:
+            args = f"push --force {remote} {branch}".strip()
+            return await self.run(args, allow_destructive=True)
+        return await self.run(args)
+
+    async def clone(self, url: str, dest: str = "") -> GitResult:
+        """Clone a repository."""
+        args = f"clone {url}"
+        if dest:
+            args += f" {dest}"
+        return await self.run(args, timeout=120)
+
+    async def pull(self, remote: str = "origin", branch: str = "") -> GitResult:
+        """Pull from remote."""
+        args = f"pull {remote} {branch}".strip()
+        return await self.run(args)
+
+    def info(self) -> dict:
+        """Return a status summary for the dashboard."""
+        return {
+            "repo_dir": self._repo_dir,
+            "default_timeout": self._timeout,
+        }
+
+
+# ── Module-level singleton ──────────────────────────────────────────────────
+git_hand = GitHand()
--- a/src/infrastructure/hands/shell.py
+++ b/src/infrastructure/hands/shell.py
@@ -0,0 +1,249 @@
+"""Shell Execution Hand — sandboxed command runner for Timmy.
+
+Provides a restricted shell execution environment with:
+- Configurable command allow-list
+- Timeout enforcement
+- Working-directory pinning
+- Graceful degradation (never crashes the coordinator)
+
+Follows project conventions:
+- Config via ``from config import settings``
+- Singleton pattern for module-level import
+- Graceful degradation: log error, return fallback, never crash
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import shlex
+import time
+from dataclasses import dataclass, field
+from typing import Optional
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+# Commands that are always blocked regardless of allow-list
+_BLOCKED_COMMANDS = frozenset({
+    "rm -rf /",
+    "rm -rf /*",
+    "mkfs",
+    "dd if=/dev/zero",
+    ":(){ :|:& };:",  # fork bomb
+    "> /dev/sda",
+    "chmod -R 777 /",
+})
+
+# Default allow-list: safe build/dev commands
+DEFAULT_ALLOWED_PREFIXES = (
+    "make",
+    "pytest",
+    "python",
+    "pip",
+    "git",
+    "ls",
+    "cat",
+    "head",
+    "tail",
+    "grep",
+    "find",
+    "echo",
+    "env",
+    "which",
+    "uname",
+    "whoami",
+    "date",
+    "wc",
+    "sort",
+    "uniq",
+    "diff",
+    "curl",
+    "wget",
+    "docker",
+    "npm",
+    "node",
+    "cargo",
+    "rustc",
+)
+
+
+@dataclass
+class ShellResult:
+    """Result from a shell command execution."""
+
+    command: str
+    success: bool
+    exit_code: int = -1
+    stdout: str = ""
+    stderr: str = ""
+    error: str = ""
+    latency_ms: float = 0.0
+    timed_out: bool = False
+    metadata: dict = field(default_factory=dict)
+
+
+class ShellHand:
+    """Sandboxed shell executor for Timmy.
+
+    All methods degrade gracefully — if a command fails or times out,
+    the hand returns a ``ShellResult(success=False)`` rather than raising.
+    """
+
+    def __init__(
+        self,
+        allowed_prefixes: Optional[tuple[str, ...]] = None,
+        default_timeout: int = 60,
+        working_dir: Optional[str] = None,
+    ) -> None:
+        self._allowed_prefixes = allowed_prefixes or DEFAULT_ALLOWED_PREFIXES
+        self._default_timeout = default_timeout
+        self._working_dir = working_dir or settings.repo_root or None
+        self._enabled = True
+        logger.info(
+            "ShellHand initialised — cwd=%s, timeout=%ds, %d allowed prefixes",
+            self._working_dir,
+            self._default_timeout,
+            len(self._allowed_prefixes),
+        )
+
+    @property
+    def enabled(self) -> bool:
+        return self._enabled
+
+    def _validate_command(self, command: str) -> Optional[str]:
+        """Validate a command against the allow-list.
+
+        Returns None if valid, or an error message if blocked.
+        """
+        stripped = command.strip()
+
+        # Check explicit block-list
+        for blocked in _BLOCKED_COMMANDS:
+            if blocked in stripped:
+                return f"Command blocked by safety filter: {blocked!r}"
+
+        # Check allow-list by first token
+        try:
+            tokens = shlex.split(stripped)
+        except ValueError:
+            tokens = stripped.split()
+
+        if not tokens:
+            return "Empty command"
+
+        base_cmd = tokens[0].split("/")[-1]  # strip path prefix
+
+        if base_cmd not in self._allowed_prefixes:
+            return (
+                f"Command '{base_cmd}' not in allow-list. "
+                f"Allowed: {', '.join(sorted(self._allowed_prefixes))}"
+            )
+
+        return None
+
+    async def run(
+        self,
+        command: str,
+        timeout: Optional[int] = None,
+        working_dir: Optional[str] = None,
+        env: Optional[dict] = None,
+    ) -> ShellResult:
+        """Execute a shell command in a sandboxed environment.
+
+        Args:
+            command: The shell command to execute.
+            timeout: Override default timeout (seconds).
+            working_dir: Override default working directory.
+            env: Additional environment variables to set.
+
+        Returns:
+            ShellResult with stdout/stderr or error details.
+        """
+        start = time.time()
+
+        # Validate
+        validation_error = self._validate_command(command)
+        if validation_error:
+            return ShellResult(
+                command=command,
+                success=False,
+                error=validation_error,
+                latency_ms=(time.time() - start) * 1000,
+            )
+
+        effective_timeout = timeout or self._default_timeout
+        cwd = working_dir or self._working_dir
+
+        try:
+            import os
+
+            run_env = os.environ.copy()
+            if env:
+                run_env.update(env)
+
+            proc = await asyncio.create_subprocess_shell(
+                command,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=cwd,
+                env=run_env,
+            )
+
+            try:
+                stdout_bytes, stderr_bytes = await asyncio.wait_for(
+                    proc.communicate(), timeout=effective_timeout
+                )
+            except asyncio.TimeoutError:
+                proc.kill()
+                await proc.wait()
+                latency = (time.time() - start) * 1000
+                logger.warning(
+                    "Shell command timed out after %ds: %s", effective_timeout, command
+                )
+                return ShellResult(
+                    command=command,
+                    success=False,
+                    exit_code=-1,
+                    error=f"Command timed out after {effective_timeout}s",
+                    latency_ms=latency,
+                    timed_out=True,
+                )
+
+            latency = (time.time() - start) * 1000
+            exit_code = proc.returncode or 0
+            stdout = stdout_bytes.decode("utf-8", errors="replace").strip()
+            stderr = stderr_bytes.decode("utf-8", errors="replace").strip()
+
+            return ShellResult(
+                command=command,
+                success=exit_code == 0,
+                exit_code=exit_code,
+                stdout=stdout,
+                stderr=stderr,
+                latency_ms=latency,
+            )
+
+        except Exception as exc:
+            latency = (time.time() - start) * 1000
+            logger.warning("Shell command failed: %s — %s", command, exc)
+            return ShellResult(
+                command=command,
+                success=False,
+                error=str(exc),
+                latency_ms=latency,
+            )
+
+    def status(self) -> dict:
+        """Return a status summary for the dashboard."""
+        return {
+            "enabled": self._enabled,
+            "working_dir": self._working_dir,
+            "default_timeout": self._default_timeout,
+            "allowed_prefixes": list(self._allowed_prefixes),
+        }
+
+
+# ── Module-level singleton ──────────────────────────────────────────────────
+shell_hand = ShellHand()
--- a/src/infrastructure/hands/tools.py
+++ b/src/infrastructure/hands/tools.py
@@ -0,0 +1,148 @@
+"""Register Shell and Git Hands as MCP tools in Timmy's tool registry.
+
+Shell and Git hands are local execution hands (no HTTP sidecar).
+They provide Timmy with the ability to run shell commands and
+perform git operations directly.
+
+Call ``register_hand_tools()`` during app startup to populate
+the tool registry.
+"""
+
+import logging
+from typing import Any
+
+from infrastructure.hands.shell import shell_hand
+from infrastructure.hands.git import git_hand
+
+try:
+    from mcp.schemas.base import create_tool_schema
+except ImportError:
+    def create_tool_schema(**kwargs):
+        return kwargs
+
+logger = logging.getLogger(__name__)
+
+# ── Tool schemas ─────────────────────────────────────────────────────────────
+
+_HAND_SCHEMAS: dict[str, dict] = {
+    "shell": create_tool_schema(
+        name="hand_shell",
+        description=(
+            "Execute a shell command in a sandboxed environment. "
+            "Commands are validated against an allow-list. "
+            "Returns stdout, stderr, and exit code."
+        ),
+        parameters={
+            "command": {
+                "type": "string",
+                "description": "Shell command to execute (must match allow-list)",
+            },
+            "timeout": {
+                "type": "integer",
+                "description": "Timeout in seconds (default 60)",
+                "default": 60,
+            },
+        },
+        required=["command"],
+    ),
+    "git": create_tool_schema(
+        name="hand_git",
+        description=(
+            "Execute a git operation in the project repository. "
+            "Supports status, log, diff, add, commit, push, pull, "
+            "clone, and branch operations. Destructive operations "
+            "(force-push, hard reset) require explicit confirmation."
+        ),
+        parameters={
+            "args": {
+                "type": "string",
+                "description": "Git arguments (e.g. 'status', 'log --oneline -5')",
+            },
+            "allow_destructive": {
+                "type": "boolean",
+                "description": "Allow destructive operations (default false)",
+                "default": False,
+            },
+        },
+        required=["args"],
+    ),
+}
+
+# Map personas to local hands they should have access to
+PERSONA_LOCAL_HAND_MAP: dict[str, list[str]] = {
+    "forge": ["shell", "git"],
+    "helm": ["shell", "git"],
+    "echo": ["git"],
+    "seer": ["shell"],
+    "quill": [],
+    "pixel": [],
+    "lyra": [],
+    "reel": [],
+}
+
+
+# ── Handlers ─────────────────────────────────────────────────────────────────
+
+async def _handle_shell(**kwargs: Any) -> str:
+    """Handler for the shell MCP tool."""
+    command = kwargs.get("command", "")
+    timeout = kwargs.get("timeout")
+    result = await shell_hand.run(command, timeout=timeout)
+    if result.success:
+        return result.stdout or "(no output)"
+    return f"[Shell error] exit={result.exit_code} {result.error or result.stderr}"
+
+
+async def _handle_git(**kwargs: Any) -> str:
+    """Handler for the git MCP tool."""
+    args = kwargs.get("args", "")
+    allow_destructive = kwargs.get("allow_destructive", False)
+    result = await git_hand.run(args, allow_destructive=allow_destructive)
+    if result.success:
+        return result.output or "(no output)"
+    if result.requires_confirmation:
+        return f"[Git blocked] {result.error}"
+    return f"[Git error] {result.error or result.output}"
+
+
+_HANDLERS = {
+    "shell": _handle_shell,
+    "git": _handle_git,
+}
+
+
+def register_hand_tools() -> int:
+    """Register Shell and Git hands as MCP tools.
+
+    Returns the number of tools registered.
+    """
+    try:
+        from mcp.registry import tool_registry
+    except ImportError:
+        logger.warning("MCP registry not available — skipping hand tool registration")
+        return 0
+
+    count = 0
+    for hand_name, schema in _HAND_SCHEMAS.items():
+        tool_name = f"hand_{hand_name}"
+        handler = _HANDLERS[hand_name]
+
+        tool_registry.register(
+            name=tool_name,
+            schema=schema,
+            handler=handler,
+            category="hands",
+            tags=["hands", hand_name, "local"],
+            source_module="infrastructure.hands.tools",
+            requires_confirmation=(hand_name == "shell"),
+        )
+        count += 1
+
+    logger.info("Registered %d local hand tools in MCP registry", count)
+    return count
+
+
+def get_local_hands_for_persona(persona_id: str) -> list[str]:
+    """Return the local hand tool names available to a persona."""
+    hand_names = PERSONA_LOCAL_HAND_MAP.get(persona_id, [])
+    return [f"hand_{h}" for h in hand_names]
--- a/tests/test_hands_git.py
+++ b/tests/test_hands_git.py
@@ -0,0 +1,197 @@
+"""Tests for the Git Hand.
+
+Covers:
+- Destructive operation gating
+- Successful git operations (status, log)
+- Convenience wrappers
+- GitResult dataclass defaults
+- Info summary
+"""
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Destructive operation gating
+# ---------------------------------------------------------------------------
+
+def test_is_destructive_detects_force_push():
+    """Force-push should be flagged as destructive."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    assert hand._is_destructive("push --force origin main") is True
+    assert hand._is_destructive("push -f origin main") is True
+
+
+def test_is_destructive_detects_hard_reset():
+    """Hard reset should be flagged as destructive."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    assert hand._is_destructive("reset --hard HEAD~1") is True
+
+
+def test_is_destructive_safe_ops():
+    """Safe operations should not be flagged."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    assert hand._is_destructive("status") is False
+    assert hand._is_destructive("log --oneline -5") is False
+    assert hand._is_destructive("push origin main") is False
+    assert hand._is_destructive("commit -m 'test'") is False
+
+
+@pytest.mark.asyncio
+async def test_run_blocks_destructive_by_default():
+    """Destructive ops should be blocked unless allow_destructive=True."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    result = await hand.run("push --force origin main")
+    assert result.success is False
+    assert result.requires_confirmation is True
+    assert "blocked" in result.error.lower()
+
+
+@pytest.mark.asyncio
+async def test_run_allows_destructive_with_flag():
+    """Destructive ops should execute when allow_destructive=True."""
+    from infrastructure.hands.git import GitHand
+
+    # This will fail because there's no remote, but it should NOT be blocked
+    hand = GitHand()
+    result = await hand.run("push --force origin nonexistent-branch", allow_destructive=True)
+    # It will fail (no remote), but it was NOT blocked by the gate
+    assert result.requires_confirmation is False
+
+
+# ---------------------------------------------------------------------------
+# Successful operations
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_run_git_status():
+    """git status should succeed in a git repo."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    result = await hand.run("status --short")
+    assert result.success is True
+    assert result.latency_ms > 0
+
+
+@pytest.mark.asyncio
+async def test_run_git_log():
+    """git log should succeed in a git repo."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    result = await hand.run("log --oneline -3")
+    assert result.success is True
+    assert result.output  # should have at least one commit
+
+
+# ---------------------------------------------------------------------------
+# Convenience wrappers
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_status_wrapper():
+    """status() convenience wrapper should work."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    result = await hand.status()
+    assert result.success is True
+    assert result.operation == "git status --short"
+
+
+@pytest.mark.asyncio
+async def test_log_wrapper():
+    """log() convenience wrapper should work."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    result = await hand.log(count=3)
+    assert result.success is True
+    assert "git log" in result.operation
+
+
+@pytest.mark.asyncio
+async def test_diff_wrapper():
+    """diff() convenience wrapper should work."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    result = await hand.diff()
+    assert result.success is True
+    assert result.operation == "git diff"
+
+
+@pytest.mark.asyncio
+async def test_diff_staged_wrapper():
+    """diff(staged=True) should pass --cached."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    result = await hand.diff(staged=True)
+    assert result.success is True
+    assert result.operation == "git diff --cached"
+
+
+# ---------------------------------------------------------------------------
+# GitResult dataclass
+# ---------------------------------------------------------------------------
+
+def test_git_result_defaults():
+    """GitResult should have sensible defaults."""
+    from infrastructure.hands.git import GitResult
+
+    r = GitResult(operation="git status", success=True)
+    assert r.output == ""
+    assert r.error == ""
+    assert r.latency_ms == 0.0
+    assert r.requires_confirmation is False
+    assert r.metadata == {}
+
+
+# ---------------------------------------------------------------------------
+# Info summary
+# ---------------------------------------------------------------------------
+
+def test_info_returns_summary():
+    """info() should return a dict with repo_dir and timeout."""
+    from infrastructure.hands.git import GitHand
+
+    hand = GitHand()
+    i = hand.info()
+    assert "repo_dir" in i
+    assert "default_timeout" in i
+
+
+# ---------------------------------------------------------------------------
+# Tool registration
+# ---------------------------------------------------------------------------
+
+def test_persona_hand_map():
+    """Forge and Helm should have both shell and git access."""
+    from infrastructure.hands.tools import get_local_hands_for_persona
+
+    forge_hands = get_local_hands_for_persona("forge")
+    assert "hand_shell" in forge_hands
+    assert "hand_git" in forge_hands
+
+    helm_hands = get_local_hands_for_persona("helm")
+    assert "hand_shell" in helm_hands
+    assert "hand_git" in helm_hands
+
+    # Echo only gets git
+    echo_hands = get_local_hands_for_persona("echo")
+    assert "hand_git" in echo_hands
+    assert "hand_shell" not in echo_hands
+
+    # Quill gets neither
+    quill_hands = get_local_hands_for_persona("quill")
+    assert quill_hands == []
--- a/tests/test_hands_shell.py
+++ b/tests/test_hands_shell.py
@@ -0,0 +1,189 @@
+"""Tests for the Shell Execution Hand.
+
+Covers:
+- Command validation (allow-list and block-list)
+- Successful command execution
+- Command failure (non-zero exit code)
+- Timeout enforcement
+- ShellResult dataclass defaults
+- Status summary
+"""
+
+import asyncio
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Command validation
+# ---------------------------------------------------------------------------
+
+def test_validate_allows_safe_commands():
+    """Commands matching the allow-list should pass validation."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    assert hand._validate_command("make test") is None
+    assert hand._validate_command("pytest -q") is None
+    assert hand._validate_command("git status") is None
+    assert hand._validate_command("python -c 'print(1)'") is None
+    assert hand._validate_command("ls -la") is None
+
+
+def test_validate_blocks_unknown_commands():
+    """Commands not in the allow-list should be rejected."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    err = hand._validate_command("reboot")
+    assert err is not None
+    assert "not in allow-list" in err
+
+
+def test_validate_blocks_dangerous_commands():
+    """Explicitly dangerous commands should be blocked."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    err = hand._validate_command("rm -rf /")
+    assert err is not None
+    assert "blocked by safety filter" in err
+
+
+def test_validate_empty_command():
+    """Empty commands should be rejected."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    err = hand._validate_command("")
+    assert err is not None
+
+
+def test_validate_strips_path_prefix():
+    """Command with a path prefix should still match allow-list."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    assert hand._validate_command("/usr/bin/python --version") is None
+    assert hand._validate_command("/usr/local/bin/make test") is None
+
+
+# ---------------------------------------------------------------------------
+# Execution — success path
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_run_echo():
+    """A simple echo command should succeed."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    result = await hand.run("echo hello world")
+    assert result.success is True
+    assert result.exit_code == 0
+    assert "hello world" in result.stdout
+    assert result.latency_ms > 0
+
+
+@pytest.mark.asyncio
+async def test_run_python_expression():
+    """Running a python one-liner should succeed."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    result = await hand.run("python -c 'print(2 + 2)'")
+    assert result.success is True
+    assert "4" in result.stdout
+
+
+# ---------------------------------------------------------------------------
+# Execution — failure path
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_run_blocked_command():
+    """Running a blocked command returns success=False without executing."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    result = await hand.run("shutdown now")
+    assert result.success is False
+    assert "not in allow-list" in result.error
+
+
+@pytest.mark.asyncio
+async def test_run_nonzero_exit():
+    """A command that exits non-zero should return success=False."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    result = await hand.run("python -c 'import sys; sys.exit(1)'")
+    assert result.success is False
+    assert result.exit_code == 1
+
+
+# ---------------------------------------------------------------------------
+# Timeout
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_run_timeout():
+    """A command exceeding timeout should be killed and return timed_out=True."""
+    from unittest.mock import AsyncMock, MagicMock, patch
+
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+
+    # Mock subprocess that never finishes until killed
+    mock_proc = MagicMock()
+    mock_proc.kill = MagicMock()
+    mock_proc.wait = AsyncMock()
+    mock_proc.returncode = -9
+
+    async def slow_communicate():
+        await asyncio.sleep(999)
+
+    mock_proc.communicate = slow_communicate
+
+    with patch("asyncio.create_subprocess_shell", return_value=mock_proc):
+        result = await hand.run("python -c 'import time; time.sleep(30)'", timeout=1)
+
+    assert result.success is False
+    assert result.timed_out is True
+    mock_proc.kill.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# ShellResult dataclass
+# ---------------------------------------------------------------------------
+
+def test_shell_result_defaults():
+    """ShellResult should have sensible defaults."""
+    from infrastructure.hands.shell import ShellResult
+
+    r = ShellResult(command="echo hi", success=True)
+    assert r.exit_code == -1
+    assert r.stdout == ""
+    assert r.stderr == ""
+    assert r.error == ""
+    assert r.latency_ms == 0.0
+    assert r.timed_out is False
+    assert r.metadata == {}
+
+
+# ---------------------------------------------------------------------------
+# Status summary
+# ---------------------------------------------------------------------------
+
+def test_status_returns_summary():
+    """status() should return a dict with enabled, working_dir, etc."""
+    from infrastructure.hands.shell import ShellHand
+
+    hand = ShellHand()
+    s = hand.status()
+    assert "enabled" in s
+    assert "working_dir" in s
+    assert "default_timeout" in s
+    assert "allowed_prefixes" in s
+    assert isinstance(s["allowed_prefixes"], list)