Wire orchestrator pipe into task runner + pipe-verifying integration tests (#134)

2026-03-06 01:20:14 -05:00
parent d10cff333a
commit 87dc5eadfe
14 changed files with 2617 additions and 0 deletions
--- a/tests/integrations/test_paperclip_bridge.py
+++ b/tests/integrations/test_paperclip_bridge.py
@@ -0,0 +1,205 @@
+"""Tests for the Paperclip bridge (CEO orchestration logic)."""
+
+from unittest.mock import AsyncMock, patch, MagicMock
+
+import pytest
+
+from integrations.paperclip.bridge import PaperclipBridge
+from integrations.paperclip.client import PaperclipClient
+from integrations.paperclip.models import PaperclipAgent, PaperclipGoal, PaperclipIssue
+
+
+@pytest.fixture
+def mock_client():
+    client = MagicMock(spec=PaperclipClient)
+    # Make all methods async
+    client.healthy = AsyncMock(return_value=True)
+    client.list_agents = AsyncMock(return_value=[])
+    client.list_issues = AsyncMock(return_value=[])
+    client.list_goals = AsyncMock(return_value=[])
+    client.list_approvals = AsyncMock(return_value=[])
+    client.list_heartbeat_runs = AsyncMock(return_value=[])
+    client.get_issue = AsyncMock(return_value=None)
+    client.get_org = AsyncMock(return_value=None)
+    client.create_issue = AsyncMock(return_value=None)
+    client.update_issue = AsyncMock(return_value=None)
+    client.add_comment = AsyncMock(return_value=None)
+    client.wake_agent = AsyncMock(return_value=None)
+    client.create_goal = AsyncMock(return_value=None)
+    client.approve = AsyncMock(return_value=None)
+    client.reject = AsyncMock(return_value=None)
+    client.cancel_run = AsyncMock(return_value=None)
+    client.list_comments = AsyncMock(return_value=[])
+    return client
+
+
+@pytest.fixture
+def bridge(mock_client):
+    return PaperclipBridge(client=mock_client)
+
+
+# ── status ───────────────────────────────────────────────────────────────────
+
+
+async def test_status_when_disabled(bridge):
+    with patch("integrations.paperclip.bridge.settings") as mock_settings:
+        mock_settings.paperclip_enabled = False
+        mock_settings.paperclip_url = "http://localhost:3100"
+        status = await bridge.get_status()
+    assert status.enabled is False
+
+
+async def test_status_when_connected(bridge, mock_client):
+    mock_client.healthy.return_value = True
+    mock_client.list_agents.return_value = [
+        PaperclipAgent(id="a1", name="Codex"),
+    ]
+    mock_client.list_issues.return_value = [
+        PaperclipIssue(id="i1", title="Bug"),
+        PaperclipIssue(id="i2", title="Feature"),
+    ]
+
+    with patch("integrations.paperclip.bridge.settings") as mock_settings:
+        mock_settings.paperclip_enabled = True
+        mock_settings.paperclip_url = "http://vps:3100"
+        mock_settings.paperclip_company_id = "comp-1"
+        status = await bridge.get_status()
+
+    assert status.enabled is True
+    assert status.connected is True
+    assert status.agent_count == 1
+    assert status.issue_count == 2
+
+
+async def test_status_when_disconnected(bridge, mock_client):
+    mock_client.healthy.return_value = False
+
+    with patch("integrations.paperclip.bridge.settings") as mock_settings:
+        mock_settings.paperclip_enabled = True
+        mock_settings.paperclip_url = "http://vps:3100"
+        mock_settings.paperclip_company_id = "comp-1"
+        status = await bridge.get_status()
+
+    assert status.enabled is True
+    assert status.connected is False
+    assert "Cannot reach" in status.error
+
+
+# ── create and assign ────────────────────────────────────────────────────────
+
+
+async def test_create_and_assign_with_wake(bridge, mock_client):
+    issue = PaperclipIssue(id="i1", title="Deploy v2")
+    mock_client.create_issue.return_value = issue
+    mock_client.wake_agent.return_value = {"status": "queued"}
+
+    result = await bridge.create_and_assign(
+        title="Deploy v2",
+        assignee_id="agent-codex",
+        wake=True,
+    )
+
+    assert result is not None
+    assert result.id == "i1"
+    mock_client.wake_agent.assert_awaited_once_with("agent-codex", issue_id="i1")
+
+
+async def test_create_and_assign_no_wake(bridge, mock_client):
+    issue = PaperclipIssue(id="i2", title="Research task")
+    mock_client.create_issue.return_value = issue
+
+    result = await bridge.create_and_assign(
+        title="Research task",
+        assignee_id="agent-research",
+        wake=False,
+    )
+
+    assert result is not None
+    mock_client.wake_agent.assert_not_awaited()
+
+
+async def test_create_and_assign_failure(bridge, mock_client):
+    mock_client.create_issue.return_value = None
+
+    result = await bridge.create_and_assign(title="Will fail")
+    assert result is None
+
+
+# ── delegate ─────────────────────────────────────────────────────────────────
+
+
+async def test_delegate_issue(bridge, mock_client):
+    mock_client.update_issue.return_value = PaperclipIssue(id="i1", title="Task")
+    mock_client.wake_agent.return_value = {"status": "queued"}
+
+    ok = await bridge.delegate_issue("i1", "agent-codex", message="Handle this")
+    assert ok is True
+    mock_client.add_comment.assert_awaited_once()
+    mock_client.wake_agent.assert_awaited_once()
+
+
+async def test_delegate_issue_update_fails(bridge, mock_client):
+    mock_client.update_issue.return_value = None
+
+    ok = await bridge.delegate_issue("i1", "agent-codex")
+    assert ok is False
+
+
+# ── close issue ──────────────────────────────────────────────────────────────
+
+
+async def test_close_issue(bridge, mock_client):
+    mock_client.update_issue.return_value = PaperclipIssue(id="i1", title="Done")
+
+    ok = await bridge.close_issue("i1", comment="Shipped!")
+    assert ok is True
+    mock_client.add_comment.assert_awaited_once()
+
+
+# ── goals ────────────────────────────────────────────────────────────────────
+
+
+async def test_set_goal(bridge, mock_client):
+    mock_client.create_goal.return_value = PaperclipGoal(id="g1", title="Ship MVP")
+
+    goal = await bridge.set_goal("Ship MVP")
+    assert goal is not None
+    assert goal.title == "Ship MVP"
+
+
+# ── approvals ────────────────────────────────────────────────────────────────
+
+
+async def test_approve(bridge, mock_client):
+    mock_client.approve.return_value = {"status": "approved"}
+    ok = await bridge.approve("ap1")
+    assert ok is True
+
+
+async def test_reject(bridge, mock_client):
+    mock_client.reject.return_value = {"status": "rejected"}
+    ok = await bridge.reject("ap1", comment="Needs work")
+    assert ok is True
+
+
+async def test_approve_failure(bridge, mock_client):
+    mock_client.approve.return_value = None
+    ok = await bridge.approve("ap1")
+    assert ok is False
+
+
+# ── runs ─────────────────────────────────────────────────────────────────────
+
+
+async def test_active_runs(bridge, mock_client):
+    mock_client.list_heartbeat_runs.return_value = [
+        {"id": "r1", "status": "running"},
+    ]
+    runs = await bridge.active_runs()
+    assert len(runs) == 1
+
+
+async def test_cancel_run(bridge, mock_client):
+    mock_client.cancel_run.return_value = {"status": "cancelled"}
+    ok = await bridge.cancel_run("r1")
+    assert ok is True
--- a/tests/integrations/test_paperclip_client.py
+++ b/tests/integrations/test_paperclip_client.py
@@ -0,0 +1,180 @@
+"""Tests for the Paperclip API client."""
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from integrations.paperclip.client import PaperclipClient
+from integrations.paperclip.models import CreateIssueRequest
+
+
+@pytest.fixture
+def client():
+    return PaperclipClient(base_url="http://fake:3100", api_key="test-key")
+
+
+# ── health ───────────────────────────────────────────────────────────────────
+
+
+async def test_healthy_returns_true_on_success(client):
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value={"status": "ok"}):
+        assert await client.healthy() is True
+
+
+async def test_healthy_returns_false_on_failure(client):
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value=None):
+        assert await client.healthy() is False
+
+
+# ── agents ───────────────────────────────────────────────────────────────────
+
+
+async def test_list_agents_returns_list(client):
+    raw = [{"id": "a1", "name": "Codex", "role": "engineer", "status": "active"}]
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value=raw):
+        with patch("integrations.paperclip.client.settings") as mock_settings:
+            mock_settings.paperclip_company_id = "comp-1"
+            agents = await client.list_agents(company_id="comp-1")
+    assert len(agents) == 1
+    assert agents[0].name == "Codex"
+
+
+async def test_list_agents_graceful_on_none(client):
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value=None):
+        agents = await client.list_agents(company_id="comp-1")
+    assert agents == []
+
+
+# ── issues ───────────────────────────────────────────────────────────────────
+
+
+async def test_list_issues(client):
+    raw = [{"id": "i1", "title": "Fix bug"}]
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value=raw):
+        issues = await client.list_issues(company_id="comp-1")
+    assert len(issues) == 1
+    assert issues[0].title == "Fix bug"
+
+
+async def test_get_issue(client):
+    raw = {"id": "i1", "title": "Fix bug", "description": "It's broken"}
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value=raw):
+        issue = await client.get_issue("i1")
+    assert issue is not None
+    assert issue.id == "i1"
+
+
+async def test_get_issue_not_found(client):
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value=None):
+        issue = await client.get_issue("nonexistent")
+    assert issue is None
+
+
+async def test_create_issue(client):
+    raw = {"id": "i2", "title": "New feature"}
+    with patch.object(client, "_post", new_callable=AsyncMock, return_value=raw):
+        req = CreateIssueRequest(title="New feature")
+        issue = await client.create_issue(req, company_id="comp-1")
+    assert issue is not None
+    assert issue.id == "i2"
+
+
+async def test_create_issue_no_company_id(client):
+    with patch("integrations.paperclip.client.settings") as mock_settings:
+        mock_settings.paperclip_company_id = ""
+        issue = await client.create_issue(
+            CreateIssueRequest(title="Test"),
+        )
+    assert issue is None
+
+
+async def test_delete_issue(client):
+    with patch.object(client, "_delete", new_callable=AsyncMock, return_value=True):
+        result = await client.delete_issue("i1")
+    assert result is True
+
+
+# ── comments ─────────────────────────────────────────────────────────────────
+
+
+async def test_add_comment(client):
+    raw = {"id": "c1", "issue_id": "i1", "content": "Done"}
+    with patch.object(client, "_post", new_callable=AsyncMock, return_value=raw):
+        comment = await client.add_comment("i1", "Done")
+    assert comment is not None
+    assert comment.content == "Done"
+
+
+async def test_list_comments(client):
+    raw = [{"id": "c1", "issue_id": "i1", "content": "LGTM"}]
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value=raw):
+        comments = await client.list_comments("i1")
+    assert len(comments) == 1
+
+
+# ── goals ────────────────────────────────────────────────────────────────────
+
+
+async def test_list_goals(client):
+    raw = [{"id": "g1", "title": "Ship MVP"}]
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value=raw):
+        goals = await client.list_goals(company_id="comp-1")
+    assert len(goals) == 1
+    assert goals[0].title == "Ship MVP"
+
+
+async def test_create_goal(client):
+    raw = {"id": "g2", "title": "Scale to 1000 users"}
+    with patch.object(client, "_post", new_callable=AsyncMock, return_value=raw):
+        goal = await client.create_goal("Scale to 1000 users", company_id="comp-1")
+    assert goal is not None
+
+
+# ── wake agent ───────────────────────────────────────────────────────────────
+
+
+async def test_wake_agent(client):
+    raw = {"status": "queued"}
+    with patch.object(client, "_post", new_callable=AsyncMock, return_value=raw):
+        result = await client.wake_agent("a1", issue_id="i1")
+    assert result == {"status": "queued"}
+
+
+async def test_wake_agent_failure(client):
+    with patch.object(client, "_post", new_callable=AsyncMock, return_value=None):
+        result = await client.wake_agent("a1")
+    assert result is None
+
+
+# ── approvals ────────────────────────────────────────────────────────────────
+
+
+async def test_approve(client):
+    raw = {"status": "approved"}
+    with patch.object(client, "_post", new_callable=AsyncMock, return_value=raw):
+        result = await client.approve("ap1", comment="LGTM")
+    assert result is not None
+
+
+async def test_reject(client):
+    raw = {"status": "rejected"}
+    with patch.object(client, "_post", new_callable=AsyncMock, return_value=raw):
+        result = await client.reject("ap1", comment="Needs work")
+    assert result is not None
+
+
+# ── heartbeat runs ───────────────────────────────────────────────────────────
+
+
+async def test_list_heartbeat_runs(client):
+    raw = [{"id": "r1", "agent_id": "a1", "status": "running"}]
+    with patch.object(client, "_get", new_callable=AsyncMock, return_value=raw):
+        runs = await client.list_heartbeat_runs(company_id="comp-1")
+    assert len(runs) == 1
+
+
+async def test_cancel_run(client):
+    raw = {"status": "cancelled"}
+    with patch.object(client, "_post", new_callable=AsyncMock, return_value=raw):
+        result = await client.cancel_run("r1")
+    assert result is not None
--- a/tests/integrations/test_paperclip_task_runner.py
+++ b/tests/integrations/test_paperclip_task_runner.py
@@ -0,0 +1,848 @@
+"""Integration tests for the Paperclip task runner — full green-path workflow.
+
+Tests the complete autonomous cycle with a StubOrchestrator that exercises
+the real pipe (TaskRunner → orchestrator.execute_task → bridge → client)
+while stubbing only the LLM intelligence layer.
+
+Green path:
+  1. Timmy grabs first task in queue
+  2. Orchestrator.execute_task processes it (stub returns input-aware response)
+  3. Timmy posts completion comment and marks issue done
+  4. Timmy creates a recursive follow-up task for himself
+
+The stub is deliberately input-aware — it echoes back task metadata so
+assertions can prove data actually flowed through the pipe, not just that
+methods were called.
+
+Live-LLM tests (``@pytest.mark.ollama``) are at the bottom; they hit a real
+tiny model via Ollama and are skipped when Ollama is not running.
+Run them with: ``tox -e ollama`` or ``pytest -m ollama``
+"""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from integrations.paperclip.bridge import PaperclipBridge
+from integrations.paperclip.client import PaperclipClient
+from integrations.paperclip.models import (
+    PaperclipIssue,
+)
+from integrations.paperclip.task_runner import TaskRunner
+
+
+# ── Constants ─────────────────────────────────────────────────────────────────
+
+TIMMY_AGENT_ID = "agent-timmy"
+COMPANY_ID = "comp-1"
+
+
+# ── StubOrchestrator: exercises the pipe, stubs the intelligence ──────────────
+
+
+class StubOrchestrator:
+    """Deterministic orchestrator that proves data flows through the pipe.
+
+    Returns responses that reference input metadata — so tests can assert
+    the pipe actually connected (task_id, title, priority all appear in output).
+    Tracks every call for post-hoc inspection.
+    """
+
+    def __init__(self) -> None:
+        self.calls: list[dict] = []
+
+    async def execute_task(
+        self, task_id: str, description: str, context: dict
+    ) -> dict:
+        call_record = {
+            "task_id": task_id,
+            "description": description,
+            "context": dict(context),
+        }
+        self.calls.append(call_record)
+
+        title = context.get("title", description[:50])
+        priority = context.get("priority", "normal")
+
+        return {
+            "task_id": task_id,
+            "agent": "orchestrator",
+            "result": (
+                f"[Orchestrator] Processed '{title}'. "
+                f"Task {task_id} handled with priority {priority}. "
+                "Self-reflection: my task automation loop is functioning. "
+                "I should create a follow-up to review this pattern."
+            ),
+            "status": "completed",
+        }
+
+
+# ── Fixtures ──────────────────────────────────────────────────────────────────
+
+
+@pytest.fixture
+def stub_orchestrator():
+    return StubOrchestrator()
+
+
+@pytest.fixture
+def mock_client():
+    """Fully stubbed PaperclipClient with async methods."""
+    client = MagicMock(spec=PaperclipClient)
+    client.healthy = AsyncMock(return_value=True)
+    client.list_issues = AsyncMock(return_value=[])
+    client.get_issue = AsyncMock(return_value=None)
+    client.create_issue = AsyncMock(return_value=None)
+    client.update_issue = AsyncMock(return_value=None)
+    client.delete_issue = AsyncMock(return_value=True)
+    client.add_comment = AsyncMock(return_value=None)
+    client.list_comments = AsyncMock(return_value=[])
+    client.checkout_issue = AsyncMock(return_value={"ok": True})
+    client.release_issue = AsyncMock(return_value={"ok": True})
+    client.wake_agent = AsyncMock(return_value=None)
+    client.list_agents = AsyncMock(return_value=[])
+    client.list_goals = AsyncMock(return_value=[])
+    client.create_goal = AsyncMock(return_value=None)
+    client.list_approvals = AsyncMock(return_value=[])
+    client.list_heartbeat_runs = AsyncMock(return_value=[])
+    client.cancel_run = AsyncMock(return_value=None)
+    client.approve = AsyncMock(return_value=None)
+    client.reject = AsyncMock(return_value=None)
+    return client
+
+
+@pytest.fixture
+def bridge(mock_client):
+    return PaperclipBridge(client=mock_client)
+
+
+@pytest.fixture
+def settings_patch():
+    """Patch settings for all task runner tests."""
+    with patch("integrations.paperclip.task_runner.settings") as ts, \
+         patch("integrations.paperclip.bridge.settings") as bs:
+        for s in (ts, bs):
+            s.paperclip_enabled = True
+            s.paperclip_agent_id = TIMMY_AGENT_ID
+            s.paperclip_company_id = COMPANY_ID
+            s.paperclip_url = "http://fake:3100"
+            s.paperclip_poll_interval = 0
+        yield ts
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+
+def _make_issue(
+    id: str = "issue-1",
+    title: str = "Muse about task automation",
+    description: str = "Reflect on how you handle tasks and write a recursive self-improvement task.",
+    status: str = "open",
+    assignee_id: str = TIMMY_AGENT_ID,
+    priority: str = "normal",
+    labels: list[str] | None = None,
+) -> PaperclipIssue:
+    return PaperclipIssue(
+        id=id,
+        title=title,
+        description=description,
+        status=status,
+        assignee_id=assignee_id,
+        priority=priority,
+        labels=labels or [],
+    )
+
+
+def _make_done(id: str = "issue-1", title: str = "Done") -> PaperclipIssue:
+    return PaperclipIssue(id=id, title=title, status="done")
+
+
+def _make_follow_up(id: str = "issue-2") -> PaperclipIssue:
+    return PaperclipIssue(
+        id=id,
+        title="Follow-up: Muse about task automation",
+        description="Automated follow-up from completed task",
+        status="open",
+        assignee_id=TIMMY_AGENT_ID,
+        priority="normal",
+    )
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# PIPE WIRING: verify orchestrator is actually connected
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestOrchestratorWiring:
+    """Verify the orchestrator parameter actually connects to the pipe."""
+
+    async def test_orchestrator_execute_task_is_called(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """When orchestrator is wired, process_task calls execute_task."""
+        issue = _make_issue()
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        result = await runner.process_task(issue)
+
+        assert len(stub_orchestrator.calls) == 1
+        call = stub_orchestrator.calls[0]
+        assert call["task_id"] == "issue-1"
+        assert call["context"]["title"] == "Muse about task automation"
+
+    async def test_orchestrator_receives_full_context(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """Context dict passed to execute_task includes all issue metadata."""
+        issue = _make_issue(
+            id="ctx-test",
+            title="Context verification",
+            priority="high",
+            labels=["automation", "meta"],
+        )
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        await runner.process_task(issue)
+
+        ctx = stub_orchestrator.calls[0]["context"]
+        assert ctx["issue_id"] == "ctx-test"
+        assert ctx["title"] == "Context verification"
+        assert ctx["priority"] == "high"
+        assert ctx["labels"] == ["automation", "meta"]
+
+    async def test_orchestrator_dict_result_unwrapped(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """When execute_task returns a dict, the 'result' key is extracted."""
+        issue = _make_issue()
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        result = await runner.process_task(issue)
+
+        # StubOrchestrator returns dict with "result" key
+        assert "[Orchestrator]" in result
+        assert "issue-1" in result
+
+    async def test_orchestrator_string_result_passthrough(
+        self, mock_client, bridge, settings_patch,
+    ):
+        """When execute_task returns a plain string, it passes through."""
+
+        class StringOrchestrator:
+            async def execute_task(self, task_id, description, context):
+                return f"Plain string result for {task_id}"
+
+        runner = TaskRunner(bridge=bridge, orchestrator=StringOrchestrator())
+        result = await runner.process_task(_make_issue())
+
+        assert result == "Plain string result for issue-1"
+
+    async def test_process_fn_overrides_orchestrator(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """Explicit process_fn takes priority over orchestrator."""
+
+        async def override(task_id, desc, ctx):
+            return "override wins"
+
+        runner = TaskRunner(
+            bridge=bridge, orchestrator=stub_orchestrator, process_fn=override,
+        )
+        result = await runner.process_task(_make_issue())
+
+        assert result == "override wins"
+        assert len(stub_orchestrator.calls) == 0  # orchestrator NOT called
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# STEP 1: Timmy grabs the first task in queue
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestGrabNextTask:
+    """Verify Timmy picks the first open issue assigned to him."""
+
+    async def test_grabs_first_assigned_issue(self, mock_client, bridge, settings_patch):
+        issue = _make_issue()
+        mock_client.list_issues.return_value = [issue]
+
+        runner = TaskRunner(bridge=bridge)
+        grabbed = await runner.grab_next_task()
+
+        assert grabbed is not None
+        assert grabbed.id == "issue-1"
+        assert grabbed.assignee_id == TIMMY_AGENT_ID
+        mock_client.list_issues.assert_awaited_once_with(status="open")
+
+    async def test_skips_issues_not_assigned_to_timmy(self, mock_client, bridge, settings_patch):
+        other = _make_issue(id="other-1", assignee_id="agent-codex")
+        mine = _make_issue(id="timmy-1")
+        mock_client.list_issues.return_value = [other, mine]
+
+        runner = TaskRunner(bridge=bridge)
+        grabbed = await runner.grab_next_task()
+
+        assert grabbed.id == "timmy-1"
+
+    async def test_returns_none_when_queue_empty(self, mock_client, bridge, settings_patch):
+        mock_client.list_issues.return_value = []
+        runner = TaskRunner(bridge=bridge)
+        assert await runner.grab_next_task() is None
+
+    async def test_returns_none_when_no_agent_id(self, mock_client, bridge, settings_patch):
+        settings_patch.paperclip_agent_id = ""
+        runner = TaskRunner(bridge=bridge)
+        assert await runner.grab_next_task() is None
+        mock_client.list_issues.assert_not_awaited()
+
+    async def test_grabs_first_of_multiple(self, mock_client, bridge, settings_patch):
+        issues = [_make_issue(id=f"t-{i}", title=f"Task {i}") for i in range(3)]
+        mock_client.list_issues.return_value = issues
+
+        runner = TaskRunner(bridge=bridge)
+        assert (await runner.grab_next_task()).id == "t-0"
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# STEP 2: Timmy processes the task through the orchestrator
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestProcessTask:
+    """Verify checkout + orchestrator invocation + result flow."""
+
+    async def test_checkout_before_orchestrator(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """Issue must be checked out before orchestrator runs."""
+        issue = _make_issue()
+        checkout_happened = {"before_execute": False}
+
+        original_execute = stub_orchestrator.execute_task
+
+        async def tracking_execute(task_id, desc, ctx):
+            checkout_happened["before_execute"] = (
+                mock_client.checkout_issue.await_count > 0
+            )
+            return await original_execute(task_id, desc, ctx)
+
+        stub_orchestrator.execute_task = tracking_execute
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        await runner.process_task(issue)
+
+        assert checkout_happened["before_execute"], "checkout must happen before execute_task"
+
+    async def test_orchestrator_output_flows_to_result(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """The string returned by process_task comes from the orchestrator."""
+        issue = _make_issue(id="flow-1", title="Flow verification", priority="high")
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        result = await runner.process_task(issue)
+
+        # Verify orchestrator's output arrived — it references the input
+        assert "Flow verification" in result
+        assert "flow-1" in result
+        assert "high" in result
+
+    async def test_default_fallback_without_orchestrator(
+        self, mock_client, bridge, settings_patch,
+    ):
+        """Without orchestrator or process_fn, a default message is returned."""
+        issue = _make_issue(title="Fallback test")
+        runner = TaskRunner(bridge=bridge)  # no orchestrator
+        result = await runner.process_task(issue)
+        assert "Fallback test" in result
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# STEP 3: Timmy completes the task — comment + close
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestCompleteTask:
+    """Verify orchestrator output flows into the completion comment."""
+
+    async def test_orchestrator_output_in_comment(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """The comment posted to Paperclip contains the orchestrator's output."""
+        issue = _make_issue(id="cmt-1", title="Comment pipe test")
+        mock_client.update_issue.return_value = _make_done("cmt-1")
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        # Process to get orchestrator output
+        result = await runner.process_task(issue)
+        # Complete to post it as comment
+        await runner.complete_task(issue, result)
+
+        comment_content = mock_client.add_comment.call_args[0][1]
+        assert "[Timmy]" in comment_content
+        assert "[Orchestrator]" in comment_content
+        assert "Comment pipe test" in comment_content
+
+    async def test_marks_issue_done(
+        self, mock_client, bridge, settings_patch,
+    ):
+        issue = _make_issue()
+        mock_client.update_issue.return_value = _make_done()
+
+        runner = TaskRunner(bridge=bridge)
+        ok = await runner.complete_task(issue, "any result")
+
+        assert ok is True
+        update_req = mock_client.update_issue.call_args[0][1]
+        assert update_req.status == "done"
+
+    async def test_returns_false_on_close_failure(
+        self, mock_client, bridge, settings_patch,
+    ):
+        mock_client.update_issue.return_value = None
+        runner = TaskRunner(bridge=bridge)
+        assert await runner.complete_task(_make_issue(), "result") is False
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# STEP 4: Follow-up creation with orchestrator output embedded
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestCreateFollowUp:
+    """Verify orchestrator output flows into the follow-up description."""
+
+    async def test_follow_up_contains_orchestrator_output(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """The follow-up description includes the orchestrator's result text."""
+        issue = _make_issue(id="fu-1", title="Follow-up pipe test")
+        mock_client.create_issue.return_value = _make_follow_up()
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        result = await runner.process_task(issue)
+        await runner.create_follow_up(issue, result)
+
+        create_req = mock_client.create_issue.call_args[0][0]
+        # Orchestrator output should be embedded in description
+        assert "[Orchestrator]" in create_req.description
+        assert "fu-1" in create_req.description
+
+    async def test_follow_up_assigned_to_self(
+        self, mock_client, bridge, settings_patch,
+    ):
+        mock_client.create_issue.return_value = _make_follow_up()
+        runner = TaskRunner(bridge=bridge)
+        await runner.create_follow_up(_make_issue(), "result")
+
+        req = mock_client.create_issue.call_args[0][0]
+        assert req.assignee_id == TIMMY_AGENT_ID
+
+    async def test_follow_up_preserves_priority(
+        self, mock_client, bridge, settings_patch,
+    ):
+        mock_client.create_issue.return_value = _make_follow_up()
+        runner = TaskRunner(bridge=bridge)
+        await runner.create_follow_up(_make_issue(priority="high"), "result")
+
+        req = mock_client.create_issue.call_args[0][0]
+        assert req.priority == "high"
+
+    async def test_follow_up_not_woken(self, mock_client, bridge, settings_patch):
+        mock_client.create_issue.return_value = _make_follow_up()
+        runner = TaskRunner(bridge=bridge)
+        await runner.create_follow_up(_make_issue(), "result")
+        mock_client.wake_agent.assert_not_awaited()
+
+    async def test_returns_none_on_failure(self, mock_client, bridge, settings_patch):
+        mock_client.create_issue.return_value = None
+        runner = TaskRunner(bridge=bridge)
+        assert await runner.create_follow_up(_make_issue(), "r") is None
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# FULL GREEN PATH: orchestrator wired end-to-end
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestGreenPathWithOrchestrator:
+    """Full pipe: TaskRunner → StubOrchestrator → bridge → mock_client.
+
+    Proves orchestrator output propagates to every downstream artefact:
+    the comment, the follow-up description, and the summary dict.
+    """
+
+    async def test_full_cycle_orchestrator_output_everywhere(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """Orchestrator result appears in comment, follow-up, and summary."""
+        original = _make_issue(
+            id="green-1",
+            title="Muse about task automation and write a recursive task",
+            description="Reflect on your task processing. Create a follow-up.",
+            priority="high",
+        )
+        mock_client.list_issues.return_value = [original]
+        mock_client.update_issue.return_value = _make_done("green-1")
+        mock_client.create_issue.return_value = _make_follow_up("green-fu")
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        summary = await runner.run_once()
+
+        # ── Orchestrator was called with correct data
+        assert len(stub_orchestrator.calls) == 1
+        call = stub_orchestrator.calls[0]
+        assert call["task_id"] == "green-1"
+        assert call["context"]["priority"] == "high"
+        assert "Reflect on your task processing" in call["description"]
+
+        # ── Summary contains orchestrator output
+        assert summary is not None
+        assert summary["original_issue_id"] == "green-1"
+        assert summary["completed"] is True
+        assert summary["follow_up_issue_id"] == "green-fu"
+        assert "[Orchestrator]" in summary["result"]
+        assert "green-1" in summary["result"]
+
+        # ── Comment posted contains orchestrator output
+        comment_content = mock_client.add_comment.call_args[0][1]
+        assert "[Timmy]" in comment_content
+        assert "[Orchestrator]" in comment_content
+        assert "high" in comment_content  # priority flowed through
+
+        # ── Follow-up description contains orchestrator output
+        follow_up_req = mock_client.create_issue.call_args[0][0]
+        assert "[Orchestrator]" in follow_up_req.description
+        assert "green-1" in follow_up_req.description
+        assert follow_up_req.priority == "high"
+        assert follow_up_req.assignee_id == TIMMY_AGENT_ID
+
+        # ── Correct ordering of API calls
+        mock_client.list_issues.assert_awaited_once()
+        mock_client.checkout_issue.assert_awaited_once_with("green-1")
+        mock_client.add_comment.assert_awaited_once()
+        mock_client.update_issue.assert_awaited_once()
+        assert mock_client.create_issue.await_count == 1
+
+    async def test_no_tasks_returns_none(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        mock_client.list_issues.return_value = []
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        assert await runner.run_once() is None
+        assert len(stub_orchestrator.calls) == 0
+
+    async def test_close_failure_still_creates_follow_up(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        mock_client.list_issues.return_value = [_make_issue()]
+        mock_client.update_issue.return_value = None  # close fails
+        mock_client.create_issue.return_value = _make_follow_up()
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        summary = await runner.run_once()
+
+        assert summary["completed"] is False
+        assert summary["follow_up_issue_id"] == "issue-2"
+        assert len(stub_orchestrator.calls) == 1
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# EXTERNAL INJECTION: task from Paperclip API → orchestrator processes it
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestExternalTaskInjection:
+    """External system creates a task → Timmy's orchestrator processes it."""
+
+    async def test_external_task_flows_through_orchestrator(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        external = _make_issue(
+            id="ext-1",
+            title="Review quarterly metrics",
+            description="Analyze Q1 metrics and prepare summary.",
+        )
+        mock_client.list_issues.return_value = [external]
+        mock_client.update_issue.return_value = _make_done("ext-1")
+        mock_client.create_issue.return_value = _make_follow_up("ext-fu")
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        summary = await runner.run_once()
+
+        # Orchestrator received the external task
+        assert stub_orchestrator.calls[0]["task_id"] == "ext-1"
+        assert "Analyze Q1 metrics" in stub_orchestrator.calls[0]["description"]
+
+        # Its output flowed to Paperclip
+        assert "[Orchestrator]" in summary["result"]
+        assert "Review quarterly metrics" in summary["result"]
+
+    async def test_skips_tasks_for_other_agents(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        other = _make_issue(id="other-1", assignee_id="agent-codex")
+        mine = _make_issue(id="mine-1", title="My task")
+        mock_client.list_issues.return_value = [other, mine]
+        mock_client.update_issue.return_value = _make_done("mine-1")
+        mock_client.create_issue.return_value = _make_follow_up()
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        summary = await runner.run_once()
+
+        assert summary["original_issue_id"] == "mine-1"
+        mock_client.checkout_issue.assert_awaited_once_with("mine-1")
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# RECURSIVE CHAIN: follow-up → grabbed → orchestrator → follow-up → ...
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestRecursiveChain:
+    """Multi-cycle chains where each follow-up becomes the next task."""
+
+    async def test_two_cycle_chain(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        task_a = _make_issue(id="A", title="Initial musing")
+        fu_b = PaperclipIssue(
+            id="B", title="Follow-up: Initial musing",
+            description="Continue", status="open",
+            assignee_id=TIMMY_AGENT_ID, priority="normal",
+        )
+        fu_c = PaperclipIssue(
+            id="C", title="Follow-up: Follow-up",
+            status="open", assignee_id=TIMMY_AGENT_ID,
+        )
+
+        # Cycle 1
+        mock_client.list_issues.return_value = [task_a]
+        mock_client.update_issue.return_value = _make_done("A")
+        mock_client.create_issue.return_value = fu_b
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        s1 = await runner.run_once()
+        assert s1["original_issue_id"] == "A"
+        assert s1["follow_up_issue_id"] == "B"
+
+        # Cycle 2: follow-up B is now the task
+        mock_client.list_issues.return_value = [fu_b]
+        mock_client.update_issue.return_value = _make_done("B")
+        mock_client.create_issue.return_value = fu_c
+
+        s2 = await runner.run_once()
+        assert s2["original_issue_id"] == "B"
+        assert s2["follow_up_issue_id"] == "C"
+
+        # Orchestrator was called twice — once per cycle
+        assert len(stub_orchestrator.calls) == 2
+        assert stub_orchestrator.calls[0]["task_id"] == "A"
+        assert stub_orchestrator.calls[1]["task_id"] == "B"
+
+    async def test_three_cycle_chain_all_through_orchestrator(
+        self, mock_client, bridge, stub_orchestrator, settings_patch,
+    ):
+        """Three cycles — every task goes through the orchestrator pipe."""
+        tasks = [_make_issue(id=f"c-{i}", title=f"Chain {i}") for i in range(3)]
+        follow_ups = [
+            PaperclipIssue(
+                id=f"c-{i + 1}", title=f"Follow-up: Chain {i}",
+                status="open", assignee_id=TIMMY_AGENT_ID,
+            )
+            for i in range(3)
+        ]
+
+        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
+        ids = []
+
+        for i in range(3):
+            mock_client.list_issues.return_value = [tasks[i]]
+            mock_client.update_issue.return_value = _make_done(tasks[i].id)
+            mock_client.create_issue.return_value = follow_ups[i]
+
+            s = await runner.run_once()
+            ids.append(s["original_issue_id"])
+
+        assert ids == ["c-0", "c-1", "c-2"]
+        assert len(stub_orchestrator.calls) == 3
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# LIFECYCLE: start/stop
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+class TestLifecycle:
+
+    async def test_stop_halts_loop(self, mock_client, bridge, settings_patch):
+        runner = TaskRunner(bridge=bridge)
+        runner._running = True
+        runner.stop()
+        assert runner._running is False
+
+    async def test_start_disabled_when_interval_zero(
+        self, mock_client, bridge, settings_patch,
+    ):
+        settings_patch.paperclip_poll_interval = 0
+        runner = TaskRunner(bridge=bridge)
+        await runner.start()
+        mock_client.list_issues.assert_not_awaited()
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# LIVE LLM (manual e2e): runs only when Ollama is available
+# ═══════════════════════════════════════════════════════════════════════════════
+
+
+def _ollama_reachable() -> tuple[bool, list[str]]:
+    """Return (reachable, model_names)."""
+    try:
+        import httpx
+        resp = httpx.get("http://localhost:11434/api/tags", timeout=3)
+        resp.raise_for_status()
+        names = [m["name"] for m in resp.json().get("models", [])]
+        return True, names
+    except Exception:
+        return False, []
+
+
+def _pick_tiny_model(available: list[str]) -> str | None:
+    """Pick the smallest model available for e2e tests."""
+    candidates = ["tinyllama", "phi", "qwen2:0.5b", "llama3.2:1b", "gemma:2b"]
+    for candidate in candidates:
+        for name in available:
+            if candidate in name:
+                return name
+    return None
+
+
+class LiveOllamaOrchestrator:
+    """Thin orchestrator that calls Ollama directly — no Agno dependency."""
+
+    def __init__(self, model_name: str) -> None:
+        self.model_name = model_name
+        self.calls: list[dict] = []
+
+    async def execute_task(
+        self, task_id: str, description: str, context: dict
+    ) -> str:
+        import httpx as hx
+
+        self.calls.append({"task_id": task_id, "description": description})
+
+        async with hx.AsyncClient(timeout=60) as client:
+            resp = await client.post(
+                "http://localhost:11434/api/generate",
+                json={
+                    "model": self.model_name,
+                    "prompt": (
+                        f"You are Timmy, a task automation agent. "
+                        f"Task: {description}\n"
+                        f"Respond in 1-2 sentences about what you did."
+                    ),
+                    "stream": False,
+                    "options": {"num_predict": 64},
+                },
+            )
+            resp.raise_for_status()
+            return resp.json()["response"]
+
+
+@pytest.mark.ollama
+class TestLiveOllamaGreenPath:
+    """Green-path with a real tiny LLM via Ollama.
+
+    Run with: ``tox -e ollama`` or ``pytest -m ollama``
+    Requires: Ollama running with a small model.
+    """
+
+    async def test_live_full_cycle(self, mock_client, bridge, settings_patch):
+        """Wire a real tiny LLM through the full pipe and verify output."""
+        reachable, models = _ollama_reachable()
+        if not reachable:
+            pytest.skip("Ollama not reachable at localhost:11434")
+
+        chosen = _pick_tiny_model(models)
+        if not chosen:
+            pytest.skip(f"No tiny model found (have: {models[:5]})")
+
+        issue = _make_issue(
+            id="live-1",
+            title="Reflect on task automation",
+            description="Muse about how you process tasks and suggest improvements.",
+        )
+        mock_client.list_issues.return_value = [issue]
+        mock_client.update_issue.return_value = _make_done("live-1")
+        mock_client.create_issue.return_value = _make_follow_up("live-fu")
+
+        live_orch = LiveOllamaOrchestrator(chosen)
+        runner = TaskRunner(bridge=bridge, orchestrator=live_orch)
+        summary = await runner.run_once()
+
+        # The LLM produced *something* non-empty
+        assert summary is not None
+        assert len(summary["result"]) > 0
+        assert summary["completed"] is True
+        assert summary["follow_up_issue_id"] == "live-fu"
+
+        # Orchestrator was actually called
+        assert len(live_orch.calls) == 1
+        assert live_orch.calls[0]["task_id"] == "live-1"
+
+        # LLM output flowed into the Paperclip comment
+        comment = mock_client.add_comment.call_args[0][1]
+        assert "[Timmy]" in comment
+        assert len(comment) > len("[Timmy] Task completed.\n\n")
+
+        # LLM output flowed into the follow-up description
+        fu_req = mock_client.create_issue.call_args[0][0]
+        assert len(fu_req.description) > 0
+        assert fu_req.assignee_id == TIMMY_AGENT_ID
+
+    async def test_live_recursive_chain(self, mock_client, bridge, settings_patch):
+        """Two-cycle chain with a real LLM — each cycle produces real output."""
+        reachable, models = _ollama_reachable()
+        if not reachable:
+            pytest.skip("Ollama not reachable")
+
+        chosen = _pick_tiny_model(models)
+        if not chosen:
+            pytest.skip("No tiny model found")
+
+        task_a = _make_issue(id="live-A", title="Initial reflection")
+        fu_b = PaperclipIssue(
+            id="live-B", title="Follow-up: Initial reflection",
+            description="Continue reflecting", status="open",
+            assignee_id=TIMMY_AGENT_ID, priority="normal",
+        )
+        fu_c = PaperclipIssue(
+            id="live-C", title="Follow-up: Follow-up",
+            status="open", assignee_id=TIMMY_AGENT_ID,
+        )
+
+        live_orch = LiveOllamaOrchestrator(chosen)
+        runner = TaskRunner(bridge=bridge, orchestrator=live_orch)
+
+        # Cycle 1
+        mock_client.list_issues.return_value = [task_a]
+        mock_client.update_issue.return_value = _make_done("live-A")
+        mock_client.create_issue.return_value = fu_b
+
+        s1 = await runner.run_once()
+        assert s1 is not None
+        assert len(s1["result"]) > 0
+
+        # Cycle 2
+        mock_client.list_issues.return_value = [fu_b]
+        mock_client.update_issue.return_value = _make_done("live-B")
+        mock_client.create_issue.return_value = fu_c
+
+        s2 = await runner.run_once()
+        assert s2 is not None
+        assert len(s2["result"]) > 0
+
+        # Both cycles went through the LLM
+        assert len(live_orch.calls) == 2