Timmy-time-dashboard/tests/integrations/test_paperclip_task_runner.py

"""Integration tests for the Paperclip task runner — full green-path workflow.

Tests the complete autonomous cycle with a StubOrchestrator that exercises
the real pipe (TaskRunner → orchestrator.execute_task → bridge → client)
while stubbing only the LLM intelligence layer.

Green path:
  1. Timmy grabs first task in queue
  2. Orchestrator.execute_task processes it (stub returns input-aware response)
  3. Timmy posts completion comment and marks issue done
  4. Timmy creates a recursive follow-up task for himself

The stub is deliberately input-aware — it echoes back task metadata so
assertions can prove data actually flowed through the pipe, not just that
methods were called.

Live-LLM tests (``@pytest.mark.ollama``) are at the bottom; they hit a real
tiny model via Ollama and are skipped when Ollama is not running.
Run them with: ``tox -e ollama`` or ``pytest -m ollama``
"""

from __future__ import annotations

from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from integrations.paperclip.bridge import PaperclipBridge
from integrations.paperclip.client import PaperclipClient
from integrations.paperclip.models import (
    PaperclipIssue,
)
from integrations.paperclip.task_runner import TaskRunner


# ── Constants ─────────────────────────────────────────────────────────────────

TIMMY_AGENT_ID = "agent-timmy"
COMPANY_ID = "comp-1"


# ── StubOrchestrator: exercises the pipe, stubs the intelligence ──────────────


class StubOrchestrator:
    """Deterministic orchestrator that proves data flows through the pipe.

    Returns responses that reference input metadata — so tests can assert
    the pipe actually connected (task_id, title, priority all appear in output).
    Tracks every call for post-hoc inspection.
    """

    def __init__(self) -> None:
        self.calls: list[dict] = []

    async def execute_task(
        self, task_id: str, description: str, context: dict
    ) -> dict:
        call_record = {
            "task_id": task_id,
            "description": description,
            "context": dict(context),
        }
        self.calls.append(call_record)

        title = context.get("title", description[:50])
        priority = context.get("priority", "normal")

        return {
            "task_id": task_id,
            "agent": "orchestrator",
            "result": (
                f"[Orchestrator] Processed '{title}'. "
                f"Task {task_id} handled with priority {priority}. "
                "Self-reflection: my task automation loop is functioning. "
                "I should create a follow-up to review this pattern."
            ),
            "status": "completed",
        }


# ── Fixtures ──────────────────────────────────────────────────────────────────


@pytest.fixture
def stub_orchestrator():
    return StubOrchestrator()


@pytest.fixture
def mock_client():
    """Fully stubbed PaperclipClient with async methods."""
    client = MagicMock(spec=PaperclipClient)
    client.healthy = AsyncMock(return_value=True)
    client.list_issues = AsyncMock(return_value=[])
    client.get_issue = AsyncMock(return_value=None)
    client.create_issue = AsyncMock(return_value=None)
    client.update_issue = AsyncMock(return_value=None)
    client.delete_issue = AsyncMock(return_value=True)
    client.add_comment = AsyncMock(return_value=None)
    client.list_comments = AsyncMock(return_value=[])
    client.checkout_issue = AsyncMock(return_value={"ok": True})
    client.release_issue = AsyncMock(return_value={"ok": True})
    client.wake_agent = AsyncMock(return_value=None)
    client.list_agents = AsyncMock(return_value=[])
    client.list_goals = AsyncMock(return_value=[])
    client.create_goal = AsyncMock(return_value=None)
    client.list_approvals = AsyncMock(return_value=[])
    client.list_heartbeat_runs = AsyncMock(return_value=[])
    client.cancel_run = AsyncMock(return_value=None)
    client.approve = AsyncMock(return_value=None)
    client.reject = AsyncMock(return_value=None)
    return client


@pytest.fixture
def bridge(mock_client):
    return PaperclipBridge(client=mock_client)


@pytest.fixture
def settings_patch():
    """Patch settings for all task runner tests."""
    with patch("integrations.paperclip.task_runner.settings") as ts, \
         patch("integrations.paperclip.bridge.settings") as bs:
        for s in (ts, bs):
            s.paperclip_enabled = True
            s.paperclip_agent_id = TIMMY_AGENT_ID
            s.paperclip_company_id = COMPANY_ID
            s.paperclip_url = "http://fake:3100"
            s.paperclip_poll_interval = 0
        yield ts


# ── Helpers ───────────────────────────────────────────────────────────────────


def _make_issue(
    id: str = "issue-1",
    title: str = "Muse about task automation",
    description: str = "Reflect on how you handle tasks and write a recursive self-improvement task.",
    status: str = "open",
    assignee_id: str = TIMMY_AGENT_ID,
    priority: str = "normal",
    labels: list[str] | None = None,
) -> PaperclipIssue:
    return PaperclipIssue(
        id=id,
        title=title,
        description=description,
        status=status,
        assignee_id=assignee_id,
        priority=priority,
        labels=labels or [],
    )


def _make_done(id: str = "issue-1", title: str = "Done") -> PaperclipIssue:
    return PaperclipIssue(id=id, title=title, status="done")


def _make_follow_up(id: str = "issue-2") -> PaperclipIssue:
    return PaperclipIssue(
        id=id,
        title="Follow-up: Muse about task automation",
        description="Automated follow-up from completed task",
        status="open",
        assignee_id=TIMMY_AGENT_ID,
        priority="normal",
    )


# ═══════════════════════════════════════════════════════════════════════════════
# PIPE WIRING: verify orchestrator is actually connected
# ═══════════════════════════════════════════════════════════════════════════════


class TestOrchestratorWiring:
    """Verify the orchestrator parameter actually connects to the pipe."""

    async def test_orchestrator_execute_task_is_called(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """When orchestrator is wired, process_task calls execute_task."""
        issue = _make_issue()

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        result = await runner.process_task(issue)

        assert len(stub_orchestrator.calls) == 1
        call = stub_orchestrator.calls[0]
        assert call["task_id"] == "issue-1"
        assert call["context"]["title"] == "Muse about task automation"

    async def test_orchestrator_receives_full_context(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """Context dict passed to execute_task includes all issue metadata."""
        issue = _make_issue(
            id="ctx-test",
            title="Context verification",
            priority="high",
            labels=["automation", "meta"],
        )

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        await runner.process_task(issue)

        ctx = stub_orchestrator.calls[0]["context"]
        assert ctx["issue_id"] == "ctx-test"
        assert ctx["title"] == "Context verification"
        assert ctx["priority"] == "high"
        assert ctx["labels"] == ["automation", "meta"]

    async def test_orchestrator_dict_result_unwrapped(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """When execute_task returns a dict, the 'result' key is extracted."""
        issue = _make_issue()

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        result = await runner.process_task(issue)

        # StubOrchestrator returns dict with "result" key
        assert "[Orchestrator]" in result
        assert "issue-1" in result

    async def test_orchestrator_string_result_passthrough(
        self, mock_client, bridge, settings_patch,
    ):
        """When execute_task returns a plain string, it passes through."""

        class StringOrchestrator:
            async def execute_task(self, task_id, description, context):
                return f"Plain string result for {task_id}"

        runner = TaskRunner(bridge=bridge, orchestrator=StringOrchestrator())
        result = await runner.process_task(_make_issue())

        assert result == "Plain string result for issue-1"

    async def test_process_fn_overrides_orchestrator(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """Explicit process_fn takes priority over orchestrator."""

        async def override(task_id, desc, ctx):
            return "override wins"

        runner = TaskRunner(
            bridge=bridge, orchestrator=stub_orchestrator, process_fn=override,
        )
        result = await runner.process_task(_make_issue())

        assert result == "override wins"
        assert len(stub_orchestrator.calls) == 0  # orchestrator NOT called


# ═══════════════════════════════════════════════════════════════════════════════
# STEP 1: Timmy grabs the first task in queue
# ═══════════════════════════════════════════════════════════════════════════════


class TestGrabNextTask:
    """Verify Timmy picks the first open issue assigned to him."""

    async def test_grabs_first_assigned_issue(self, mock_client, bridge, settings_patch):
        issue = _make_issue()
        mock_client.list_issues.return_value = [issue]

        runner = TaskRunner(bridge=bridge)
        grabbed = await runner.grab_next_task()

        assert grabbed is not None
        assert grabbed.id == "issue-1"
        assert grabbed.assignee_id == TIMMY_AGENT_ID
        mock_client.list_issues.assert_awaited_once_with(status="open")

    async def test_skips_issues_not_assigned_to_timmy(self, mock_client, bridge, settings_patch):
        other = _make_issue(id="other-1", assignee_id="agent-codex")
        mine = _make_issue(id="timmy-1")
        mock_client.list_issues.return_value = [other, mine]

        runner = TaskRunner(bridge=bridge)
        grabbed = await runner.grab_next_task()

        assert grabbed.id == "timmy-1"

    async def test_returns_none_when_queue_empty(self, mock_client, bridge, settings_patch):
        mock_client.list_issues.return_value = []
        runner = TaskRunner(bridge=bridge)
        assert await runner.grab_next_task() is None

    async def test_returns_none_when_no_agent_id(self, mock_client, bridge, settings_patch):
        settings_patch.paperclip_agent_id = ""
        runner = TaskRunner(bridge=bridge)
        assert await runner.grab_next_task() is None
        mock_client.list_issues.assert_not_awaited()

    async def test_grabs_first_of_multiple(self, mock_client, bridge, settings_patch):
        issues = [_make_issue(id=f"t-{i}", title=f"Task {i}") for i in range(3)]
        mock_client.list_issues.return_value = issues

        runner = TaskRunner(bridge=bridge)
        assert (await runner.grab_next_task()).id == "t-0"


# ═══════════════════════════════════════════════════════════════════════════════
# STEP 2: Timmy processes the task through the orchestrator
# ═══════════════════════════════════════════════════════════════════════════════


class TestProcessTask:
    """Verify checkout + orchestrator invocation + result flow."""

    async def test_checkout_before_orchestrator(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """Issue must be checked out before orchestrator runs."""
        issue = _make_issue()
        checkout_happened = {"before_execute": False}

        original_execute = stub_orchestrator.execute_task

        async def tracking_execute(task_id, desc, ctx):
            checkout_happened["before_execute"] = (
                mock_client.checkout_issue.await_count > 0
            )
            return await original_execute(task_id, desc, ctx)

        stub_orchestrator.execute_task = tracking_execute

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        await runner.process_task(issue)

        assert checkout_happened["before_execute"], "checkout must happen before execute_task"

    async def test_orchestrator_output_flows_to_result(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """The string returned by process_task comes from the orchestrator."""
        issue = _make_issue(id="flow-1", title="Flow verification", priority="high")

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        result = await runner.process_task(issue)

        # Verify orchestrator's output arrived — it references the input
        assert "Flow verification" in result
        assert "flow-1" in result
        assert "high" in result

    async def test_default_fallback_without_orchestrator(
        self, mock_client, bridge, settings_patch,
    ):
        """Without orchestrator or process_fn, a default message is returned."""
        issue = _make_issue(title="Fallback test")
        runner = TaskRunner(bridge=bridge)  # no orchestrator
        result = await runner.process_task(issue)
        assert "Fallback test" in result


# ═══════════════════════════════════════════════════════════════════════════════
# STEP 3: Timmy completes the task — comment + close
# ═══════════════════════════════════════════════════════════════════════════════


class TestCompleteTask:
    """Verify orchestrator output flows into the completion comment."""

    async def test_orchestrator_output_in_comment(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """The comment posted to Paperclip contains the orchestrator's output."""
        issue = _make_issue(id="cmt-1", title="Comment pipe test")
        mock_client.update_issue.return_value = _make_done("cmt-1")

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        # Process to get orchestrator output
        result = await runner.process_task(issue)
        # Complete to post it as comment
        await runner.complete_task(issue, result)

        comment_content = mock_client.add_comment.call_args[0][1]
        assert "[Timmy]" in comment_content
        assert "[Orchestrator]" in comment_content
        assert "Comment pipe test" in comment_content

    async def test_marks_issue_done(
        self, mock_client, bridge, settings_patch,
    ):
        issue = _make_issue()
        mock_client.update_issue.return_value = _make_done()

        runner = TaskRunner(bridge=bridge)
        ok = await runner.complete_task(issue, "any result")

        assert ok is True
        update_req = mock_client.update_issue.call_args[0][1]
        assert update_req.status == "done"

    async def test_returns_false_on_close_failure(
        self, mock_client, bridge, settings_patch,
    ):
        mock_client.update_issue.return_value = None
        runner = TaskRunner(bridge=bridge)
        assert await runner.complete_task(_make_issue(), "result") is False


# ═══════════════════════════════════════════════════════════════════════════════
# STEP 4: Follow-up creation with orchestrator output embedded
# ═══════════════════════════════════════════════════════════════════════════════


class TestCreateFollowUp:
    """Verify orchestrator output flows into the follow-up description."""

    async def test_follow_up_contains_orchestrator_output(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """The follow-up description includes the orchestrator's result text."""
        issue = _make_issue(id="fu-1", title="Follow-up pipe test")
        mock_client.create_issue.return_value = _make_follow_up()

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        result = await runner.process_task(issue)
        await runner.create_follow_up(issue, result)

        create_req = mock_client.create_issue.call_args[0][0]
        # Orchestrator output should be embedded in description
        assert "[Orchestrator]" in create_req.description
        assert "fu-1" in create_req.description

    async def test_follow_up_assigned_to_self(
        self, mock_client, bridge, settings_patch,
    ):
        mock_client.create_issue.return_value = _make_follow_up()
        runner = TaskRunner(bridge=bridge)
        await runner.create_follow_up(_make_issue(), "result")

        req = mock_client.create_issue.call_args[0][0]
        assert req.assignee_id == TIMMY_AGENT_ID

    async def test_follow_up_preserves_priority(
        self, mock_client, bridge, settings_patch,
    ):
        mock_client.create_issue.return_value = _make_follow_up()
        runner = TaskRunner(bridge=bridge)
        await runner.create_follow_up(_make_issue(priority="high"), "result")

        req = mock_client.create_issue.call_args[0][0]
        assert req.priority == "high"

    async def test_follow_up_not_woken(self, mock_client, bridge, settings_patch):
        mock_client.create_issue.return_value = _make_follow_up()
        runner = TaskRunner(bridge=bridge)
        await runner.create_follow_up(_make_issue(), "result")
        mock_client.wake_agent.assert_not_awaited()

    async def test_returns_none_on_failure(self, mock_client, bridge, settings_patch):
        mock_client.create_issue.return_value = None
        runner = TaskRunner(bridge=bridge)
        assert await runner.create_follow_up(_make_issue(), "r") is None


# ═══════════════════════════════════════════════════════════════════════════════
# FULL GREEN PATH: orchestrator wired end-to-end
# ═══════════════════════════════════════════════════════════════════════════════


class TestGreenPathWithOrchestrator:
    """Full pipe: TaskRunner → StubOrchestrator → bridge → mock_client.

    Proves orchestrator output propagates to every downstream artefact:
    the comment, the follow-up description, and the summary dict.
    """

    async def test_full_cycle_orchestrator_output_everywhere(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """Orchestrator result appears in comment, follow-up, and summary."""
        original = _make_issue(
            id="green-1",
            title="Muse about task automation and write a recursive task",
            description="Reflect on your task processing. Create a follow-up.",
            priority="high",
        )
        mock_client.list_issues.return_value = [original]
        mock_client.update_issue.return_value = _make_done("green-1")
        mock_client.create_issue.return_value = _make_follow_up("green-fu")

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        summary = await runner.run_once()

        # ── Orchestrator was called with correct data
        assert len(stub_orchestrator.calls) == 1
        call = stub_orchestrator.calls[0]
        assert call["task_id"] == "green-1"
        assert call["context"]["priority"] == "high"
        assert "Reflect on your task processing" in call["description"]

        # ── Summary contains orchestrator output
        assert summary is not None
        assert summary["original_issue_id"] == "green-1"
        assert summary["completed"] is True
        assert summary["follow_up_issue_id"] == "green-fu"
        assert "[Orchestrator]" in summary["result"]
        assert "green-1" in summary["result"]

        # ── Comment posted contains orchestrator output
        comment_content = mock_client.add_comment.call_args[0][1]
        assert "[Timmy]" in comment_content
        assert "[Orchestrator]" in comment_content
        assert "high" in comment_content  # priority flowed through

        # ── Follow-up description contains orchestrator output
        follow_up_req = mock_client.create_issue.call_args[0][0]
        assert "[Orchestrator]" in follow_up_req.description
        assert "green-1" in follow_up_req.description
        assert follow_up_req.priority == "high"
        assert follow_up_req.assignee_id == TIMMY_AGENT_ID

        # ── Correct ordering of API calls
        mock_client.list_issues.assert_awaited_once()
        mock_client.checkout_issue.assert_awaited_once_with("green-1")
        mock_client.add_comment.assert_awaited_once()
        mock_client.update_issue.assert_awaited_once()
        assert mock_client.create_issue.await_count == 1

    async def test_no_tasks_returns_none(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        mock_client.list_issues.return_value = []
        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        assert await runner.run_once() is None
        assert len(stub_orchestrator.calls) == 0

    async def test_close_failure_still_creates_follow_up(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        mock_client.list_issues.return_value = [_make_issue()]
        mock_client.update_issue.return_value = None  # close fails
        mock_client.create_issue.return_value = _make_follow_up()

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        summary = await runner.run_once()

        assert summary["completed"] is False
        assert summary["follow_up_issue_id"] == "issue-2"
        assert len(stub_orchestrator.calls) == 1


# ═══════════════════════════════════════════════════════════════════════════════
# EXTERNAL INJECTION: task from Paperclip API → orchestrator processes it
# ═══════════════════════════════════════════════════════════════════════════════


class TestExternalTaskInjection:
    """External system creates a task → Timmy's orchestrator processes it."""

    async def test_external_task_flows_through_orchestrator(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        external = _make_issue(
            id="ext-1",
            title="Review quarterly metrics",
            description="Analyze Q1 metrics and prepare summary.",
        )
        mock_client.list_issues.return_value = [external]
        mock_client.update_issue.return_value = _make_done("ext-1")
        mock_client.create_issue.return_value = _make_follow_up("ext-fu")

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        summary = await runner.run_once()

        # Orchestrator received the external task
        assert stub_orchestrator.calls[0]["task_id"] == "ext-1"
        assert "Analyze Q1 metrics" in stub_orchestrator.calls[0]["description"]

        # Its output flowed to Paperclip
        assert "[Orchestrator]" in summary["result"]
        assert "Review quarterly metrics" in summary["result"]

    async def test_skips_tasks_for_other_agents(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        other = _make_issue(id="other-1", assignee_id="agent-codex")
        mine = _make_issue(id="mine-1", title="My task")
        mock_client.list_issues.return_value = [other, mine]
        mock_client.update_issue.return_value = _make_done("mine-1")
        mock_client.create_issue.return_value = _make_follow_up()

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        summary = await runner.run_once()

        assert summary["original_issue_id"] == "mine-1"
        mock_client.checkout_issue.assert_awaited_once_with("mine-1")


# ═══════════════════════════════════════════════════════════════════════════════
# RECURSIVE CHAIN: follow-up → grabbed → orchestrator → follow-up → ...
# ═══════════════════════════════════════════════════════════════════════════════


class TestRecursiveChain:
    """Multi-cycle chains where each follow-up becomes the next task."""

    async def test_two_cycle_chain(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        task_a = _make_issue(id="A", title="Initial musing")
        fu_b = PaperclipIssue(
            id="B", title="Follow-up: Initial musing",
            description="Continue", status="open",
            assignee_id=TIMMY_AGENT_ID, priority="normal",
        )
        fu_c = PaperclipIssue(
            id="C", title="Follow-up: Follow-up",
            status="open", assignee_id=TIMMY_AGENT_ID,
        )

        # Cycle 1
        mock_client.list_issues.return_value = [task_a]
        mock_client.update_issue.return_value = _make_done("A")
        mock_client.create_issue.return_value = fu_b

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        s1 = await runner.run_once()
        assert s1["original_issue_id"] == "A"
        assert s1["follow_up_issue_id"] == "B"

        # Cycle 2: follow-up B is now the task
        mock_client.list_issues.return_value = [fu_b]
        mock_client.update_issue.return_value = _make_done("B")
        mock_client.create_issue.return_value = fu_c

        s2 = await runner.run_once()
        assert s2["original_issue_id"] == "B"
        assert s2["follow_up_issue_id"] == "C"

        # Orchestrator was called twice — once per cycle
        assert len(stub_orchestrator.calls) == 2
        assert stub_orchestrator.calls[0]["task_id"] == "A"
        assert stub_orchestrator.calls[1]["task_id"] == "B"

    async def test_three_cycle_chain_all_through_orchestrator(
        self, mock_client, bridge, stub_orchestrator, settings_patch,
    ):
        """Three cycles — every task goes through the orchestrator pipe."""
        tasks = [_make_issue(id=f"c-{i}", title=f"Chain {i}") for i in range(3)]
        follow_ups = [
            PaperclipIssue(
                id=f"c-{i + 1}", title=f"Follow-up: Chain {i}",
                status="open", assignee_id=TIMMY_AGENT_ID,
            )
            for i in range(3)
        ]

        runner = TaskRunner(bridge=bridge, orchestrator=stub_orchestrator)
        ids = []

        for i in range(3):
            mock_client.list_issues.return_value = [tasks[i]]
            mock_client.update_issue.return_value = _make_done(tasks[i].id)
            mock_client.create_issue.return_value = follow_ups[i]

            s = await runner.run_once()
            ids.append(s["original_issue_id"])

        assert ids == ["c-0", "c-1", "c-2"]
        assert len(stub_orchestrator.calls) == 3


# ═══════════════════════════════════════════════════════════════════════════════
# LIFECYCLE: start/stop
# ═══════════════════════════════════════════════════════════════════════════════


class TestLifecycle:

    async def test_stop_halts_loop(self, mock_client, bridge, settings_patch):
        runner = TaskRunner(bridge=bridge)
        runner._running = True
        runner.stop()
        assert runner._running is False

    async def test_start_disabled_when_interval_zero(
        self, mock_client, bridge, settings_patch,
    ):
        settings_patch.paperclip_poll_interval = 0
        runner = TaskRunner(bridge=bridge)
        await runner.start()
        mock_client.list_issues.assert_not_awaited()


# ═══════════════════════════════════════════════════════════════════════════════
# LIVE LLM (manual e2e): runs only when Ollama is available
# ═══════════════════════════════════════════════════════════════════════════════


def _ollama_reachable() -> tuple[bool, list[str]]:
    """Return (reachable, model_names)."""
    try:
        import httpx
        resp = httpx.get("http://localhost:11434/api/tags", timeout=3)
        resp.raise_for_status()
        names = [m["name"] for m in resp.json().get("models", [])]
        return True, names
    except Exception:
        return False, []


def _pick_tiny_model(available: list[str]) -> str | None:
    """Pick the smallest model available for e2e tests."""
    candidates = ["tinyllama", "phi", "qwen2:0.5b", "llama3.2:1b", "gemma:2b"]
    for candidate in candidates:
        for name in available:
            if candidate in name:
                return name
    return None


class LiveOllamaOrchestrator:
    """Thin orchestrator that calls Ollama directly — no Agno dependency."""

    def __init__(self, model_name: str) -> None:
        self.model_name = model_name
        self.calls: list[dict] = []

    async def execute_task(
        self, task_id: str, description: str, context: dict
    ) -> str:
        import httpx as hx

        self.calls.append({"task_id": task_id, "description": description})

        async with hx.AsyncClient(timeout=60) as client:
            resp = await client.post(
                "http://localhost:11434/api/generate",
                json={
                    "model": self.model_name,
                    "prompt": (
                        f"You are Timmy, a task automation agent. "
                        f"Task: {description}\n"
                        f"Respond in 1-2 sentences about what you did."
                    ),
                    "stream": False,
                    "options": {"num_predict": 64},
                },
            )
            resp.raise_for_status()
            return resp.json()["response"]


@pytest.mark.ollama
class TestLiveOllamaGreenPath:
    """Green-path with a real tiny LLM via Ollama.

    Run with: ``tox -e ollama`` or ``pytest -m ollama``
    Requires: Ollama running with a small model.
    """

    async def test_live_full_cycle(self, mock_client, bridge, settings_patch):
        """Wire a real tiny LLM through the full pipe and verify output."""
        reachable, models = _ollama_reachable()
        if not reachable:
            pytest.skip("Ollama not reachable at localhost:11434")

        chosen = _pick_tiny_model(models)
        if not chosen:
            pytest.skip(f"No tiny model found (have: {models[:5]})")

        issue = _make_issue(
            id="live-1",
            title="Reflect on task automation",
            description="Muse about how you process tasks and suggest improvements.",
        )
        mock_client.list_issues.return_value = [issue]
        mock_client.update_issue.return_value = _make_done("live-1")
        mock_client.create_issue.return_value = _make_follow_up("live-fu")

        live_orch = LiveOllamaOrchestrator(chosen)
        runner = TaskRunner(bridge=bridge, orchestrator=live_orch)
        summary = await runner.run_once()

        # The LLM produced *something* non-empty
        assert summary is not None
        assert len(summary["result"]) > 0
        assert summary["completed"] is True
        assert summary["follow_up_issue_id"] == "live-fu"

        # Orchestrator was actually called
        assert len(live_orch.calls) == 1
        assert live_orch.calls[0]["task_id"] == "live-1"

        # LLM output flowed into the Paperclip comment
        comment = mock_client.add_comment.call_args[0][1]
        assert "[Timmy]" in comment
        assert len(comment) > len("[Timmy] Task completed.\n\n")

        # LLM output flowed into the follow-up description
        fu_req = mock_client.create_issue.call_args[0][0]
        assert len(fu_req.description) > 0
        assert fu_req.assignee_id == TIMMY_AGENT_ID

    async def test_live_recursive_chain(self, mock_client, bridge, settings_patch):
        """Two-cycle chain with a real LLM — each cycle produces real output."""
        reachable, models = _ollama_reachable()
        if not reachable:
            pytest.skip("Ollama not reachable")

        chosen = _pick_tiny_model(models)
        if not chosen:
            pytest.skip("No tiny model found")

        task_a = _make_issue(id="live-A", title="Initial reflection")
        fu_b = PaperclipIssue(
            id="live-B", title="Follow-up: Initial reflection",
            description="Continue reflecting", status="open",
            assignee_id=TIMMY_AGENT_ID, priority="normal",
        )
        fu_c = PaperclipIssue(
            id="live-C", title="Follow-up: Follow-up",
            status="open", assignee_id=TIMMY_AGENT_ID,
        )

        live_orch = LiveOllamaOrchestrator(chosen)
        runner = TaskRunner(bridge=bridge, orchestrator=live_orch)

        # Cycle 1
        mock_client.list_issues.return_value = [task_a]
        mock_client.update_issue.return_value = _make_done("live-A")
        mock_client.create_issue.return_value = fu_b

        s1 = await runner.run_once()
        assert s1 is not None
        assert len(s1["result"]) > 0

        # Cycle 2
        mock_client.list_issues.return_value = [fu_b]
        mock_client.update_issue.return_value = _make_done("live-B")
        mock_client.create_issue.return_value = fu_c

        s2 = await runner.run_once()
        assert s2 is not None
        assert len(s2["result"]) > 0

        # Both cycles went through the LLM
        assert len(live_orch.calls) == 2