[claude] Auto-create Gitea issues from research findings (#977) (#1060)

Co-authored-by: Claude (Opus 4.6) <claude@hermes.local> Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
2026-03-23 15:09:18 +00:00
parent 6a674bf9e0
commit ab36149fa5
2 changed files with 717 additions and 0 deletions
--- a/src/timmy/research_triage.py
+++ b/src/timmy/research_triage.py
@@ -0,0 +1,369 @@
+"""Research triage — extract action items from research reports and file Gitea issues.
+
+Closes the loop: research → knowledge → actionable engineering work.
+
+The LLM extracts action items during synthesis (not post-processed), then
+each item is filed as a Gitea issue with appropriate labels, source links,
+and evidence from the original research.
+
+Usage::
+
+    from timmy.research_triage import triage_research_report
+
+    results = await triage_research_report(
+        report="## Findings\\n...",
+        source_issue=946,
+    )
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from typing import Any
+
+import httpx
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+# Regex to strip markdown code fences from LLM output
+_FENCE_RE = re.compile(r"^```(?:json)?\s*\n?", re.MULTILINE)
+
+
+@dataclass
+class ActionItem:
+    """A single actionable item extracted from a research report."""
+
+    title: str
+    body: str
+    labels: list[str] = field(default_factory=list)
+    priority: str = "medium"
+    source_urls: list[str] = field(default_factory=list)
+
+    def to_issue_body(self, source_issue: int | None = None) -> str:
+        """Format for a Gitea issue body with source attribution."""
+        parts = [self.body]
+
+        if self.source_urls:
+            parts.append("\n### Source Evidence")
+            for url in self.source_urls:
+                parts.append(f"- {url}")
+
+        if source_issue:
+            parts.append(
+                f"\n### Origin\nExtracted from research in #{source_issue}"
+            )
+
+        parts.append("\n---\n*Auto-triaged from research findings by Timmy*")
+        return "\n".join(parts)
+
+
+def _build_extraction_prompt(report: str) -> str:
+    """Build the LLM prompt for extracting action items from a research report."""
+    return (
+        "You are triaging a research report for actionable engineering work.\n"
+        "Extract 0-5 CONCRETE action items — bugs to fix, features to build,\n"
+        "infrastructure to set up, or investigations to run.\n\n"
+        "Rules:\n"
+        "- Only include items that map to real engineering tasks\n"
+        "- Skip vague recommendations or philosophical observations\n"
+        "- Each item should be specific enough to become a Gitea issue\n"
+        "- Include evidence/URLs from the report in source_urls\n"
+        "- Priority: high (blocking or critical), medium (important), low (nice-to-have)\n"
+        "- Labels: pick from [actionable, research, bug, feature, infrastructure, "
+        "performance, security, kimi-ready]\n"
+        "  - 'kimi-ready' means a well-scoped task suitable for an AI agent\n"
+        "  - 'actionable' should be on every item (these are all actionable)\n\n"
+        "For each item return:\n"
+        '- "title": Clear, specific title with area prefix '
+        '(e.g. "[MCP] Restore tool server with FastMCP")\n'
+        '- "body": Detailed markdown body with:\n'
+        "  **What:** What needs to be done\n"
+        "  **Why:** Why this matters (link to research finding)\n"
+        "  **Suggested approach:** How to implement\n"
+        "  **Acceptance criteria:** How to verify\n"
+        '- "labels": Array of label strings\n'
+        '- "priority": One of high, medium, low\n'
+        '- "source_urls": Array of URLs referenced in the research\n\n'
+        "Return ONLY a JSON array of objects. Return [] if nothing is actionable.\n\n"
+        f"Research report:\n{report}\n\nJSON array:"
+    )
+
+
+def _parse_llm_response(raw: str) -> list[dict[str, Any]]:
+    """Parse LLM JSON response, stripping code fences if present."""
+    cleaned = raw.strip()
+
+    # Strip markdown code fences
+    if cleaned.startswith("```"):
+        cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
+
+    items = json.loads(cleaned)
+    if not isinstance(items, list):
+        return []
+    return items
+
+
+def _validate_action_item(raw_item: dict[str, Any]) -> ActionItem | None:
+    """Validate and convert a raw dict to an ActionItem, or None if invalid."""
+    if not isinstance(raw_item, dict):
+        return None
+
+    title = raw_item.get("title", "").strip()
+    body = raw_item.get("body", "").strip()
+
+    if not title or len(title) < 10:
+        return None
+    if not body or len(body) < 20:
+        return None
+
+    labels = raw_item.get("labels", [])
+    if isinstance(labels, str):
+        labels = [l.strip() for l in labels.split(",") if l.strip()]
+    if not isinstance(labels, list):
+        labels = []
+
+    # Ensure 'actionable' label is always present
+    if "actionable" not in labels:
+        labels.insert(0, "actionable")
+
+    priority = raw_item.get("priority", "medium").strip().lower()
+    if priority not in ("high", "medium", "low"):
+        priority = "medium"
+
+    source_urls = raw_item.get("source_urls", [])
+    if not isinstance(source_urls, list):
+        source_urls = []
+
+    return ActionItem(
+        title=title,
+        body=body,
+        labels=labels,
+        priority=priority,
+        source_urls=source_urls,
+    )
+
+
+async def extract_action_items(
+    report: str,
+    llm_caller: Any | None = None,
+) -> list[ActionItem]:
+    """Extract actionable engineering items from a research report.
+
+    Uses the LLM to identify concrete tasks, bugs, features, and
+    infrastructure work from structured research output.
+
+    Args:
+        report: The research report text (markdown).
+        llm_caller: Optional async callable(prompt) -> str for LLM.
+                     Falls back to the cascade router.
+
+    Returns:
+        List of validated ActionItem objects (0-5 items).
+    """
+    if not report or not report.strip():
+        return []
+
+    prompt = _build_extraction_prompt(report)
+
+    try:
+        if llm_caller is not None:
+            raw = await llm_caller(prompt)
+        else:
+            raw = await _call_llm(prompt)
+    except Exception as exc:
+        logger.warning("LLM extraction failed: %s", exc)
+        return []
+
+    if not raw or not raw.strip():
+        return []
+
+    try:
+        raw_items = _parse_llm_response(raw)
+    except (json.JSONDecodeError, ValueError) as exc:
+        logger.warning("Failed to parse LLM action items: %s", exc)
+        return []
+
+    items = []
+    for raw_item in raw_items[:5]:  # Safety cap
+        item = _validate_action_item(raw_item)
+        if item is not None:
+            items.append(item)
+
+    logger.info("Extracted %d action items from research report", len(items))
+    return items
+
+
+async def _call_llm(prompt: str) -> str:
+    """Call the cascade router for LLM completion.
+
+    Falls back gracefully if the router is unavailable.
+    """
+    from infrastructure.router import get_router
+
+    router = get_router()
+    messages = [{"role": "user", "content": prompt}]
+    result = await router.complete(messages=messages, temperature=0.1)
+    return result.get("content", "") if isinstance(result, dict) else str(result)
+
+
+async def create_gitea_issue(
+    item: ActionItem,
+    source_issue: int | None = None,
+) -> dict[str, Any] | None:
+    """Create a Gitea issue from an ActionItem via the REST API.
+
+    Args:
+        item: The action item to file.
+        source_issue: Parent research issue number to link back to.
+
+    Returns:
+        The created issue dict from Gitea API, or None on failure.
+    """
+    if not settings.gitea_enabled or not settings.gitea_token:
+        logger.debug("Gitea not configured — skipping issue creation")
+        return None
+
+    owner, repo = settings.gitea_repo.split("/", 1)
+    api_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/issues"
+
+    body = item.to_issue_body(source_issue=source_issue)
+
+    payload: dict[str, Any] = {
+        "title": item.title,
+        "body": body,
+    }
+
+    # Resolve label names to IDs
+    label_ids = await _resolve_label_ids(item.labels, owner, repo)
+    if label_ids:
+        payload["labels"] = label_ids
+
+    try:
+        async with httpx.AsyncClient(timeout=15) as client:
+            resp = await client.post(
+                api_url,
+                headers={
+                    "Authorization": f"token {settings.gitea_token}",
+                    "Content-Type": "application/json",
+                },
+                json=payload,
+            )
+
+        if resp.status_code in (200, 201):
+            issue_data = resp.json()
+            logger.info(
+                "Created Gitea issue #%s: %s",
+                issue_data.get("number", "?"),
+                item.title[:60],
+            )
+            return issue_data
+
+        logger.warning(
+            "Gitea issue creation failed (HTTP %s): %s",
+            resp.status_code,
+            resp.text[:200],
+        )
+        return None
+
+    except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc:
+        logger.warning("Gitea connection failed: %s", exc)
+        return None
+    except Exception as exc:
+        logger.error("Unexpected error creating Gitea issue: %s", exc)
+        return None
+
+
+async def _resolve_label_ids(
+    label_names: list[str],
+    owner: str,
+    repo: str,
+) -> list[int]:
+    """Resolve label names to Gitea label IDs, creating missing labels.
+
+    Returns a list of integer label IDs for the issue payload.
+    """
+    if not label_names:
+        return []
+
+    labels_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/labels"
+    headers = {
+        "Authorization": f"token {settings.gitea_token}",
+        "Content-Type": "application/json",
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=10) as client:
+            # Fetch existing labels
+            resp = await client.get(labels_url, headers=headers)
+            if resp.status_code != 200:
+                return []
+
+            existing = {l["name"]: l["id"] for l in resp.json()}
+            label_ids = []
+
+            for name in label_names:
+                if name in existing:
+                    label_ids.append(existing[name])
+                else:
+                    # Auto-create missing labels with a default color
+                    create_resp = await client.post(
+                        labels_url,
+                        headers=headers,
+                        json={"name": name, "color": "#0075ca"},
+                    )
+                    if create_resp.status_code in (200, 201):
+                        label_ids.append(create_resp.json()["id"])
+
+            return label_ids
+
+    except Exception as exc:
+        logger.debug("Label resolution failed: %s", exc)
+        return []
+
+
+async def triage_research_report(
+    report: str,
+    source_issue: int | None = None,
+    llm_caller: Any | None = None,
+    dry_run: bool = False,
+) -> list[dict[str, Any]]:
+    """End-to-end: extract action items from research and file Gitea issues.
+
+    This is the main entry point that closes the research → backlog loop.
+
+    Args:
+        report: Research report text (markdown).
+        source_issue: The Gitea issue number that produced this research.
+        llm_caller: Optional async callable(prompt) -> str for LLM calls.
+        dry_run: If True, extract items but don't create issues.
+
+    Returns:
+        List of dicts with 'action_item' and 'gitea_issue' (or None) keys.
+    """
+    items = await extract_action_items(report, llm_caller=llm_caller)
+
+    if not items:
+        logger.info("No action items extracted from research report")
+        return []
+
+    results = []
+    for item in items:
+        if dry_run:
+            results.append({"action_item": item, "gitea_issue": None})
+            continue
+
+        issue_data = await create_gitea_issue(item, source_issue=source_issue)
+        results.append({"action_item": item, "gitea_issue": issue_data})
+
+    created_count = sum(1 for r in results if r["gitea_issue"] is not None)
+    logger.info(
+        "Research triage complete: %d items extracted, %d issues created",
+        len(results),
+        created_count,
+    )
+    return results
--- a/tests/timmy/test_research_triage.py
+++ b/tests/timmy/test_research_triage.py
@@ -0,0 +1,348 @@
+"""Tests for research triage — action item extraction and Gitea issue filing."""
+
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+from timmy.research_triage import (
+    ActionItem,
+    _parse_llm_response,
+    _resolve_label_ids,
+    _validate_action_item,
+    create_gitea_issue,
+    extract_action_items,
+    triage_research_report,
+)
+
+# ---------------------------------------------------------------------------
+# ActionItem
+# ---------------------------------------------------------------------------
+
+SAMPLE_REPORT = """
+## Research: MCP Abstraction Layer
+
+### Finding 1: FastMCP overhead is negligible
+FastMCP averages 26.45ms per tool call. Total overhead <3% of budget.
+
+### Finding 2: Agno tool calling is broken
+Agno issues #2231, #2625 document persistent breakage with Ollama.
+Fix: Use Ollama's `format` parameter with Pydantic JSON schemas.
+
+### Recommendation
+Implement three-tier router for structured output.
+"""
+
+SAMPLE_LLM_RESPONSE = json.dumps(
+    [
+        {
+            "title": "[Router] Implement three-tier structured output router",
+            "body": (
+                "**What:** Build a three-tier router that uses Ollama's "
+                "`format` parameter for structured output.\n"
+                "**Why:** Agno's native tool calling is broken (#2231, #2625). "
+                "Pydantic JSON schemas with `format` bypass the issue.\n"
+                "**Suggested approach:** Add format parameter support to "
+                "CascadeRouter.\n"
+                "**Acceptance criteria:** Tool calls return valid JSON matching "
+                "the Pydantic schema."
+            ),
+            "labels": ["actionable", "feature", "kimi-ready"],
+            "priority": "high",
+            "source_urls": ["https://github.com/agno-agi/agno/issues/2231"],
+        },
+    ]
+)
+
+
+class TestActionItem:
+    def test_to_issue_body_basic(self):
+        item = ActionItem(title="Test", body="Test body")
+        body = item.to_issue_body()
+        assert "Test body" in body
+        assert "Auto-triaged" in body
+
+    def test_to_issue_body_with_source_issue(self):
+        item = ActionItem(title="Test", body="Test body")
+        body = item.to_issue_body(source_issue=946)
+        assert "#946" in body
+        assert "Origin" in body
+
+    def test_to_issue_body_with_source_urls(self):
+        item = ActionItem(
+            title="Test",
+            body="Body",
+            source_urls=["https://example.com/finding"],
+        )
+        body = item.to_issue_body()
+        assert "https://example.com/finding" in body
+        assert "Source Evidence" in body
+
+
+# ---------------------------------------------------------------------------
+# _parse_llm_response
+# ---------------------------------------------------------------------------
+
+
+class TestParseLlmResponse:
+    def test_plain_json(self):
+        items = _parse_llm_response('[{"title": "foo"}]')
+        assert len(items) == 1
+        assert items[0]["title"] == "foo"
+
+    def test_fenced_json(self):
+        raw = '```json\n[{"title": "bar"}]\n```'
+        items = _parse_llm_response(raw)
+        assert len(items) == 1
+        assert items[0]["title"] == "bar"
+
+    def test_empty_array(self):
+        assert _parse_llm_response("[]") == []
+
+    def test_non_array_returns_empty(self):
+        assert _parse_llm_response('{"title": "not an array"}') == []
+
+    def test_invalid_json_raises(self):
+        with pytest.raises(json.JSONDecodeError):
+            _parse_llm_response("not json at all")
+
+
+# ---------------------------------------------------------------------------
+# _validate_action_item
+# ---------------------------------------------------------------------------
+
+
+class TestValidateActionItem:
+    def test_valid_item(self):
+        raw = {
+            "title": "[Area] A specific clear title",
+            "body": "Detailed body with enough content to be useful.",
+            "labels": ["actionable", "bug"],
+            "priority": "high",
+        }
+        item = _validate_action_item(raw)
+        assert item is not None
+        assert item.title == "[Area] A specific clear title"
+        assert item.priority == "high"
+        assert "actionable" in item.labels
+
+    def test_short_title_rejected(self):
+        raw = {"title": "Short", "body": "Detailed body with enough content here."}
+        assert _validate_action_item(raw) is None
+
+    def test_short_body_rejected(self):
+        raw = {"title": "A perfectly fine title here", "body": "Too short"}
+        assert _validate_action_item(raw) is None
+
+    def test_missing_title_rejected(self):
+        raw = {"body": "Detailed body with enough content to be useful."}
+        assert _validate_action_item(raw) is None
+
+    def test_non_dict_rejected(self):
+        assert _validate_action_item("not a dict") is None
+
+    def test_actionable_label_auto_added(self):
+        raw = {
+            "title": "A perfectly fine title here",
+            "body": "Detailed body with enough content to be useful.",
+            "labels": ["bug"],
+        }
+        item = _validate_action_item(raw)
+        assert item is not None
+        assert "actionable" in item.labels
+
+    def test_labels_as_csv_string(self):
+        raw = {
+            "title": "A perfectly fine title here",
+            "body": "Detailed body with enough content to be useful.",
+            "labels": "bug, feature",
+        }
+        item = _validate_action_item(raw)
+        assert item is not None
+        assert "bug" in item.labels
+        assert "feature" in item.labels
+
+    def test_invalid_priority_defaults_medium(self):
+        raw = {
+            "title": "A perfectly fine title here",
+            "body": "Detailed body with enough content to be useful.",
+            "priority": "urgent",
+        }
+        item = _validate_action_item(raw)
+        assert item is not None
+        assert item.priority == "medium"
+
+
+# ---------------------------------------------------------------------------
+# extract_action_items
+# ---------------------------------------------------------------------------
+
+
+class TestExtractActionItems:
+    @pytest.mark.asyncio
+    async def test_extracts_items_from_report(self):
+        mock_llm = AsyncMock(return_value=SAMPLE_LLM_RESPONSE)
+        items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
+        assert len(items) == 1
+        assert "three-tier" in items[0].title.lower()
+        assert items[0].priority == "high"
+        mock_llm.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_empty_report_returns_empty(self):
+        items = await extract_action_items("")
+        assert items == []
+
+    @pytest.mark.asyncio
+    async def test_llm_failure_returns_empty(self):
+        mock_llm = AsyncMock(side_effect=RuntimeError("LLM down"))
+        items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
+        assert items == []
+
+    @pytest.mark.asyncio
+    async def test_llm_returns_empty_string(self):
+        mock_llm = AsyncMock(return_value="")
+        items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
+        assert items == []
+
+    @pytest.mark.asyncio
+    async def test_llm_returns_invalid_json(self):
+        mock_llm = AsyncMock(return_value="not valid json")
+        items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
+        assert items == []
+
+    @pytest.mark.asyncio
+    async def test_caps_at_five_items(self):
+        many_items = [
+            {
+                "title": f"[Area] Action item number {i} is specific",
+                "body": f"Detailed body for action item {i} with enough words.",
+                "labels": ["actionable"],
+                "priority": "medium",
+            }
+            for i in range(10)
+        ]
+        mock_llm = AsyncMock(return_value=json.dumps(many_items))
+        items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
+        assert len(items) <= 5
+
+
+# ---------------------------------------------------------------------------
+# create_gitea_issue
+# ---------------------------------------------------------------------------
+
+
+class TestCreateGiteaIssue:
+    @pytest.mark.asyncio
+    async def test_creates_issue_via_api(self):
+        item = ActionItem(
+            title="[Test] Create a test issue",
+            body="This is a test issue body with details.",
+            labels=["actionable"],
+        )
+        issue_resp = MagicMock()
+        issue_resp.status_code = 201
+        issue_resp.json.return_value = {"number": 42, "title": item.title}
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = issue_resp
+
+        with (
+            patch("timmy.research_triage.settings") as mock_settings,
+            patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[1]),
+            patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
+        ):
+            mock_settings.gitea_enabled = True
+            mock_settings.gitea_token = "test-token"
+            mock_settings.gitea_repo = "owner/repo"
+            mock_settings.gitea_url = "http://localhost:3000"
+            mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_cls.return_value.__aexit__ = AsyncMock(return_value=False)
+            result = await create_gitea_issue(item, source_issue=946)
+
+        assert result is not None
+        assert result["number"] == 42
+
+    @pytest.mark.asyncio
+    async def test_returns_none_when_disabled(self):
+        item = ActionItem(title="[Test] Disabled test", body="Body content here.")
+        with patch("timmy.research_triage.settings") as mock_settings:
+            mock_settings.gitea_enabled = False
+            mock_settings.gitea_token = ""
+            result = await create_gitea_issue(item)
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_handles_connection_error(self):
+        item = ActionItem(
+            title="[Test] Connection fail",
+            body="Body content for connection test.",
+        )
+        mock_client = AsyncMock()
+        mock_client.post.side_effect = httpx.ConnectError("refused")
+
+        with (
+            patch("timmy.research_triage.settings") as mock_settings,
+            patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]),
+            patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
+        ):
+            mock_settings.gitea_enabled = True
+            mock_settings.gitea_token = "test-token"
+            mock_settings.gitea_repo = "owner/repo"
+            mock_settings.gitea_url = "http://localhost:3000"
+            mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_cls.return_value.__aexit__ = AsyncMock(return_value=False)
+            result = await create_gitea_issue(item)
+        assert result is None
+
+
+# ---------------------------------------------------------------------------
+# triage_research_report (integration)
+# ---------------------------------------------------------------------------
+
+
+class TestTriageResearchReport:
+    @pytest.mark.asyncio
+    async def test_dry_run_extracts_without_filing(self):
+        mock_llm = AsyncMock(return_value=SAMPLE_LLM_RESPONSE)
+        results = await triage_research_report(
+            SAMPLE_REPORT, source_issue=946, llm_caller=mock_llm, dry_run=True
+        )
+        assert len(results) == 1
+        assert results[0]["action_item"] is not None
+        assert results[0]["gitea_issue"] is None
+
+    @pytest.mark.asyncio
+    async def test_empty_report_returns_empty(self):
+        results = await triage_research_report("", llm_caller=AsyncMock(return_value="[]"))
+        assert results == []
+
+    @pytest.mark.asyncio
+    async def test_end_to_end_with_mock_gitea(self):
+        mock_llm = AsyncMock(return_value=SAMPLE_LLM_RESPONSE)
+
+        issue_resp = MagicMock()
+        issue_resp.status_code = 201
+        issue_resp.json.return_value = {"number": 99, "title": "test"}
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = issue_resp
+
+        with (
+            patch("timmy.research_triage.settings") as mock_settings,
+            patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]),
+            patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
+        ):
+            mock_settings.gitea_enabled = True
+            mock_settings.gitea_token = "test-token"
+            mock_settings.gitea_repo = "owner/repo"
+            mock_settings.gitea_url = "http://localhost:3000"
+            mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_cls.return_value.__aexit__ = AsyncMock(return_value=False)
+            results = await triage_research_report(
+                SAMPLE_REPORT, source_issue=946, llm_caller=mock_llm
+            )
+
+        assert len(results) == 1
+        assert results[0]["gitea_issue"]["number"] == 99